def add_sqlite_table(table_name, sqlite_meta, dbc_map, ds, refyear=max(pc.WORKING_PARTITIONS['ferc1']['years']), bad_cols=()): """Adds a new Table to the FERC Form 1 database schema. Creates a new sa.Table object named ``table_name`` and add it to the database schema contained in ``sqlite_meta``. Use the information in the dictionary ``dbc_map`` to translate between the DBF filenames in the datastore (e.g. ``F1_31.DBF``), and the full name of the table in the FoxPro database (e.g. ``f1_fuel``) and also between truncated column names extracted from that DBF file, and the full column names extracted from the DBC file. Read the column datatypes out of each DBF file and use them to define the columns in the new Table object. Args: table_name (str): The name of the new table to be added to the database schema. sqlite_meta (:class:`sqlalchemy.schema.MetaData`): The database schema to which the newly defined :class:`sqlalchemy.Table` will be added. dbc_map (dict): A dictionary of dictionaries ds (:class:`Ferc1Datastore`): Initialized datastore bad_cols (iterable of 2-tuples): A list or other iterable containing pairs of strings of the form (table_name, column_name), indicating columns (and their parent tables) which should *not* be cloned into the SQLite database for some reason. Returns: None """ # Create the new table object new_table = sa.Table(table_name, sqlite_meta) dbf_filename = DBF_TABLES_FILENAMES[table_name] filedata = ds.get_file(refyear, dbf_filename) ferc1_dbf = dbfread.DBF(dbf_filename, ignore_missing_memofile=True, filedata=filedata) # Add Columns to the table for field in ferc1_dbf.fields: if field.name == '_NullFlags': continue col_name = dbc_map[table_name][field.name] if (table_name, col_name) in bad_cols: continue col_type = DBF_TYPES[field.type] if col_type == sa.String: col_type = sa.String(length=field.length) new_table.append_column(sa.Column(col_name, col_type)) col_names = [c.name for c in new_table.columns] if table_name == 'f1_respondent_id': new_table.append_constraint( sa.PrimaryKeyConstraint('respondent_id', sqlite_on_conflict='REPLACE')) if (('respondent_id' in col_names) and (table_name != 'f1_respondent_id')): new_table.append_constraint( sa.ForeignKeyConstraint( columns=[ 'respondent_id', ], refcolumns=['f1_respondent_id.respondent_id']))
def get_dbc_map(ds, year, min_length=4): """ Extract names of all tables and fields from a FERC Form 1 DBC file. Read the DBC file associated with the FERC Form 1 database for the given ``year``, and extract all printable strings longer than ``min_lengh``. Select those strings that appear to be database table names, and their associated field for use in re-naming the truncated column names extracted from the corresponding DBF files (those names are limited to having only 10 characters in their names.) Args: ds (:class:`Ferc1Datastore`): Initialized datastore year (int): The year of data from which the database table and column names are to be extracted. Typically this is expected to be the most recently available year of FERC Form 1 data. min_length (int): The minimum number of consecutive printable characters that should be considered a meaningful string and extracted. Returns: dict: a dictionary whose keys are the long table names extracted from the DBC file, and whose values are lists of pairs of values, the first of which is the full name of each field in the table with the same name as the key, and the second of which is the truncated (<=10 character) long name of that field as found in the DBF file. """ # Extract all the strings longer than "min" from the DBC file dbc = ds.get_file(year, "F1_PUB.DBC") tf_dict = get_fields(dbc) dbc_map = {} for table, dbf_filename in DBF_TABLES_FILENAMES.items(): try: dbc = ds.get_file(year, dbf_filename) except KeyError: # Not all tables exist in all years, so this is acceptable dbc = None if dbc is None: continue dbf_fields = dbfread.DBF("", filedata=dbc, ignore_missing_memofile=True).field_names dbf_fields = [f for f in dbf_fields if f != '_NullFlags'] dbc_map[table] = dict(zip(dbf_fields, tf_dict[table])) if len(tf_dict[table]) != len(dbf_fields): raise ValueError( f"Number of DBF fields in {table} does not match what was " f"found in the FERC Form 1 DBC index file for {year}.") # Insofar as we are able, make sure that the fields match each other for k in dbc_map: for sn, ln in zip(dbc_map[k].keys(), dbc_map[k].values()): if ln[:8] != sn.lower()[:8]: raise ValueError( f"DBF field name mismatch: {ln[:8]} != {sn.lower()[:8]}") return dbc_map
def extract_dbc_tables(year, minstring=4): """Extract the names of all the tables and fields from FERC Form 1 DB. This function reads all the strings in the given DBC database file for the and picks out the ones that appear to be database table names, and their subsequent table field names, for use in re-naming the truncated columns extracted from the corresponding DBF files (which are limited to having only 10 characters in their names.) Strings must have at least min printable characters. Returns: dict: A dictionary whose keys are the long table names extracted from the DBC file, and whose values are lists of pairs of values, the first of which is the full name of each field in the table with the same name as the key, and the second of which is the truncated (<=10 character) long name of that field as found in the DBF file. TODO: This routine shouldn't refer to any particular year of data, but right now it depends on the ferc1_dbf2tbl dictionary, which was generated from the 2015 Form 1 database. """ # Extract all the strings longer than "min" from the DBC file dbc_strs = list(get_strings(dbc_filename(year), min=minstring)) # Get rid of leading & trailing whitespace in the strings: dbc_strs = [s.strip() for s in dbc_strs] # Get rid of all the empty strings: dbc_strs = [s for s in dbc_strs if s is not ''] # Collapse all whitespace to a single space: dbc_strs = [re.sub('\s+', ' ', s) for s in dbc_strs] # Pull out only strings that begin with Table or Field dbc_strs = [s for s in dbc_strs if re.match('(^Table|^Field)', s)] # Split each string by whitespace, and retain only the first two elements. # This eliminates some weird dangling junk characters dbc_strs = [' '.join(s.split()[:2]) for s in dbc_strs] # Remove all of the leading Field keywords dbc_strs = [re.sub('Field ', '', s) for s in dbc_strs] # Join all the strings together (separated by spaces) and then split the # big string on Table, so each string is now a table name followed by the # associated field names, separated by spaces dbc_list = ' '.join(dbc_strs).split('Table ') # strip leading & trailing whitespace from the lists, and get rid of empty # strings: dbc_list = [s.strip() for s in dbc_list if s is not ''] # Create a dictionary using the first element of these strings (the table # name) as the key, and the list of field names as the values, and return # it: tf_dict = {} for tbl in dbc_list: x = tbl.split() tf_dict[x[0]] = x[1:] tf_doubledict = {} for dbf in pc.ferc1_dbf2tbl.keys(): filename = os.path.join(datadir(year), '{}.DBF'.format(dbf)) if os.path.isfile(filename): dbf_fields = dbfread.DBF(filename).field_names dbf_fields = [f for f in dbf_fields if f != '_NullFlags'] tf_doubledict[pc.ferc1_dbf2tbl[dbf]] = \ {k: v for k, v in zip(dbf_fields, tf_dict[pc.ferc1_dbf2tbl[dbf]])} assert(len(tf_dict[pc.ferc1_dbf2tbl[dbf]]) == len(dbf_fields)) # Insofar as we are able, make sure that the fields match each other for k in tf_doubledict.keys(): for sn, ln in zip(tf_doubledict[k].keys(), tf_doubledict[k].values()): assert(ln[:8] == sn.lower()[:8]) return(tf_doubledict)
import pickle from sklearn import linear_model #Changing the directory MyLoc = r'D:\Dropbox\Dropbox\PublicCode_Git\PatrolRedistrict\PatrolRedistrict\DataCreated' os.chdir(MyLoc) print(os.getcwd()) #Get the Reporting Areas table, include call weights and XY coordinates ra_data = [] vars = [ 'PDGrid', 'NumberId', 'XFeet', 'YFeet', 'Cnt_ra', 'Sum_Minute', 'Sum_Minu_1' ] for record in dbfread.DBF('RA_Centroids.dbf'): temp = [record[i] for i in vars] ra_data.append(temp) #I need to make a dictionary from this that maps the PDGrid to the NumberId value nid_map = {} nid_invmap = {} cont_dict = {} #contiguity dictionary call_dict = {} #call dictionary dist_dict = {} #distance dictionary areas = [] pair_data = [] for i in ra_data: areas.append(i[0]) nid_map[i[1]] = i[0] nid_invmap[i[0]] = i[1]
def define_db(refyear, ferc1_tables, ferc1_meta, verbose=True): """ Given a list of FERC Form 1 DBF files, create analogous database tables. Based on strings extracted from the master F1_PUB.DBC file corresponding to the year indicated by refyear, and the list of DBF files specified in dbfs recreate a subset of the FERC Form 1 database as a Postgres database using SQLAlchemy. Args: refyear (int): Year of FERC Form 1 data to use as the database template. ferc1_tables (list): List of FERC Form 1 tables to ingest. ferc1_meta (SQLAlchemy MetaData): SQLAlchemy MetaData object to store the schema in. """ ferc1_tblmap = extract_dbc_tables(refyear) # Translate the list of FERC Form 1 database tables that has # been passed in into a list of DBF files prefixes: dbfs = [pc.ferc1_tbl2dbf[table] for table in ferc1_tables] if verbose: print("Defining new FERC Form 1 DB based on {}...".format(refyear)) if verbose: print("Clearing any existing FERC Form 1 database MetaData...") # This keeps us from having collisions when re-initializing the DB. ferc1_meta.clear() for dbf in dbfs: dbf_filename = os.path.join(datadir(refyear), '{}.DBF'.format(dbf)) ferc1_dbf = dbfread.DBF(dbf_filename) # And the corresponding SQLAlchemy Table object: table_name = pc.ferc1_dbf2tbl[dbf] ferc1_sql = sa.Table(table_name, ferc1_meta) # _NullFlags isn't a "real" data field... remove it. fields = [f for f in ferc1_dbf.fields if not re.match( '_NullFlags', f.name)] for field in fields: col_name = ferc1_tblmap[pc.ferc1_dbf2tbl[dbf]][field.name] col_type = pc.dbf_typemap[field.type] # String/VarChar is the only type that really NEEDS a length if(col_type == sa.String): col_type = col_type(length=field.length) # This eliminates the "footnote" fields which all mirror database # fields, but end with _f. We have not yet integrated the footnotes # into the rest of the DB, and so why clutter it up? if(not re.match('(.*_f$)', col_name)): ferc1_sql.append_column(sa.Column(col_name, col_type)) # Append primary key constraints to the table: if (table_name == 'f1_respondent_id'): ferc1_sql.append_constraint( sa.PrimaryKeyConstraint('respondent_id')) if (table_name in pc.ferc1_data_tables): # All the "real" data tables use the same 5 fields as a composite # primary key: [ respondent_id, report_year, report_prd, # row_number, spplmnt_num ] ferc1_sql.append_constraint(sa.PrimaryKeyConstraint( 'respondent_id', 'report_year', 'report_prd', 'row_number', 'spplmnt_num') ) # They also all have respondent_id as their foreign key: ferc1_sql.append_constraint(sa.ForeignKeyConstraint( columns=['respondent_id', ], refcolumns=['f1_respondent_id.respondent_id']) )
def init_db(ferc1_tables=pc.ferc1_default_tables, refyear=2015, years=[2015, ], def_db=True, verbose=True, testing=False): """Assuming an empty FERC Form 1 DB, create tables and insert data. This function uses dbfread and SQLAlchemy to migrate a set of FERC Form 1 database tables from the provided DBF format into a postgres database. Args: ferc1_tables (list): The set of tables to read from the FERC Form 1 dbf database into the FERC Form 1 DB. refyear (int): Year of FERC Form 1 data to use as the database template. years (list): The set of years to read from FERC Form 1 dbf database into the FERC Form 1 DB. """ ferc1_engine = db_connect_ferc1(testing=testing) # This function (see below) uses metadata from the DBF files to define a # postgres database structure suitable for accepting the FERC Form 1 data if def_db: define_db(refyear, ferc1_tables, ferc1_meta) # Wipe the DB and start over... drop_tables_ferc1(ferc1_engine) create_tables_ferc1(ferc1_engine) # Create a DB connection to use for the record insertions below: conn = ferc1_engine.connect() # This awkward dictionary of dictionaries lets us map from a DBF file # to a couple of lists -- one of the short field names from the DBF file, # and the other the full names that we want to have the SQL database... ferc1_tblmap = extract_dbc_tables(refyear) # Translate the list of FERC Form 1 database tables that has # been passed in into a list of DBF files prefixes: dbfs = [pc.ferc1_tbl2dbf[table] for table in ferc1_tables] for year in years: if verbose: print("Ingesting FERC Form 1 Data from {}...".format(year)) for dbf in dbfs: dbf_filename = os.path.join(datadir(year), '{}.DBF'.format(dbf)) dbf_table = dbfread.DBF(dbf_filename, load=True) # pc.ferc1_dbf2tbl is a dictionary mapping DBF files to SQL table # names sql_table_name = pc.ferc1_dbf2tbl[dbf] sql_stmt = sa.dialects.postgresql.insert( ferc1_meta.tables[sql_table_name]) # Build up a list of dictionaries to INSERT into the postgres # database. Each dictionary is one record. Within each dictionary # the keys are the field names, and the values are the values for # that field. sql_records = [] for dbf_rec in dbf_table.records: sql_rec = {} for d, s in ferc1_tblmap[sql_table_name].items(): sql_rec[s] = dbf_rec[d] sql_records.append(sql_rec) # If we're reading in multiple years of FERC Form 1 data, we # need to avoid collisions in the f1_respondent_id table, which # does not have a year field... F1_1 is the DBF file that stores # this table: if(dbf == 'F1_1'): sql_stmt = sql_stmt.on_conflict_do_nothing() # insert the new records! conn.execute(sql_stmt, sql_records) conn.close()
def dBase2sqllite(db, table, encoding="cp437", overwrite_table=None, fLOG=noLOG): """ Put all rows from a dBase database into sqlite Add a dbase table to an open sqlite database. @param db cursor on SQLite or file name @param table DBF object or filename @param encoding encoding if table is a filename @param overwrite_table overwrite the table name @param fLOG logging function, to see the progress The table will be removed if it exists. """ typemap = { 'F': 'FLOAT', 'L': 'BOOLEAN', 'I': 'INTEGER', 'C': 'TEXT', 'N': 'REAL', # because it can be integer or float 'M': 'TEXT', 'D': 'DATE', 'T': 'DATETIME', '0': 'INTEGER', } if isinstance(db, str): cursor = Database(db, LOG=fLOG) cursor.connect() else: cursor = db if isinstance(table, str): import dbfread # pylint: disable=C0415 table = dbfread.DBF(table, load=False, encoding=encoding) cursor.execute('drop table if exists %s' % table.name) field_types = {} for f in table.fields: field_types[f.name] = typemap.get(f.type, 'TEXT') table_name = overwrite_table if overwrite_table is not None else table.name # Create the table # defs = ', '.join( ['%s %s' % (f, field_types[f]) for f in table.field_names]) sql = 'create table %s (%s)' % (table_name, defs) cursor.execute(sql) # Create data rows refs = ', '.join([':' + f for f in table.field_names]) sql = 'insert into %s values (%s)' % (table_name, refs) for i, rec in enumerate(table): cursor._connection.execute(sql, list(rec.values())) if i % 20000 == 0: fLOG("moving line ", i, " to table", table_name) if isinstance(db, str): cursor.commit() cursor.close()
import dbfread import numpy as np from osgeo import gdal import matplotlib.pyplot as plt file = r'E:/Public/JiangJun/标准流程数据/shuju_chuli/TAVG/TAVG_fenxi/TAVG_R2_mask' data = [ 'Qinghai', 'Haibei', 'Yushudong', 'Guoluodong', 'Yushuxi', 'Huangnan', 'Tanggula', 'Guoluoxi', 'Hainan', 'Haidong', 'Haixi' ] ds = gdal.Open(file + '.tif') data1 = ds.ReadAsArray() data1[data1 == data1[0][0]] = np.nan data2 = [np.std(data1[~np.isnan(data1)])] data1 = [data1[~np.isnan(data1)].mean()] table = dbfread.DBF(file + '.dbf') for i in table: data1.append(i['MEAN']) table = dbfread.DBF(file + '_std.dbf') for i in table: data2.append(i['STD']) class para: bar_width = 0.6 bar_align = 'center' ticks_font = 'Times New Roman' xtick_direction = 'out' ytick_direction = 'in' frames = ['right', 'top'] error_kw = {'ecolor': 'k', 'capsize': 2, 'elinewidth': 0.5}
def open_dbf(name): return dbfread.DBF("%s/%s.DBF" % (PATH, name.upper()), encoding="latin1", parserclass=ExtraFieldParser)
# (team72, Dongfang, Wang, 906257) from matplotlib.path import Path import numpy as np import geopandas as gpd import dbfread as dbr NSW_map = gpd.read_file('NSW_LGA_POLYGON_shp.shp') NSW_name_table1 = dbr.DBF('NSW_LGA_POLYGON_shp.dbf') NSW_name_table2 = dbr.DBF('NSW_LGA_shp.dbf') NT_map = gpd.read_file('NT_LGA_POLYGON_shp.shp') NT_name_table1 = dbr.DBF('NT_LGA_POLYGON_shp.dbf') NT_name_table2 = dbr.DBF('NT_LGA_shp.dbf') OT_map = gpd.read_file('OT_LGA_POLYGON_shp.shp') OT_name_table1 = dbr.DBF('OT_LGA_POLYGON_shp.dbf') OT_name_table2 = dbr.DBF('OT_LGA_shp.dbf') QLD_map = gpd.read_file('QLD_LGA_POLYGON_shp.shp') QLD_name_table1 = dbr.DBF('QLD_LGA_POLYGON_shp.dbf') QLD_name_table2 = dbr.DBF('QLD_LGA_shp.dbf') SA_map = gpd.read_file('SA_LGA_POLYGON_shp.shp') SA_name_table1 = dbr.DBF('SA_LGA_POLYGON_shp.dbf') SA_name_table2 = dbr.DBF('SA_LGA_shp.dbf') TAS_map = gpd.read_file('TAS_LGA_POLYGON_shp.shp') TAS_name_table1 = dbr.DBF('TAS_LGA_POLYGON_shp.dbf') TAS_name_table2 = dbr.DBF('TAS_LGA_shp.dbf') VIC_map = gpd.read_file('VIC_LGA_POLYGON_shp.shp')
import matplotlib.pyplot as plt class para1(object): Folder_path = r'E:/Public/JiangJun/标准流程数据/shuju_chuli/TAVG/TAVG_ddq/' File_extension = '.dbf' ini_year = 1980 end_year = 2018 data1 = [] data2 = [] for year in range(para1.ini_year, para1.end_year + 1): file = glob(para1.Folder_path + '*' + str(year) + '*' + para1.File_extension) table = dbfread.DBF(file[0]) b = [i['MEAN'] for i in table] data2.append(b) data1.append(year) data2 = np.array(data2).T data1 = np.array(data1) class para(object): Line_marker = ['^', 'o', 'p', 's'] Line_ls = ['-.', '--', '-', '-'] Legend = ['果洛东', '黄南', '果洛西', '海东'] frames = ['right', 'top'] Legend1_prop = {'family': 'SimSun', 'size': 12} # Legend2_prop = {'family': 'Times New Roman', 'style': 'italic', 'size': 12} # Line2_label = 'y = {0} x - {1}\n(R = {2} , n = {3} , P < 0.01)'.format(s, d, r, n)
def import_dbf(filename, limit=0): logger.info(f'Import {filename} (limit={limit}):') field_map = { 'DT_VRSV': 'TEMP_POLICY_DATE', 'NO_VRSV': 'TEMP_POLICY_NUMBER', 'EDINNP': 'ENP', 'SER_POL': 'POLICY_SERIAL', 'NOM_POL': 'POLICY_NUMBER', 'FAM': 'SURNAME', 'IM': 'NAME1', 'OT': 'NAME2', 'D_R': 'BIRTHDAY', 'POL': 'SEX', 'RAION': 'REGION', 'KOD_NP': 'LOCALITY_CODE', 'NAS_P': 'LOCALITY', 'KOD_UL': 'STREET_CODE', 'DOM': 'HOUSE', 'KV': 'FLAT', 'KDLPU': 'MO_CODE', 'KOD_SMO': 'SMO_CODE', 'SS': 'CLIENT_SNILS', 'SS_UCHVR': 'MEDIC_SNILS', } search_field_map = { 'FAM': 'SURNAME', 'IM': 'NAME1', 'OT': 'NAME2', 'D_R': 'BIRTHDAY', } count = 0 countAdded = 0 dbf = dbfread.DBF(filename, encoding='cp866') for recs in split_every(500, dbf): for rec in recs: count += 1 exists = db.session.query(ClientModel.id).filter_by( **{ db_field: rec[dbf_field] for dbf_field, db_field in search_field_map.items() if rec[dbf_field] is not None }).scalar() is not None if not exists: fields = { db_field: (rec[dbf_field] or None) for dbf_field, db_field in field_map.items() } client = ClientModel(**fields) db.session.add(client) countAdded += 1 if count >= limit: break logger.debug(f'processed: {count}, added: {countAdded}') db.session.commit() if count >= limit: break logger.info(f'Total records: {count}, added: {countAdded}')
in_file = None if len(sys.argv) < 1: print("Error: usage: vri_dbf_to_csv [input dbf file]") else: in_file = sys.argv[1] out_file = in_file + ".csv" print("+w " + out_file) ci = 0 count = {} keys = None keytype = {} f = open(out_file, "wb") # "VRI_KLoops.csv", "wb") for record in dbfread.DBF(in_file): #"VRI_KLoops.dbf"): if keys is None: keys = record.keys() keys = [k.replace(',', ';') for k in keys] f.write(",".join(keys)) else: if str(keys) != str(record.keys()): print "Error: record length mismatch" sys.exit(1) r = [] for k in keys: r.append(str(record[k])) if k not in count: count[k] = {} keytype[k] = type(record[k])
def read_file(self): ''' 读取输入文件 ''' table = dbf.DBF(self.file_name, load=True) return table
_author_ = "aman" import dbfread def convert_rec(dbf, lst_of_key, lst_of_value): i = 0 for records in dbf: lst_value = list(records.values()) lst_of_value.insert(i, lst_value) if i == 0: for key in records.keys(): lst_of_key.append(key) i += 1 if (i < 30): print(lst_value[5]) return lst_of_key, lst_of_value input_path = "C:/Users/aman.raj/Documents/BCN0220.DBF" dbf = dbfread.DBF(input_path, encoding="ibm850") lst_of_key = [] lst_of_value = [[]] print(dbf) lst_of_key, lst_of_value = convert_rec(dbf, lst_of_key, lst_of_value)
def get_dbc_map(year, data_dir, min_length=4): """ Extract names of all tables and fields from a FERC Form 1 DBC file. Read the DBC file associated with the FERC Form 1 database for the given ``year``, and extract all printable strings longer than ``min_lengh``. Select those strings that appear to be database table names, and their associated field for use in re-naming the truncated column names extracted from the corresponding DBF files (those names are limited to having only 10 characters in their names.) Args: year (int): The year of data from which the database table and column names are to be extracted. Typically this is expected to be the most recently available year of FERC Form 1 data. data_dir (str): A string representing the full path to the top level of the PUDL datastore containing the FERC Form 1 data to be used. min_length (int): The minimum number of consecutive printable characters that should be considered a meaningful string and extracted. Returns: dict: a dictionary whose keys are the long table names extracted from the DBC file, and whose values are lists of pairs of values, the first of which is the full name of each field in the table with the same name as the key, and the second of which is the truncated (<=10 character) long name of that field as found in the DBF file. """ # Extract all the strings longer than "min" from the DBC file dbc_strings = list( get_strings(dbc_filename(year, data_dir), min_length=min_length)) # Get rid of leading & trailing whitespace in the strings: dbc_strings = [s.strip() for s in dbc_strings] # Get rid of all the empty strings: dbc_strings = [s for s in dbc_strings if s != ''] # Collapse all whitespace to a single space: dbc_strings = [re.sub(r'\s+', ' ', s) for s in dbc_strings] # Pull out only strings that begin with Table or Field dbc_strings = [s for s in dbc_strings if re.match('(^Table|^Field)', s)] # Split strings by whitespace, and retain only the first two elements. # This eliminates some weird dangling junk characters dbc_strings = [' '.join(s.split()[:2]) for s in dbc_strings] # Remove all of the leading Field keywords dbc_strings = [re.sub('Field ', '', s) for s in dbc_strings] # Join all the strings together (separated by spaces) and then split the # big string on Table, so each string is now a table name followed by the # associated field names, separated by spaces dbc_table_strings = ' '.join(dbc_strings).split('Table ') # strip leading & trailing whitespace from the lists # and get rid of empty strings: dbc_table_strings = [s.strip() for s in dbc_table_strings if s != ''] # Create a dictionary using the first element of these strings (the table # name) as the key, and the list of field names as the values, and return # it: tf_dict = {} for table_string in dbc_table_strings: table_and_fields = table_string.split() tf_dict[table_and_fields[0]] = table_and_fields[1:] dbc_map = {} for table in pc.ferc1_tbl2dbf: dbf_path = get_dbf_path(table, year, data_dir=data_dir) if os.path.isfile(dbf_path): dbf_fields = dbfread.DBF(dbf_path).field_names dbf_fields = [f for f in dbf_fields if f != '_NullFlags'] dbc_map[table] = \ {k: v for k, v in zip(dbf_fields, tf_dict[table])} if len(tf_dict[table]) != len(dbf_fields): raise ValueError( f"Number of DBF fields in {table} does not match what was " f"found in the FERC Form 1 DBC index file for {year}.") # Insofar as we are able, make sure that the fields match each other for k in dbc_map: for sn, ln in zip(dbc_map[k].keys(), dbc_map[k].values()): if ln[:8] != sn.lower()[:8]: raise ValueError( f"DBF field name mismatch: {ln[:8]} != {sn.lower()[:8]}") return dbc_map
import dbfread as dr records = [p for p in dr.DBF('vendas.dbf')] print("Quantidade de registros: ", len(records)) records = [dict(p) for p in records] print(type(records[0])) db_sequencias = [] for record in records: db_sequencias.append(record) produtos = [] for record in db_sequencias: produtos.append([record['PRODUTO'], record['OPERACAO']]) print("Quantiadde de itens", len(produtos)) dataset = {} for record in produtos: dataset[record[1]] = [] for record in produtos: dataset[record[1]].append(record[0]) cont = 0 line = '' with open('input_apriori.csv', 'w') as fl: for key in dataset: cont += 1