def shammun_dbf_files(root_folder = 'shammun_geography_dbf', division_file='division.dbf', district_file='district.dbf', upazila_file='upazila.dbf'): division_file_path = os.path.join(root_folder, division_file) district_file_path = os.path.join(root_folder, district_file) upazila_file_path = os.path.join(root_folder, upazila_file) division, district, upazila = set(), set(), set() under_division, under_district = defaultdict(list), defaultdict(list) for record in DBF(division_file_path, parserclass=MyFieldParser): div = shamnun_get_div(record) division.add(div) print(division) for record in DBF(district_file_path, parserclass=MyFieldParser): div = shamnun_get_div(record) if div not in division: raise Exception('Incorrect division in district.dbf', div) dist = shamnun_get_dist(record) district.add(dist) under_division[div].append(dist) print(district) for record in DBF(upazila_file_path, parserclass=MyFieldParser): div = shamnun_get_div(record) if div not in division: raise Exception('Incorrect division in district.dbf', div) dist = shamnun_get_dist(record) if dist not in district: raise Exception('Incorrect district in district.dbf', dist) upa = shamnun_get_upa(record) upazila.add(upa) under_district[dist].append(upa) print(upazila) # shammun_dbf_files()
def dbf2csv(dbfname,csvname): reader=csv.reader(open('OUCHNSYS.EAS_DIC_GENDER.txt','r',encoding='utf-16')) gender={rows[1]:rows[0] for rows in reader} reader=csv.reader(open('OUCHNSYS.EAS_DIC_ETHNICNAME.txt','r',encoding='utf-16')) ethnic={rows[1]:rows[0] for rows in reader} reader=csv.reader(open('OUCHNSYS.EAS_DIC_POLITICALSTATUS.txt','r',encoding='utf-16')) polica={rows[1]:rows[0] for rows in reader} with open(csvname,'w',newline='') as csvfile: wr = csv.writer(csvfile,quoting=csv.QUOTE_NONE,quotechar='',escapechar='\\') table = DBF(dbfname) table.encoding='gb18030' fidnames = table.field_names for row in table: dic = {key:'' for key in fidnames} #print('begin') for x in fidnames: dic[x]=row[x] if x=='XB': dic[x]=gender[row[x]] elif x=='MZ': dic[x]=ethnic[row[x]] elif x=='ZZMM': dic[x]=polica[row[x]] csvlist=[dic[x] for x in ['KSH','XH','XM','CSRQ','SFZH','MZ','ZZMM','XB']] wr.writerow(csvlist)
def TRMM(): workbook = xlwt.Workbook(encoding='ascii') worksheet = workbook.add_sheet('IMERG') worksheet1 = workbook.add_sheet('TRMM') worksheet2 = workbook.add_sheet('RG') for y in range(15, 18): dbf = DBF("F:/MSA/RAIN/YE/" + "IYE" + str(2000 + y) + ".dbf", load=True) worksheet.write((y - 15), 0, dbf.records[0]['CenterX']) worksheet.write((y - 15), 1, dbf.records[0]['CenterY']) worksheet.write((y - 15), 2, dbf.records[0]['XStdDist']) worksheet.write((y - 15), 3, dbf.records[0]['YStdDist']) worksheet.write((y - 15), 4, dbf.records[0]['Rotation']) dbf1 = DBF("F:/MSA/RAIN/YE/" + "TYE" + str(2000 + y) + ".dbf", load=True) worksheet1.write((y - 15), 0, dbf1.records[0]['CenterX']) worksheet1.write((y - 15), 1, dbf1.records[0]['CenterY']) worksheet1.write((y - 15), 2, dbf1.records[0]['XStdDist']) worksheet1.write((y - 15), 3, dbf1.records[0]['YStdDist']) worksheet1.write((y - 15), 4, dbf1.records[0]['Rotation']) dbf2 = DBF("F:/MSA/RAIN/YE/" + "RYE" + str(2000 + y) + ".dbf", load=True) worksheet2.write((y - 15), 0, dbf2.records[0]['CenterX']) worksheet2.write((y - 15), 1, dbf2.records[0]['CenterY']) worksheet2.write((y - 15), 2, dbf2.records[0]['XStdDist']) worksheet2.write((y - 15), 3, dbf2.records[0]['YStdDist']) worksheet2.write((y - 15), 4, dbf2.records[0]['Rotation']) out = "F:/MSA/RAIN/" + "test.xls" workbook.save(out)
def parser(request): """ Автор:Козлов Даниил Цель: Обновление БД :param request: :return: обновленную базу данных в фромате DataFrame """ result = requests.get( 'https://cbr.ru/banking_sector/otchetnost-kreditnykh-organizaciy/') html = result.text soup = BeautifulSoup(html) # читаем старую базу данных (в CSV) data = pd.read_csv('BD.csv') # парсинг сайта ЦБ, находим, есть ли данные на определённый период времени, если нет, то s == [] s2 = soup.find_all('a', href="/vfs/credit/forms/102-20200401.rar") s1 = soup.find_all('a', href="/vfs/credit/forms/102-20200101.rar") s3 = soup.find_all('a', href="/vfs/credit/forms/102-20200701.rar") s4 = soup.find_all('a', href="/vfs/credit/forms/102-20201001.rar") if s1 == []: print('Нет данных на 01.01 2020') elif s2 == []: print('Нет данных на 04.01 2020') elif s3 == []: print('Нет данных на 07.01 2020') # Выгружаем данные за прошлый период времени # Необходимо скачать rar архив с сайта ЦБ по ссылке https://cbr.ru/vfs/credit/forms/102-20200401.rar # и распаковать оттуда два файла NP1 и P1 table1 = DBF('22020NP1.DBF', load=True, encoding='cp866') frame1 = pd.DataFrame(iter(table1)) table2 = DBF('22020_P1.DBF', load=True, encoding='cp866') frame2 = pd.DataFrame(iter(table2)) frame2 = frame2.fillna(0) result = pd.merge(frame1, frame2, on='REGN') new = result.groupby(['NAME_B']).sum().reset_index() new = new.drop(['REGN'], axis=1) new_result = pd.merge(new, frame1, on='NAME_B') new_result['DT'] = '2020-04-01' final = pd.concat([new_result, data], ignore_index=True) final.to_csv('BD.csv') elif s4 == []: # Необходимо скачать rar архив с сайта ЦБ по ссылке https://cbr.ru/vfs/credit/forms/102-20200701.rar # и распаковать оттуда два файла NP1 и P1 print('Нет данных на 10.01 2020') table1 = DBF('32020NP1.DBF', load=True, encoding='cp866') frame1 = pd.DataFrame(iter(table1)) table2 = DBF('32020_P1.DBF', load=True, encoding='cp866') frame2 = pd.DataFrame(iter(table2)) frame2 = frame2.fillna(0) result = pd.merge(frame1, frame2, on='REGN') new = result.groupby(['NAME_B']).sum().reset_index() new = new.drop(['REGN'], axis=1) new_result = pd.merge(new, frame1, on='NAME_B') new_result['DT'] = '2020-07-01' final = pd.concat([new_result, data], ignore_index=True) final.to_csv('BD.csv') return final
def test_missing_memofile(): with raises(MissingMemoFile): DBF('tests/cases/no_memofile.dbf') # This should succeed. table = DBF('tests/cases/no_memofile.dbf', ignore_missing_memofile=True) # Memo fields should be returned as None. record = next(iter(table)) assert record['MEMO'] is None
def __init__(self, filename, encoding=None, ignorecase=True, lowernames=False, parserclass=FieldParser, recfactory=collections.OrderedDict, load=False, raw=False, ignore_missing_memofile=False, char_decode_errors='strict'): if isinstance(filename, str): DBF.__init__(self, filename, encoding=encoding, ignorecase=ignorecase, lowernames=lowernames, parserclass=parserclass, recfactory=recfactory, load=load, raw=raw, ignore_missing_memofile=ignore_missing_memofile, char_decode_errors=char_decode_errors) else: self.encoding = encoding self.ignorecase = ignorecase self.lowernames = lowernames self.parserclass = parserclass self.raw = raw self.ignore_missing_memofile = ignore_missing_memofile self.char_decode_errors = char_decode_errors if recfactory is None: self.recfactory = lambda items: items else: self.recfactory = recfactory self.name = None self.filename = None self.content = filename self._records = None self._deleted = None # Filled in by self._read_headers() self.memofilename = None self.header = None self.fields = [] # namedtuples self.field_names = [] # strings obj = BytesIO(filename) self._read_header(obj) self._read_field_headers(obj) self._check_headers() try: self.date = datetime.date(expand_year(self.header.year), self.header.month, self.header.day) except ValueError: # pragma: no cover # Invalid date or '\x00\x00\x00'. self.date = None self.memofilename = self._get_memofilename() if load: self.load()
def download_table_dbf(file_name): """ Realiza o download de um arquivo auxiliar de dados do SINASC em formato "dbf" ou de uma pasta "zip" que o contém (se a pasta "zip" já não foi baixada), em seguida o lê como um objeto pandas DataFrame e por fim o elimina Parâmetros ---------- file_name: objeto str String do nome do arquivo "dbf" Retorno ------- df: objeto pandas DataFrame Dataframe que contém os dados de um arquivo auxiliar de dados originalmente em formato "dbf" """ ftp = FTP('ftp.datasus.gov.br') ftp.login() fname = file_name + '.DBF' try: if file_name == 'CNESDN18': # Esse arquivo "dbf" deveria estar no diretório # ".../dissemin/publicos/SINASC/NOV/TABELAS/" do endereço "ftp" ftp.cwd('/dissemin/publicos/SINASC/NOV/TAB/') folder = 'NASC_NOV_TAB.zip' ftp.retrbinary(f'RETR {folder}', open(folder, 'wb').write) zip = ZipFile(folder, 'r') zip.extract(fname) elif file_name == 'rl_municip_regsaud': folder = 'base_territorial.zip' ftp.cwd('/territorio/tabelas/') ftp.retrbinary(f'RETR {folder}', open(folder, 'wb').write) zip = ZipFile(folder, 'r') fname = file_name + '.dbf' zip.extract(fname) else: ftp.cwd('/dissemin/publicos/SINASC/NOV/TABELAS/') ftp.retrbinary(f'RETR {fname}', open(fname, 'wb').write) except: raise Exception(f'Could not access {fname}.') if ((file_name == 'CNESDN18') or (file_name == 'TABOCUP') or (file_name == 'CID10')): dbf = DBF(fname, encoding='iso-8859-1') else: dbf = DBF(fname) df = pd.DataFrame(iter(dbf)) os.unlink(fname) return df
def convertDBF(sqliteCon, dbfFilename, dbfTableName, tableDescription): a = readDBF(dbfFilename) return convertedFields = [] cursor = sqliteCon.cursor() cursor.execute("BEGIN TRANSACTION") dbfFields = DBF(dbfFilename).fields createString = "CREATE TABLE '" + dbfTableName + "' ('fid' INTEGER PRIMARY KEY AUTOINCREMENT " firstField = True for fieldno in range(len(dbfFields)): # add column field = dbfFields[fieldno] convertedFields.append(field) createString += ',' createString += "'" + field.name + "' " createFieldTypeString = "TEXT" if (field.type == 'F' or field.type == 'O' or field.type == 'N'): createFieldTypeString = "REAL" elif (field.type == 'I'): createFieldTypeString = "INTEGER" firstField = False createString += createFieldTypeString createString += ")" #print(createString) cursor.execute(createString) contentsString = "insert into gpkg_contents (table_name,data_type,identifier,description,last_change) VALUES(?,'attributes',?,?,strftime('%Y-%m-%dT%H:%M:%fZ','now'))" contentsAttrs = (dbfTableName, dbfTableName, dbfTableName + " " + tableDescription) cursor.execute(contentsString, contentsAttrs) for record in DBF(dbfFilename): #print(record) insertValues = [] insertValuesString = "" insertString = "" for key, value in record.items(): if (len(insertString) > 0): insertString += "," insertValuesString += "," else: insertString = "INSERT INTO " + dbfTableName + " (" insertValuesString += " VALUES (" insertString += key insertValues.append(value) insertValuesString += "?" insertValuesString += ")" insertString += ") " insertString += insertValuesString #print(insertString) cursor.execute(insertString, tuple(insertValues)) cursor.execute("COMMIT TRANSACTION") return convertedFields
def __init__(self, barcode, barcodes, liqcode): self.price = None self.qty = None self.brand = '' self.desc = '' self.barcode = barcode.upper() self.codeNum = '' self.singlePrice = None self.casePrice = None self.barcodes = DBF(barcodes, encoding='latin-1') self.liqcode = DBF(liqcode, encoding='latin-1') self.deposit = None self.dep = False self.found = False
def readDBF(dbfFilename): cNameRecords = {} dbfFields = DBF(dbfFilename).fields for record in DBF(dbfFilename, load=True): recordFields = {} for field in record.keys(): recordFields[field] = record[field] #print(record) cNameRecords[record['CNAM']] = recordFields return cNameRecords
def read_dbf(dbf_file): class MyFieldParser(FieldParser): def parse(self, field, data): try: return FieldParser.parse(self, field, data) except ValueError: return None try: dbf = DBF(dbf_file) table = pd.DataFrame(iter(dbf)) except ValueError: dbf = DBF(dbf_file, parserclass=MyFieldParser) table = pd.DataFrame(iter(dbf)) table.rename(columns={column: column.lower() for column in table.columns}, inplace=True) return table
def main(): """ Step 1. Creating copies of a file using names from an excel sheet """ file_to_copy_name = "some.file" path_to_copy_file = os.path.join(get_input_dir(), file_to_copy_name) workbook_path = os.path.join(get_input_dir(), "workbook_with_file_names.xlsx") excel_workbook = xlrd.open_workbook(workbook_path) sheet_with_file_names = excel_workbook.sheet_by_index(0) file_names = sheet_with_file_names.row_values(0) print("found these file-names in excel", file_names) for name in file_names: output_directory = get_output_dir() path_to_destination_file = os.path.join(output_directory, name) shutil.copyfile(src=path_to_copy_file, dst=path_to_destination_file) print("successfully wrote [%s] to [%s]" \ %(path_to_copy_file, path_to_destination_file)) """ Step 2. Editing .dbf files in two directories I can't test this as I don't have .dbf files :/ The idea is just to iterate over the 'zone' files, read a row and write that row to the 'surroundings' file """ dbf_zones_directory = os.path.join(get_input_dir(), "zone_files") dbf_surroundings_directory = os.path.join(get_input_dir(), "surroundings_files") zone_files = os.listdir(dbf_zones_directory) for zone in zone_files: zone_file_path = os.path.join(dbf_zones_directory, zone) surrounding_file_path = \ os.path.join(dbf_surroundings_directory, zone.replace("zone", "surroundings")) assert os.path.exists(zone_file_path), \ "missing .dbf zone file at absolute path [%s]" %check_file zone_table = DBF.read(zone_file_path) for record in zone_table: print(record) surrounding_table = DBF.read(surrounding_file_path) for record in surrounding_table: print(record)
def merge_dbf(file1, file2): table1 = DBF(file1) records1 = list(table1) table2 = DBF(file2) records2 = list(table2) # Delete unnecessary fields from lists for records in [records1, records2]: for record in records: del record['INTERRUPT'] if record['Suma opadu'] < 0: record['Suma opadu'] = 0.0 # Check is it possible to connect lists # Find variable last record, change add one hour # Make variable next record # Measurements are made every hour last_record = records1[-1] next_hour = unicode(int(last_record['TIME'][:2]) + 1) new_day = last_record['DATE'] if len(next_hour) == 1: next_hour = '0' + next_hour # Check is it new day elif next_hour == '24': next_hour = unicode('00') new_day = last_record['DATE'] + datetime.timedelta(days=1) else: pass # Specify search date and search day search_date = new_day search_time = next_hour + last_record['TIME'][2:] # Search date and time to connect values for index, record in enumerate(records2): if (record['DATE'] == search_date and record['TIME'] == search_time): print("Possible to connect") # TODO Check this case records2 = records2[index:] merged_list = records1 + records2 break return merged_list
def process(self, products, overwrite=False, **kwargs): for asset_type, asset in self.assets.iteritems(): if asset_type != _cdlmkii: # with older cdl products, the asset is the product continue fname = self.temp_product_filename(_cdl, _cdlmkii) fname_without_ext, _ = os.path.splitext(fname) with ZipFile(asset.filename, 'r') as zipfile: for member in zipfile.infolist(): member_ext = member.filename.split('.', 1)[1] extracted = zipfile.extract(member, fname_without_ext) os.rename(extracted, fname_without_ext + '.' + member_ext) image = GeoImage(fname, True) image[0].SetNoData(0) image = None image = gdal.Open(fname, gdal.GA_Update) dbf = DBF(fname + '.vat.dbf') for i, record in enumerate(dbf): image.SetMetadataItem(str("CLASS_NAME_%s" % record['CLASS_NAME']), str(i)) image = None archive_fp = self.archive_temp_path(fname) self.AddFile(_cdl, _cdl, archive_fp)
def extract_files(data_folder, filters): corrupted_files = [] database_folder = os.path.join(data_folder, '.database') os.makedirs(database_folder, exist_ok=True) data_files = glob.glob( os.path.join(data_folder, '*.dbc')) for filename in tqdm(data_files): converted = dbc2dbf_single(filename) if not converted: continue db = converted['database'] database = os.path.join(database_folder, db) try: dbf = DBF(converted['filename']) except ValueError: corrupted_files.append(converted['filename']) except Exception as e: # print(f"Problem file {filename}") continue if not dbf: continue try: df = pd.DataFrame(dbf, columns=dbf.field_names) df['month'] = converted['month'] df['year'] = converted['year'] df['uf'] = converted['uf'] with pd.HDFStore(database) as hdf: if db in hdf.keys(): hdf.append(db, df, data_columns=True) else: hdf.put(db, df, data_columns=True) except Exception as e: print(e)
def import_db(db): try: for record in DBF(db): values = get_values(record, fields) print(insert.format(*values)) except: pass
def read_dbc(filename, signature='utf-8'): """ Descompacta um arquivo "dbc" para "dbf", em seguida o lê como tal e por fim o converte em um objeto pandas DataFrame e elimina os dois arquivos. Parâmetros ---------- filename: objeto str String do nome do arquivo "dbc" signature: objeto str String do nome do formato de encoding do arquivo "dbc" Retorno ------- df: objeto pandas DataFrame Dataframe que contém os dados de um arquivo principal de dados originalmente em formato "dbc" """ if isinstance(filename, str): filename = filename.encode() with NamedTemporaryFile(delete=False) as tf: dbc2dbf(filename, tf.name.encode()) dbf = DBF(tf.name, encoding=signature) df = pd.DataFrame(list(dbf)) os.unlink(tf.name) os.unlink(filename) return df
def __init__(self, dbf_fname, ano, encoding="iso=8859-1"): """ Instancia Objeto SINAN carregando-o a partir do arquivo indicado :param dbf_fname: Nome do arquivo dbf do Sinan :param ano: Ano dos dados :return: """ self.ano = ano self.dbf = DBF(dbf_fname, encoding=encoding) self.colunas_entrada = self.dbf.field_names self.tabela = pd.DataFrame(list(self.dbf)) self.tabela.drop_duplicates('NU_NOTIFIC', keep='first', inplace=True) if "ID_MUNICIP" in self.tabela.columns: self.geocodigos = self.tabela.ID_MUNICIP.dropna().unique() elif "ID_MN_RESI" in self.tabela.columns: # print(self.tabela.columns) self.geocodigos = self.tabela.ID_MN_RESI.dropna().unique() self.tabela["ID_MUNICIP"] = self.tabela.ID_MN_RESI del self.tabela['ID_MN_RESI'] self._parse_date_cols() if not (self.time_span[0].year == self.ano and self.time_span[1].year == self.ano): raise ValidationError( _("Existem nesse arquivo notificações " "incompatíveis com o ano de notificação informado. " "Por favor, tenha certeza de que o ano de notificação é o mesmo " "para todos os registros no arquivo e de que este foi o ano " "informado no momento do envio."))
def read_one_dbf_file(rows, path): table = DBF(path, load=True) times = 0 for record in table.records: if times > 10: break row = [] # change category type to number if not category_to_number.has_key(record['CATEGORY']): continue row.append(category_to_number[record['CATEGORY']]) # print category_number[record['CATEGORY']] row.append(record['CALL_GROUP']) if not final_case_type_to_number.has_key(record['final_case']): print 'missed final_case type:' print record['final_case'] row.append(final_case_type_to_number[record['final_case']]) row.append(record['CASE_DESC']) row.append(record['occ_date']) row.append(record['x_coordina']) row.append(record['y_coordina']) row.append(record['census_tra']) #times = times + 1 #print row rows.append(row) return rows
def dbf_to_xlsx(self): """ Opens and read DBF file, using specified encoding in file_encoding variable, then """ workbook = xlsxwriter.Workbook('C:\\\\zakaz\\zakaz_dbf.xlsx') worksheet = workbook.add_worksheet() #formats header_format = workbook.add_format({'bold': False}) column_letter = 65 with DBF('C:\\\\zakaz\\1.dbf', encoding=self.file_encoding) as dbf_table: heads = list(list(dbf_table)[0].keys()) for head in heads: worksheet.write( str(chr(column_letter)) + '1', head, header_format) column_letter += 1 row = 1 col = 0 for record in dbf_table: for head in heads: worksheet.write(row, col, record[head]) col += 1 col = 0 row += 1
def dbf_open(infile, load=False, verbose=False): ''' dbf_open() - Open a DBF file Parameters ---------- infile : str Name of existing DBF file load : bool, default=False Read into memory? verbose : bool, default=False Turn command-line output on or off Returns ------- db: obj Database ''' from dbfread import DBF db = DBF(infile, load=load) if verbose: print(f' Opened file {infile}, contains {len(db):,} records') return db
def data_view(file_path, worksheet, nb_rows_beg, nb_rows_end, nb_col_max=10, **kwargs): # pd.set_option('display.height', 1000) # pd.set_option('display.max_rows', 2500) # pd.set_option('display.max_columns', 1500) # pd.set_option('display.width', 2000) if is_correct_kwargs({'type': ['excel', 'csv', 'geo']}, kwargs) == True: print(True) else: print(False) # set_max_columns(nb_col_max) type_of_file = kwargs.get("type") if type_of_file == "excel": results = pd.read_excel(file_path, sheet_name=worksheet) elif type_of_file == "csv": results = pd.read_csv(file_path) elif type_of_file == "geo": dbf = DBF(file_path) results = pd.DataFrame(iter(dbf)) else: results = pd.read_csv(file_path) head = results.head(nb_rows_beg) tail = results.tail(nb_rows_end) display_dataframe(head, tail)
def geoms(input: 'Name of the files to be converted (dbf, shp)', output: 'Name of the output GeoJSON file' = 'geometries') -> int: """ Convert ArcGIS data files into a GeoJSON output The input files must all have the same name Ex: python factory.py geoms resources/precinct_files/May2016Precinct_region.shp """ # Remove the file ext input = '.'.join(input.split('.')[:-1]) # Import meta data and shapes dbf = DBF(input + '.dbf') shapes = shapefile.Reader(input + '.shp').shapes() if len(dbf) != len(shapes): raise Exception('Files do not have the same number of elements') # Format shape files into an ID key dict of GeoJSON objects features = [] for meta, shape in zip(dbf, shapes): if shape.shapeType == shapefile.POLYGONZ: features.append({ 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [shape.points] }, 'properties': {k.lower(): v for k, v in meta.items()} }) # Export as a JSON file json.dump({ 'type': 'FeatureCollection', 'features': features }, open(output + '.geojson', 'w')) return 0
def createAttributeTable(self, dbf_path, field_name): attr_table = {} for row in DBF(dbf_path): if len(row) < 3: return None attr_table.update({row.items()[0][1]: [row.items()[-1][1]]}) return attr_table
def get_NY_StreetsGraph(): dir = "/home/ingared/Documents/NS_IP/UndergroundData" ny_streets = "NY_STREETS.dbf" g = nx.DiGraph() count = 0 test1 = DBF(os.path.join(dir,ny_streets)) print " Started reading NY Street data\n" for record in test1: edge_id = record['edge_ID'] fn_id = record['FN_ID'] tn_id = record['TN_ID'] length = record['length'] #print fn_id,tn_id,length, edge_id if ( not g.has_node(fn_id)): g.add_node(fn_id) if (not g.has_node(tn_id)): g.add_node(fn_id) g.add_weighted_edges_from([(fn_id,tn_id,length)],edge_id=edge_id) g.add_weighted_edges_from([(tn_id,fn_id,length)],edge_id=edge_id) count += 1 if (count%1000 == 0): print "No of edges read :", count print '\n' print " Total Edges in Streets : " , count print '\n' return g
def concat_dfs(list_df_names, list_state_names, list_of_paths): """ Concatenate passed dataframes to create bigger ones. Args: list_df_names: list with the names of the resulting dfs. They are the keys of the resulting dictionary where the resulting dfs will be stored state_names_nac: list with the state names (Nacional included or not) list_of_paths: list with the paths where the data to create the auxiliary dataframes is located """ dic_of_aux_dfs = {} dic_final_dfs = {} x = 0 for l in list_df_names: dic_of_aux_dfs[l] = { k: pd.DataFrame(iter(DBF(v))) for k, v in zip(list_state_names, list_of_paths[x]) } dic_final_dfs[l] = pd.concat(dic_of_aux_dfs[l]) x += 1 return dic_final_dfs
def draw(magnitudeDictionary, title): plt.close() sf = shp.Reader("Shape/county.shp", ) table = DBF('Shape/county.dbf') maxVal = max(magnitudeDictionary.values()) names = [] for record in table: names.append(record["COUNTY"]) plt.figure() plt.suptitle(title) # numShapes = len(sf.shapeRecords()) if maxVal != 0: step = 1 / maxVal else: step = 0 current = 0 for shape in sf.shapeRecords(): curMag = magnitudeDictionary[names[current]] c = (curMag * step, 1 - curMag * step, 0) x = [i[0] for i in shape.shape.points[:]] y = [i[1] for i in shape.shape.points[:]] plt.fill(x, y, color=c) plt.plot(x, y, color='black') current += 1 plt.show() # draw()
def data_info(file_path): if get_extension(file_path) == "dbf": print("allo") # results = pd.read_excel("commune.xls",sheet_name="Donnees") # head = results.head(5) # tail = results.tail(5) # print(head) # print(".. | .."+"\n.. | .."+"\n.. | .."+"\n.. v ..") # print(tail) # result1 = results.loc[0][0] # result2 = results.loc[0][1] # print(type(result2)) # print("[{}][{}] --> {}".format(0,0,result1)) # print("[{}][{}] --> {}".format(0,1,result2)) print(type('00.1')) dbf = DBF("81-.dbf") # print("{}".format(dbf.fields)) # print(dbf.records[0]) print("type" + str(ast.literal_eval('1'))) print("{}".format(type(ast.literal_eval('200.0')))) print(type(df.dtypes)) # Number of rows in .dbf file print(len(dbf)) # Number of columns in .dbf file print(len(dbf.fields))
def get_CID10_table(cache=True): """ Fetch the CID10 table :param cache: :return: """ ftp = FTP('ftp.datasus.gov.br') ftp.login() ftp.cwd('/dissemin/publicos/SIM/CID10/TABELAS') fname = 'CID10.DBF' cachefile = os.path.join(CACHEPATH, 'SIM_' + fname.split('.')[0] + '_.parquet') if os.path.exists(cachefile): df = pd.read_parquet(cachefile) return df try: ftp.retrbinary('RETR {}'.format(fname), open(fname, 'wb').write) except: raise Exception('Could not download {}'.format(fname)) dbf = DBF(fname, encoding='iso-8859-1') df = pd.DataFrame(list(dbf)) if cache: df.to_parquet(cachefile) os.unlink(fname) return df
def get_dbf_counts(folder_path=None): if not folder_path: return else: files = [f for f in os.listdir(os.path.join(".", folder_path)) if f[-10:] == 'Counts.dbf'] return DBF(os.path.join(folder_path, files[0]))
def dbf2eod(filepath,obj): hasil=[] gagal=0 JML=0 DQTY=0 DTTL=0 filename=os.path.basename(filepath) linking='<a href="/felino/eod/'+filename+'">'+filename+'</a>' objects= http.request.env['felino.eoddetail'].search([],limit=30) for item in DBF(filepath,encoding='iso-8859-1'): bisa=['PLU','D%1','RTN','VOD','DS1'] tampil=False if item['FLAG'] in bisa: tampil=True JML=JML+1 idx=bisa.index(item['FLAG']) if item['FLAG']=='RTN': QTY=-1*item['QTY'] elif item['FLAG']=='D%1': QTY=0 elif item['FLAG']=='DS1': QTY=0 else: QTY=item['QTY'] DQTY=DQTY+QTY DTTL=DTTL+(QTY*item['PRICE']) product= http.request.env['felino.felino'].search([('barcode','=',item['CODE'])]) eod={'name':filename,'code':item['CODE'],'barcode':item['CODE'],'desc':item['DESC'],'qty':QTY,'price':item['PRICE'],'norcp':item['NORCP'],'etype':item['ETYPE'],'flag':item['FLAG'],'cprice':item['CPRICE'],'hide':tampil,'category':product['catagory']} objects.sudo().create(eod) hasil.append(eod) eod={'name':filename,'link':linking,'Child':JML,'Child1':DQTY,'totalsales':DTTL} obj.sudo().create(eod) return hasil
def get_fields(db: str) -> List[str]: fields = list() record = DBF(db).__iter__().__next__() for name in record.keys(): fields.append(name) return sorted(fields)
import sys kshcsv=sys.argv[1] xhlist=[] f1=open(kshcsv,'r') i=0 for line in f1: if len(line)>0: xhlist.append(line[0:-1]) i=i+1 #print(line[0:-1]) print("total:{}".format(i)) f=open("exec_{}.sql".format(kshcsv),'w+') table0 = DBF('ZXSMD_0000_50z.dbf') table0.encoding='gb18030' for row in table0: #print("{},{}".format(str(row['XH']),(str(row['XH']) in xhlist))) if str(row['XH']) in xhlist: print("{},{}".format(row['XH'],row['KSH'])) print("update eas_schroll_student set examno='{}' where studentcode='{}';".format(row['KSH'],row['XH']),file=f) i=i-1 table = DBF('ZXSMD_0000_50b.dbf') table.encoding='gb18030' for row in table: if str(row['XH']) in xhlist: print("{},{}".format(row['XH'],row['KSH'])) print("update eas_schroll_student set examno='{}' where studentcode='{}';".format(row['KSH'],row['XH']),file=f)
""" Return records as named tuples. This saves a lot of memory. """ from collections import namedtuple from dbfread import DBF table = DBF('files/people.dbf', lowernames=True) # Set record factory. This must be done after # the table is opened because it needs the field # names. Record = namedtuple('Record', table.field_names) factory = lambda lst: Record(**dict(lst)) table.recfactory = factory for record in table: print(record.name)