def download(self, engine=None, debug=False): Script.download(self, engine, debug) reload(sys) if hasattr(sys, 'setdefaultencoding'): sys.setdefaultencoding("utf-8") self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls") filename = os.path.basename("GlobalWoodDensityDatabase.xls") book = xlrd.open_workbook(self.engine.format_filename(filename)) sh = book.sheet_by_index(1) rows = sh.nrows # Creating data files file_path = self.engine.format_filename("gwdd_data.csv") gwdd_data = open_fw(file_path) csv_writer = open_csvw(gwdd_data) csv_writer.writerow(["Number", "Family", "Binomial", "Wood_Density", "Region", "Reference_Number"]) for index in range(1, rows): row = sh.row(index) # get each row and format the sell value. row_as_list = [to_str(column_value.value) for column_value in row] csv_writer.writerow(row_as_list) gwdd_data.close() table = Table("data", delimiter=",") table.columns = [("Number", ("pk-int",)), ("Family", ("char",)), ("Binomial", ("char",)), ("Wood_Density", ("double",)), ("Region", ("char",)), ("Reference_Number", ("int",))] table.pk = 'Number' table.contains_pk = True self.engine.table = table self.engine.create_table() self.engine.insert_data_from_file(engine.format_filename(file_path)) # Creating reference tale file file_path = self.engine.format_filename("gwdd_ref.csv") ref_file = open_fw(file_path) csv_writerd = open_csvw(ref_file) csv_writerd.writerow(["Reference_Number", "Reference"]) sh = book.sheet_by_index(2) rows = sh.nrows for index in range(1, rows): row = sh.row(index) # get each row and format the sell value. row_as_list = [to_str(column_value.value, object_encoding=sys.stdout) for column_value in row] csv_writerd.writerow(row_as_list) ref_file.close() table = Table("reference", delimiter=",") table.columns = [("Reference_Number", ("pk-int",)), ("Reference", ("char",))] table.pk = 'Reference_Number' table.contains_pk = True self.engine.table = table self.engine.create_table() self.engine.insert_data_from_file(engine.format_filename(file_path)) return self.engine
def download(self, engine=None, debug=False): Script.download(self, engine, debug) reload(sys) if hasattr(sys, 'setdefaultencoding'): sys.setdefaultencoding("utf-8") self.engine.download_file(self.urls["GWDD"], "GlobalWoodDensityDatabase.xls") filename = os.path.basename("GlobalWoodDensityDatabase.xls") book = xlrd.open_workbook(self.engine.format_filename(filename)) sh = book.sheet_by_index(1) rows = sh.nrows # Creating data files file_path = self.engine.format_filename("gwdd_data.csv") gwdd_data = open_fw(file_path) csv_writer = open_csvw(gwdd_data) csv_writer.writerow([ "Number", "Family", "Binomial", "Wood_Density", "Region", "Reference_Number" ]) for index in range(1, rows): row = sh.row(index) # get each row and format the sell value. row_as_list = [to_str(column_value.value) for column_value in row] csv_writer.writerow(row_as_list) gwdd_data.close() table = Table("data", delimiter=",") table.columns = [("Number", ("pk-int", )), ("Family", ("char", )), ("Binomial", ("char", )), ("Wood_Density", ("double", )), ("Region", ("char", )), ("Reference_Number", ("int", ))] table.pk = 'Number' table.contains_pk = True self.engine.table = table self.engine.create_table() self.engine.insert_data_from_file(engine.format_filename(file_path)) # Creating reference tale file file_path = self.engine.format_filename("gwdd_ref.csv") ref_file = open_fw(file_path) csv_writerd = open_csvw(ref_file) csv_writerd.writerow(["Reference_Number", "Reference"]) sh = book.sheet_by_index(2) rows = sh.nrows for index in range(1, rows): row = sh.row(index) # get each row and format the sell value. row_as_list = [ to_str(column_value.value, object_encoding=sys.stdout) for column_value in row ] csv_writerd.writerow(row_as_list) ref_file.close() table = Table("reference", delimiter=",") table.columns = [("Reference_Number", ("pk-int", )), ("Reference", ("char", ))] table.pk = 'Reference_Number' table.contains_pk = True self.engine.table = table self.engine.create_table() self.engine.insert_data_from_file(engine.format_filename(file_path)) return self.engine
def download(self, engine=None, debug=False): Script.download(self, engine, debug) engine = self.engine engine.download_files_from_archive(self.urls["capture"], archive_type="zip") # Convert xlsx to csv. xlsx_file = self.engine.format_filename("DSD_FI_CAPTURE.xlsx") file_path = self.engine.format_filename("DSD_CAPTURE.csv") book = xlrd.open_workbook(xlsx_file) sh = book.sheet_by_index(0) rows = sh.nrows # Creating data files new_data = open_fw(file_path) csv_writer = open_csvw(new_data) csv_writer.writerow(["Order", "Concept_id", "Role_Type", "Codelist_id", "Codelist_Code_id", "Description"]) for index in range(2, rows): row = sh.row(index) # Get each row and format the sell value. # Data starts at index 2 row_as_list = [to_str(column_value.value) for column_value in row] csv_writer.writerow(row_as_list) new_data.close() file_names = [ ('CL_FI_UNIT.csv', 'unit_data'), ('CL_FI_WATERAREA_GROUPS.csv', 'waterarea_groups'), ('DSD_CAPTURE.csv', 'dsd_capture_data'), ('CL_FI_SPECIES_GROUPS.csv', 'species_group') ] for (filename, tablename) in file_names: data_path = self.engine.format_filename(filename) table = Table(tablename, delimiter=',', cleanup=self.cleanup_func_table) self.engine.auto_create_table(table, filename=filename) self.engine.insert_data_from_file(data_path) # File CL_FI_COUNTRY_GROUPS.csv has multi encoding file_names_encoded = [ ('CL_FI_COUNTRY_GROUPS.csv', 'country_groups'), ] for (filename, tablename) in file_names_encoded: data_path = self.engine.format_filename(filename) table = Table(tablename, delimiter=',', cleanup=self.cleanup_func_table) table.columns = [('UN_Code', ('int', )), ('Identifier', ('int', )), ('ISO2_Code', ('char', '5')), ('ISO3_Code', ('char', '5')), ('Name_En', ('char', '50')), ('Name_Fr', ('char', '50')), ('Name_Es', ('char', '50')), ('Name_Ar', ('char', '120')), ('Name_Cn', ('char', '90')), ('Name_Ru', ('char', '150')), ('Official_Name_En', ('char', '70')), ('Official_Name_Fr', ('char', '70')), ('Official_Name_Es', ('char', '70')), ('Official_Name_Ar', ('char', '1100')), ('Official_Name_Cn', ('char', '70')), ('Official_Name_Ru', ('char', '130')), ('Continent_Group', ('char', '15')), ('EcoClass_Group', ('char', '50')), ('GeoRegion_Group', ('char', '30'))] self.engine.auto_create_table(table, filename=filename) self.engine.insert_data_from_file(data_path) # TS_FI_CAPTURE is file_names_encoded = [ ('TS_FI_CAPTURE.csv', 'ts_capture_data',) ] for (filename, tablename) in file_names_encoded: data_path = self.engine.format_filename(filename) table = Table(tablename, delimiter=',', cleanup=self.cleanup_func_table) table.columns = [('COUNTRY', ('int', )), ('FISHING_AREA', ('int', )), ('SPECIES', ('char', '10')), ('YEAR', ('int', )), ('UNIT', ('char', '5')), ('QUANTITY', ('double', )), ('SYMBOL', ('char', '4'))] self.engine.auto_create_table(table, filename=filename) self.engine.insert_data_from_file(data_path)