except ValueError: pass if len( data_values_tuple_list ) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] # for dataset in existing_datasets_list: # write_dataset_csv(dataset.pk, dataset.name, dataset.name, 'gbd_cause_fetcher', '') # for dataset in new_datasets_list: # write_dataset_csv(dataset.pk, dataset.name, None, 'gbd_cause_fetcher', '') newimport = ImportHistory( import_type='unaids', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='A unaids import was performed', import_state='There are a total of %s unaids variables after the import' % Variable.objects.filter(datasetId__namespace='unaids').count()) newimport.save() print("--- %s seconds ---" % (time.time() - start_time))
with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") data_values_tuple_list = [] column_number = 0 if row_number % 10 == 0: time.sleep(0.001) # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") newimport = ImportHistory(import_type=DATASET_NAMESPACE, import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='Initial import of WDI', import_state=json.dumps({'file_hash': file_checksum(WDI_DOWNLOADS_PATH + 'wdi.zip')})) newimport.save() # for dataset in datasets_list: # write_dataset_csv(dataset.pk, dataset.name, None, 'wdi_fetcher', '') logger.info("Import complete.") else: last_import = import_history.last() if json.loads(last_import.import_state)['file_hash'] == file_checksum(WDI_DOWNLOADS_PATH + 'wdi.zip'): logger.info('No updates available.') sys.exit(0) logger.info('New data is available.')
column_number = 0 if len(data_values_tuple_list) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) newimport = ImportHistory(import_type='unwpp', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='Importing file %s' % file_to_parse, import_state=json.dumps( {'file_hash': file_checksum(os.path.join(wpp_downloads_save_location, file_to_parse)), 'file_name': file_to_parse })) newimport.save() write_dataset_csv(newdataset.pk, newdataset.name, None, 'unwpp_fetcher', '') else: if imported_before_hash == file_checksum(os.path.join(wpp_downloads_save_location, file_to_parse)): sys.exit('No updates available.') country_name_entity_ref = process_entities(country_names_dict) existing_categories = DatasetCategory.objects.values('name') existing_categories_list = {item['name'] for item in existing_categories}
column_number = 0 data_values_tuple_list = [] for country, data_value in data_values_dict.items(): for year, value in data_value.items(): data_values_tuple_list.append( (value, year, country, newvariable.pk)) with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) newimport = ImportHistory( import_type='clioinfra', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='Importing file %s' % one_file, import_state=json.dumps({ 'file_hash': file_checksum(file), 'file_name': one_file })) newimport.save() else: if imported_before_hash == file_checksum(file): print('No updates available for file %s.' % one_file) else: if 'historical' in one_file.lower(): print('Processing: %s' % one_file) wb = load_workbook(file, read_only=True) data_ws = wb['Data']
variable_name_to_object[variable_name].pk)) if len(data_values_tuple_list) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] if row_number % 100 == 0: time.sleep(0.001) # this is done in order to not keep the CPU busy all the time, the delay after each 100th row is 1 millisecond if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] os.remove(csv_filename) # for dataset in existing_datasets_list: # write_dataset_csv(dataset.pk, dataset.name, dataset.name, 'gbd_risk_fetcher', '') # for dataset in new_datasets_list: # write_dataset_csv(dataset.pk, dataset.name, None, 'gbd_risk_fetcher', '') newimport = ImportHistory(import_type='gbd_risk', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='A gbd import was performed', import_state='There are a total of %s gbd_risk variables after the import' % Variable.objects.filter( fk_dst_id__namespace='gbd_risk').count()) newimport.save() print("--- %s seconds ---" % (time.time() - start_time))
with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") data_values_tuple_list = [] column_number = 0 if row_number % 10 == 0: time.sleep(0.001) # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") newimport = ImportHistory(import_type='povstats', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='Initial import of POVSTATS datasets', import_state=json.dumps({'file_hash': file_checksum(povstats_downloads_save_location + 'povstats.zip')})) newimport.save() for dataset in datasets_list: write_dataset_csv(dataset.pk, dataset.name, None, 'povstats_fetcher', '') logger.info("Import complete.") else: last_import = import_history.last() deleted_indicators = {} # This is used to keep track which variables' data values were already deleted before writing new values if json.loads(last_import.import_state)['file_hash'] == file_checksum(povstats_downloads_save_location + 'povstats.zip'): logger.info('No updates available.') sys.exit('No updates available.') logger.info('New data is available.')
data_values_tuple_list.append((str(float(row['val'])*100) if row['metric_name'] == 'Percent' else row['val'], int(row['year']), c_name_entity_ref[row['location_name']].pk, variable_name_to_object[variable_name].pk)) if len(data_values_tuple_list) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] if row_number % 100 == 0: time.sleep(0.001) # this is done in order to not keep the CPU busy all the time, the delay after each 100th row is 1 millisecond if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) data_values_tuple_list = [] os.remove(csv_filename) # for dataset in existing_datasets_list: # write_dataset_csv(dataset.pk, dataset.name, dataset.name, 'gbd_prevalence_by_gender', '') # for dataset in new_datasets_list: # write_dataset_csv(dataset.pk, dataset.name, None, 'gbd_prevalence_by_gender', '') newimport = ImportHistory(import_type='gbd_prevalence_by_gender', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='A gbd import was performed', import_state='There are a total of %s gbd_prevalence_by_gender variables after the import' % Variable.objects.filter(datasetId__namespace='gbd_prevalence_by_gender').count()) newimport.save() print("--- %s seconds ---" % (time.time() - start_time))
time.sleep( 0.001 ) # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as dbconnection: dbconnection.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") logger.info("Imported a total of %s data values." % total_data_values) newimport = ImportHistory( import_type='qog', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='Initial import of QoG data. %s data values imported.' % total_data_values, import_state=json.dumps({ 'file_hash': file_checksum(qog_downloads_save_location + 'qog.csv') })) newimport.save() # now exporting csvs to the repo for category, dataset in datasets_ref_models.items(): write_dataset_csv(dataset.pk, dataset.name, None, 'qog_fetcher', '') logger.info("Import complete.") else: logger.info("Importing the QoG dataset.") last_import = import_history.last()
data_values_tuple_list = [] column_number = 0 if row_number % 10 == 0: time.sleep( 0.001 ) # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond if len(data_values_tuple_list): # insert any leftover data_values with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") newimport = ImportHistory( import_type='climatech', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'), import_notes='Initial import of climatech datasets', import_state=json.dumps( {'file_hash': file_checksum(excel_filename)})) newimport.save() for dataset in datasets_list: write_dataset_csv(dataset.pk, dataset.name, None, 'climatech_fetcher', '') logger.info("Import complete.") else: last_import = import_history.last() deleted_indicators = { } # This is used to keep track which variables' data values were already deleted before writing new values if json.loads(last_import.import_state)['file_hash'] == file_checksum( excel_filename):
for oneimport in import_history: if json.loads(oneimport.import_state )['file_name'] == os.path.basename(eachfile): file_imported_before = True imported_before_hash = json.loads( oneimport.import_state)['file_hash'] if not file_imported_before: process_csv_file_insert("/tmp/%s" % csv_filename, os.path.basename(eachfile)) os.remove("/tmp/%s" % csv_filename) newimport = ImportHistory( import_type='faostat', import_time=timezone.now().strftime( '%Y-%m-%d %H:%M:%S'), import_notes='Importing file %s' % os.path.basename(eachfile), import_state=json.dumps({ 'file_hash': file_checksum(eachfile), 'file_name': os.path.basename(eachfile) })) newimport.save() else: if imported_before_hash == file_checksum(eachfile): print('No updates available for file %s.' % os.path.basename(eachfile)) for eachfile in glob.glob(all_dataset_files_dir + "/*.csv"): if os.path.basename(eachfile) not in files_to_exclude: file_imported_before = False for oneimport in import_history: