def validate_quantile_csv_file_app(quantile_csv_file): """ Simple CLI wrapper of `validate_quantile_csv_file()` :param csv_fp: as passed to `json_io_dict_from_quantile_csv_file()` :return: """ validate_quantile_csv_file(quantile_csv_file)
def check_formatting(my_path): output_errors = {} df = pd.read_csv('code/validation/validated_files.csv') previous_checked = list(df['file_path']) # Iterate through processed csvs for path in glob.iglob(my_path + "**/**/", recursive=False): for filepath in glob.iglob(path + "*.csv", recursive=False): if filepath not in previous_checked: file_error = validate_quantile_csv_file(filepath) if file_error != 'no errors': output_errors[filepath] = file_error else: # add to previously checked files current_time = datetime.datetime.now() df = df.append({'file_path': filepath, 'validation_date': current_time}, ignore_index=True) # update previously checked files df.to_csv('code/validation/validated_files.csv', index=False) # Output list of Errors if len(output_errors) > 0: for filename, errors in output_errors.items(): print("\n* ERROR IN '", filename, "'") for error in errors: print(error) sys.exit("\n ERRORS FOUND EXITING BUILD...") else: print("✓ no errors")
def check_formatting(my_path): output_errors = {} df = pd.read_csv('code/validation/validated_files.csv') previous_checked = list(df['file_path']) files_in_repository = [] # Iterate through processed csvs for path in glob.iglob(my_path + "**/**/", recursive=False): for filepath in glob.iglob(path + "*.csv", recursive=False): files_in_repository += [filepath] if filepath not in previous_checked: file_error = validate_quantile_csv_file(filepath) # Check forecast file date = forecast_date column forecast_date_error = filename_match_forecast_date(filepath) if forecast_date_error is not None: if file_error == 'no errors': file_error = [forecast_date_error] else: file_error += [forecast_date_error] if file_error != 'no errors': output_errors[filepath] = file_error else: # add to previously checked files current_time = datetime.datetime.now() df = df.append( { 'file_path': filepath, 'validation_date': current_time }, ignore_index=True) # Remove files that have been deleted from repo # files that are in verify checks but NOT in repository deleted_files = np.setdiff1d(previous_checked, files_in_repository) df = df[~df['file_path'].isin(deleted_files)] # delted files should be moved from the central validated_files.csv file if len(deleted_files) > 0: df.to_csv('code/validation/validated_files.csv', index=False) # update previously checked files df.to_csv('code/validation/locally_validated_files.csv', index=False) # Output list of Errors if len(output_errors) > 0: for filename, errors in output_errors.items(): print("\n* ERROR IN '", filename, "'") for error in errors: print(error) sys.exit("\n ERRORS FOUND EXITING BUILD...") else: print("✓ no errors")
def validate_forecast_file(filepath, silent=False): """ purpose: Validates the forecast file with zoltpy link: https://github.com/reichlab/zoltpy/blob/master/zoltpy/covid19.py params: * filepath: Full filepath of the forecast """ file_error = validate_quantile_csv_file(filepath, silent=silent) if file_error != "no errors": return True, file_error else: return False, file_error
def upload_forecast(forecast_name): path = get_forecast_info(forecast_name) db = read_validation_db() metadata = metadata_dict_for_file( list(Path(path).parent.glob('metadata-*.txt'))[0]) if f"{metadata['team_abbr']}-{metadata['model_abbr']}" not in [ m.abbreviation for m in models ]: create_model(path, metadata) time_zero_date = '-'.join(forecast_name.split('-')[:3]) if time_zero_date not in [ timezero.timezero_date for timezero in project_obj.timezeros ]: create_timezero(time_zero_date) # print(forecast_name, metadata, time_zero_date) if path is not None: errors_from_validation = validate_quantile_csv_file(path) if errors_from_validation != "no errors": print(errors_from_validation) return errors_from_validation, True with open(path) as fp: print('uploading %s' % path) checksum = hashlib.md5(str(fp.read()).encode('utf-8')).hexdigest() fp.seek(0) quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if len(error_from_transformation) > 0: return error_from_transformation, True try: fr = util.upload_forecast( conn, quantile_json, forecast, project_name, f"{metadata['team_abbr']}-{metadata['model_abbr']}", time_zero_date) db[forecast_name] = checksum write_db(db) return None, fr except Exception as e: raise e return e, True pass
list_of_model_directories = os.listdir('./data-processed/') for directory in list_of_model_directories: if "." in directory: continue # Get all forecasts in the directory of this model path = './data-processed/' + directory + '/' forecasts = glob.glob(path + "*.csv") for forecast in forecasts: with open(forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() db = get_db() # Validate covid19 file if UPDATE and db.get(get_filename_from_path(forecast), None) == checksum: continue errors_from_validation = validate_quantile_csv_file(forecast) # Upload forecast if "no errors" == errors_from_validation: # Check this hash against the previous version of hash if db.get(get_filename_from_path(forecast), None) != checksum: db[get_filename_from_path(forecast)] = checksum else: print(errors_from_validation) print('Dumping db') dump_db()
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_names # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) conn.re_authenticate_if_necessary() # Get model name or create a new model if it's not in the current Zoltar project try: metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') except Exception as ex: return ex model_name = metadata['model_name'] if model_name not in model_names: model_config = {} model_config['name'], model_config['abbreviation'], model_config['team_name'], \ model_config['description'], model_config['home_url'], model_config['aux_data_url'] \ = metadata['model_name'], metadata['team_abbr'] + '-' + metadata['model_abbr'], \ metadata['team_name'], metadata['methods'], metadata['website_url'] if metadata.get( 'website_url') != None else url + dir_name, 'NA' try: print('Create model %s' % model_name) project_obj.create_model(model_config) models = project_obj.models model_names = [model.name for model in models] except Exception as ex: return ex print('Time: %s \t Model: %s' % (datetime.now(), model_name)) model = [model for model in models if model.name == model_name][0] # Get names of existing forecasts to avoid re-upload existing_time_zeros = [ forecast.timezero.timezero_date for forecast in model.forecasts ] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: # Default config over_write = False checksum = 0 time_zero_date = forecast.split(dir_name)[0][:-1] # Check if forecast is already on zoltar with open(path_to_processed_model_forecasts + forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() f.close() # Check this hash against the previous version of hash # if db.get(forecast, None) != checksum: # print(forecast, db.get(forecast, None)) # if time_zero_date in existing_time_zeros: # over_write = True # else: # continue # if timezero existing, then don't write again if time_zero_date in existing_time_zeros: #update checksum # db[forecast] = checksum continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Create timezero on zoltar if not existed if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: return ex # Validate covid19 file errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if len(error_from_transformation) > 0: return error_from_transformation else: try: print('Upload forecast for model: %s \t|\t File: %s' % (model_name, forecast)) print() util.upload_forecast(conn, quantile_json, forecast, project_name, model_name, time_zero_date, overwrite=over_write) db[forecast] = checksum except Exception as ex: print(ex) return ex json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # # Batch upload for better performance # if len(json_io_dict_batch) > 0: # try: # util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write) # except Exception as ex: # return ex return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_names # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) # Get model name or create a new model if it's not in the current Zoltar project try: metadata = metadata_dict_for_file(path_to_processed_model_forecasts+'metadata-'+dir_name+'.txt') except Exception as ex: return ex model_name = metadata['model_name'] if model_name not in model_names: model_config = {} model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \ = metadata['model_name'], metadata['team_abbr']+'-'+metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA' try: project_obj.create_model(model_config) models = project_obj.models model_names = [model.name for model in models] except Exception as ex: return ex model = [model for model in models if model.name == model_name][0] # Get names of existing forecasts to avoid re-upload existing_forecasts = [forecast.source for forecast in model.forecasts] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: over_write = False checksum = 0 # Check if forecast is already on zoltar with open(path_to_processed_model_forecasts+forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() f.close() # Check this hash against the previous version of hash if db.get(forecast, None) != checksum: print(forecast) if forecast in existing_forecasts: over_write = True else: continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts+forecast) as fp: # Get timezero and create timezero on zoltar if not existed time_zero_date = forecast.split(dir_name)[0][:-1] # if time_zero_date != "2020-05-25": # continue if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: return ex # Validate covid19 file errors_from_validation = validate_quantile_csv_file(path_to_processed_model_forecasts+forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(fp, VALID_TARGET_NAMES, covid19_row_validator) if len(error_from_transformation) >0 : return error_from_transformation else: try: util.upload_forecast(conn, quantile_json, forecast, project_name, model_name , time_zero_date, overwrite=over_write) db[forecast] = checksum except Exception as ex: print(ex) return ex json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # # Batch upload for better performance # if len(json_io_dict_batch) > 0: # try: # util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write) # except Exception as ex: # return ex return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_abbrs # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) # Get model name or create a new model if it's not in the current Zoltar project try: metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') except Exception as ex: return ex model_abbreviation = metadata['model_abbr'] # get the corresponding model_config for the metadata file model_config = zoltar_config_from_metadata(metadata) if model_abbreviation not in model_abbrs: pprint.pprint('%s not in models' % model_abbreviation) if 'home_url' not in model_config: model_config['home_url'] = url + dir_name try: logger.info(f"Creating model {model_config}") models.append(project_obj.create_model(model_config)) model_abbrs = [model.abbreviation for model in models] except Exception as ex: return ex # fetch model based on model_abbr model = [ model for model in models if model.abbreviation == model_abbreviation ][0] if has_changed(metadata, model): # model metadata has changed, call the edit function in zoltpy to update metadata print( f"{metadata['model_abbr']!r} model has changed metadata contents. Updating on Zoltar..." ) model.edit(model_config) # Get names of existing forecasts to avoid re-upload existing_time_zeros = [ forecast.timezero.timezero_date for forecast in model.forecasts ] # Convert all timezeros from Date type to str type existing_time_zeros = [ existing_time_zero.strftime(YYYY_MM_DD_DATE_FORMAT) for existing_time_zero in existing_time_zeros ] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: # Skip metadata text file if not forecast.endswith('.csv'): continue # Default config over_write = False checksum = 0 time_zero_date = forecast.split(dir_name)[0][:-1] # Check if forecast is already on zoltar with open(path_to_processed_model_forecasts + forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() f.close() # Check this hash against the previous version of hash if db.get(forecast, None) != checksum: print(forecast, db.get(forecast, None)) if time_zero_date in existing_time_zeros: # Check if the already existing forecast has the same issue date from datetime import date local_issue_date = date.today().strftime("%Y-%m-%d") uploaded_forecast = [ forecast for forecast in model.forecasts if forecast.timezero.timezero_date.strftime( YYYY_MM_DD_DATE_FORMAT) == time_zero_date ][0] uploaded_issue_date = uploaded_forecast.issue_date if local_issue_date == uploaded_issue_date: # Overwrite the existing forecast if has the same issue date over_write = True logger.info( f"Overwrite existing forecast={forecast} with newer version because the new issue_date={local_issue_date} is the same as the uploaded file issue_date={uploaded_issue_date}" ) else: logger.info( f"Add newer version to forecast={forecast} because the new issue_date={local_issue_date} is different from uploaded file issue_date={uploaded_issue_date}" ) else: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Create timezero on zoltar if not existed if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: print(ex) return ex # Validate covid19 file print(f"Validating {forecast}") errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if len(error_from_transformation) > 0: return error_from_transformation else: try: logger.debug( 'Upload forecast for model: %s \t|\t File: %s\n' % (metadata['model_abbr'], forecast)) upload_covid_forecast_by_model(conn, quantile_json, forecast, project_name, model, metadata['model_abbr'], time_zero_date, overwrite=over_write) db[forecast] = checksum except Exception as ex: logger.error(ex) return ex json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # # Batch upload for better performance # if len(json_io_dict_batch) > 0: # try: # util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write) # except Exception as ex: # return ex return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts): # meta info project_name = 'COVID-19 Forecasts' project_obj = None project_timezeros = [] forecasts = os.listdir(path_to_processed_model_forecasts) conn = util.authenticate() # Get all existing timezeros in the project for project in conn.projects: if project.name == project_name: project_obj = project for timezero in project.timezeros: project_timezeros.append(timezero.timezero_date) break # Get model name separator = '-' dir_name = separator.join( forecasts[0].split(separator)[3:]).split('.csv')[0] metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') model_name = metadata['model_name'] model = [ model for model in project_obj.models if model.name == model_name ][0] # Get names of existing forecasts to avoid re-upload existing_forecasts = [forecast.source for forecast in model.forecasts] for forecast in forecasts: # Skip if forecast is already on zoltar if forecast in existing_forecasts: continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Get timezero and create timezero on zoltar if not existed time_zero_date = forecast.split(dir_name)[0][:-1] if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) except Exception as ex: print(ex) # Validate covid19 file errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID19_TARGET_NAMES, covid19_row_validator) if len(error_from_transformation) > 0: print(error_from_transformation) else: try: util.upload_forecast(conn, quantile_json, forecast, project_name, model_name, time_zero_date, overwrite=False) except Exception as ex: print(ex) else: print(errors_from_validation) fp.close()
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_names # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) # Get model name or create a new model if it's not in the current Zoltar project metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') model_name = metadata['model_name'] if model_name not in model_names: model_config = {} model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \ = metadata['model_name'], metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA' try: project_obj.create_model(model_config) models = project_obj.models model_names = [model.name for model in models] except Exception as ex: return ex model = [model for model in models if model.name == model_name][0] # Get names of existing forecasts to avoid re-upload existing_forecasts = [forecast.source for forecast in model.forecasts] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: # Skip if forecast is already on zoltar if forecast in existing_forecasts: continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Get timezero and create timezero on zoltar if not existed time_zero_date = forecast.split(dir_name)[0][:-1] if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: return ex # Validate covid19 file errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, VALID_TARGET_NAMES, covid19_row_validator) if len(error_from_transformation) > 0: return error_from_transformation else: # try: # util.upload_forecast(conn, quantile_json, forecast, # project_name, model_name , time_zero_date, overwrite=False) # except Exception as ex: # print(ex) json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # Batch upload for better performance if len(json_io_dict_batch) > 0: try: util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch) except Exception as ex: return ex return "Pass"