def check_formatting(my_path): output_errors = {} df = pd.read_csv('code/validation/validated_files.csv') previous_checked = list(df['file_path']) files_in_repository = [] # Iterate through processed csvs for path in glob.iglob(my_path + "**/**/", recursive=False): for filepath in glob.iglob(path + "*.csv", recursive=False): files_in_repository += [filepath] # check if file has been edited since last checked if filepath not in previous_checked: # delete validated file if currrently present df = df[df['file_path'] != filepath] # validate file file_error = covid19.validate_quantile_csv_file(filepath) #file_error = "no errors" # Check forecast file date = forecast_date column forecast_date_error = filename_match_forecast_date(filepath) if forecast_date_error is not None: if file_error == 'no errors': file_error = [forecast_date_error] else: file_error += [forecast_date_error] if file_error != 'no errors': output_errors[filepath] = file_error else: # add to previously checked files current_time = datetime.now() df = df.append( { 'file_path': filepath, 'validation_date': current_time }, ignore_index=True) # Remove files that have been deleted from repo # files that are in verify checks but NOT in repository deleted_files = np.setdiff1d(previous_checked, files_in_repository) df = df[~df['file_path'].isin(deleted_files)] # update previously checked files # df.to_csv('code/validation/locally_validated_files.csv', index=False) # Output list of Errors if len(output_errors) > 0: for filename, errors in output_errors.items(): print("\n* ERROR IN '", filename, "'") for error in errors: print(error) pass sys.exit("\n ERRORS FOUND EXITING BUILD...") else: print("✓ no errors")
def validate_forecast_file(filepath, silent=False): """ purpose: Validates the forecast file with zoltpy params: * filepath: Full filepath of the forecast """ file_error = validate_quantile_csv_file(filepath) if file_error != "no errors": return True, file_error else: return False, file_error