def validate_quantile_csv_file_app(quantile_csv_file):
    """
    Simple CLI wrapper of `validate_quantile_csv_file()`

    :param csv_fp: as passed to `json_io_dict_from_quantile_csv_file()`
    :return:
    """
    validate_quantile_csv_file(quantile_csv_file)
Exemple #2
0
def check_formatting(my_path):
    output_errors = {}
    df = pd.read_csv('code/validation/validated_files.csv')
    previous_checked = list(df['file_path'])
    # Iterate through processed csvs
    for path in glob.iglob(my_path + "**/**/", recursive=False):
        for filepath in glob.iglob(path + "*.csv", recursive=False):
            if filepath not in previous_checked:
                file_error = validate_quantile_csv_file(filepath)
                if file_error != 'no errors':
                    output_errors[filepath] = file_error
                else:
                    # add to previously checked files
                    current_time = datetime.datetime.now()
                    df = df.append({'file_path': filepath,
                                    'validation_date': current_time}, ignore_index=True)
    # update previously checked files
    df.to_csv('code/validation/validated_files.csv', index=False)

    # Output list of Errors
    if len(output_errors) > 0:
        for filename, errors in output_errors.items():
            print("\n* ERROR IN '", filename, "'")
            for error in errors:
                print(error)
        sys.exit("\n ERRORS FOUND EXITING BUILD...")
    else:
        print("✓ no errors")
def check_formatting(my_path):
    output_errors = {}
    df = pd.read_csv('code/validation/validated_files.csv')
    previous_checked = list(df['file_path'])
    files_in_repository = []
    # Iterate through processed csvs
    for path in glob.iglob(my_path + "**/**/", recursive=False):
        for filepath in glob.iglob(path + "*.csv", recursive=False):
            files_in_repository += [filepath]
            if filepath not in previous_checked:
                file_error = validate_quantile_csv_file(filepath)
                # Check forecast file date = forecast_date column
                forecast_date_error = filename_match_forecast_date(filepath)
                if forecast_date_error is not None:
                    if file_error == 'no errors':
                        file_error = [forecast_date_error]
                    else:
                        file_error += [forecast_date_error]

                if file_error != 'no errors':
                    output_errors[filepath] = file_error
                else:
                    # add to previously checked files
                    current_time = datetime.datetime.now()
                    df = df.append(
                        {
                            'file_path': filepath,
                            'validation_date': current_time
                        },
                        ignore_index=True)

    # Remove files that have been deleted from repo
    # files that are in verify checks but NOT in repository
    deleted_files = np.setdiff1d(previous_checked, files_in_repository)
    df = df[~df['file_path'].isin(deleted_files)]

    # delted files should be moved from the central validated_files.csv file
    if len(deleted_files) > 0:
        df.to_csv('code/validation/validated_files.csv', index=False)

    # update previously checked files
    df.to_csv('code/validation/locally_validated_files.csv', index=False)

    # Output list of Errors
    if len(output_errors) > 0:
        for filename, errors in output_errors.items():
            print("\n* ERROR IN '", filename, "'")
            for error in errors:
                print(error)
        sys.exit("\n ERRORS FOUND EXITING BUILD...")
    else:
        print("✓ no errors")
def validate_forecast_file(filepath, silent=False):
    """
    purpose: Validates the forecast file with zoltpy 
    link: https://github.com/reichlab/zoltpy/blob/master/zoltpy/covid19.py

    params:
    * filepath: Full filepath of the forecast
    """
    file_error = validate_quantile_csv_file(filepath, silent=silent)

    if file_error != "no errors":
        return True, file_error
    else:
        return False, file_error
Exemple #5
0
def upload_forecast(forecast_name):
    path = get_forecast_info(forecast_name)
    db = read_validation_db()

    metadata = metadata_dict_for_file(
        list(Path(path).parent.glob('metadata-*.txt'))[0])
    if f"{metadata['team_abbr']}-{metadata['model_abbr']}" not in [
            m.abbreviation for m in models
    ]:
        create_model(path, metadata)

    time_zero_date = '-'.join(forecast_name.split('-')[:3])

    if time_zero_date not in [
            timezero.timezero_date for timezero in project_obj.timezeros
    ]:
        create_timezero(time_zero_date)

    # print(forecast_name, metadata, time_zero_date)
    if path is not None:
        errors_from_validation = validate_quantile_csv_file(path)
        if errors_from_validation != "no errors":
            print(errors_from_validation)
            return errors_from_validation, True
        with open(path) as fp:
            print('uploading %s' % path)
            checksum = hashlib.md5(str(fp.read()).encode('utf-8')).hexdigest()
            fp.seek(0)
            quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS)

            if len(error_from_transformation) > 0:
                return error_from_transformation, True

            try:
                fr = util.upload_forecast(
                    conn, quantile_json, forecast, project_name,
                    f"{metadata['team_abbr']}-{metadata['model_abbr']}",
                    time_zero_date)
                db[forecast_name] = checksum
                write_db(db)
                return None, fr
            except Exception as e:
                raise e
                return e, True
    pass

list_of_model_directories = os.listdir('./data-processed/')
for directory in list_of_model_directories:
    if "." in directory:
        continue
    # Get all forecasts in the directory of this model
    path = './data-processed/' + directory + '/'
    forecasts = glob.glob(path + "*.csv")
    for forecast in forecasts:

        with open(forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()

        db = get_db()
        # Validate covid19 file
        if UPDATE and db.get(get_filename_from_path(forecast),
                             None) == checksum:
            continue
        errors_from_validation = validate_quantile_csv_file(forecast)

        # Upload forecast
        if "no errors" == errors_from_validation:
            # Check this hash against the previous version of hash
            if db.get(get_filename_from_path(forecast), None) != checksum:
                db[get_filename_from_path(forecast)] = checksum
        else:
            print(errors_from_validation)
print('Dumping db')
dump_db()
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)
    conn.re_authenticate_if_necessary()
    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                          'metadata-' + dir_name + '.txt')
    except Exception as ex:
        return ex
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], \
        model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['team_abbr'] + '-' + metadata['model_abbr'], \
              metadata['team_name'], metadata['methods'], metadata['website_url'] if metadata.get(
            'website_url') != None else url + dir_name, 'NA'
        try:
            print('Create model %s' % model_name)
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex
    print('Time: %s \t Model: %s' % (datetime.now(), model_name))
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_time_zeros = [
        forecast.timezero.timezero_date for forecast in model.forecasts
    ]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Default config
        over_write = False
        checksum = 0
        time_zero_date = forecast.split(dir_name)[0][:-1]

        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts + forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            # if db.get(forecast, None) != checksum:
            #     print(forecast, db.get(forecast, None))
            #     if time_zero_date in existing_time_zeros:
            #         over_write = True
            # else:
            #     continue

            # if timezero existing, then don't write again
            if time_zero_date in existing_time_zeros:
                #update checksum
                # db[forecast] = checksum
                continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:
            # Create timezero on zoltar if not existed
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    try:
                        print('Upload forecast for model: %s \t|\t File: %s' %
                              (model_name, forecast))
                        print()
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             model_name,
                                             time_zero_date,
                                             overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        print(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Exemple #8
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts+'metadata-'+dir_name+'.txt')
    except Exception as ex:
        return ex 
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['team_abbr']+'-'+metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA'
        try:
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex  
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:
        over_write = False
        checksum = 0
        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts+forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            if db.get(forecast, None) != checksum:
                print(forecast)
                if forecast in existing_forecasts:
                    over_write = True
            else:
                continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts+forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            # if time_zero_date != "2020-05-25":
            #     continue
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(path_to_processed_model_forecasts+forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(fp, VALID_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) >0 :
                    return error_from_transformation
                else:
                    try:
                        util.upload_forecast(conn, quantile_json, forecast, 
                                                project_name, model_name , time_zero_date, overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        print(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()
    
    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_abbrs

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                          'metadata-' + dir_name + '.txt')
    except Exception as ex:
        return ex
    model_abbreviation = metadata['model_abbr']

    # get the corresponding model_config for the metadata file
    model_config = zoltar_config_from_metadata(metadata)

    if model_abbreviation not in model_abbrs:
        pprint.pprint('%s not in models' % model_abbreviation)
        if 'home_url' not in model_config:
            model_config['home_url'] = url + dir_name

        try:
            logger.info(f"Creating model {model_config}")
            models.append(project_obj.create_model(model_config))
            model_abbrs = [model.abbreviation for model in models]
        except Exception as ex:
            return ex

    # fetch model based on model_abbr
    model = [
        model for model in models if model.abbreviation == model_abbreviation
    ][0]

    if has_changed(metadata, model):
        # model metadata has changed, call the edit function in zoltpy to update metadata
        print(
            f"{metadata['model_abbr']!r} model has changed metadata contents. Updating on Zoltar..."
        )
        model.edit(model_config)

    # Get names of existing forecasts to avoid re-upload
    existing_time_zeros = [
        forecast.timezero.timezero_date for forecast in model.forecasts
    ]

    # Convert all timezeros from Date type to str type
    existing_time_zeros = [
        existing_time_zero.strftime(YYYY_MM_DD_DATE_FORMAT)
        for existing_time_zero in existing_time_zeros
    ]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Skip metadata text file
        if not forecast.endswith('.csv'):
            continue

        # Default config
        over_write = False
        checksum = 0
        time_zero_date = forecast.split(dir_name)[0][:-1]

        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts + forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            if db.get(forecast, None) != checksum:
                print(forecast, db.get(forecast, None))
                if time_zero_date in existing_time_zeros:

                    # Check if the already existing forecast has the same issue date

                    from datetime import date
                    local_issue_date = date.today().strftime("%Y-%m-%d")

                    uploaded_forecast = [
                        forecast for forecast in model.forecasts
                        if forecast.timezero.timezero_date.strftime(
                            YYYY_MM_DD_DATE_FORMAT) == time_zero_date
                    ][0]
                    uploaded_issue_date = uploaded_forecast.issue_date

                    if local_issue_date == uploaded_issue_date:
                        # Overwrite the existing forecast if has the same issue date
                        over_write = True
                        logger.info(
                            f"Overwrite existing forecast={forecast} with newer version because the new issue_date={local_issue_date} is the same as the uploaded file issue_date={uploaded_issue_date}"
                        )
                    else:
                        logger.info(
                            f"Add newer version to forecast={forecast} because the new issue_date={local_issue_date} is different from uploaded file issue_date={uploaded_issue_date}"
                        )

            else:
                continue

        with open(path_to_processed_model_forecasts + forecast) as fp:
            # Create timezero on zoltar if not existed
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    print(ex)
                    return ex

            # Validate covid19 file
            print(f"Validating {forecast}")
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    try:
                        logger.debug(
                            'Upload forecast for model: %s \t|\t File: %s\n' %
                            (metadata['model_abbr'], forecast))
                        upload_covid_forecast_by_model(conn,
                                                       quantile_json,
                                                       forecast,
                                                       project_name,
                                                       model,
                                                       metadata['model_abbr'],
                                                       time_zero_date,
                                                       overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        logger.error(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Exemple #10
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts):
    # meta info
    project_name = 'COVID-19 Forecasts'
    project_obj = None
    project_timezeros = []
    forecasts = os.listdir(path_to_processed_model_forecasts)
    conn = util.authenticate()

    # Get all existing timezeros in the project
    for project in conn.projects:
        if project.name == project_name:
            project_obj = project
            for timezero in project.timezeros:
                project_timezeros.append(timezero.timezero_date)
            break

    # Get model name
    separator = '-'
    dir_name = separator.join(
        forecasts[0].split(separator)[3:]).split('.csv')[0]
    metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                      'metadata-' + dir_name + '.txt')
    model_name = metadata['model_name']
    model = [
        model for model in project_obj.models if model.name == model_name
    ][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    for forecast in forecasts:

        # Skip if forecast is already on zoltar
        if forecast in existing_forecasts:
            continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                except Exception as ex:
                    print(ex)

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID19_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) > 0:
                    print(error_from_transformation)
                else:
                    try:
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             model_name,
                                             time_zero_date,
                                             overwrite=False)
                    except Exception as ex:
                        print(ex)
            else:
                print(errors_from_validation)
            fp.close()
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                      'metadata-' + dir_name + '.txt')
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA'
        try:
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Skip if forecast is already on zoltar
        if forecast in existing_forecasts:
            continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, VALID_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    # try:
                    #     util.upload_forecast(conn, quantile_json, forecast,
                    #                             project_name, model_name , time_zero_date, overwrite=False)
                    # except Exception as ex:
                    #     print(ex)
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # Batch upload for better performance
    if len(json_io_dict_batch) > 0:
        try:
            util.upload_forecast_batch(conn, json_io_dict_batch,
                                       forecast_filename_batch, project_name,
                                       model_name, timezero_date_batch)
        except Exception as ex:
            return ex
    return "Pass"