Beispiel #1
0
def check_envelope(envelope_data, eligible_location_ids, eligible_year_ids,
                   eligible_sex_ids, eligible_age_group_ids):
    # Make sure data is above 0
    print("Making sure all draws are above or equal to 0")
    data_columns = ['env_{}'.format(x) for x in range(1000)]
    envelope_data['min'] = envelope_data[data_columns].min(axis=1)
    print("Minimum envelope value: {}".format(envelope_data['min'].min()))
    if envelope_data['min'].min() < 0:
        print("ERROR: Draw/pop values in envelope that are less than 0")
        sys.exit(1)
    # Make sure all unique IDs are present
    print("Checking for unique IDs in envelope")
    uid_template = pd.DataFrame(eligible_location_ids, columns=['location_id'])
    uid_template = expand_id_set(uid_template, eligible_year_ids, 'year_id')
    uid_template = expand_id_set(uid_template, eligible_age_group_ids,
                                 'age_group_id')
    uid_template = expand_id_set(uid_template, eligible_sex_ids, 'sex_id')
    envelope_data['_check'] = 1
    envelope_data = pd.merge(
        uid_template,
        envelope_data,
        on=['location_id', 'year_id', 'sex_id', 'age_group_id'],
        how='left')
    if len(envelope_data.loc[envelope_data['_check'].isnull()]) > 0:
        print("ERROR: Missing unique IDs from envelope")
        sys.exit(1)
    else:
        print("No missing unique IDs in envelope")
Beispiel #2
0
def check_pred_ex(eligible_location_ids,
                  eligible_year_ids,
                  eligible_sex_ids,
                  eligible_age_group_ids,
                  fail=True):
    pred_ex_data = get_life_table(location_set_id=35,
                                  life_table_parameter_id=6)
    uid_template = pd.DataFrame(eligible_location_ids, columns=['location_id'])
    uid_template = expand_id_set(uid_template, eligible_year_ids, 'year_id')
    uid_template = expand_id_set(uid_template, eligible_age_group_ids,
                                 'age_group_id')
    uid_template = expand_id_set(uid_template, eligible_sex_ids, 'sex_id')

    logger = logging.getLogger('error_check.check_pred_ex')
    try:  # all of CoDCorrect will break if there is no version id at all
        assert pred_ex_data.process_version_map_id.unique().item(
        ) is not None, ("No version id uploaded for pred_ex")
    except AssertionError as e:
        logger.exception('Failed to get life_table version: {}'.format(e))
        sys.exit()

    try:  # non-yll CoDcorrect will be fine and deaths will still be calculated
        assert len(pred_ex_data.process_version_map_id.unique()
                   ) == 1, "More than one life table version returned"
        # Make sure data is above 0
        logger.info("Making sure all draws are above or equal to 0")
        data_columns = ['mean']
        pred_ex_data['min'] = pred_ex_data[data_columns].min(axis=1)
        logger.info("Minimum pred_ex value: {}".format(
            pred_ex_data['min'].min()))
        if pred_ex_data['min'].min() < 0:
            raise ValueError('ERROR: Draw/pop values in pred_ex that are less '
                             'than 0')
        # Make sure all unique IDs are present
        logger.info("Checking for unique IDs in pred_ex")
        pred_ex_data['_check'] = 1
        pred_ex_data = pd.merge(
            uid_template,
            pred_ex_data,
            on=['location_id', 'year_id', 'sex_id', 'age_group_id'],
            how='left')
        if len(pred_ex_data.ix[pred_ex_data['_check'].isnull()]) > 0:
            raise ValueError("ERROR: Missing unique IDs from pred_ex")
        else:
            logger.info("No missing unique IDs in pred_ex")
    except (AssertionError, ValueError) as e:
        logger.warning("Failed to validate pred_ex: {}".format(e))
        if fail:
            sys.exit()
    return int(pred_ex_data.ix[0, 'process_version_map_id'])
Beispiel #3
0
def check_pred_ex(pred_ex_data,
                  eligible_location_ids,
                  eligible_year_ids,
                  eligible_sex_ids,
                  eligible_age_group_ids,
                  fail=True):
    data = pred_ex_data.copy(deep=True)
    uid_template = pd.DataFrame(eligible_location_ids, columns=['location_id'])
    uid_template = expand_id_set(uid_template, eligible_year_ids, 'year_id')
    uid_template = expand_id_set(uid_template, eligible_age_group_ids,
                                 'age_group_id')
    uid_template = expand_id_set(uid_template, eligible_sex_ids, 'sex_id')

    logger = logging.getLogger('error_check.check_pred_ex')

    try:  # non-yll CoDcorrect will be fine and deaths will still be calculated
        # Make sure data is above 0
        logger.info("Making sure all draws are above or equal to 0")
        data_columns = ['mean']
        pred_ex_data['min'] = pred_ex_data[data_columns].min(axis=1)
        logger.info("Minimum pred_ex value: {}".format(
            pred_ex_data['min'].min()))
        if pred_ex_data['min'].min() < 0:
            raise ValueError('ERROR: Draw/pop values in pred_ex that are less '
                             'than 0')
        # Make sure all unique IDs are present
        logger.info("Checking for unique IDs in pred_ex")
        pred_ex_data['_check'] = 1
        pred_ex_data = pd.merge(
            uid_template,
            pred_ex_data,
            on=['location_id', 'year_id', 'sex_id', 'age_group_id'],
            how='left')
        if len(pred_ex_data.loc[pred_ex_data['_check'].isnull()]) > 0:
            raise ValueError("ERROR: Missing unique IDs from pred_ex")
        else:
            logger.info("No missing unique IDs in pred_ex")
    except (AssertionError, ValueError) as e:
        logger.warning("Failed to validate pred_ex: {}".format(e))
        if fail:
            sys.exit(1)
Beispiel #4
0
        # Set the eligible locations, years, sexes, and ages that will appear in the input data
        eligible_age_group_ids = range(2, 22)
        eligible_sex_ids = [1, 2]
        eligible_cause_ids = cause_data.ix[cause_data['level'] > 0,
                                           'cause_id'].tolist()
        eligible_year_ids = range(1980, 2016)
        eligible_location_ids = location_data.ix[
            location_data['is_estimate'] == 1, 'location_id'].tolist()

        # Pull Space-Time (Geographic) restrictions
        spacetime_restrictions = get_spacetime_restrictions()

        # Create a DataFrame of all eligible cause, age, sex combinations
        eligible_data = pd.DataFrame(eligible_cause_ids, columns=['cause_id'])
        eligible_data = expand_id_set(eligible_data, eligible_age_group_ids,
                                      'age_group_id')
        eligible_data = expand_id_set(eligible_data, eligible_sex_ids,
                                      'sex_id')

        # Add a restriction variable to the eligible DataFrame to factor in age-sex restrictions of causes
        eligible_data['restricted'] = True
        for cause_id in eligible_cause_ids:
            non_restricted_age_group_ids = get_eligible_age_group_ids(
                cause_metadata[cause_id]['yll_age_start'],
                cause_metadata[cause_id]['yll_age_end'])
            non_restricted_sex_ids = get_eligible_sex_ids(
                cause_metadata[cause_id]['male'],
                cause_metadata[cause_id]['female'])
            eligible_data.ix[(eligible_data['cause_id'] == cause_id) & (
                (eligible_data['age_group_id'].
                 isin(non_restricted_age_group_ids)) &
Beispiel #5
0
        # Read in config variables
        eligible_year_ids = config['eligible_year_ids']
        index_columns = config['index_columns']
        index_columns.remove('measure_id')
        data_columns = config['data_columns']
        envelope_index_columns = config['envelope_index_columns']
        envelope_column = config['envelope_column']
        raw_data_columns = (['model_version_id'] + [envelope_column] +
                            index_columns + data_columns)

        # Make eligible data for data
        logging.info("Make eligible data list")
        eligible_data = eligible_data.loc[
            eligible_data['sex_id'] == int(sex_id)]
        eligible_data = expand_id_set(eligible_data, eligible_year_ids,
                                      'year_id')
        eligible_data['location_id'] = int(location_id)

        # Merge on space-time restrictions
        spacetime_restriction_data['spacetime_restriction'] = True
        eligible_data = pd.merge(eligible_data,
                                 spacetime_restriction_data,
                                 on=['location_id', 'year_id', 'cause_id'],
                                 how='left')

        # Apply space-time restrictions
        eligible_data.loc[eligible_data['spacetime_restriction'] == True,
                          'restricted'] = True
        eligible_data = eligible_data.loc[:, ['cause_id', 'age_group_id',
                                              'sex_id', 'restricted', 'level',
                                              'parent_id', 'year_id',