def import_cod_model_draws(model_version_id, location_id, cause_id, sex_id, required_columns, filter_years=None, db_env=None, envelope=None): """Import model draws from CODEm/custom models. Read in CODEm/custom model draws from a given filepath (filtered by a specific location_id) and then check to make sure that the imported draws are not missing any columns and do not have null values. """ logger = logging.getLogger('io.import_cod_model_draws') try: data = draws(cause_id, lids=[int(location_id)], sids=[int(sex_id)], yids=filter_years, status=model_version_id, n_draws='max', db_env=db_env) except DirectoryNotFoundException: data = draws(cause_id, lids=[int(location_id)], sids=[int(sex_id)], yids=filter_years, status=model_version_id, n_draws='max', db_env='prod') except Exception: logger.exception("Failed to read" + '/n' + 'Problem demographics were mvid {} cause {}, ' 'location {}, sex {}, and years {}' .format(model_version_id, cause_id, location_id, sex_id, ','.join(str(y) for y in filter_years) )) sys.exit(1) data = data[data.age_group_id.isin(list(range(2, 22)) + [30, 31, 32, 235])] data = add_envelope(data, envelope) r = check_data_format(data, required_columns) if not r: print(model_version_id, r) return None data = data.loc[:, required_columns] return data
def import_cod_model_draws(model_version_id, location_id, cause_id, sex_id, required_columns, filter_years=None): """ Import model draws from CODEm/custom models Read in CODEm/custom model draws from a given filepath (filtered by a specific location_id) and then check to make sure that the imported draws are not missing any columns and do not have null values. """ logger = logging.getLogger('io.import_cod_model_draws') try: data = draws(gbd_ids={'cause_ids': [cause_id]}, source='codem', location_ids=[int(location_id)], sex_ids=[int(sex_id)], year_ids=filter_years, status=model_version_id) data = data.ix[data.age_group_id.isin(range(2, 22) + [30, 31, 32, 235])] except Exception: logger.exception("Failed to read" + '/n' + 'Problem demographics were mvid {} cause {}, ' 'location {}, sex {}, and years {}' .format(model_version_id, cause_id, location_id, sex_id, ','.join(str(y) for y in filter_years) )) sys.exit() r = check_data_format(data, required_columns) if not r: print model_version_id, r return None data = data.ix[:, required_columns] return data
def import_cod_model_draws(model_version_id, location_id, acause, sex_name, required_columns, filter_years=None): """ Import model draws from CODEm/custom models Read in CODEm/custom model draws from a given filepath (filtered by a specific location_id) and then check to make sure that the imported draws are not missing any columns and do not have null values. """ sex_dict = {'male': 1, 'female': 2} logger = logging.getLogger('io.import_cod_model_draws') try: # Get file path for CoD model draws_filepath = DRAWS_PATH # Read in file data = read_hdf_draws(draws_filepath, location_id, key="data", filter_sexes=[sex_dict[sex_name]], filter_ages=range(2, 22), filter_years=filter_years) data['model_version_id'] = model_version_id except IOError: logger.warn('Failed to read {}'.format(draws_filepath)) print model_version_id return None logger.info('Reading {}'.format(draws_filepath)) r = check_data_format(data, required_columns) if not r: print model_version_id, r return None data = data.ix[:, required_columns] return data
'parent_id', 'year_id', 'location_id']] # Make envelope object logging.info("Make envelope object") envelope = Envelope(envelope_data, envelope_index_columns, data_columns) # Read in draw files logging.info("Reading in best model draws") raw_data = read_all_model_draws(best_models, raw_data_columns, filter_years=eligible_year_ids, env_df=envelope_summ) # Check formatting logging.info("Checking in best model draws") check_data_format(raw_data, raw_data_columns, fail=True) # Filter out zeros logging.info("Filtering out zeroes") data, zeroes = filter_zeros(raw_data, data_columns) # Format data for rescale logging.info("Formatting data for rescale") formatted_data, model_data = format_for_rescale( data, eligible_data, index_columns, data_columns, envelope_column, parent_dir + '/debug', '{}_{}_{}'.format(output_version_id, location_id, sex_id))