Esempio n. 1
0

def transform_add_measure(df, measure_id):
    df['measure_id'] = measure_id
    return df


if __name__ == '__main__':
    # Get command line arguments
    version_id, df_type, measure_id, location_set_id, year_id = parse_args()
    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')
    # Start logging
    cc_log_utils.setup_logging(log_dir, 'agg_location', str(version_id),
                               df_type, str(measure_id), str(location_set_id),
                               str(year_id))

    measure_id = int(measure_id)

    try:
        # Read in helper files
        logging.info("Reading in helper files.")
        config = read_helper_files(parent_dir)
        # Read in config variables
        index_cols = config['index_columns']
        draw_cols = config['data_columns']
        sex_id = config['eligible_sex_ids']

        # Create draw source/sink
        logging.info("Creating draw source and sink.")
Esempio n. 2
0
    shocks_sink = DrawSink(shocks_params)
    shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    shocks_sink.push(shock_data, append=False)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location = parse_args()

    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')

    # Start logging
    cc_log_utils.setup_logging(log_dir, 'agg_cause', output_version_id,
                               location)

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        config, cause_hierarchy = read_helper_files(parent_dir, location)

        # Read in config variables
        index_columns = config['index_columns']
        index_columns.remove('measure_id')
        data_columns = config['data_columns']

        # Read in rescaled draw files
        logging.info("Reading in rescaled draw files")
        rescaled_data = read_rescaled_draw_files(parent_dir, location)
                            poolsize=8)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, df_type, measure, location_set_id, years = parse_args()

    # Set paths
    parent_dir = r'FILEPATH'
    log_dir = parent_dir + r'/logs'
    in_dir = r'FILEPATH'
    out_dir = in_dir

    # Start logging
    l.setup_logging(log_dir, 'agg_location', output_version_id, df_type,
                    measure, location_set_id, "".join(str(yr) for yr in years))

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        config = read_helper_files(parent_dir)

        # Read in config variables
        index_columns = config['index_columns']
        sexes = config['eligible_sex_ids']

        if measure == "1":
            input_file_pattern = 'FILEPATH.h5'
            output_file_pattern = 'FILEPATH.h5'
        else:
            input_file_pattern = 'FILEPATH.h5'
Esempio n. 4
0
                    parent_dir,
                    'summaries/gbd/single/{m}/*'.format(m=measure_id))))
    upload_gbd_summaries(process_version, gbd_conn_def, directories)
    logging.info("Finished upload to gbd")


if __name__ == '__main__':
    # parse args
    output_version_id, db, measure_id, conn_def, change = parse_args()

    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')

    # Start logging
    cc_log_utils.setup_logging(log_dir, 'upload', output_version_id, db,
                               measure_id, str(change))
    logging.info("conn_def is {}".format(conn_def))

    try:
        config, causes = read_helper_files(parent_dir)
        envelope_version_id = config['envelope_version_id']
        years = config['eligible_year_ids']
        if db == 'cod':
            upload_cod_summary(output_version_id, envelope_version_id, causes,
                               years, conn_def)
        elif db == 'gbd':
            process_version = config['process_version_id']
            upload_gbd_summary(process_version, measure_id, change, conn_def)
        else:
            upload_diagnostic_summary(output_version_id)
        logging.info('All done!')
Esempio n. 5
0
                        key='best_output_version_{}'.format(state),
                        mode='a',
                        format='table',
                        data_columns=['output_version_id'])


def read_helper_files(parent_dir):
    return read_json(os.path.join(parent_dir, '_temp/config.json'))


if __name__ == '__main__':
    output_version_id, test = parse_args()

    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')
    cc_log.setup_logging(log_dir, 'mark_best', time.strftime("%m_%d_%Y_%H"))

    config = read_helper_files(parent_dir)

    process_version_id = config['process_version_id']

    if test:
        db_env = DBEnv.DEV
    else:
        db_env = DBEnv.PROD

    gbd_conn_def = _GBD_CONN_DEF_MAP[db_env.value]
    cod_conn_def = _COD_CONN_DEF_MAP[db_env.value]

    cache_cod_db('before', cod_conn_def, parent_dir)
    cache_gbd_db('before', gbd_conn_def, parent_dir)
Esempio n. 6
0
            sex_id=sex_id),
        'draws', mode='w', format='table',
        data_columns=index_columns)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id, sex_id = parse_args()

    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')

    # Start logging
    cc_log_utils.setup_logging(log_dir, 'shocks', output_version_id,
                               location_id, sex_id)

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        config, best_models = read_helper_files(parent_dir, location_id,
                                                sex_id)

        # Read in config variables
        eligible_year_ids = config['eligible_year_ids']
        index_columns = config['index_columns']
        data_columns = config['data_columns']
        raw_data_columns = index_columns + data_columns

        # Read in draw files
        logging.info("Reading in best model draws")
Esempio n. 7
0
        logger.exception('Failed to save all outputs: {}'.format(e))


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location = parse_args()

    # Set paths
    parent_dir = r'FILEPATH'
    log_dir = parent_dir + r'FILEPATH'
    shock_dir = r'FILEPATH'
    rescaled_dir = r'FILEPATH'

    # Start logging
    l.setup_logging(log_dir, 'append_shocks', output_version_id, location)

    try:
        # Read in helper files\
        logging.info("Reading in helper files")
        config, most_detailed = read_helper_files(parent_dir, location)

        # Read in config variables
        index_cols = config['index_columns']
        data_cols = config['data_columns']
        years = config['eligible_year_ids']
        sexes = config['eligible_sex_ids']

        # Read in all inputs
        logging.info("Reading in all inputs for {}".format(location))
        rescaled, shocks, rescaled_yll, shocks_yll = read_all_inputs(
             mode='w',
             format='table',
             data_columns=index_columns)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id = parse_args()

    # Set paths
    parent_dir = PARENT_DIRECTORY
    log_dir = parent_dir + r'/logs'

    # Start logging
    l.setup_logging(log_dir, 'agg_location', output_version_id, location_id,
                    'both')

    try:
        # Read in helper files
        print "Reading in helper files"
        logging.info("Reading in helper files")
        config, child_locations = read_helper_files(parent_dir, location_id)

        # Read in config variables
        index_columns = config['index_columns']
        data_columns = config['data_columns']

        # Read in rescaled draw files
        print "Reading in child location draw files"
        logging.info("Reading in child location draw files")
        logging.info("{}".format(', '.join([str(x) for x in child_locations])))
                            location_id,
                            parent_dir,
                            save=False)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id = parse_args()

    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')

    # Start logging
    cc_log_utils.setup_logging(log_dir, 'append_diagnostics',
                               output_version_id)

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        config, location_ids, est_locations = read_helper_files(parent_dir)

        # Read in config variables
        diag_years = config['diagnostic_year_ids']
        index_columns = config['index_columns']
        data_columns = config['data_columns']

        logging.info('Reading in diagnostic files and creating ones that '
                     'dont exist')
        data = []
        for location_id in location_ids:
Esempio n. 10
0
    shocks_sink = DrawSink(agg_shocks_params)
    shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    shocks_sink.push(yll_shocks, append=False)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id = parse_args()

    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')

    # Start logging
    cc_log_utils.setup_logging(log_dir, 'ylls', output_version_id, location_id)

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        config = read_helper_files(parent_dir)
        envelope_version_id = config['envelope_version_id']

        # Read in config variables
        index_columns = config['index_columns']
        index_columns.remove('measure_id')
        yll_index_columns = list(set(index_columns) - set(['cause_id']))
        data_columns = config['data_columns']

        # Read in rescaled draw files
        logging.info("Reading in cause/loc aggregated with shock+hiv draws")
Esempio n. 11
0
            'draws', mode='w', format='table',
        data_columns=index_columns)



if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id, sex_name = parse_args()

    # Set paths
    parent_dir = PARENT_DIRECTORY
    log_dir = parent_dir + r'/logs'

    # Start logging
    l.setup_logging(log_dir, 'shocks', output_version_id, location_id, sex_name)

    # Sex dictionary
    sex_dict = {'male': 1, 'female': 2}
    sex_id = sex_dict[sex_name]

    try:
        # Read in helper files
        print "Reading in helper files"
        logging.info("Reading in helper files")
        config, best_models = read_helper_files(parent_dir, location_id, sex_name)

        # Read in config variables
        eligible_year_ids = config['eligible_year_ids']
        index_columns = config['index_columns']
        data_columns = config['data_columns']
Esempio n. 12
0
if __name__ == '__main__':
    # Set some core variables
    code_directory = os.path.dirname(os.path.abspath(__file__))
    output_directory = 'FILEPATH'

    # set up folders
    (output_version_id, codcorrect_years, location_set_ids, resume,
     upload_to_cod, upload_to_gbd, upload_to_diagnostics, db_env, best,
     upload_to_concurrent) = parse_args()

    parent_dir = set_up_folders(output_directory, output_version_id)
    log_dir = os.path.join(parent_dir, 'logs')

    # Start logging
    cc_log.setup_logging(log_dir, 'launch', time.strftime("%m_%d_%Y_%H"))

    change_years = [1990, 2007, 2017]

    if not resume:
        # Retrieve cause resources from database.
        # Uses the codcorrect cause set (1) to create the cause data and
        # metadata for the current round. Used in the correct step to rescale
        # the cause fractions down the hierarchy.
        (cause_set_version_id,
         cause_metadata_version_id) = get_cause_hierarchy_version(
             1, GBD.GBD_ROUND)
        cause_data = get_cause_hierarchy(cause_set_version_id)
        cause_metadata = get_cause_metadata(cause_metadata_version_id)
        #
        (cause_agg_set_version_id,
Esempio n. 13
0
    draw_filepath = parent_dir + r'/unaggregated/rescaled/rescaled_{location_id}_{sex_name}.h5'.format(location_id=location_id, sex_name=sex_name)
    save_hdf(data, draw_filepath, key='draws', mode='w',
             format='table', data_columns=index_columns)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id, sex_name = parse_args()

    # Set paths
    parent_dir = PARENT_DIRECTORY
    log_dir = parent_dir + r'/logs'

    # Start logging
    l.setup_logging(log_dir, 'correct', output_version_id, location_id, sex_name)

    # Sex dictionary
    sex_dict = {'male': 1, 'female': 2}
    sex_id = sex_dict[sex_name]

    try:
        # Read in helper files
        print "Reading in helper files"
        logging.info("Reading in helper files")
        config, best_models, eligible_data, spacetime_restriction_data, envelope_data = read_helper_files(parent_dir, location_id, sex_name)

        # Read in config variables
        eligible_year_ids = config['eligible_year_ids']
        index_columns = config['index_columns']
        data_columns = config['data_columns']
Esempio n. 14
0
        logger.exception("Summarizing GBD failed: {}".format(e))
        rc = e
    return rc


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id, db = parse_args()

    # Set paths
    parent_dir = r'FILEPATH'
    log_dir = parent_dir + r'/logs'

    # Start logging
    l.setup_logging(log_dir, 'summary', output_version_id, location_id, db)

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        age_weights, most_detailed_location, config = read_helper_files(
            parent_dir, location_id)

        # Read in config variables
        index_columns = config['index_columns']
        data_columns = config['data_columns']
        change_years = [1990, 2006, 2016]
        years = [yr for yr in config['eligible_year_ids']
                 if yr not in change_years]
        # ensure that the change years get grouped together
        for i, y in enumerate(change_years):
Esempio n. 15
0
    update_status(output_version_id, 1, cod_conn)

    cache_cod_db('after', cod_conn, parent_dir)
    logging.info("Cache saved to _temp. keys: 'output_version_before' "
                 "'and output_version_after'")
    logging.info("Cod Post Scriptum Finished.")


if __name__ == '__main__':

    output_version_id, db, test_env = parse_args()

    if test_env:
        db_env = DBEnv.DEV
    else:
        db_env = DBEnv.PROD

    parent_dir = 'FILEPATH'
    log_dir = os.path.join(parent_dir, 'logs')
    cc_log.setup_logging(log_dir,
                         'post_scriptum',
                         time.strftime("%m_%d_%Y_%H"),
                         db)

    if db == 'database':
        gbd_main(parent_dir, db_env)
    else:
        cod_main(parent_dir, output_version_id, db_env)

    logging.info("Done.")
Esempio n. 16
0
        .format(location_id=location_id, sex_id=sex_id))
    save_hdf(data, draw_filepath, key='draws', mode='w',
             format='table', data_columns=index_columns)


if __name__ == '__main__':

    # Get command line arguments
    output_version_id, location_id, sex_id = parse_args()

    # Set paths
    parent_dir = 'FILEPATH'
    log_dir = parent_dir + r'/logs'

    # Start logging
    cc_log_utils.setup_logging(log_dir, 'correct', output_version_id,
                               location_id, sex_id)

    try:
        # Read in helper files
        logging.info("Reading in helper files")
        (config, best_models, eligible_data, spacetime_restriction_data,
            envelope_data, envelope_summ) = read_helper_files(parent_dir,
                                                              location_id,
                                                              sex_id)

        # Read in config variables
        eligible_year_ids = config['eligible_year_ids']
        index_columns = config['index_columns']
        index_columns.remove('measure_id')
        data_columns = config['data_columns']
        envelope_index_columns = config['envelope_index_columns']
Esempio n. 17
0
        return output_files
    except Exception as e:
        logger.exception('Failed to save output files: {}'.format(e))


if __name__ == '__main__':

    # Get command line arguments
    output_version_id = parse_args()

    # Set paths
    parent_dir = PARENT_DIRECTORY
    log_dir = parent_dir + r'/logs'

    # Start logging
    l.setup_logging(log_dir, 'append_summaries', output_version_id)

    try:
        # Read in helper files
        print "Reading in helper files"
        logging.info("Reading in helper files")
        config, location_ids = read_helper_files(parent_dir)

        # Read in summary files
        print "Reading in summary files"
        logging.info("Reading in summary files")
        # for location_id in location_ids:
        #     file_path = parent_dir + r'/summaries/summary_{location_id}.csv'.format(location_id=location_id)
        #     print "Reading in {}".format(file_path)
        #     logging.info("Reading in {}".format(file_path))
        #     data.append(pd.read_csv(file_path))