def transform_add_measure(df, measure_id): df['measure_id'] = measure_id return df if __name__ == '__main__': # Get command line arguments version_id, df_type, measure_id, location_set_id, year_id = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log_utils.setup_logging(log_dir, 'agg_location', str(version_id), df_type, str(measure_id), str(location_set_id), str(year_id)) measure_id = int(measure_id) try: # Read in helper files logging.info("Reading in helper files.") config = read_helper_files(parent_dir) # Read in config variables index_cols = config['index_columns'] draw_cols = config['data_columns'] sex_id = config['eligible_sex_ids'] # Create draw source/sink logging.info("Creating draw source and sink.")
shocks_sink = DrawSink(shocks_params) shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id) shocks_sink.push(shock_data, append=False) if __name__ == '__main__': # Get command line arguments output_version_id, location = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log_utils.setup_logging(log_dir, 'agg_cause', output_version_id, location) try: # Read in helper files logging.info("Reading in helper files") config, cause_hierarchy = read_helper_files(parent_dir, location) # Read in config variables index_columns = config['index_columns'] index_columns.remove('measure_id') data_columns = config['data_columns'] # Read in rescaled draw files logging.info("Reading in rescaled draw files") rescaled_data = read_rescaled_draw_files(parent_dir, location)
poolsize=8) if __name__ == '__main__': # Get command line arguments output_version_id, df_type, measure, location_set_id, years = parse_args() # Set paths parent_dir = r'FILEPATH' log_dir = parent_dir + r'/logs' in_dir = r'FILEPATH' out_dir = in_dir # Start logging l.setup_logging(log_dir, 'agg_location', output_version_id, df_type, measure, location_set_id, "".join(str(yr) for yr in years)) try: # Read in helper files logging.info("Reading in helper files") config = read_helper_files(parent_dir) # Read in config variables index_columns = config['index_columns'] sexes = config['eligible_sex_ids'] if measure == "1": input_file_pattern = 'FILEPATH.h5' output_file_pattern = 'FILEPATH.h5' else: input_file_pattern = 'FILEPATH.h5'
parent_dir, 'summaries/gbd/single/{m}/*'.format(m=measure_id)))) upload_gbd_summaries(process_version, gbd_conn_def, directories) logging.info("Finished upload to gbd") if __name__ == '__main__': # parse args output_version_id, db, measure_id, conn_def, change = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log_utils.setup_logging(log_dir, 'upload', output_version_id, db, measure_id, str(change)) logging.info("conn_def is {}".format(conn_def)) try: config, causes = read_helper_files(parent_dir) envelope_version_id = config['envelope_version_id'] years = config['eligible_year_ids'] if db == 'cod': upload_cod_summary(output_version_id, envelope_version_id, causes, years, conn_def) elif db == 'gbd': process_version = config['process_version_id'] upload_gbd_summary(process_version, measure_id, change, conn_def) else: upload_diagnostic_summary(output_version_id) logging.info('All done!')
key='best_output_version_{}'.format(state), mode='a', format='table', data_columns=['output_version_id']) def read_helper_files(parent_dir): return read_json(os.path.join(parent_dir, '_temp/config.json')) if __name__ == '__main__': output_version_id, test = parse_args() parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') cc_log.setup_logging(log_dir, 'mark_best', time.strftime("%m_%d_%Y_%H")) config = read_helper_files(parent_dir) process_version_id = config['process_version_id'] if test: db_env = DBEnv.DEV else: db_env = DBEnv.PROD gbd_conn_def = _GBD_CONN_DEF_MAP[db_env.value] cod_conn_def = _COD_CONN_DEF_MAP[db_env.value] cache_cod_db('before', cod_conn_def, parent_dir) cache_gbd_db('before', gbd_conn_def, parent_dir)
sex_id=sex_id), 'draws', mode='w', format='table', data_columns=index_columns) if __name__ == '__main__': # Get command line arguments output_version_id, location_id, sex_id = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log_utils.setup_logging(log_dir, 'shocks', output_version_id, location_id, sex_id) try: # Read in helper files logging.info("Reading in helper files") config, best_models = read_helper_files(parent_dir, location_id, sex_id) # Read in config variables eligible_year_ids = config['eligible_year_ids'] index_columns = config['index_columns'] data_columns = config['data_columns'] raw_data_columns = index_columns + data_columns # Read in draw files logging.info("Reading in best model draws")
logger.exception('Failed to save all outputs: {}'.format(e)) if __name__ == '__main__': # Get command line arguments output_version_id, location = parse_args() # Set paths parent_dir = r'FILEPATH' log_dir = parent_dir + r'FILEPATH' shock_dir = r'FILEPATH' rescaled_dir = r'FILEPATH' # Start logging l.setup_logging(log_dir, 'append_shocks', output_version_id, location) try: # Read in helper files\ logging.info("Reading in helper files") config, most_detailed = read_helper_files(parent_dir, location) # Read in config variables index_cols = config['index_columns'] data_cols = config['data_columns'] years = config['eligible_year_ids'] sexes = config['eligible_sex_ids'] # Read in all inputs logging.info("Reading in all inputs for {}".format(location)) rescaled, shocks, rescaled_yll, shocks_yll = read_all_inputs(
mode='w', format='table', data_columns=index_columns) if __name__ == '__main__': # Get command line arguments output_version_id, location_id = parse_args() # Set paths parent_dir = PARENT_DIRECTORY log_dir = parent_dir + r'/logs' # Start logging l.setup_logging(log_dir, 'agg_location', output_version_id, location_id, 'both') try: # Read in helper files print "Reading in helper files" logging.info("Reading in helper files") config, child_locations = read_helper_files(parent_dir, location_id) # Read in config variables index_columns = config['index_columns'] data_columns = config['data_columns'] # Read in rescaled draw files print "Reading in child location draw files" logging.info("Reading in child location draw files") logging.info("{}".format(', '.join([str(x) for x in child_locations])))
location_id, parent_dir, save=False) if __name__ == '__main__': # Get command line arguments output_version_id = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log_utils.setup_logging(log_dir, 'append_diagnostics', output_version_id) try: # Read in helper files logging.info("Reading in helper files") config, location_ids, est_locations = read_helper_files(parent_dir) # Read in config variables diag_years = config['diagnostic_year_ids'] index_columns = config['index_columns'] data_columns = config['data_columns'] logging.info('Reading in diagnostic files and creating ones that ' 'dont exist') data = [] for location_id in location_ids:
shocks_sink = DrawSink(agg_shocks_params) shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id) shocks_sink.push(yll_shocks, append=False) if __name__ == '__main__': # Get command line arguments output_version_id, location_id = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log_utils.setup_logging(log_dir, 'ylls', output_version_id, location_id) try: # Read in helper files logging.info("Reading in helper files") config = read_helper_files(parent_dir) envelope_version_id = config['envelope_version_id'] # Read in config variables index_columns = config['index_columns'] index_columns.remove('measure_id') yll_index_columns = list(set(index_columns) - set(['cause_id'])) data_columns = config['data_columns'] # Read in rescaled draw files logging.info("Reading in cause/loc aggregated with shock+hiv draws")
'draws', mode='w', format='table', data_columns=index_columns) if __name__ == '__main__': # Get command line arguments output_version_id, location_id, sex_name = parse_args() # Set paths parent_dir = PARENT_DIRECTORY log_dir = parent_dir + r'/logs' # Start logging l.setup_logging(log_dir, 'shocks', output_version_id, location_id, sex_name) # Sex dictionary sex_dict = {'male': 1, 'female': 2} sex_id = sex_dict[sex_name] try: # Read in helper files print "Reading in helper files" logging.info("Reading in helper files") config, best_models = read_helper_files(parent_dir, location_id, sex_name) # Read in config variables eligible_year_ids = config['eligible_year_ids'] index_columns = config['index_columns'] data_columns = config['data_columns']
if __name__ == '__main__': # Set some core variables code_directory = os.path.dirname(os.path.abspath(__file__)) output_directory = 'FILEPATH' # set up folders (output_version_id, codcorrect_years, location_set_ids, resume, upload_to_cod, upload_to_gbd, upload_to_diagnostics, db_env, best, upload_to_concurrent) = parse_args() parent_dir = set_up_folders(output_directory, output_version_id) log_dir = os.path.join(parent_dir, 'logs') # Start logging cc_log.setup_logging(log_dir, 'launch', time.strftime("%m_%d_%Y_%H")) change_years = [1990, 2007, 2017] if not resume: # Retrieve cause resources from database. # Uses the codcorrect cause set (1) to create the cause data and # metadata for the current round. Used in the correct step to rescale # the cause fractions down the hierarchy. (cause_set_version_id, cause_metadata_version_id) = get_cause_hierarchy_version( 1, GBD.GBD_ROUND) cause_data = get_cause_hierarchy(cause_set_version_id) cause_metadata = get_cause_metadata(cause_metadata_version_id) # (cause_agg_set_version_id,
draw_filepath = parent_dir + r'/unaggregated/rescaled/rescaled_{location_id}_{sex_name}.h5'.format(location_id=location_id, sex_name=sex_name) save_hdf(data, draw_filepath, key='draws', mode='w', format='table', data_columns=index_columns) if __name__ == '__main__': # Get command line arguments output_version_id, location_id, sex_name = parse_args() # Set paths parent_dir = PARENT_DIRECTORY log_dir = parent_dir + r'/logs' # Start logging l.setup_logging(log_dir, 'correct', output_version_id, location_id, sex_name) # Sex dictionary sex_dict = {'male': 1, 'female': 2} sex_id = sex_dict[sex_name] try: # Read in helper files print "Reading in helper files" logging.info("Reading in helper files") config, best_models, eligible_data, spacetime_restriction_data, envelope_data = read_helper_files(parent_dir, location_id, sex_name) # Read in config variables eligible_year_ids = config['eligible_year_ids'] index_columns = config['index_columns'] data_columns = config['data_columns']
logger.exception("Summarizing GBD failed: {}".format(e)) rc = e return rc if __name__ == '__main__': # Get command line arguments output_version_id, location_id, db = parse_args() # Set paths parent_dir = r'FILEPATH' log_dir = parent_dir + r'/logs' # Start logging l.setup_logging(log_dir, 'summary', output_version_id, location_id, db) try: # Read in helper files logging.info("Reading in helper files") age_weights, most_detailed_location, config = read_helper_files( parent_dir, location_id) # Read in config variables index_columns = config['index_columns'] data_columns = config['data_columns'] change_years = [1990, 2006, 2016] years = [yr for yr in config['eligible_year_ids'] if yr not in change_years] # ensure that the change years get grouped together for i, y in enumerate(change_years):
update_status(output_version_id, 1, cod_conn) cache_cod_db('after', cod_conn, parent_dir) logging.info("Cache saved to _temp. keys: 'output_version_before' " "'and output_version_after'") logging.info("Cod Post Scriptum Finished.") if __name__ == '__main__': output_version_id, db, test_env = parse_args() if test_env: db_env = DBEnv.DEV else: db_env = DBEnv.PROD parent_dir = 'FILEPATH' log_dir = os.path.join(parent_dir, 'logs') cc_log.setup_logging(log_dir, 'post_scriptum', time.strftime("%m_%d_%Y_%H"), db) if db == 'database': gbd_main(parent_dir, db_env) else: cod_main(parent_dir, output_version_id, db_env) logging.info("Done.")
.format(location_id=location_id, sex_id=sex_id)) save_hdf(data, draw_filepath, key='draws', mode='w', format='table', data_columns=index_columns) if __name__ == '__main__': # Get command line arguments output_version_id, location_id, sex_id = parse_args() # Set paths parent_dir = 'FILEPATH' log_dir = parent_dir + r'/logs' # Start logging cc_log_utils.setup_logging(log_dir, 'correct', output_version_id, location_id, sex_id) try: # Read in helper files logging.info("Reading in helper files") (config, best_models, eligible_data, spacetime_restriction_data, envelope_data, envelope_summ) = read_helper_files(parent_dir, location_id, sex_id) # Read in config variables eligible_year_ids = config['eligible_year_ids'] index_columns = config['index_columns'] index_columns.remove('measure_id') data_columns = config['data_columns'] envelope_index_columns = config['envelope_index_columns']
return output_files except Exception as e: logger.exception('Failed to save output files: {}'.format(e)) if __name__ == '__main__': # Get command line arguments output_version_id = parse_args() # Set paths parent_dir = PARENT_DIRECTORY log_dir = parent_dir + r'/logs' # Start logging l.setup_logging(log_dir, 'append_summaries', output_version_id) try: # Read in helper files print "Reading in helper files" logging.info("Reading in helper files") config, location_ids = read_helper_files(parent_dir) # Read in summary files print "Reading in summary files" logging.info("Reading in summary files") # for location_id in location_ids: # file_path = parent_dir + r'/summaries/summary_{location_id}.csv'.format(location_id=location_id) # print "Reading in {}".format(file_path) # logging.info("Reading in {}".format(file_path)) # data.append(pd.read_csv(file_path))