コード例 #1
0
def pull_vr_data(code_dir,
                 vr_temp_folder,
                 compiled_vr_filepath,
                 events_map_filepath,
                 start_year,
                 end_year,
                 encoding='utf8'):

    # Get the list of all unique cause IDs that should be pulled
    events_map = pd.read_excel(events_map_filepath)
    cause_ids = events_map.loc[events_map['pull_vr'] == 1,
                               'cause_id'].unique().tolist()
    cause_ids_str = ','.join([str(c) for c in cause_ids])
    # Create the VR temp directory if it does not already exist
    if not os.path.exists(vr_temp_folder):
        os.mkdir(vr_temp_folder)

    assert type(start_year) is int
    assert type(end_year) is int
    period_start_year = start_year
    period_end_year = min([period_start_year + 5, end_year])
    hold_jids = []
    while (period_start_year <= end_year):
        print("Pulling VR for years {} to {}".format(period_start_year,
                                                     period_end_year))
        pulled_vr_file = join(
            vr_temp_folder, "vr_{}_to_{}.csv".format(period_start_year,
                                                     period_end_year))
        pull_run_args = ("--causes {0} --startyear {1} --endyear {2} "
                         "--outfile {3} --encoding {4}".format(
                             cause_ids_str, period_start_year, period_end_year,
                             pulled_vr_file, encoding))
        pull_jid = qsub.qsub(program_name=join(code_dir, "FILEPATH"),
                             program_args=pull_run_args,
                             python_filepath="",
                             slots=35,
                             hold_jids=None,
                             qsub_name="vr_{}to{}".format(
                                 period_start_year, period_end_year))
        hold_jids.append(pull_jid)
        # Update to the new period
        period_start_year = period_end_year + 1
        period_end_year = min([period_start_year + 5, end_year])
    # All VR pulling jobs have now been submitted
    # Submit a final job to compile all individual VR data and delete temporary
    #  VR files
    compile_run_args = ("--indir {0} --outfile {1} --encoding {2}".format(
        vr_temp_folder, compiled_vr_filepath, encoding))
    compiled_vr_jid = qsub.qsub(program_name=join(code_dir, "FILEPATH"),
                                program_args=compile_run_args,
                                slots=35,
                                hold_jids=hold_jids,
                                qsub_name="vr_compile")
    return compiled_vr_jid
コード例 #2
0
def submit_all_geocoding(geocode_dir, outdir, code_dir,
                         location_col='location_name',iso2_col='iso2'):
    # Add all Google Maps keys directly
    gmaps_keys = ["PASSWORD"]
    # Get the list of all files to submit
    files_to_submit = [i for i in os.listdir(geocode_dir) 
                           if i.lower().endswith('.xlsx')
                           or i.lower().endswith('.xls')]
    assert len(gmaps_keys) >= len(files_to_submit)
    job_jids = list()

    submit_code_path = 'FILEPATH'
    for i in range(len(files_to_submit)):
        infile = join(geocode_dir,files_to_submit[i])
        outfile = join(outdir,files_to_submit[i])
        query_args = ("--infile {} --outfile {} --address {} --iso {} "
                      "--keygm {} --geonames --nomaps".format(infile,
                            outfile, location_col, iso2_col, gmaps_keys[i]))
        this_jid = qsub.qsub(program_name = submit_code_path,
                        program_args=query_args,
                        slots=10,
                        project="",
                        qsub_name = "geocode{}".format(i),
                        email_after=False)
        job_jids.append(this_jid)
    return job_jids
コード例 #3
0
def gen_draws_save_results(in_filepath, cause_id, encoding, upload_only,
                           mark_best, message):
    # Make the folder where all HDF results will be saved for upload
    base_dir = ''
    save_dir = join(
        base_dir,
        os.path.dirname(os.path.abspath(in_filepath)).split("/")[-1])
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    GBD_PYTHON_PATH = ("FILEPATH")
    # GBD PYTHON PATH IS "FILEPATH"
    # ** SUBMIT THE FIRST JOB, WHERE DRAWS ARE GENERATED **
    # Formulate input arguments
    if not upload_only:
        splitting_code = join(os.path.dirname(os.path.abspath(__file__)),
                              'draws2.py')
        program_args_a = (
            '--cause_id {} --infile {} --encoding {} --savedir {} '.format(
                cause_id, in_filepath, encoding, save_dir))
        draws_jid = qsub.qsub(splitting_code,
                              program_args=program_args_a,
                              python_filepath=GBD_PYTHON_PATH,
                              slots=40,
                              mem_free_gb=70,
                              project="proj_shocks",
                              qsub_name="draws_{}".format(cause_id))
    else:
        draws_jid = None
    # ** SUBMIT THE SECOND JOB, WHERE THE RESULTS ARE SAVED **
    save_results_code = join(os.path.dirname(os.path.abspath(__file__)),
                             'save_shocks_results.py')
    all_years_dir = join(base_dir, "most_recent_all_years")
    count_space_dir = ""
    program_args_b = '--cause_id {} --savedir {} --message {}'.format(
        cause_id, count_space_dir, message)
    if mark_best:
        program_args_b = "{} --best".format(program_args_b)
        draws_jid = qsub.qsub(save_results_code,
                              program_args=program_args_b,
                              python_filepath=GBD_PYTHON_PATH,
                              slots=50,
                              mem_free_gb=180,
                              hold_jids=draws_jid,
                              project="proj_shocks",
                              qsub_name="save_{}".format(cause_id))
    return None
コード例 #4
0
def submit_first_overlay(infile, outfile, long_col, lat_col,
                code_dir="FILEPATH",
                hold_jids=None):
    query_args = ("--infile {} --outfile {} --latitude_col {} "
                  "--longitude_col {}".format(infile, outfile, lat_col, long_col))
    out_jid = qsub.qsub(program_name=join(code_dir,
                            "FILEPATH"),
                        program_args=query_args,
                        slots=15,
                        hold_jids=hold_jids,
                        qsub_name="first_overlay")
    return out_jid
コード例 #5
0
def submit_geocoding_compiler(in_dir, outfile, code_dir,
                              encoding='latin1', hold_jids=None):
    query_args = ("--indir {} --outfile {} --encoding {}".format(in_dir, 
                                                             outfile, encoding))
    submit_code_path = join(code_dir,'FILEPATH',
                            'FILEPATH')
    out_jid = qsub.qsub(program_name=submit_code_path,
                        program_args=query_args,
                        slots=15,
                        project="",
                        qsub_name="geocode_compile",
                        email_after=False,
                        hold_jids=hold_jids)
    return out_jid
コード例 #6
0
def submit_location_compiler_all(storage_dir, outfile, outfile_split, code_dir,
                                 encoding='latin1',hold_jids=None):
    query_args = ("--storage_dir {} --outfile {} --outfile_split {}"
                  " --encoding {}".format(storage_dir, outfile,
                                          outfile_split, encoding))
    submit_code_path = join(code_dir,'FILEPATH',
                            'FILEPATH')
    out_jid = qsub.qsub(program_name=submit_code_path,
                        program_args=query_args,
                        slots=15,
                        project="",
                        qsub_name="loc_comp_all",
                        email_after=True,
                        hold_jids=hold_jids)
    return out_jid