Esempio n. 1
0
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(interface='ib0',
                         walltime='01:00:00',
                         memory=f'64 G',
                         resource_spec=f'h_vmem=64G',
                         scheduler_options={
                             'dashboard_address': ':5757',
                         },
                         job_extra=['-cwd', '-V', f'-pe smp {n_processes}'],
                         local_directory=os.sep.join(
                             [os.environ.get('PWD'), 'dask-worker-space']))

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # regrid custom outputs to pop grid
    custom_outputs = glob.glob(path + 'ds*' + output + '.nc')
    custom_outputs_completed = glob.glob(path + 'ds*' + output +
                                         '_popgrid_0.05deg.nc')
    custom_outputs_completed = [
        f'{item[0:-19]}.nc' for item in custom_outputs_completed
    ]
    custom_outputs_remaining_set = set(custom_outputs) - set(
        custom_outputs_completed)
    custom_outputs_remaining = [item for item in custom_outputs_remaining_set]
    print(
        f'custom outputs remaining for {output}: {len(custom_outputs_remaining)}'
    )

    # dask bag and process
    custom_outputs_remaining = custom_outputs_remaining[
        0:
        2500]  # run in 2,500 chunks over 30 cores, each chunk taking 5 minutes
    print(f'predicting for {len(custom_outputs_remaining)} custom outputs ...')
    bag_custom_outputs = db.from_sequence(custom_outputs_remaining,
                                          npartitions=n_workers)
    bag_custom_outputs.map(regrid_to_pop).compute()

    time_end = time.time() - time_start
    print(
        f'completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours'
    )
    print(
        f'average time per custom output is {time_end / len(custom_outputs_remaining):0.2f} seconds'
    )

    client.close()
    cluster.close()
def main():
    # dask cluster and client
    n_jobs = 20
    n_processes = 1
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="02:00:00",
        memory=f"48 G",
        resource_spec=f"h_vmem=48G",
        scheduler_options={
            "dashboard_address": ":7777",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=48G",
        ],
        local_directory=os.sep.join([os.environ.get("PWD"), "dask-hia-space"]),
    )

    client = Client(cluster)
    cluster.scale(jobs=n_jobs)
    time_start = time.time()

    # dask bag and process
    simulations = [f'emulator_Base_CLE_2020_{output}']

    #simulations = []
    #simulations.append(f'wrfchem_Base_CLE_2020_{output}')
    #simulations.append(f'wrfchem_Base_CLE_2050_{output}')
    #simulations.append(f'wrfchem_Base_MFR_2050_{output}')
    #simulations.append(f'wrfchem_SDS_MFR_2050_{output}')

    #for year in ['2020', '2030', '2040', '2050']:
    #    for scenario in ['Base_CLE', 'Base_MFR', 'SDS_MFR']:
    #        for sim in ['', '_RES', '_IND', '_TRA', '_AGR', '_ENE', '_NO_RES', '_NO_IND', '_NO_TRA', '_NO_AGR', '_NO_ENE']:
    #            simulations.append(f'emulator_{scenario}_{year}{sim}_{output}')

    print(f"predicting for {len(simulations)} custom outputs ...")
    bag_simulations = db.from_sequence(simulations, npartitions=n_workers)

    if output == "PM2_5_DRY":
        bag_simulations.map(health_impact_assessment_pm25).compute()
    elif output == "o3_6mDM8h":
        bag_simulations.map(health_impact_assessment_o3).compute()

    time_end = time.time() - time_start
    print(
        f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours"
    )

    client.close()
    cluster.close()
Esempio n. 3
0
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="48:00:00",
        memory=f"12 G",
        resource_spec=f"h_vmem=12G",
        scheduler_options={
            "dashboard_address": ":5757",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=1G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-worker-space_popweighted_region"]),
    )

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    # main processing
    matrix_stacked = np.array(
        np.meshgrid(
            np.linspace(0, 1.4, 8),
            np.linspace(0, 1.4, 8),
            np.linspace(0, 1.4, 8),
            np.linspace(0, 1.4, 8),
            np.linspace(0, 1.4, 8),
        )).T.reshape(-1, 5)

    custom_inputs = [np.array(item).reshape(1, -1) for item in matrix_stacked]

    print(f"processing for {output} over {region} ...")
    outputs_popweighted = []
    bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers)
    outputs_popweighted = bag_custom_inputs.map(
        popweight_outputs_for_input).compute()

    print("saving ...")
    joblib.dump(
        outputs_popweighted,
        f"/nobackup/earlacoa/machinelearning/data_annual/popweighted/popweighted_{region}_{output}_0.25deg_adjusted_scaled.joblib",
    )

    client.close()
    cluster.close()
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(interface='ib0',
                         walltime='48:00:00',
                         memory=f'12 G',
                         resource_spec=f'h_vmem=12G',
                         scheduler_options={
                             'dashboard_address': ':5757',
                         },
                         job_extra=[
                             '-cwd',
                             '-V',
                             f'-pe smp {n_processes}',
                             f'-l disk=1G',
                         ],
                         local_directory=os.sep.join(
                             [os.environ.get('PWD'), 'dask-worker-space']))

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    # main processing
    matrix_stacked = np.array(
        np.meshgrid(np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16),
                    np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16),
                    np.linspace(0, 1.5, 16))).T.reshape(-1, 5)

    custom_inputs = [np.array(item).reshape(1, -1) for item in matrix_stacked]

    print(f'processing for {output} over {region} ...')
    outputs_popweighted = []
    bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers)
    outputs_popweighted = bag_custom_inputs.map(
        popweight_outputs_for_input).compute()

    print('saving ...')
    joblib.dump(
        outputs_popweighted,
        '/nobackup/earlacoa/machinelearning/data/popweighted/popweighted_' +
        region + '_' + output + '.joblib')

    client.close()
    cluster.close()
Esempio n. 5
0
def main():
    # dask cluster and client
    number_processes = 1
    number_jobs = 35
    number_workers = number_processes * number_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="04:00:00",
        memory=f"2 G",
        resource_spec=f"h_vmem=2G",
        scheduler_options={
            "dashboard_address": ":2727",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {number_processes}",
            f"-l disk=1G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-worker-space"]),
    )

    client = Client(cluster)
    cluster.scale(jobs=number_jobs)

    # main processing
    print("processing ...")
    results = []
    bag = db.from_sequence(nums, npartitions=number_workers)
    results = bag.map(weird_function).compute()

    print("saving ...")
    joblib.dump(results, f"/nobackup/${USER}/results.joblib")

    client.close()
    cluster.close()
Esempio n. 6
0
def main():
    # dask cluster and client
    number_processes = 1
    number_jobs = 35
    number_workers = number_processes * number_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="04:00:00",
        memory=f"12 G",
        resource_spec=f"h_vmem=12G",
        scheduler_options={
            "dashboard_address": ":2727",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {number_processes}",
            f"-l disk=1G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-worker-space"]),
    )

    client = Client(cluster)
    cluster.scale(jobs=number_jobs)

    # main processing
    print("processing ...")
    results = []
    bag = db.from_sequence(sims, npartitions=number_workers)
    results = bag.map(create_ozone_metric).compute()
    print("complete")

    client.close()
    cluster.close()
Esempio n. 7
0
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(interface='ib0',
                         walltime='01:00:00',
                         memory=f'2 G',
                         resource_spec=f'h_vmem=2G',
                         scheduler_options={
                             'dashboard_address': ':5757',
                         },
                         project='admiralty',
                         job_extra=[
                             '-cwd',
                             '-V',
                             f'-pe smp {n_processes}',
                             f'-l disk=1G',
                         ],
                         local_directory=os.sep.join(
                             [os.environ.get('PWD'), 'dask-worker-space']))

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # custom inputs
    matrix_stacked = np.array(
        np.meshgrid(
            np.linspace(
                0, 1.5, 16
            ),  # 1.5 and 16 for 0.1, 1.5 and 6 for 0.3, 1.4 and 8 for 0.2
            np.linspace(0, 1.5, 16),
            np.linspace(0, 1.5, 16),
            np.linspace(0, 1.5, 16),
            np.linspace(0, 1.5, 16))).T.reshape(-1, 5)
    custom_inputs_set = set(
        tuple(map(float, map("{:.1f}".format, item)))
        for item in matrix_stacked)

    custom_inputs_completed_filenames = glob.glob(
        '/nobackup/earlacoa/machinelearning/data/summary/ds*' + output + '*')
    custom_inputs_completed_list = []
    for custom_inputs_completed_filename in custom_inputs_completed_filenames:
        custom_inputs_completed_list.append([
            float(item) for item in re.findall(
                r'\d+\.\d+', custom_inputs_completed_filename)
        ])

    custom_inputs_completed_set = set(
        tuple(item) for item in custom_inputs_completed_list)
    custom_inputs_remaining_set = custom_inputs_set - custom_inputs_completed_set
    custom_inputs = [
        np.array(item).reshape(1, -1) for item in custom_inputs_remaining_set
    ]
    print(f'custom inputs remaining for {output}: {len(custom_inputs)}')

    # dask bag and process
    custom_inputs = custom_inputs[
        0:5000]  # run in 1,000 chunks over 30 cores, each chunk taking 1 hour
    print(f'predicting for {len(custom_inputs)} custom inputs ...')
    bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers)
    bag_custom_inputs.map(custom_predicts).compute()

    time_end = time.time() - time_start
    print(
        f'completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours'
    )
    print(
        f'average time per custom input is {time_end / len(custom_inputs):0.2f} seconds'
    )

    client.close()
    cluster.close()
Esempio n. 8
0
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="01:00:00",
        memory=f"64 G",
        resource_spec=f"h_vmem=64G",
        scheduler_options={
            "dashboard_address": ":5757",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=32G",
        ],
        local_directory=os.sep.join([os.environ.get("PWD"), "dask-worker-space"]),
    )

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # regrid custom outputs to pop grid
    custom_outputs = glob.glob(
        f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}.nc"
    )
    custom_outputs_completed = glob.glob(
        f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}_popgrid_0.25deg.nc"
    )
    custom_outputs_completed = [
        f"{item[0:-19]}.nc" for item in custom_outputs_completed
    ]
    custom_outputs_remaining_set = set(custom_outputs) - set(custom_outputs_completed)
    custom_outputs_remaining = [item for item in custom_outputs_remaining_set]
    print(f"custom outputs remaining for {output}: {len(custom_outputs_remaining)}")

    # dask bag and process
    custom_outputs_remaining = custom_outputs_remaining[
        0:5000
    ]  # run in 5,000 chunks over 30 cores, each chunk taking 2 minutes
    print(f"predicting for {len(custom_outputs_remaining)} custom outputs ...")
    bag_custom_outputs = db.from_sequence(
        custom_outputs_remaining, npartitions=n_workers
    )
    bag_custom_outputs.map(regrid_to_pop).compute()

    time_end = time.time() - time_start
    print(
        f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours"
    )
    print(
        f"average time per custom output is {time_end / len(custom_outputs_remaining):0.2f} seconds"
    )

    client.close()
    cluster.close()
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="01:00:00",
        memory=f"64 G",
        resource_spec=f"h_vmem=64G",
        scheduler_options={
            "dashboard_address": ":5757",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=32G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-worker-scale-space"]),
    )

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # scale custom outputs
    if normal:
        emission_configs = np.array(
            np.meshgrid(
                np.linspace(0.0, 1.4, 8),
                np.linspace(0.0, 1.4, 8),
                np.linspace(0.0, 1.4, 8),
                np.linspace(0.0, 1.4, 8),
                np.linspace(0.0, 1.4, 8),
            )).T.reshape(-1, 5)
        emission_configs_20percentintervals = []
        for emission_config in emission_configs:
            emission_configs_20percentintervals.append(
                f'RES{round(emission_config[0], 1)}_IND{round(emission_config[1], 1)}_TRA{round(emission_config[2], 1)}_AGR{round(emission_config[3], 1)}_ENE{round(emission_config[4], 1)}'
            )

    if extra:
        custom_inputs_main = [
            np.array([[1.15, 1.27, 0.98, 0.98, 1.36]]),  # bottom-up 2010
            np.array([[1.19, 1.30, 1.01, 1.01, 1.46]]),  # bottom-up 2011
            np.array([[1.20, 1.30, 1.01, 1.02, 1.39]]),  # bottom-up 2012
            np.array([[1.13, 1.29, 1.02, 1.01, 1.29]]),  # bottom-up 2013
            np.array([[1.06, 1.12, 0.99, 1.01, 1.12]]),  # bottom-up 2014
            np.array([[0.92, 0.84, 0.97, 0.99, 0.94]]),  # bottom-up 2016
            np.array([[0.84, 0.81, 0.99, 0.99, 0.89]]),  # bottom-up 2017
            np.array([[0.76, 0.934, 0.735, 0.683, 0.708]]),
            np.array([[0.704, 0.786, 0.73, 0.659, 0.6]]),
            np.array([[0.712, 0.703, 0.725, 0.676, 0.649]]),
            np.array([[0.739, 0.668, 0.701, 0.686, 0.682]]),
            np.array([[0.67, 0.609, 0.709, 0.621, 0.661]]),
            np.array([[0.744, 0.904, 0.778, 0.678, 0.716]]),
            np.array([[0.771, 0.835, 0.711, 0.685, 0.544]]),
            np.array([[0.647, 0.945, 0.746, 0.588, 0.473]]),
            np.array([[0.657, 0.745, 0.714, 0.613, 0.591]]),
            np.array([[0.582, 0.7, 0.672, 0.5, 0.492]]),
            np.array([[0.803, 0.835, 0.742, 0.71, 0.717]]),
            np.array([[0.721, 0.863, 0.712, 0.74, 0.709]]),
            np.array([[0.661, 0.674, 0.694, 0.742, 0.715]]),
            np.array([[0.701, 0.642, 0.669, 0.681, 0.679]]),
            np.array([[0.604, 0.399, 0.659, 0.613, 0.724]]),
            np.array([[0.769, 1.009, 0.697, 0.69, 0.72]]),
            np.array([[0.824, 0.759, 0.767, 0.641, 0.429]]),
            np.array([[0.858, 1.092, 0.794, 0.604, 0.475]]),
            np.array([[0.8, 0.987, 0.648, 0.57, 0.493]]),
            np.array([[0.867, 0.957, 0.677, 0.558, 0.477]])
        ]
        custom_inputs = []
        for custom_input in custom_inputs_main:
            custom_input_res = np.copy(custom_input)
            custom_input_ind = np.copy(custom_input)
            custom_input_tra = np.copy(custom_input)
            custom_input_agr = np.copy(custom_input)
            custom_input_ene = np.copy(custom_input)
            custom_input_nores = np.copy(custom_input)
            custom_input_noind = np.copy(custom_input)
            custom_input_notra = np.copy(custom_input)
            custom_input_noagr = np.copy(custom_input)
            custom_input_noene = np.copy(custom_input)
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_res[0][1:] = 1.0
            custom_input_ind[0][0] = 1.0
            custom_input_ind[0][2:] = 1.0
            custom_input_tra[0][:2] = 1.0
            custom_input_tra[0][3:] = 1.0
            custom_input_agr[0][:3] = 1.0
            custom_input_agr[0][4:] = 1.0
            custom_input_ene[0][:4] = 1.0

            custom_input_nores[0][0] = 0.0
            custom_input_noind[0][1] = 0.0
            custom_input_notra[0][2] = 0.0
            custom_input_noagr[0][3] = 0.0
            custom_input_noene[0][4] = 0.0

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input)
            custom_inputs.append(custom_input_res)
            custom_inputs.append(custom_input_ind)
            custom_inputs.append(custom_input_tra)
            custom_inputs.append(custom_input_agr)
            custom_inputs.append(custom_input_ene)
            custom_inputs.append(custom_input_nores)
            custom_inputs.append(custom_input_noind)
            custom_inputs.append(custom_input_notra)
            custom_inputs.append(custom_input_noagr)
            custom_inputs.append(custom_input_noene)
            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        emission_configs_20percentintervals = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            emission_configs_20percentintervals.append(emission_config)

    if climate_cobenefits:
        custom_inputs_main = [
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # Base_CLE_2020
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # Base_MFR_2020
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # SDS_MFR_2020
            np.array([[0.68, 0.84, 0.71, 1.16, 0.93]]),  # Base_CLE_2030
            np.array([[0.33, 0.47, 0.48, 0.81, 0.69]]),  # Base_MFR_2030
            np.array([[0.27, 0.45, 0.41, 0.81, 0.55]]),  # SDS_MFR_2030
            np.array([[0.57, 0.75, 0.69, 1.2, 0.94]]),  # Base_CLE_2040
            np.array([[0.24, 0.41, 0.31, 0.83, 0.73]]),  # Base_MFR_2040
            np.array([[0.19, 0.38, 0.22, 0.83, 0.5]]),  # SDS_MFR_2040
            np.array([[0.52, 0.72, 0.65, 1.24, 0.91]]),  # Base_CLE_2050
            np.array([[0.2, 0.38, 0.29, 0.86, 0.72]]),  # Base_MFR_2050
            np.array([[0.18, 0.35, 0.2, 0.86, 0.46]]),  # SDS_MFR_2050
        ]
        custom_inputs = []
        for custom_input in custom_inputs_main:
            custom_input_res = np.copy(custom_input)
            custom_input_ind = np.copy(custom_input)
            custom_input_tra = np.copy(custom_input)
            custom_input_agr = np.copy(custom_input)
            custom_input_ene = np.copy(custom_input)
            custom_input_nores = np.copy(custom_input)
            custom_input_noind = np.copy(custom_input)
            custom_input_notra = np.copy(custom_input)
            custom_input_noagr = np.copy(custom_input)
            custom_input_noene = np.copy(custom_input)
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_res[0][1:] = 1.0
            custom_input_ind[0][0] = 1.0
            custom_input_ind[0][2:] = 1.0
            custom_input_tra[0][:2] = 1.0
            custom_input_tra[0][3:] = 1.0
            custom_input_agr[0][:3] = 1.0
            custom_input_agr[0][4:] = 1.0
            custom_input_ene[0][:4] = 1.0

            custom_input_nores[0][0] = 0.0
            custom_input_noind[0][1] = 0.0
            custom_input_notra[0][2] = 0.0
            custom_input_noagr[0][3] = 0.0
            custom_input_noene[0][4] = 0.0

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input)
            custom_inputs.append(custom_input_res)
            custom_inputs.append(custom_input_ind)
            custom_inputs.append(custom_input_tra)
            custom_inputs.append(custom_input_agr)
            custom_inputs.append(custom_input_ene)
            custom_inputs.append(custom_input_nores)
            custom_inputs.append(custom_input_noind)
            custom_inputs.append(custom_input_notra)
            custom_inputs.append(custom_input_noagr)
            custom_inputs.append(custom_input_noene)
            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        emission_configs_20percentintervals = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            emission_configs_20percentintervals.append(emission_config)

    if top_down_2020_baseline:
        emission_config_2020_baseline = np.array(
            [0.604, 0.399, 0.659, 0.613,
             0.724])  # matching to PM2.5 only, top 1,000
        emission_configs = np.array(
            np.meshgrid(
                np.linspace(
                    emission_config_2020_baseline[0] * 0.50,
                    emission_config_2020_baseline[0], 6
                ),  # 10% reduction increments from 2020 baseline up to 50%
                np.linspace(emission_config_2020_baseline[1] * 0.50,
                            emission_config_2020_baseline[1], 6),
                np.linspace(emission_config_2020_baseline[2] * 0.50,
                            emission_config_2020_baseline[2], 6),
                np.linspace(emission_config_2020_baseline[3] * 0.50,
                            emission_config_2020_baseline[3], 6),
                np.linspace(emission_config_2020_baseline[4] * 0.50,
                            emission_config_2020_baseline[4], 6),
            )).T.reshape(-1, 5)
        # add a couple more for larger reductions in RES and IND to reach WHO-IT2
        emission_configs = list(emission_configs)
        emission_configs.append(np.array([0.242, 0.160, 0.659, 0.613, 0.724]))
        emission_configs.append(np.array([0.181, 0.120, 0.659, 0.613, 0.724]))
        emission_configs.append(np.array([0.121, 0.080, 0.659, 0.613, 0.724]))
        emission_configs.append(np.array([0.060, 0.040, 0.659, 0.613, 0.724]))

        emission_configs_20percentintervals = []
        for emission_config in emission_configs:
            emission_configs_20percentintervals.append(
                f'RES{round(emission_config[0], 3):.3f}_IND{round(emission_config[1], 3):.3f}_TRA{round(emission_config[2], 3):.3f}_AGR{round(emission_config[3], 3):.3f}_ENE{round(emission_config[4], 3):.3f}'
            )

    emission_configs_completed = glob.glob(
        f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}_adjusted_scaled/ds*{output}_popgrid_0.25deg_adjusted_scaled.nc"
    )
    emission_configs_completed = [
        f"{item[88:-45]}" for item in emission_configs_completed
    ]

    emission_configs_20percentintervals_remaining_set = set(
        emission_configs_20percentintervals) - set(emission_configs_completed)
    emission_configs_remaining = [
        item for item in emission_configs_20percentintervals_remaining_set
    ]
    print(
        f"custom outputs remaining for {output}: {len(emission_configs_remaining)} - 20% intervals with {int(100 * len(emission_configs_20percentintervals_remaining_set) / len(emission_configs_20percentintervals))}% remaining"
    )

    # dask bag and process
    emission_configs_remaining = emission_configs_remaining[:35000]
    print(
        f"predicting for {len(emission_configs_remaining)} custom outputs ...")
    bag_emission_configs = db.from_sequence(emission_configs_remaining,
                                            npartitions=n_workers)
    bag_emission_configs.map(scale).compute()

    time_end = time.time() - time_start
    print(
        f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours"
    )
    print(
        f"average time per custom output is {time_end / len(emission_configs_remaining):0.2f} seconds"
    )

    client.close()
    cluster.close()
def main():
    # dask cluster and client
    n_processes = 1
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime=walltime,
        memory=f"32 G",
        resource_spec=f"h_vmem=32G",
        scheduler_options={
            "dashboard_address": ":5761",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=32G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-find-emis-pm-space"]),
    )

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # dask bag over emission_configs
    print(
        f"predicting over {len(emission_configs)} emission configs for {station_id} ..."
    )
    bag_emission_configs = db.from_sequence(emission_configs,
                                            npartitions=n_workers)
    results = bag_emission_configs.map(filter_emission_configs).compute()

    station_diffs_abs = [result[0] for result in results]
    station_diffs_per = [result[1] for result in results]
    key = [key for key in baselines.keys()][0]
    station_diffs_abs = [
        station_diff_abs for station_diff_abs in station_diffs_abs
        if len(station_diff_abs[key]) > 0
    ]
    station_diffs_per = [
        station_diff_per for station_diff_per in station_diffs_per
        if len(station_diff_per[key]) > 0
    ]

    merged_per = {}
    for station_diff_per in station_diffs_per:
        merged_per = {**merged_per, **station_diff_per[key]}

    merged_abs = {}
    for station_diff_abs in station_diffs_abs:
        merged_abs = {**merged_abs, **station_diff_abs[key]}

    station_diffs_per = {key: merged_per}
    station_diffs_abs = {key: merged_abs}

    joblib.dump(
        obs_change_abs,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/obs_change_abs_{output}_{station_id}.joblib"
    )
    joblib.dump(
        obs_change_per,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/obs_change_per_{output}_{station_id}.joblib"
    )
    joblib.dump(
        baselines,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/baselines_{output}_{station_id}.joblib"
    )
    joblib.dump(
        targets,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/targets_{output}_{station_id}.joblib"
    )
    joblib.dump(
        target_diffs,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/target_diffs_{output}_{station_id}.joblib"
    )
    joblib.dump(
        station_diffs_abs,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/station_diffs_abs_{output}_{station_id}.joblib"
    )
    joblib.dump(
        station_diffs_per,
        f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/station_diffs_per_{output}_{station_id}.joblib"
    )

    time_end = time.time() - time_start
    print(
        f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours"
    )

    client.close()
    cluster.close()
def main():
    # dask cluster and client
    n_processes = 1
    n_jobs = 35
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="01:00:00",
        memory=f"2 G",
        resource_spec=f"h_vmem=2G",
        scheduler_options={
            "dashboard_address": ":5757",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=1G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-worker-space"]),
    )

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # custom inputs
    if normal:
        matrix_stacked = np.array(
            np.meshgrid(
                np.linspace(
                    0, 1.5, 16
                ),  # 1.5 and 16 for 0.1, 1.5 and 6 for 0.3, 1.4 and 8 for 0.2
                np.linspace(0, 1.5, 16),
                np.linspace(0, 1.5, 16),
                np.linspace(0, 1.5, 16),
                np.linspace(0, 1.5, 16),
            )).T.reshape(-1, 5)
        custom_inputs_set = set(
            tuple(map(float, map("{:.1f}".format, item)))
            for item in matrix_stacked)

        custom_inputs_completed_filenames = glob.glob(
            f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}*"
        )
        custom_inputs_completed_list = []
        for custom_inputs_completed_filename in custom_inputs_completed_filenames:
            custom_inputs_completed_list.append([
                float(item) for item in re.findall(
                    r"\d+\.\d+", custom_inputs_completed_filename)
            ])

        custom_inputs_completed_set = set(
            tuple(item) for item in custom_inputs_completed_list)
        custom_inputs_remaining_set = custom_inputs_set - custom_inputs_completed_set
        custom_inputs = [
            np.array(item).reshape(1, -1)
            for item in custom_inputs_remaining_set
        ]
        print(f"custom inputs remaining for {output}: {len(custom_inputs)}")

    if extra:
        custom_inputs_main = [
            np.array([[1.15, 1.27, 0.98, 0.98, 1.36]]),  # bottom-up 2010
            np.array([[1.19, 1.30, 1.01, 1.01, 1.46]]),  # bottom-up 2011
            np.array([[1.20, 1.30, 1.01, 1.02, 1.39]]),  # bottom-up 2012
            np.array([[1.13, 1.29, 1.02, 1.01, 1.29]]),  # bottom-up 2013
            np.array([[1.06, 1.12, 0.99, 1.01, 1.12]]),  # bottom-up 2014
            np.array([[0.92, 0.84, 0.97, 0.99, 0.94]]),  # bottom-up 2016
            np.array([[0.84, 0.81, 0.99, 0.99, 0.89]]),  # bottom-up 2017
            np.array([[0.76, 0.934, 0.735, 0.683, 0.708]]),
            np.array([[0.704, 0.786, 0.73, 0.659, 0.6]]),
            np.array([[0.712, 0.703, 0.725, 0.676, 0.649]]),
            np.array([[0.739, 0.668, 0.701, 0.686, 0.682]]),
            np.array([[0.67, 0.609, 0.709, 0.621, 0.661]]),
            np.array([[0.744, 0.904, 0.778, 0.678, 0.716]]),
            np.array([[0.771, 0.835, 0.711, 0.685, 0.544]]),
            np.array([[0.647, 0.945, 0.746, 0.588, 0.473]]),
            np.array([[0.657, 0.745, 0.714, 0.613, 0.591]]),
            np.array([[0.582, 0.7, 0.672, 0.5, 0.492]]),
            np.array([[0.803, 0.835, 0.742, 0.71, 0.717]]),
            np.array([[0.721, 0.863, 0.712, 0.74, 0.709]]),
            np.array([[0.661, 0.674, 0.694, 0.742, 0.715]]),
            np.array([[0.701, 0.642, 0.669, 0.681, 0.679]]),
            np.array([[0.604, 0.399, 0.659, 0.613, 0.724]]),
            np.array([[0.769, 1.009, 0.697, 0.69, 0.72]]),
            np.array([[0.824, 0.759, 0.767, 0.641, 0.429]]),
            np.array([[0.858, 1.092, 0.794, 0.604, 0.475]]),
            np.array([[0.8, 0.987, 0.648, 0.57, 0.493]]),
            np.array([[0.867, 0.957, 0.677, 0.558, 0.477]])
        ]
        custom_inputs = []
        for custom_input in custom_inputs_main:
            custom_input_res = np.copy(custom_input)
            custom_input_ind = np.copy(custom_input)
            custom_input_tra = np.copy(custom_input)
            custom_input_agr = np.copy(custom_input)
            custom_input_ene = np.copy(custom_input)
            custom_input_nores = np.copy(custom_input)
            custom_input_noind = np.copy(custom_input)
            custom_input_notra = np.copy(custom_input)
            custom_input_noagr = np.copy(custom_input)
            custom_input_noene = np.copy(custom_input)
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_res[0][1:] = 1.0
            custom_input_ind[0][0] = 1.0
            custom_input_ind[0][2:] = 1.0
            custom_input_tra[0][:2] = 1.0
            custom_input_tra[0][3:] = 1.0
            custom_input_agr[0][:3] = 1.0
            custom_input_agr[0][4:] = 1.0
            custom_input_ene[0][:4] = 1.0

            custom_input_nores[0][0] = 0.0
            custom_input_noind[0][1] = 0.0
            custom_input_notra[0][2] = 0.0
            custom_input_noagr[0][3] = 0.0
            custom_input_noene[0][4] = 0.0

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input)
            custom_inputs.append(custom_input_res)
            custom_inputs.append(custom_input_ind)
            custom_inputs.append(custom_input_tra)
            custom_inputs.append(custom_input_agr)
            custom_inputs.append(custom_input_ene)
            custom_inputs.append(custom_input_nores)
            custom_inputs.append(custom_input_noind)
            custom_inputs.append(custom_input_notra)
            custom_inputs.append(custom_input_noagr)
            custom_inputs.append(custom_input_noene)
            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        # just for emulator_predictions.py as this is required in order to adjust for double emissions
        custom_inputs_temp = custom_inputs.copy()
        for custom_input in custom_inputs_temp:
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        emission_configs_20percentintervals = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            emission_configs_20percentintervals.append(emission_config)

        emission_configs_20percentintervals = list(
            set(emission_configs_20percentintervals))

        custom_inputs = []
        for emission_config in emission_configs_20percentintervals:
            custom_input = np.array([
                float(num) for num in re.findall(r'\d.\d+', emission_config)
            ]).reshape(1, -1)
            custom_inputs.append(custom_input)

    if climate_cobenefits:
        custom_inputs_main = [
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # Base_CLE_2020
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # Base_MFR_2020
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # SDS_MFR_2020
            np.array([[0.68, 0.84, 0.71, 1.16, 0.93]]),  # Base_CLE_2030
            np.array([[0.33, 0.47, 0.48, 0.81, 0.69]]),  # Base_MFR_2030
            np.array([[0.27, 0.45, 0.41, 0.81, 0.55]]),  # SDS_MFR_2030
            np.array([[0.57, 0.75, 0.69, 1.2, 0.94]]),  # Base_CLE_2040
            np.array([[0.24, 0.41, 0.31, 0.83, 0.73]]),  # Base_MFR_2040
            np.array([[0.19, 0.38, 0.22, 0.83, 0.5]]),  # SDS_MFR_2040
            np.array([[0.52, 0.72, 0.65, 1.24, 0.91]]),  # Base_CLE_2050
            np.array([[0.2, 0.38, 0.29, 0.86, 0.72]]),  # Base_MFR_2050
            np.array([[0.18, 0.35, 0.2, 0.86, 0.46]]),  # SDS_MFR_2050
        ]
        custom_inputs = []
        for custom_input in custom_inputs_main:
            custom_input_res = np.copy(custom_input)
            custom_input_ind = np.copy(custom_input)
            custom_input_tra = np.copy(custom_input)
            custom_input_agr = np.copy(custom_input)
            custom_input_ene = np.copy(custom_input)
            custom_input_nores = np.copy(custom_input)
            custom_input_noind = np.copy(custom_input)
            custom_input_notra = np.copy(custom_input)
            custom_input_noagr = np.copy(custom_input)
            custom_input_noene = np.copy(custom_input)
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_res[0][1:] = 1.0
            custom_input_ind[0][0] = 1.0
            custom_input_ind[0][2:] = 1.0
            custom_input_tra[0][:2] = 1.0
            custom_input_tra[0][3:] = 1.0
            custom_input_agr[0][:3] = 1.0
            custom_input_agr[0][4:] = 1.0
            custom_input_ene[0][:4] = 1.0

            custom_input_nores[0][0] = 0.0
            custom_input_noind[0][1] = 0.0
            custom_input_notra[0][2] = 0.0
            custom_input_noagr[0][3] = 0.0
            custom_input_noene[0][4] = 0.0

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input)
            custom_inputs.append(custom_input_res)
            custom_inputs.append(custom_input_ind)
            custom_inputs.append(custom_input_tra)
            custom_inputs.append(custom_input_agr)
            custom_inputs.append(custom_input_ene)
            custom_inputs.append(custom_input_nores)
            custom_inputs.append(custom_input_noind)
            custom_inputs.append(custom_input_notra)
            custom_inputs.append(custom_input_noagr)
            custom_inputs.append(custom_input_noene)
            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        # just for emulator_predictions.py as this is required in order to adjust for double emissions
        custom_inputs_temp = custom_inputs.copy()
        for custom_input in custom_inputs_temp:
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        emission_configs_20percentintervals = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            emission_configs_20percentintervals.append(emission_config)

        emission_configs_20percentintervals = list(
            set(emission_configs_20percentintervals))

        custom_inputs = []
        for emission_config in emission_configs_20percentintervals:
            custom_input = np.array([
                float(num) for num in re.findall(r'\d.\d+', emission_config)
            ]).reshape(1, -1)
            custom_inputs.append(custom_input)

    if top_down_2020_baseline:
        emission_config_2020_baseline = np.array(
            [0.604, 0.399, 0.659, 0.613,
             0.724])  # matching to PM2.5 only, top 1,000
        emission_configs = np.array(
            np.meshgrid(
                np.linspace(
                    emission_config_2020_baseline[0] * 0.50,
                    emission_config_2020_baseline[0], 6
                ),  # 10% reduction increments from 2020 baseline up to 50%
                np.linspace(emission_config_2020_baseline[1] * 0.50,
                            emission_config_2020_baseline[1], 6),
                np.linspace(emission_config_2020_baseline[2] * 0.50,
                            emission_config_2020_baseline[2], 6),
                np.linspace(emission_config_2020_baseline[3] * 0.50,
                            emission_config_2020_baseline[3], 6),
                np.linspace(emission_config_2020_baseline[4] * 0.50,
                            emission_config_2020_baseline[4], 6),
            )).T.reshape(-1, 5)
        custom_inputs = [
            np.array(item).reshape(1, -1) for item in emission_configs
        ]
        # add a couple more for larger reductions in RES and IND to reach WHO-IT2
        custom_inputs.append(np.array([[0.242, 0.160, 0.659, 0.613, 0.724]]))
        custom_inputs.append(np.array([[0.181, 0.120, 0.659, 0.613, 0.724]]))
        custom_inputs.append(np.array([[0.121, 0.080, 0.659, 0.613, 0.724]]))
        custom_inputs.append(np.array([[0.060, 0.040, 0.659, 0.613, 0.724]]))

        # just for emulator_predictions.py as this is required in order to adjust for double emissions
        custom_inputs_temp = custom_inputs.copy()
        for custom_input in custom_inputs_temp:
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)
            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0
            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        emission_configs_20percentintervals = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            emission_configs_20percentintervals.append(emission_config)

        emission_configs_20percentintervals = set(
            emission_configs_20percentintervals)

        custom_inputs_completed_filenames = glob.glob(
            f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}.nc"
        )
        custom_inputs_completed_list = []
        for custom_inputs_completed_filename in custom_inputs_completed_filenames:
            emission_config = re.findall(
                r"RES\d+\.\d+_IND\d+\.\d+_TRA\d+\.\d+_AGR\d+\.\d+_ENE\d+\.\d+",
                custom_inputs_completed_filename)
            if len(emission_config) > 0:
                custom_inputs_completed_list.append(emission_config)

        custom_inputs_completed_set = set(
            item[0] for item in custom_inputs_completed_list)
        custom_inputs_remaining_set = emission_configs_20percentintervals - custom_inputs_completed_set
        custom_inputs = [
            np.array([float(n)
                      for n in re.findall(r'\d+.\d+', item)]).reshape(1, -1)
            for item in custom_inputs_remaining_set
        ]

    # dask bag and process
    custom_inputs = custom_inputs[:5000]
    #custom_inputs = custom_inputs[5000:]

    print(f"predicting for {len(custom_inputs)} custom inputs ...")
    bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers)
    bag_custom_inputs.map(custom_predicts).compute()

    time_end = time.time() - time_start
    print(
        f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours"
    )
    print(
        f"average time per custom input is {time_end / len(custom_inputs):0.2f} seconds"
    )

    client.close()
    cluster.close()
Esempio n. 12
0
def main():
    # dask cluster and client
    if output == 'PM2_5_DRY':
        n_jobs = 20
        n_outputs = 1000
    elif output == 'o3_6mDM8h':
        n_jobs = 20
        n_outputs = 2000

    n_processes = 1
    n_workers = n_processes * n_jobs

    cluster = SGECluster(
        interface="ib0",
        walltime="02:00:00",
        memory=f"48 G",
        resource_spec=f"h_vmem=48G",
        scheduler_options={
            "dashboard_address": ":7777",
        },
        job_extra=[
            "-cwd",
            "-V",
            f"-pe smp {n_processes}",
            f"-l disk=48G",
        ],
        local_directory=os.sep.join(
            [os.environ.get("PWD"), "dask-hia-ozone-space"]),
    )

    client = Client(cluster)

    cluster.scale(jobs=n_jobs)

    time_start = time.time()

    # find remaining inputs
    if normal:
        custom_outputs = glob.glob(
            f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}_adjusted_scaled/ds*{output}_popgrid_0.25deg_adjusted_scaled.nc"
        )
        custom_outputs_completed = glob.glob(
            f"/nobackup/earlacoa/machinelearning/data_annual/health_impact_assessments/{output}_adjusted_scaled/df_country_hia_*.csv"
        )
        custom_outputs_remaining_set = set([
            item.split("/")[-1][3:-1 - len(output) - 19 - 7]
            for item in custom_outputs
        ]) - set([
            item.split("/")[-1][15 + len(output) + 1:-4 - 7]
            for item in custom_outputs_completed
        ])
        custom_outputs_remaining = [
            item for item in custom_outputs_remaining_set
        ]
        print(
            f"custom outputs remaining for {output}: {len(custom_outputs_remaining)} - 10% intervals with {int(100 * len(custom_outputs_remaining_set) / 16**5)}% remaining"
        )

        reduce_to_20percent_intervals = True
        if reduce_to_20percent_intervals:
            emission_configs = np.array(
                np.meshgrid(
                    np.linspace(0.0, 1.4, 8),
                    np.linspace(0.0, 1.4, 8),
                    np.linspace(0.0, 1.4, 8),
                    np.linspace(0.0, 1.4, 8),
                    np.linspace(0.0, 1.4, 8),
                )).T.reshape(-1, 5)
            emission_configs_20percentintervals = []
            for emission_config in emission_configs:
                emission_configs_20percentintervals.append(
                    f'RES{round(emission_config[0], 1)}_IND{round(emission_config[1], 1)}_TRA{round(emission_config[2], 1)}_AGR{round(emission_config[3], 1)}_ENE{round(emission_config[4], 1)}'
                )

            emission_configs_completed = []
            for custom_output_completed in custom_outputs_completed:
                emission_configs_completed.append(
                    re.findall(
                        r'RES\d+.\d+_IND\d+.\d+_TRA\d+.\d+_AGR\d+.\d+_ENE\d+.\d+',
                        custom_output_completed)[0])

            emission_configs_20percentintervals_remaining_set = set(
                emission_configs_20percentintervals) - set(
                    emission_configs_completed)
            custom_outputs_remaining = [
                item
                for item in emission_configs_20percentintervals_remaining_set
            ]
            print(
                f"custom outputs remaining for {output}: {len(custom_outputs_remaining)} - 20% intervals with {int(100 * len(emission_configs_20percentintervals_remaining_set) / len(emission_configs_20percentintervals))}% remaining"
            )

    if extra:
        if year == '2010':
            custom_inputs_main = [
                np.array([[1.15, 1.27, 0.98, 0.98, 1.36]]),  # bottom-up 2010
            ]
        elif year == '2011':
            custom_inputs_main = [
                np.array([[1.19, 1.30, 1.01, 1.01, 1.46]]),  # bottom-up 2011
            ]
        elif year == '2012':
            custom_inputs_main = [
                np.array([[1.20, 1.30, 1.01, 1.02, 1.39]]),  # bottom-up 2012
            ]
        elif year == '2013':
            custom_inputs_main = [
                np.array([[1.13, 1.29, 1.02, 1.01, 1.29]]),  # bottom-up 2013
            ]
        elif year == '2014':
            custom_inputs_main = [
                np.array([[1.06, 1.12, 0.99, 1.01, 1.12]]),  # bottom-up 2014
            ]
        elif year == '2015':
            custom_inputs_main = [
                np.array([[1.0, 1.0, 1.0, 1.0, 1.0]]),  # control
            ]
        elif year == '2016':
            custom_inputs_main = [
                np.array([[0.92, 0.84, 0.97, 0.99, 0.94]]),  # bottom-up 2016
                np.array([[0.76, 0.934, 0.735, 0.683,
                           0.708]]),  # top-down 2016 - both
                np.array([[0.744, 0.904, 0.778, 0.678,
                           0.716]]),  # top-down 2016 - either
                np.array([[0.803, 0.835, 0.742, 0.71,
                           0.717]]),  # top-down 2016 - pm25 only
                np.array([[0.769, 1.009, 0.697, 0.69,
                           0.72]]),  # top-down 2016 - o3 only
            ]
        elif year == '2017':
            custom_inputs_main = [
                np.array([[0.84, 0.81, 0.99, 0.99, 0.89]]),  # bottom-up 2017
                np.array([[0.704, 0.786, 0.73, 0.659,
                           0.6]]),  # top-down 2017 - both
                np.array([[0.771, 0.835, 0.711, 0.685,
                           0.544]]),  # top-down 2017 - either
                np.array([[0.721, 0.863, 0.712, 0.74,
                           0.709]]),  # top-down 2017 - pm25 only
                np.array([[0.824, 0.759, 0.767, 0.641,
                           0.429]]),  # top-down 2017 - o3 only
            ]
        elif year == '2018':
            custom_inputs_main = [
                np.array([[0.712, 0.703, 0.725, 0.676,
                           0.649]]),  # top-down 2018 - both
                np.array([[0.647, 0.945, 0.746, 0.588,
                           0.473]]),  # top-down 2018 - either
                np.array([[0.661, 0.674, 0.694, 0.742,
                           0.715]]),  # top-down 2018 - pm25 only
                np.array([[0.858, 1.092, 0.794, 0.604,
                           0.475]]),  # top-down 2018 - o3 only
            ]
        elif year == '2019':
            custom_inputs_main = [
                np.array([[0.739, 0.668, 0.701, 0.686,
                           0.682]]),  # top-down 2019 - both
                np.array([[0.657, 0.745, 0.714, 0.613,
                           0.591]]),  # top-down 2019 - either
                np.array([[0.701, 0.642, 0.669, 0.681,
                           0.679]]),  # top-down 2019 - pm25 only
                np.array([[0.8, 0.987, 0.648, 0.57,
                           0.493]]),  # top-down 2019 - o3 only
            ]
        elif year == '2020':
            custom_inputs_main = [
                np.array([[0.67, 0.609, 0.709, 0.621,
                           0.661]]),  # top-down 2020 - both
                np.array([[0.582, 0.7, 0.672, 0.5,
                           0.492]]),  # top-down 2020 - either
                np.array([[0.604, 0.399, 0.659, 0.613,
                           0.724]]),  # top-down 2020 - pm25 only
                np.array([[0.867, 0.957, 0.677, 0.558,
                           0.477]]),  # top-down 2020 - o3 only
            ]
        custom_inputs = []
        for custom_input in custom_inputs_main:
            custom_input_res = np.copy(custom_input)
            custom_input_ind = np.copy(custom_input)
            custom_input_tra = np.copy(custom_input)
            custom_input_agr = np.copy(custom_input)
            custom_input_ene = np.copy(custom_input)
            custom_input_nores = np.copy(custom_input)
            custom_input_noind = np.copy(custom_input)
            custom_input_notra = np.copy(custom_input)
            custom_input_noagr = np.copy(custom_input)
            custom_input_noene = np.copy(custom_input)

            custom_input_res[0][1:] = 1.0
            custom_input_ind[0][0] = 1.0
            custom_input_ind[0][2:] = 1.0
            custom_input_tra[0][:2] = 1.0
            custom_input_tra[0][3:] = 1.0
            custom_input_agr[0][:3] = 1.0
            custom_input_agr[0][4:] = 1.0
            custom_input_ene[0][:4] = 1.0

            custom_input_nores[0][0] = 0.0
            custom_input_noind[0][1] = 0.0
            custom_input_notra[0][2] = 0.0
            custom_input_noagr[0][3] = 0.0
            custom_input_noene[0][4] = 0.0

            custom_inputs.append(custom_input)
            custom_inputs.append(custom_input_res)
            custom_inputs.append(custom_input_ind)
            custom_inputs.append(custom_input_tra)
            custom_inputs.append(custom_input_agr)
            custom_inputs.append(custom_input_ene)
            custom_inputs.append(custom_input_nores)
            custom_inputs.append(custom_input_noind)
            custom_inputs.append(custom_input_notra)
            custom_inputs.append(custom_input_noagr)
            custom_inputs.append(custom_input_noene)

        custom_outputs_remaining = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            custom_outputs_remaining.append(emission_config)

    if climate_cobenefits:
        custom_inputs_main = [
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # Base_CLE_2020
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # Base_MFR_2020
            np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]),  # SDS_MFR_2020
            np.array([[0.68, 0.84, 0.71, 1.16, 0.93]]),  # Base_CLE_2030
            np.array([[0.33, 0.47, 0.48, 0.81, 0.69]]),  # Base_MFR_2030
            np.array([[0.27, 0.45, 0.41, 0.81, 0.55]]),  # SDS_MFR_2030
            np.array([[0.57, 0.75, 0.69, 1.2, 0.94]]),  # Base_CLE_2040
            np.array([[0.24, 0.41, 0.31, 0.83, 0.73]]),  # Base_MFR_2040
            np.array([[0.19, 0.38, 0.22, 0.83, 0.5]]),  # SDS_MFR_2040
            np.array([[0.52, 0.72, 0.65, 1.24, 0.91]]),  # Base_CLE_2050
            np.array([[0.2, 0.38, 0.29, 0.86, 0.72]]),  # Base_MFR_2050
            np.array([[0.18, 0.35, 0.2, 0.86, 0.46]]),  # SDS_MFR_2050
        ]
        custom_inputs = []
        for custom_input in custom_inputs_main:
            custom_input_res = np.copy(custom_input)
            custom_input_ind = np.copy(custom_input)
            custom_input_tra = np.copy(custom_input)
            custom_input_agr = np.copy(custom_input)
            custom_input_ene = np.copy(custom_input)
            custom_input_nores = np.copy(custom_input)
            custom_input_noind = np.copy(custom_input)
            custom_input_notra = np.copy(custom_input)
            custom_input_noagr = np.copy(custom_input)
            custom_input_noene = np.copy(custom_input)
            custom_input_resonly = np.copy(custom_input)
            custom_input_indonly = np.copy(custom_input)
            custom_input_traonly = np.copy(custom_input)
            custom_input_agronly = np.copy(custom_input)
            custom_input_eneonly = np.copy(custom_input)

            custom_input_res[0][1:] = 1.0
            custom_input_ind[0][0] = 1.0
            custom_input_ind[0][2:] = 1.0
            custom_input_tra[0][:2] = 1.0
            custom_input_tra[0][3:] = 1.0
            custom_input_agr[0][:3] = 1.0
            custom_input_agr[0][4:] = 1.0
            custom_input_ene[0][:4] = 1.0

            custom_input_nores[0][0] = 0.0
            custom_input_noind[0][1] = 0.0
            custom_input_notra[0][2] = 0.0
            custom_input_noagr[0][3] = 0.0
            custom_input_noene[0][4] = 0.0

            custom_input_resonly[0][1:] = 0.0
            custom_input_indonly[0][0] = 0.0
            custom_input_indonly[0][2:] = 0.0
            custom_input_traonly[0][:2] = 0.0
            custom_input_traonly[0][3:] = 0.0
            custom_input_agronly[0][:3] = 0.0
            custom_input_agronly[0][4:] = 0.0
            custom_input_eneonly[0][:4] = 0.0

            custom_inputs.append(custom_input)
            custom_inputs.append(custom_input_res)
            custom_inputs.append(custom_input_ind)
            custom_inputs.append(custom_input_tra)
            custom_inputs.append(custom_input_agr)
            custom_inputs.append(custom_input_ene)
            custom_inputs.append(custom_input_nores)
            custom_inputs.append(custom_input_noind)
            custom_inputs.append(custom_input_notra)
            custom_inputs.append(custom_input_noagr)
            custom_inputs.append(custom_input_noene)
            custom_inputs.append(custom_input_resonly)
            custom_inputs.append(custom_input_indonly)
            custom_inputs.append(custom_input_traonly)
            custom_inputs.append(custom_input_agronly)
            custom_inputs.append(custom_input_eneonly)

        custom_outputs_remaining = []
        for custom_input in custom_inputs:
            emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}'
            custom_outputs_remaining.append(emission_config)

    if top_down_2020_baseline:
        emission_config_2020_baseline = np.array(
            [0.604, 0.399, 0.659, 0.613,
             0.724])  # matching to PM2.5 only, top 1,000
        emission_configs = np.array(
            np.meshgrid(
                np.linspace(
                    emission_config_2020_baseline[0] * 0.50,
                    emission_config_2020_baseline[0], 6
                ),  # 10% reduction increments from 2020 baseline up to 50%
                np.linspace(emission_config_2020_baseline[1] * 0.50,
                            emission_config_2020_baseline[1], 6),
                np.linspace(emission_config_2020_baseline[2] * 0.50,
                            emission_config_2020_baseline[2], 6),
                np.linspace(emission_config_2020_baseline[3] * 0.50,
                            emission_config_2020_baseline[3], 6),
                np.linspace(emission_config_2020_baseline[4] * 0.50,
                            emission_config_2020_baseline[4], 6),
            )).T.reshape(-1, 5)
        # add a couple more for larger reductions in RES and IND to reach WHO-IT2
        emission_configs = list(emission_configs)
        emission_configs.append(np.array([[0.242, 0.160, 0.659, 0.613,
                                           0.724]]))
        emission_configs.append(np.array([[0.181, 0.120, 0.659, 0.613,
                                           0.724]]))
        emission_configs.append(np.array([[0.121, 0.080, 0.659, 0.613,
                                           0.724]]))
        emission_configs.append(np.array([[0.060, 0.040, 0.659, 0.613,
                                           0.724]]))

        emission_configs_total = []
        for emission_config in emission_configs:
            emission_configs_total.append(
                f'RES{round(emission_config[0], 3):.3f}_IND{round(emission_config[1], 3):.3f}_TRA{round(emission_config[2], 3):.3f}_AGR{round(emission_config[3], 3):.3f}_ENE{round(emission_config[4], 3):.3f}'
            )

        custom_outputs_completed = glob.glob(
            f"/nobackup/earlacoa/machinelearning/data_annual/health_impact_assessments/{output}_adjusted_scaled/df_country_hia_*.csv"
        )
        emission_configs_completed = []
        for custom_output_completed in custom_outputs_completed:
            emission_configs_completed.append(
                re.findall(
                    r'RES\d+.\d+_IND\d+.\d+_TRA\d+.\d+_AGR\d+.\d+_ENE\d+.\d+',
                    custom_output_completed)[0])

        emission_configs_remaining_set = set(emission_configs_total) - set(
            emission_configs_completed)
        custom_outputs_remaining = [
            item for item in emission_configs_remaining_set
        ]
        print(
            f"custom outputs remaining: {len(custom_outputs_remaining)}, {int(100 * len(emission_configs_remaining_set) / len(emission_configs_total))}%"
        )

    # --------------------------------------------------

    # dask bag and process
    # run in 10 chunks over 10 cores, each chunk taking 2 minutes
    custom_outputs_remaining = custom_outputs_remaining[0:n_outputs]
    print(f"predicting for {len(custom_outputs_remaining)} custom outputs ...")
    bag_custom_outputs = db.from_sequence(custom_outputs_remaining,
                                          npartitions=n_workers)
    if output == "PM2_5_DRY":
        bag_custom_outputs.map(health_impact_assessment_pm25).compute()
    elif output == "o3_6mDM8h":
        bag_custom_outputs.map(health_impact_assessment_o3).compute()

    time_end = time.time() - time_start
    print(
        f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours"
    )

    client.close()
    cluster.close()