def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster(interface='ib0', walltime='01:00:00', memory=f'64 G', resource_spec=f'h_vmem=64G', scheduler_options={ 'dashboard_address': ':5757', }, job_extra=['-cwd', '-V', f'-pe smp {n_processes}'], local_directory=os.sep.join( [os.environ.get('PWD'), 'dask-worker-space'])) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # regrid custom outputs to pop grid custom_outputs = glob.glob(path + 'ds*' + output + '.nc') custom_outputs_completed = glob.glob(path + 'ds*' + output + '_popgrid_0.05deg.nc') custom_outputs_completed = [ f'{item[0:-19]}.nc' for item in custom_outputs_completed ] custom_outputs_remaining_set = set(custom_outputs) - set( custom_outputs_completed) custom_outputs_remaining = [item for item in custom_outputs_remaining_set] print( f'custom outputs remaining for {output}: {len(custom_outputs_remaining)}' ) # dask bag and process custom_outputs_remaining = custom_outputs_remaining[ 0: 2500] # run in 2,500 chunks over 30 cores, each chunk taking 5 minutes print(f'predicting for {len(custom_outputs_remaining)} custom outputs ...') bag_custom_outputs = db.from_sequence(custom_outputs_remaining, npartitions=n_workers) bag_custom_outputs.map(regrid_to_pop).compute() time_end = time.time() - time_start print( f'completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours' ) print( f'average time per custom output is {time_end / len(custom_outputs_remaining):0.2f} seconds' ) client.close() cluster.close()
def main(): # dask cluster and client n_jobs = 20 n_processes = 1 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime="02:00:00", memory=f"48 G", resource_spec=f"h_vmem=48G", scheduler_options={ "dashboard_address": ":7777", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=48G", ], local_directory=os.sep.join([os.environ.get("PWD"), "dask-hia-space"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # dask bag and process simulations = [f'emulator_Base_CLE_2020_{output}'] #simulations = [] #simulations.append(f'wrfchem_Base_CLE_2020_{output}') #simulations.append(f'wrfchem_Base_CLE_2050_{output}') #simulations.append(f'wrfchem_Base_MFR_2050_{output}') #simulations.append(f'wrfchem_SDS_MFR_2050_{output}') #for year in ['2020', '2030', '2040', '2050']: # for scenario in ['Base_CLE', 'Base_MFR', 'SDS_MFR']: # for sim in ['', '_RES', '_IND', '_TRA', '_AGR', '_ENE', '_NO_RES', '_NO_IND', '_NO_TRA', '_NO_AGR', '_NO_ENE']: # simulations.append(f'emulator_{scenario}_{year}{sim}_{output}') print(f"predicting for {len(simulations)} custom outputs ...") bag_simulations = db.from_sequence(simulations, npartitions=n_workers) if output == "PM2_5_DRY": bag_simulations.map(health_impact_assessment_pm25).compute() elif output == "o3_6mDM8h": bag_simulations.map(health_impact_assessment_o3).compute() time_end = time.time() - time_start print( f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours" ) client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime="48:00:00", memory=f"12 G", resource_spec=f"h_vmem=12G", scheduler_options={ "dashboard_address": ":5757", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=1G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-worker-space_popweighted_region"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) # main processing matrix_stacked = np.array( np.meshgrid( np.linspace(0, 1.4, 8), np.linspace(0, 1.4, 8), np.linspace(0, 1.4, 8), np.linspace(0, 1.4, 8), np.linspace(0, 1.4, 8), )).T.reshape(-1, 5) custom_inputs = [np.array(item).reshape(1, -1) for item in matrix_stacked] print(f"processing for {output} over {region} ...") outputs_popweighted = [] bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers) outputs_popweighted = bag_custom_inputs.map( popweight_outputs_for_input).compute() print("saving ...") joblib.dump( outputs_popweighted, f"/nobackup/earlacoa/machinelearning/data_annual/popweighted/popweighted_{region}_{output}_0.25deg_adjusted_scaled.joblib", ) client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster(interface='ib0', walltime='48:00:00', memory=f'12 G', resource_spec=f'h_vmem=12G', scheduler_options={ 'dashboard_address': ':5757', }, job_extra=[ '-cwd', '-V', f'-pe smp {n_processes}', f'-l disk=1G', ], local_directory=os.sep.join( [os.environ.get('PWD'), 'dask-worker-space'])) client = Client(cluster) cluster.scale(jobs=n_jobs) # main processing matrix_stacked = np.array( np.meshgrid(np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16))).T.reshape(-1, 5) custom_inputs = [np.array(item).reshape(1, -1) for item in matrix_stacked] print(f'processing for {output} over {region} ...') outputs_popweighted = [] bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers) outputs_popweighted = bag_custom_inputs.map( popweight_outputs_for_input).compute() print('saving ...') joblib.dump( outputs_popweighted, '/nobackup/earlacoa/machinelearning/data/popweighted/popweighted_' + region + '_' + output + '.joblib') client.close() cluster.close()
def main(): # dask cluster and client number_processes = 1 number_jobs = 35 number_workers = number_processes * number_jobs cluster = SGECluster( interface="ib0", walltime="04:00:00", memory=f"2 G", resource_spec=f"h_vmem=2G", scheduler_options={ "dashboard_address": ":2727", }, job_extra=[ "-cwd", "-V", f"-pe smp {number_processes}", f"-l disk=1G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-worker-space"]), ) client = Client(cluster) cluster.scale(jobs=number_jobs) # main processing print("processing ...") results = [] bag = db.from_sequence(nums, npartitions=number_workers) results = bag.map(weird_function).compute() print("saving ...") joblib.dump(results, f"/nobackup/${USER}/results.joblib") client.close() cluster.close()
def main(): # dask cluster and client number_processes = 1 number_jobs = 35 number_workers = number_processes * number_jobs cluster = SGECluster( interface="ib0", walltime="04:00:00", memory=f"12 G", resource_spec=f"h_vmem=12G", scheduler_options={ "dashboard_address": ":2727", }, job_extra=[ "-cwd", "-V", f"-pe smp {number_processes}", f"-l disk=1G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-worker-space"]), ) client = Client(cluster) cluster.scale(jobs=number_jobs) # main processing print("processing ...") results = [] bag = db.from_sequence(sims, npartitions=number_workers) results = bag.map(create_ozone_metric).compute() print("complete") client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster(interface='ib0', walltime='01:00:00', memory=f'2 G', resource_spec=f'h_vmem=2G', scheduler_options={ 'dashboard_address': ':5757', }, project='admiralty', job_extra=[ '-cwd', '-V', f'-pe smp {n_processes}', f'-l disk=1G', ], local_directory=os.sep.join( [os.environ.get('PWD'), 'dask-worker-space'])) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # custom inputs matrix_stacked = np.array( np.meshgrid( np.linspace( 0, 1.5, 16 ), # 1.5 and 16 for 0.1, 1.5 and 6 for 0.3, 1.4 and 8 for 0.2 np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16))).T.reshape(-1, 5) custom_inputs_set = set( tuple(map(float, map("{:.1f}".format, item))) for item in matrix_stacked) custom_inputs_completed_filenames = glob.glob( '/nobackup/earlacoa/machinelearning/data/summary/ds*' + output + '*') custom_inputs_completed_list = [] for custom_inputs_completed_filename in custom_inputs_completed_filenames: custom_inputs_completed_list.append([ float(item) for item in re.findall( r'\d+\.\d+', custom_inputs_completed_filename) ]) custom_inputs_completed_set = set( tuple(item) for item in custom_inputs_completed_list) custom_inputs_remaining_set = custom_inputs_set - custom_inputs_completed_set custom_inputs = [ np.array(item).reshape(1, -1) for item in custom_inputs_remaining_set ] print(f'custom inputs remaining for {output}: {len(custom_inputs)}') # dask bag and process custom_inputs = custom_inputs[ 0:5000] # run in 1,000 chunks over 30 cores, each chunk taking 1 hour print(f'predicting for {len(custom_inputs)} custom inputs ...') bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers) bag_custom_inputs.map(custom_predicts).compute() time_end = time.time() - time_start print( f'completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours' ) print( f'average time per custom input is {time_end / len(custom_inputs):0.2f} seconds' ) client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime="01:00:00", memory=f"64 G", resource_spec=f"h_vmem=64G", scheduler_options={ "dashboard_address": ":5757", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=32G", ], local_directory=os.sep.join([os.environ.get("PWD"), "dask-worker-space"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # regrid custom outputs to pop grid custom_outputs = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}.nc" ) custom_outputs_completed = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}_popgrid_0.25deg.nc" ) custom_outputs_completed = [ f"{item[0:-19]}.nc" for item in custom_outputs_completed ] custom_outputs_remaining_set = set(custom_outputs) - set(custom_outputs_completed) custom_outputs_remaining = [item for item in custom_outputs_remaining_set] print(f"custom outputs remaining for {output}: {len(custom_outputs_remaining)}") # dask bag and process custom_outputs_remaining = custom_outputs_remaining[ 0:5000 ] # run in 5,000 chunks over 30 cores, each chunk taking 2 minutes print(f"predicting for {len(custom_outputs_remaining)} custom outputs ...") bag_custom_outputs = db.from_sequence( custom_outputs_remaining, npartitions=n_workers ) bag_custom_outputs.map(regrid_to_pop).compute() time_end = time.time() - time_start print( f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours" ) print( f"average time per custom output is {time_end / len(custom_outputs_remaining):0.2f} seconds" ) client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime="01:00:00", memory=f"64 G", resource_spec=f"h_vmem=64G", scheduler_options={ "dashboard_address": ":5757", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=32G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-worker-scale-space"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # scale custom outputs if normal: emission_configs = np.array( np.meshgrid( np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), )).T.reshape(-1, 5) emission_configs_20percentintervals = [] for emission_config in emission_configs: emission_configs_20percentintervals.append( f'RES{round(emission_config[0], 1)}_IND{round(emission_config[1], 1)}_TRA{round(emission_config[2], 1)}_AGR{round(emission_config[3], 1)}_ENE{round(emission_config[4], 1)}' ) if extra: custom_inputs_main = [ np.array([[1.15, 1.27, 0.98, 0.98, 1.36]]), # bottom-up 2010 np.array([[1.19, 1.30, 1.01, 1.01, 1.46]]), # bottom-up 2011 np.array([[1.20, 1.30, 1.01, 1.02, 1.39]]), # bottom-up 2012 np.array([[1.13, 1.29, 1.02, 1.01, 1.29]]), # bottom-up 2013 np.array([[1.06, 1.12, 0.99, 1.01, 1.12]]), # bottom-up 2014 np.array([[0.92, 0.84, 0.97, 0.99, 0.94]]), # bottom-up 2016 np.array([[0.84, 0.81, 0.99, 0.99, 0.89]]), # bottom-up 2017 np.array([[0.76, 0.934, 0.735, 0.683, 0.708]]), np.array([[0.704, 0.786, 0.73, 0.659, 0.6]]), np.array([[0.712, 0.703, 0.725, 0.676, 0.649]]), np.array([[0.739, 0.668, 0.701, 0.686, 0.682]]), np.array([[0.67, 0.609, 0.709, 0.621, 0.661]]), np.array([[0.744, 0.904, 0.778, 0.678, 0.716]]), np.array([[0.771, 0.835, 0.711, 0.685, 0.544]]), np.array([[0.647, 0.945, 0.746, 0.588, 0.473]]), np.array([[0.657, 0.745, 0.714, 0.613, 0.591]]), np.array([[0.582, 0.7, 0.672, 0.5, 0.492]]), np.array([[0.803, 0.835, 0.742, 0.71, 0.717]]), np.array([[0.721, 0.863, 0.712, 0.74, 0.709]]), np.array([[0.661, 0.674, 0.694, 0.742, 0.715]]), np.array([[0.701, 0.642, 0.669, 0.681, 0.679]]), np.array([[0.604, 0.399, 0.659, 0.613, 0.724]]), np.array([[0.769, 1.009, 0.697, 0.69, 0.72]]), np.array([[0.824, 0.759, 0.767, 0.641, 0.429]]), np.array([[0.858, 1.092, 0.794, 0.604, 0.475]]), np.array([[0.8, 0.987, 0.648, 0.57, 0.493]]), np.array([[0.867, 0.957, 0.677, 0.558, 0.477]]) ] custom_inputs = [] for custom_input in custom_inputs_main: custom_input_res = np.copy(custom_input) custom_input_ind = np.copy(custom_input) custom_input_tra = np.copy(custom_input) custom_input_agr = np.copy(custom_input) custom_input_ene = np.copy(custom_input) custom_input_nores = np.copy(custom_input) custom_input_noind = np.copy(custom_input) custom_input_notra = np.copy(custom_input) custom_input_noagr = np.copy(custom_input) custom_input_noene = np.copy(custom_input) custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_res[0][1:] = 1.0 custom_input_ind[0][0] = 1.0 custom_input_ind[0][2:] = 1.0 custom_input_tra[0][:2] = 1.0 custom_input_tra[0][3:] = 1.0 custom_input_agr[0][:3] = 1.0 custom_input_agr[0][4:] = 1.0 custom_input_ene[0][:4] = 1.0 custom_input_nores[0][0] = 0.0 custom_input_noind[0][1] = 0.0 custom_input_notra[0][2] = 0.0 custom_input_noagr[0][3] = 0.0 custom_input_noene[0][4] = 0.0 custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input) custom_inputs.append(custom_input_res) custom_inputs.append(custom_input_ind) custom_inputs.append(custom_input_tra) custom_inputs.append(custom_input_agr) custom_inputs.append(custom_input_ene) custom_inputs.append(custom_input_nores) custom_inputs.append(custom_input_noind) custom_inputs.append(custom_input_notra) custom_inputs.append(custom_input_noagr) custom_inputs.append(custom_input_noene) custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) emission_configs_20percentintervals = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' emission_configs_20percentintervals.append(emission_config) if climate_cobenefits: custom_inputs_main = [ np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # Base_CLE_2020 np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # Base_MFR_2020 np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # SDS_MFR_2020 np.array([[0.68, 0.84, 0.71, 1.16, 0.93]]), # Base_CLE_2030 np.array([[0.33, 0.47, 0.48, 0.81, 0.69]]), # Base_MFR_2030 np.array([[0.27, 0.45, 0.41, 0.81, 0.55]]), # SDS_MFR_2030 np.array([[0.57, 0.75, 0.69, 1.2, 0.94]]), # Base_CLE_2040 np.array([[0.24, 0.41, 0.31, 0.83, 0.73]]), # Base_MFR_2040 np.array([[0.19, 0.38, 0.22, 0.83, 0.5]]), # SDS_MFR_2040 np.array([[0.52, 0.72, 0.65, 1.24, 0.91]]), # Base_CLE_2050 np.array([[0.2, 0.38, 0.29, 0.86, 0.72]]), # Base_MFR_2050 np.array([[0.18, 0.35, 0.2, 0.86, 0.46]]), # SDS_MFR_2050 ] custom_inputs = [] for custom_input in custom_inputs_main: custom_input_res = np.copy(custom_input) custom_input_ind = np.copy(custom_input) custom_input_tra = np.copy(custom_input) custom_input_agr = np.copy(custom_input) custom_input_ene = np.copy(custom_input) custom_input_nores = np.copy(custom_input) custom_input_noind = np.copy(custom_input) custom_input_notra = np.copy(custom_input) custom_input_noagr = np.copy(custom_input) custom_input_noene = np.copy(custom_input) custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_res[0][1:] = 1.0 custom_input_ind[0][0] = 1.0 custom_input_ind[0][2:] = 1.0 custom_input_tra[0][:2] = 1.0 custom_input_tra[0][3:] = 1.0 custom_input_agr[0][:3] = 1.0 custom_input_agr[0][4:] = 1.0 custom_input_ene[0][:4] = 1.0 custom_input_nores[0][0] = 0.0 custom_input_noind[0][1] = 0.0 custom_input_notra[0][2] = 0.0 custom_input_noagr[0][3] = 0.0 custom_input_noene[0][4] = 0.0 custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input) custom_inputs.append(custom_input_res) custom_inputs.append(custom_input_ind) custom_inputs.append(custom_input_tra) custom_inputs.append(custom_input_agr) custom_inputs.append(custom_input_ene) custom_inputs.append(custom_input_nores) custom_inputs.append(custom_input_noind) custom_inputs.append(custom_input_notra) custom_inputs.append(custom_input_noagr) custom_inputs.append(custom_input_noene) custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) emission_configs_20percentintervals = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' emission_configs_20percentintervals.append(emission_config) if top_down_2020_baseline: emission_config_2020_baseline = np.array( [0.604, 0.399, 0.659, 0.613, 0.724]) # matching to PM2.5 only, top 1,000 emission_configs = np.array( np.meshgrid( np.linspace( emission_config_2020_baseline[0] * 0.50, emission_config_2020_baseline[0], 6 ), # 10% reduction increments from 2020 baseline up to 50% np.linspace(emission_config_2020_baseline[1] * 0.50, emission_config_2020_baseline[1], 6), np.linspace(emission_config_2020_baseline[2] * 0.50, emission_config_2020_baseline[2], 6), np.linspace(emission_config_2020_baseline[3] * 0.50, emission_config_2020_baseline[3], 6), np.linspace(emission_config_2020_baseline[4] * 0.50, emission_config_2020_baseline[4], 6), )).T.reshape(-1, 5) # add a couple more for larger reductions in RES and IND to reach WHO-IT2 emission_configs = list(emission_configs) emission_configs.append(np.array([0.242, 0.160, 0.659, 0.613, 0.724])) emission_configs.append(np.array([0.181, 0.120, 0.659, 0.613, 0.724])) emission_configs.append(np.array([0.121, 0.080, 0.659, 0.613, 0.724])) emission_configs.append(np.array([0.060, 0.040, 0.659, 0.613, 0.724])) emission_configs_20percentintervals = [] for emission_config in emission_configs: emission_configs_20percentintervals.append( f'RES{round(emission_config[0], 3):.3f}_IND{round(emission_config[1], 3):.3f}_TRA{round(emission_config[2], 3):.3f}_AGR{round(emission_config[3], 3):.3f}_ENE{round(emission_config[4], 3):.3f}' ) emission_configs_completed = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}_adjusted_scaled/ds*{output}_popgrid_0.25deg_adjusted_scaled.nc" ) emission_configs_completed = [ f"{item[88:-45]}" for item in emission_configs_completed ] emission_configs_20percentintervals_remaining_set = set( emission_configs_20percentintervals) - set(emission_configs_completed) emission_configs_remaining = [ item for item in emission_configs_20percentintervals_remaining_set ] print( f"custom outputs remaining for {output}: {len(emission_configs_remaining)} - 20% intervals with {int(100 * len(emission_configs_20percentintervals_remaining_set) / len(emission_configs_20percentintervals))}% remaining" ) # dask bag and process emission_configs_remaining = emission_configs_remaining[:35000] print( f"predicting for {len(emission_configs_remaining)} custom outputs ...") bag_emission_configs = db.from_sequence(emission_configs_remaining, npartitions=n_workers) bag_emission_configs.map(scale).compute() time_end = time.time() - time_start print( f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours" ) print( f"average time per custom output is {time_end / len(emission_configs_remaining):0.2f} seconds" ) client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime=walltime, memory=f"32 G", resource_spec=f"h_vmem=32G", scheduler_options={ "dashboard_address": ":5761", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=32G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-find-emis-pm-space"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # dask bag over emission_configs print( f"predicting over {len(emission_configs)} emission configs for {station_id} ..." ) bag_emission_configs = db.from_sequence(emission_configs, npartitions=n_workers) results = bag_emission_configs.map(filter_emission_configs).compute() station_diffs_abs = [result[0] for result in results] station_diffs_per = [result[1] for result in results] key = [key for key in baselines.keys()][0] station_diffs_abs = [ station_diff_abs for station_diff_abs in station_diffs_abs if len(station_diff_abs[key]) > 0 ] station_diffs_per = [ station_diff_per for station_diff_per in station_diffs_per if len(station_diff_per[key]) > 0 ] merged_per = {} for station_diff_per in station_diffs_per: merged_per = {**merged_per, **station_diff_per[key]} merged_abs = {} for station_diff_abs in station_diffs_abs: merged_abs = {**merged_abs, **station_diff_abs[key]} station_diffs_per = {key: merged_per} station_diffs_abs = {key: merged_abs} joblib.dump( obs_change_abs, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/obs_change_abs_{output}_{station_id}.joblib" ) joblib.dump( obs_change_per, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/obs_change_per_{output}_{station_id}.joblib" ) joblib.dump( baselines, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/baselines_{output}_{station_id}.joblib" ) joblib.dump( targets, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/targets_{output}_{station_id}.joblib" ) joblib.dump( target_diffs, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/target_diffs_{output}_{station_id}.joblib" ) joblib.dump( station_diffs_abs, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/station_diffs_abs_{output}_{station_id}.joblib" ) joblib.dump( station_diffs_per, f"/nobackup/earlacoa/machinelearning/data_annual/find_emissions_that_match_change_air_quality/{sub_folder}_adjusted_scaled/station_diffs_per_{output}_{station_id}.joblib" ) time_end = time.time() - time_start print( f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours" ) client.close() cluster.close()
def main(): # dask cluster and client n_processes = 1 n_jobs = 35 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime="01:00:00", memory=f"2 G", resource_spec=f"h_vmem=2G", scheduler_options={ "dashboard_address": ":5757", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=1G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-worker-space"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # custom inputs if normal: matrix_stacked = np.array( np.meshgrid( np.linspace( 0, 1.5, 16 ), # 1.5 and 16 for 0.1, 1.5 and 6 for 0.3, 1.4 and 8 for 0.2 np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), np.linspace(0, 1.5, 16), )).T.reshape(-1, 5) custom_inputs_set = set( tuple(map(float, map("{:.1f}".format, item))) for item in matrix_stacked) custom_inputs_completed_filenames = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}*" ) custom_inputs_completed_list = [] for custom_inputs_completed_filename in custom_inputs_completed_filenames: custom_inputs_completed_list.append([ float(item) for item in re.findall( r"\d+\.\d+", custom_inputs_completed_filename) ]) custom_inputs_completed_set = set( tuple(item) for item in custom_inputs_completed_list) custom_inputs_remaining_set = custom_inputs_set - custom_inputs_completed_set custom_inputs = [ np.array(item).reshape(1, -1) for item in custom_inputs_remaining_set ] print(f"custom inputs remaining for {output}: {len(custom_inputs)}") if extra: custom_inputs_main = [ np.array([[1.15, 1.27, 0.98, 0.98, 1.36]]), # bottom-up 2010 np.array([[1.19, 1.30, 1.01, 1.01, 1.46]]), # bottom-up 2011 np.array([[1.20, 1.30, 1.01, 1.02, 1.39]]), # bottom-up 2012 np.array([[1.13, 1.29, 1.02, 1.01, 1.29]]), # bottom-up 2013 np.array([[1.06, 1.12, 0.99, 1.01, 1.12]]), # bottom-up 2014 np.array([[0.92, 0.84, 0.97, 0.99, 0.94]]), # bottom-up 2016 np.array([[0.84, 0.81, 0.99, 0.99, 0.89]]), # bottom-up 2017 np.array([[0.76, 0.934, 0.735, 0.683, 0.708]]), np.array([[0.704, 0.786, 0.73, 0.659, 0.6]]), np.array([[0.712, 0.703, 0.725, 0.676, 0.649]]), np.array([[0.739, 0.668, 0.701, 0.686, 0.682]]), np.array([[0.67, 0.609, 0.709, 0.621, 0.661]]), np.array([[0.744, 0.904, 0.778, 0.678, 0.716]]), np.array([[0.771, 0.835, 0.711, 0.685, 0.544]]), np.array([[0.647, 0.945, 0.746, 0.588, 0.473]]), np.array([[0.657, 0.745, 0.714, 0.613, 0.591]]), np.array([[0.582, 0.7, 0.672, 0.5, 0.492]]), np.array([[0.803, 0.835, 0.742, 0.71, 0.717]]), np.array([[0.721, 0.863, 0.712, 0.74, 0.709]]), np.array([[0.661, 0.674, 0.694, 0.742, 0.715]]), np.array([[0.701, 0.642, 0.669, 0.681, 0.679]]), np.array([[0.604, 0.399, 0.659, 0.613, 0.724]]), np.array([[0.769, 1.009, 0.697, 0.69, 0.72]]), np.array([[0.824, 0.759, 0.767, 0.641, 0.429]]), np.array([[0.858, 1.092, 0.794, 0.604, 0.475]]), np.array([[0.8, 0.987, 0.648, 0.57, 0.493]]), np.array([[0.867, 0.957, 0.677, 0.558, 0.477]]) ] custom_inputs = [] for custom_input in custom_inputs_main: custom_input_res = np.copy(custom_input) custom_input_ind = np.copy(custom_input) custom_input_tra = np.copy(custom_input) custom_input_agr = np.copy(custom_input) custom_input_ene = np.copy(custom_input) custom_input_nores = np.copy(custom_input) custom_input_noind = np.copy(custom_input) custom_input_notra = np.copy(custom_input) custom_input_noagr = np.copy(custom_input) custom_input_noene = np.copy(custom_input) custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_res[0][1:] = 1.0 custom_input_ind[0][0] = 1.0 custom_input_ind[0][2:] = 1.0 custom_input_tra[0][:2] = 1.0 custom_input_tra[0][3:] = 1.0 custom_input_agr[0][:3] = 1.0 custom_input_agr[0][4:] = 1.0 custom_input_ene[0][:4] = 1.0 custom_input_nores[0][0] = 0.0 custom_input_noind[0][1] = 0.0 custom_input_notra[0][2] = 0.0 custom_input_noagr[0][3] = 0.0 custom_input_noene[0][4] = 0.0 custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input) custom_inputs.append(custom_input_res) custom_inputs.append(custom_input_ind) custom_inputs.append(custom_input_tra) custom_inputs.append(custom_input_agr) custom_inputs.append(custom_input_ene) custom_inputs.append(custom_input_nores) custom_inputs.append(custom_input_noind) custom_inputs.append(custom_input_notra) custom_inputs.append(custom_input_noagr) custom_inputs.append(custom_input_noene) custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) # just for emulator_predictions.py as this is required in order to adjust for double emissions custom_inputs_temp = custom_inputs.copy() for custom_input in custom_inputs_temp: custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) emission_configs_20percentintervals = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' emission_configs_20percentintervals.append(emission_config) emission_configs_20percentintervals = list( set(emission_configs_20percentintervals)) custom_inputs = [] for emission_config in emission_configs_20percentintervals: custom_input = np.array([ float(num) for num in re.findall(r'\d.\d+', emission_config) ]).reshape(1, -1) custom_inputs.append(custom_input) if climate_cobenefits: custom_inputs_main = [ np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # Base_CLE_2020 np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # Base_MFR_2020 np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # SDS_MFR_2020 np.array([[0.68, 0.84, 0.71, 1.16, 0.93]]), # Base_CLE_2030 np.array([[0.33, 0.47, 0.48, 0.81, 0.69]]), # Base_MFR_2030 np.array([[0.27, 0.45, 0.41, 0.81, 0.55]]), # SDS_MFR_2030 np.array([[0.57, 0.75, 0.69, 1.2, 0.94]]), # Base_CLE_2040 np.array([[0.24, 0.41, 0.31, 0.83, 0.73]]), # Base_MFR_2040 np.array([[0.19, 0.38, 0.22, 0.83, 0.5]]), # SDS_MFR_2040 np.array([[0.52, 0.72, 0.65, 1.24, 0.91]]), # Base_CLE_2050 np.array([[0.2, 0.38, 0.29, 0.86, 0.72]]), # Base_MFR_2050 np.array([[0.18, 0.35, 0.2, 0.86, 0.46]]), # SDS_MFR_2050 ] custom_inputs = [] for custom_input in custom_inputs_main: custom_input_res = np.copy(custom_input) custom_input_ind = np.copy(custom_input) custom_input_tra = np.copy(custom_input) custom_input_agr = np.copy(custom_input) custom_input_ene = np.copy(custom_input) custom_input_nores = np.copy(custom_input) custom_input_noind = np.copy(custom_input) custom_input_notra = np.copy(custom_input) custom_input_noagr = np.copy(custom_input) custom_input_noene = np.copy(custom_input) custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_res[0][1:] = 1.0 custom_input_ind[0][0] = 1.0 custom_input_ind[0][2:] = 1.0 custom_input_tra[0][:2] = 1.0 custom_input_tra[0][3:] = 1.0 custom_input_agr[0][:3] = 1.0 custom_input_agr[0][4:] = 1.0 custom_input_ene[0][:4] = 1.0 custom_input_nores[0][0] = 0.0 custom_input_noind[0][1] = 0.0 custom_input_notra[0][2] = 0.0 custom_input_noagr[0][3] = 0.0 custom_input_noene[0][4] = 0.0 custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input) custom_inputs.append(custom_input_res) custom_inputs.append(custom_input_ind) custom_inputs.append(custom_input_tra) custom_inputs.append(custom_input_agr) custom_inputs.append(custom_input_ene) custom_inputs.append(custom_input_nores) custom_inputs.append(custom_input_noind) custom_inputs.append(custom_input_notra) custom_inputs.append(custom_input_noagr) custom_inputs.append(custom_input_noene) custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) # just for emulator_predictions.py as this is required in order to adjust for double emissions custom_inputs_temp = custom_inputs.copy() for custom_input in custom_inputs_temp: custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) emission_configs_20percentintervals = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' emission_configs_20percentintervals.append(emission_config) emission_configs_20percentintervals = list( set(emission_configs_20percentintervals)) custom_inputs = [] for emission_config in emission_configs_20percentintervals: custom_input = np.array([ float(num) for num in re.findall(r'\d.\d+', emission_config) ]).reshape(1, -1) custom_inputs.append(custom_input) if top_down_2020_baseline: emission_config_2020_baseline = np.array( [0.604, 0.399, 0.659, 0.613, 0.724]) # matching to PM2.5 only, top 1,000 emission_configs = np.array( np.meshgrid( np.linspace( emission_config_2020_baseline[0] * 0.50, emission_config_2020_baseline[0], 6 ), # 10% reduction increments from 2020 baseline up to 50% np.linspace(emission_config_2020_baseline[1] * 0.50, emission_config_2020_baseline[1], 6), np.linspace(emission_config_2020_baseline[2] * 0.50, emission_config_2020_baseline[2], 6), np.linspace(emission_config_2020_baseline[3] * 0.50, emission_config_2020_baseline[3], 6), np.linspace(emission_config_2020_baseline[4] * 0.50, emission_config_2020_baseline[4], 6), )).T.reshape(-1, 5) custom_inputs = [ np.array(item).reshape(1, -1) for item in emission_configs ] # add a couple more for larger reductions in RES and IND to reach WHO-IT2 custom_inputs.append(np.array([[0.242, 0.160, 0.659, 0.613, 0.724]])) custom_inputs.append(np.array([[0.181, 0.120, 0.659, 0.613, 0.724]])) custom_inputs.append(np.array([[0.121, 0.080, 0.659, 0.613, 0.724]])) custom_inputs.append(np.array([[0.060, 0.040, 0.659, 0.613, 0.724]])) # just for emulator_predictions.py as this is required in order to adjust for double emissions custom_inputs_temp = custom_inputs.copy() for custom_input in custom_inputs_temp: custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) emission_configs_20percentintervals = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' emission_configs_20percentintervals.append(emission_config) emission_configs_20percentintervals = set( emission_configs_20percentintervals) custom_inputs_completed_filenames = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}/ds*{output}.nc" ) custom_inputs_completed_list = [] for custom_inputs_completed_filename in custom_inputs_completed_filenames: emission_config = re.findall( r"RES\d+\.\d+_IND\d+\.\d+_TRA\d+\.\d+_AGR\d+\.\d+_ENE\d+\.\d+", custom_inputs_completed_filename) if len(emission_config) > 0: custom_inputs_completed_list.append(emission_config) custom_inputs_completed_set = set( item[0] for item in custom_inputs_completed_list) custom_inputs_remaining_set = emission_configs_20percentintervals - custom_inputs_completed_set custom_inputs = [ np.array([float(n) for n in re.findall(r'\d+.\d+', item)]).reshape(1, -1) for item in custom_inputs_remaining_set ] # dask bag and process custom_inputs = custom_inputs[:5000] #custom_inputs = custom_inputs[5000:] print(f"predicting for {len(custom_inputs)} custom inputs ...") bag_custom_inputs = db.from_sequence(custom_inputs, npartitions=n_workers) bag_custom_inputs.map(custom_predicts).compute() time_end = time.time() - time_start print( f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours" ) print( f"average time per custom input is {time_end / len(custom_inputs):0.2f} seconds" ) client.close() cluster.close()
def main(): # dask cluster and client if output == 'PM2_5_DRY': n_jobs = 20 n_outputs = 1000 elif output == 'o3_6mDM8h': n_jobs = 20 n_outputs = 2000 n_processes = 1 n_workers = n_processes * n_jobs cluster = SGECluster( interface="ib0", walltime="02:00:00", memory=f"48 G", resource_spec=f"h_vmem=48G", scheduler_options={ "dashboard_address": ":7777", }, job_extra=[ "-cwd", "-V", f"-pe smp {n_processes}", f"-l disk=48G", ], local_directory=os.sep.join( [os.environ.get("PWD"), "dask-hia-ozone-space"]), ) client = Client(cluster) cluster.scale(jobs=n_jobs) time_start = time.time() # find remaining inputs if normal: custom_outputs = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/predictions/{output}_adjusted_scaled/ds*{output}_popgrid_0.25deg_adjusted_scaled.nc" ) custom_outputs_completed = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/health_impact_assessments/{output}_adjusted_scaled/df_country_hia_*.csv" ) custom_outputs_remaining_set = set([ item.split("/")[-1][3:-1 - len(output) - 19 - 7] for item in custom_outputs ]) - set([ item.split("/")[-1][15 + len(output) + 1:-4 - 7] for item in custom_outputs_completed ]) custom_outputs_remaining = [ item for item in custom_outputs_remaining_set ] print( f"custom outputs remaining for {output}: {len(custom_outputs_remaining)} - 10% intervals with {int(100 * len(custom_outputs_remaining_set) / 16**5)}% remaining" ) reduce_to_20percent_intervals = True if reduce_to_20percent_intervals: emission_configs = np.array( np.meshgrid( np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), np.linspace(0.0, 1.4, 8), )).T.reshape(-1, 5) emission_configs_20percentintervals = [] for emission_config in emission_configs: emission_configs_20percentintervals.append( f'RES{round(emission_config[0], 1)}_IND{round(emission_config[1], 1)}_TRA{round(emission_config[2], 1)}_AGR{round(emission_config[3], 1)}_ENE{round(emission_config[4], 1)}' ) emission_configs_completed = [] for custom_output_completed in custom_outputs_completed: emission_configs_completed.append( re.findall( r'RES\d+.\d+_IND\d+.\d+_TRA\d+.\d+_AGR\d+.\d+_ENE\d+.\d+', custom_output_completed)[0]) emission_configs_20percentintervals_remaining_set = set( emission_configs_20percentintervals) - set( emission_configs_completed) custom_outputs_remaining = [ item for item in emission_configs_20percentintervals_remaining_set ] print( f"custom outputs remaining for {output}: {len(custom_outputs_remaining)} - 20% intervals with {int(100 * len(emission_configs_20percentintervals_remaining_set) / len(emission_configs_20percentintervals))}% remaining" ) if extra: if year == '2010': custom_inputs_main = [ np.array([[1.15, 1.27, 0.98, 0.98, 1.36]]), # bottom-up 2010 ] elif year == '2011': custom_inputs_main = [ np.array([[1.19, 1.30, 1.01, 1.01, 1.46]]), # bottom-up 2011 ] elif year == '2012': custom_inputs_main = [ np.array([[1.20, 1.30, 1.01, 1.02, 1.39]]), # bottom-up 2012 ] elif year == '2013': custom_inputs_main = [ np.array([[1.13, 1.29, 1.02, 1.01, 1.29]]), # bottom-up 2013 ] elif year == '2014': custom_inputs_main = [ np.array([[1.06, 1.12, 0.99, 1.01, 1.12]]), # bottom-up 2014 ] elif year == '2015': custom_inputs_main = [ np.array([[1.0, 1.0, 1.0, 1.0, 1.0]]), # control ] elif year == '2016': custom_inputs_main = [ np.array([[0.92, 0.84, 0.97, 0.99, 0.94]]), # bottom-up 2016 np.array([[0.76, 0.934, 0.735, 0.683, 0.708]]), # top-down 2016 - both np.array([[0.744, 0.904, 0.778, 0.678, 0.716]]), # top-down 2016 - either np.array([[0.803, 0.835, 0.742, 0.71, 0.717]]), # top-down 2016 - pm25 only np.array([[0.769, 1.009, 0.697, 0.69, 0.72]]), # top-down 2016 - o3 only ] elif year == '2017': custom_inputs_main = [ np.array([[0.84, 0.81, 0.99, 0.99, 0.89]]), # bottom-up 2017 np.array([[0.704, 0.786, 0.73, 0.659, 0.6]]), # top-down 2017 - both np.array([[0.771, 0.835, 0.711, 0.685, 0.544]]), # top-down 2017 - either np.array([[0.721, 0.863, 0.712, 0.74, 0.709]]), # top-down 2017 - pm25 only np.array([[0.824, 0.759, 0.767, 0.641, 0.429]]), # top-down 2017 - o3 only ] elif year == '2018': custom_inputs_main = [ np.array([[0.712, 0.703, 0.725, 0.676, 0.649]]), # top-down 2018 - both np.array([[0.647, 0.945, 0.746, 0.588, 0.473]]), # top-down 2018 - either np.array([[0.661, 0.674, 0.694, 0.742, 0.715]]), # top-down 2018 - pm25 only np.array([[0.858, 1.092, 0.794, 0.604, 0.475]]), # top-down 2018 - o3 only ] elif year == '2019': custom_inputs_main = [ np.array([[0.739, 0.668, 0.701, 0.686, 0.682]]), # top-down 2019 - both np.array([[0.657, 0.745, 0.714, 0.613, 0.591]]), # top-down 2019 - either np.array([[0.701, 0.642, 0.669, 0.681, 0.679]]), # top-down 2019 - pm25 only np.array([[0.8, 0.987, 0.648, 0.57, 0.493]]), # top-down 2019 - o3 only ] elif year == '2020': custom_inputs_main = [ np.array([[0.67, 0.609, 0.709, 0.621, 0.661]]), # top-down 2020 - both np.array([[0.582, 0.7, 0.672, 0.5, 0.492]]), # top-down 2020 - either np.array([[0.604, 0.399, 0.659, 0.613, 0.724]]), # top-down 2020 - pm25 only np.array([[0.867, 0.957, 0.677, 0.558, 0.477]]), # top-down 2020 - o3 only ] custom_inputs = [] for custom_input in custom_inputs_main: custom_input_res = np.copy(custom_input) custom_input_ind = np.copy(custom_input) custom_input_tra = np.copy(custom_input) custom_input_agr = np.copy(custom_input) custom_input_ene = np.copy(custom_input) custom_input_nores = np.copy(custom_input) custom_input_noind = np.copy(custom_input) custom_input_notra = np.copy(custom_input) custom_input_noagr = np.copy(custom_input) custom_input_noene = np.copy(custom_input) custom_input_res[0][1:] = 1.0 custom_input_ind[0][0] = 1.0 custom_input_ind[0][2:] = 1.0 custom_input_tra[0][:2] = 1.0 custom_input_tra[0][3:] = 1.0 custom_input_agr[0][:3] = 1.0 custom_input_agr[0][4:] = 1.0 custom_input_ene[0][:4] = 1.0 custom_input_nores[0][0] = 0.0 custom_input_noind[0][1] = 0.0 custom_input_notra[0][2] = 0.0 custom_input_noagr[0][3] = 0.0 custom_input_noene[0][4] = 0.0 custom_inputs.append(custom_input) custom_inputs.append(custom_input_res) custom_inputs.append(custom_input_ind) custom_inputs.append(custom_input_tra) custom_inputs.append(custom_input_agr) custom_inputs.append(custom_input_ene) custom_inputs.append(custom_input_nores) custom_inputs.append(custom_input_noind) custom_inputs.append(custom_input_notra) custom_inputs.append(custom_input_noagr) custom_inputs.append(custom_input_noene) custom_outputs_remaining = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' custom_outputs_remaining.append(emission_config) if climate_cobenefits: custom_inputs_main = [ np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # Base_CLE_2020 np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # Base_MFR_2020 np.array([[0.91, 0.95, 0.85, 1.05, 0.96]]), # SDS_MFR_2020 np.array([[0.68, 0.84, 0.71, 1.16, 0.93]]), # Base_CLE_2030 np.array([[0.33, 0.47, 0.48, 0.81, 0.69]]), # Base_MFR_2030 np.array([[0.27, 0.45, 0.41, 0.81, 0.55]]), # SDS_MFR_2030 np.array([[0.57, 0.75, 0.69, 1.2, 0.94]]), # Base_CLE_2040 np.array([[0.24, 0.41, 0.31, 0.83, 0.73]]), # Base_MFR_2040 np.array([[0.19, 0.38, 0.22, 0.83, 0.5]]), # SDS_MFR_2040 np.array([[0.52, 0.72, 0.65, 1.24, 0.91]]), # Base_CLE_2050 np.array([[0.2, 0.38, 0.29, 0.86, 0.72]]), # Base_MFR_2050 np.array([[0.18, 0.35, 0.2, 0.86, 0.46]]), # SDS_MFR_2050 ] custom_inputs = [] for custom_input in custom_inputs_main: custom_input_res = np.copy(custom_input) custom_input_ind = np.copy(custom_input) custom_input_tra = np.copy(custom_input) custom_input_agr = np.copy(custom_input) custom_input_ene = np.copy(custom_input) custom_input_nores = np.copy(custom_input) custom_input_noind = np.copy(custom_input) custom_input_notra = np.copy(custom_input) custom_input_noagr = np.copy(custom_input) custom_input_noene = np.copy(custom_input) custom_input_resonly = np.copy(custom_input) custom_input_indonly = np.copy(custom_input) custom_input_traonly = np.copy(custom_input) custom_input_agronly = np.copy(custom_input) custom_input_eneonly = np.copy(custom_input) custom_input_res[0][1:] = 1.0 custom_input_ind[0][0] = 1.0 custom_input_ind[0][2:] = 1.0 custom_input_tra[0][:2] = 1.0 custom_input_tra[0][3:] = 1.0 custom_input_agr[0][:3] = 1.0 custom_input_agr[0][4:] = 1.0 custom_input_ene[0][:4] = 1.0 custom_input_nores[0][0] = 0.0 custom_input_noind[0][1] = 0.0 custom_input_notra[0][2] = 0.0 custom_input_noagr[0][3] = 0.0 custom_input_noene[0][4] = 0.0 custom_input_resonly[0][1:] = 0.0 custom_input_indonly[0][0] = 0.0 custom_input_indonly[0][2:] = 0.0 custom_input_traonly[0][:2] = 0.0 custom_input_traonly[0][3:] = 0.0 custom_input_agronly[0][:3] = 0.0 custom_input_agronly[0][4:] = 0.0 custom_input_eneonly[0][:4] = 0.0 custom_inputs.append(custom_input) custom_inputs.append(custom_input_res) custom_inputs.append(custom_input_ind) custom_inputs.append(custom_input_tra) custom_inputs.append(custom_input_agr) custom_inputs.append(custom_input_ene) custom_inputs.append(custom_input_nores) custom_inputs.append(custom_input_noind) custom_inputs.append(custom_input_notra) custom_inputs.append(custom_input_noagr) custom_inputs.append(custom_input_noene) custom_inputs.append(custom_input_resonly) custom_inputs.append(custom_input_indonly) custom_inputs.append(custom_input_traonly) custom_inputs.append(custom_input_agronly) custom_inputs.append(custom_input_eneonly) custom_outputs_remaining = [] for custom_input in custom_inputs: emission_config = f'RES{custom_input[0][0]:0.3f}_IND{custom_input[0][1]:0.3f}_TRA{custom_input[0][2]:0.3f}_AGR{custom_input[0][3]:0.3f}_ENE{custom_input[0][4]:0.3f}' custom_outputs_remaining.append(emission_config) if top_down_2020_baseline: emission_config_2020_baseline = np.array( [0.604, 0.399, 0.659, 0.613, 0.724]) # matching to PM2.5 only, top 1,000 emission_configs = np.array( np.meshgrid( np.linspace( emission_config_2020_baseline[0] * 0.50, emission_config_2020_baseline[0], 6 ), # 10% reduction increments from 2020 baseline up to 50% np.linspace(emission_config_2020_baseline[1] * 0.50, emission_config_2020_baseline[1], 6), np.linspace(emission_config_2020_baseline[2] * 0.50, emission_config_2020_baseline[2], 6), np.linspace(emission_config_2020_baseline[3] * 0.50, emission_config_2020_baseline[3], 6), np.linspace(emission_config_2020_baseline[4] * 0.50, emission_config_2020_baseline[4], 6), )).T.reshape(-1, 5) # add a couple more for larger reductions in RES and IND to reach WHO-IT2 emission_configs = list(emission_configs) emission_configs.append(np.array([[0.242, 0.160, 0.659, 0.613, 0.724]])) emission_configs.append(np.array([[0.181, 0.120, 0.659, 0.613, 0.724]])) emission_configs.append(np.array([[0.121, 0.080, 0.659, 0.613, 0.724]])) emission_configs.append(np.array([[0.060, 0.040, 0.659, 0.613, 0.724]])) emission_configs_total = [] for emission_config in emission_configs: emission_configs_total.append( f'RES{round(emission_config[0], 3):.3f}_IND{round(emission_config[1], 3):.3f}_TRA{round(emission_config[2], 3):.3f}_AGR{round(emission_config[3], 3):.3f}_ENE{round(emission_config[4], 3):.3f}' ) custom_outputs_completed = glob.glob( f"/nobackup/earlacoa/machinelearning/data_annual/health_impact_assessments/{output}_adjusted_scaled/df_country_hia_*.csv" ) emission_configs_completed = [] for custom_output_completed in custom_outputs_completed: emission_configs_completed.append( re.findall( r'RES\d+.\d+_IND\d+.\d+_TRA\d+.\d+_AGR\d+.\d+_ENE\d+.\d+', custom_output_completed)[0]) emission_configs_remaining_set = set(emission_configs_total) - set( emission_configs_completed) custom_outputs_remaining = [ item for item in emission_configs_remaining_set ] print( f"custom outputs remaining: {len(custom_outputs_remaining)}, {int(100 * len(emission_configs_remaining_set) / len(emission_configs_total))}%" ) # -------------------------------------------------- # dask bag and process # run in 10 chunks over 10 cores, each chunk taking 2 minutes custom_outputs_remaining = custom_outputs_remaining[0:n_outputs] print(f"predicting for {len(custom_outputs_remaining)} custom outputs ...") bag_custom_outputs = db.from_sequence(custom_outputs_remaining, npartitions=n_workers) if output == "PM2_5_DRY": bag_custom_outputs.map(health_impact_assessment_pm25).compute() elif output == "o3_6mDM8h": bag_custom_outputs.map(health_impact_assessment_o3).compute() time_end = time.time() - time_start print( f"completed in {time_end:0.2f} seconds, or {time_end / 60:0.2f} minutes, or {time_end / 3600:0.2f} hours" ) client.close() cluster.close()