def create_cleanup_jobs(self): """Saves rake summaries and removes tempfiles no longer needed""" runtime = 600 if self.draws == 0 else 7200 for ko in list(range(0, self.holdouts + 1)): task = PythonTask(script=CLEANUP_SCRIPT, args=[ self.run_id, self.output_path, self.run_type, ko, self.draws ], name=f'clean_{self.run_id}_{ko}', num_cores=1, m_mem_free='1G', max_runtime_seconds=runtime, max_attempts=1, tag='stgpr_clean', queue='all.q') if ko == 0: for loc in self.subnat_locations: task.add_upstream(self.rake_jobs['rake_{}_{}'.format( self.run_id, loc)]) else: task.add_upstream(self.eval_jobs['eval_{}'.format( self.run_id)]) self.workflow.add_task(task) self.cleanup_jobs[task.name] = task
def create_post_jobs(self): """Depends on rake jobs. Calculates fit stats and cleans up file folders, no mas.""" for ko in list(range(0, self.holdouts + 1)): for param_group in list(range(0, len(self.param_groups))): submit_params = ','.join( [str(x) for x in self.param_groups[param_group]]) task = PythonTask( script=POST_SCRIPT, args=[ self.run_id, self.output_path, ko, self.run_type, self.holdouts, submit_params ], name=f'post_{self.run_id}_{ko}_{param_group}', num_cores=1, m_mem_free='2G', max_runtime_seconds=300, max_attempts=2, tag='stgpr_post', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) # add ST upstreams for loc_group in list(range(0, self.nparallel)): gp_label = 'gpr_{}_{}_{}_{}'.format( self.run_id, ko, param_group, loc_group) upstream_job = self.gpr_jobs[gp_label] task.add_upstream(upstream_job) self.workflow.add_task(task) self.post_jobs[task.name] = task
def create_descanso_jobs(self): """Depends on aggregate locations coming out of loc agg jobs""" for ko in list(range(0, self.holdouts + 1)): for param_group in list(range(0, len(self.param_groups))): submit_params = ','.join( [str(x) for x in self.param_groups[param_group]]) runtime = 3600 if self.is_diet_model else 300 task = PythonTask( script=IM_SCRIPT, args=[ self.run_id, self.output_path, ko, self.draws, self.nparallel, submit_params ], name=f'descanso_{self.run_id}_{ko}_{param_group}', num_cores=1, m_mem_free= '20G', # upped from 5 to 20 for variance simulation max_runtime_seconds=runtime, max_attempts=2, tag='stgpr_amp_nsv', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) # add ST upstreams for loc_group in list(range(0, self.nparallel)): st_label = 'st_{}_{}_{}_{}'.format(self.run_id, ko, param_group, loc_group) upstream_job = self.st_jobs[st_label] task.add_upstream(upstream_job) self.workflow.add_task(task) self.descanso_jobs[task.name] = task
def create_apply_correction_tasks(self) -> None: for sex in self.parameters.sex_ids: for location in self.parameters.most_detailed_location_ids: correct_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.CORRECT ), args=[ '--action', DAG.Tasks.Type.CORRECT, '--parent_dir', self.parameters.parent_dir, '--gbd_round_id', self.parameters.gbd_round_id, '--version_id', self.parameters.version_id, '--location_id', location, '--sex_id', sex, '--env_version_id', self.parameters.envelope_version_id ], name=DAG.Tasks.Name.APPLY_CORRECTION.format( location=location, sex=sex ), num_cores=DAG.Tasks.Cores.APPLY_CORRECTION, m_mem_free=DAG.Tasks.Memory.APPLY_CORRECTION, upstream_tasks=[ self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.ENVELOPE_DRAWS ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=( MortalityInputs.ENVELOPE_SUMMARY ) ) ] ], max_runtime_seconds=DAG.Tasks.Runtime.APPLY_CORRECTION, tag=DAG.Tasks.Type.CORRECT ) for mvid in self.parameters.best_model_version_ids: upstream_name = DAG.Tasks.Name.VALIDATE_DRAWS.format( model_version_id=mvid ) correct_task.add_upstream( self.task_map[DAG.Tasks.Type.VALIDATE][upstream_name] ) self.task_map[DAG.Tasks.Type.CORRECT][ correct_task.name ] = correct_task self.workflow.add_task(correct_task)
def create_gpr_jobs(self): # set runtime and memory based on draws gpr_runtime = 1200 gpr_memory = 4 if self.draws == 100: gpr_runtime = 1500 gpr_memory = 7 elif self.draws == 1000: gpr_runtime = 1800 gpr_memory = 10 if self.is_diet_model: gpr_runtime *= 3 gpr_memory *= 3 for ko in list(range(0, self.holdouts + 1)): for param_group in list(range(0, len(self.param_groups))): upstream_job = self.descanso_jobs['descanso_{}_{}_{}'.format( self.run_id, ko, param_group)] for loc_group in list(range(0, self.nparallel)): submit_params = ','.join( [str(x) for x in self.param_groups[param_group]]) jname = 'gpr_{}_{}_{}_{}'.format(self.run_id, ko, param_group, loc_group) task = PythonTask(script=GPR_SCRIPT, args=[ self.run_id, self.output_path, ko, self.draws, submit_params, self.nparallel, loc_group ], name=jname, num_cores=1, m_mem_free=f'{gpr_memory}G', max_runtime_seconds=gpr_runtime, max_attempts=2, tag='stgpr_gpr', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) task.add_upstream(upstream_job) self.workflow.add_task(task) self.gpr_jobs[task.name] = task
def create_st_jobs(self): for ko in range(0, self.holdouts + 1): upstream_job = self.stage1_jobs['stage1_{}_{}'.format( self.run_id, ko)] for param_group in range(0, len(self.param_groups)): for loc_group in range(0, self.nparallel): submit_params = ','.join( [str(x) for x in self.param_groups[param_group]]) jname = 'st_{}_{}_{}_{}'.format(self.run_id, ko, param_group, loc_group) memory = 50 runtime = 1500 if self.is_diet_model: memory = 120 runtime = 28800 # 8 hours task = PythonTask(script=ST_SCRIPT, args=[ self.run_id, self.output_path, ko, self.run_type, submit_params, self.nparallel, loc_group ], name=jname, num_cores=6, m_mem_free=f'{memory}G', max_attempts=3, max_runtime_seconds=runtime, tag='stgpr_spacetime', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) task.add_upstream(upstream_job) self.workflow.add_task(task) self.st_jobs[task.name] = task
def create_eval_jobs(self): """ For hyperparameter selection runs only, determine best hyperparameter set based on in-sample or out-of-sample RMSE. - run_type = "in_sample_selection" - run_type = "oos_selection" For runs with only one set of parameters, set the best_param_set to the *only* param_set (param_set 0) for consistency in rake inputs. Lastly, just collect the disparate fit_stats files and combine into a single file, saved as fit_stats.csv for all run types """ task = PythonTask(script=EVAL_SCRIPT, args=[ self.run_id, self.output_path, self.run_type, self.holdouts, self.n_parameter_sets ], name=f'eval_{self.run_id}', num_cores=1, m_mem_free='500M', max_runtime_seconds=180, max_attempts=2, tag='stgpr_eval', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) for ko in list(range(0, self.holdouts + 1)): for param_group in list(range(0, len(self.param_groups))): post_label = 'post_{}_{}_{}'.format(self.run_id, ko, param_group) task.add_upstream(self.post_jobs[post_label]) self.workflow.add_task(task) self.eval_jobs[task.name] = task
def create_upload_tasks(self) -> None: """ Adds tasks to upload summaries to gbd and / or cod databases. """ # Determine if percent change is part of workflow if self.parameters.year_start_ids is not None: upload_types = ['single', 'multi'] else: upload_types = ['single'] # gbd upload tasks for measure in self.parameters.measure_ids: for upload_type in upload_types: upload_gbd_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.UPLOAD ), args=[ '--machine_process', self.parameters.process, '--version_id', self.parameters.version_id, '--database', DataBases.GBD, '--measure_id', measure, '--upload_type', upload_type ], name=DAG.Tasks.Name.UPLOAD.format( database=DataBases.GBD, uploadtype=upload_type, measure=measure ), num_cores=DAG.Tasks.Cores.UPLOAD, m_mem_free=DAG.Tasks.Memory.UPLOAD, max_runtime_seconds=DAG.Tasks.Runtime.UPLOAD, queue=DAG.UPLOAD_QUEUE, tag=DAG.Tasks.Type.UPLOAD ) # add gbd summarization tasks as upstream dependencies if upload_type == 'single': for year in self.parameters.year_ids: for location in self.parameters.location_ids: upload_gbd_task.add_upstream( self.task_map[DAG.Tasks.Type.SUMMARIZE][ DAG.Tasks.Name.SUMMARIZE_GBD.format( measure=measure, location=location, year=year ) ] ) else: for year_index in range(len(self.parameters.year_start_ids)): for location in self.parameters.location_ids: upload_gbd_task.add_upstream( self.task_map[DAG.Tasks.Type.SUMMARIZE][ DAG.Tasks.Name.SUMMARIZE_PCT_CHANGE.format( measure=measure, location=location, year_start=self.parameters.year_start_ids[ year_index], year_end=self.parameters.year_end_ids[ year_index] ) ] ) self.task_map[DAG.Tasks.Type.UPLOAD][ upload_gbd_task.name ] = upload_gbd_task self.workflow.add_task(upload_gbd_task) # cod upload tasks - DEATHS only if DataBases.COD in self.parameters.databases: upload_cod_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.UPLOAD), args=[ '--machine_process', self.parameters.process, '--version_id', self.parameters.version_id, '--database', DataBases.COD, '--measure_id', Measures.Ids.DEATHS, ], name=DAG.Tasks.Name.UPLOAD.format( database=DataBases.COD, uploadtype='single', measure=Measures.Ids.DEATHS ), num_cores=DAG.Tasks.Cores.UPLOAD, m_mem_free=DAG.Tasks.Memory.UPLOAD, max_runtime_seconds=DAG.Tasks.Runtime.UPLOAD, queue=DAG.QUEUE, tag=DAG.Tasks.Type.UPLOAD ) # add cod summarization tasks as upstream dependencies for year in self.parameters.year_ids: for location in self.parameters.location_ids: upload_cod_task.add_upstream( self.task_map[DAG.Tasks.Type.SUMMARIZE][ DAG.Tasks.Name.SUMMARIZE_COD.format( measure=Measures.Ids.DEATHS, location=location, year=year ) ] ) self.task_map[DAG.Tasks.Type.UPLOAD][ upload_cod_task.name ] = upload_cod_task self.workflow.add_task(upload_cod_task)
def create_summarize_tasks(self) -> None: """ Adds tasks to summarize draw level estimates for each location, year, and measure in our FauxCorrect run. Dependent on the mortality input caching tasks as well as the append shocks tasks. """ for measure in self.parameters.measure_ids: for year in self.parameters.year_ids: for location in self.parameters.location_ids: # Create summarize tasks for gbd schema. summarize_gbd_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.SUMMARIZE_GBD ), args=[ '--parent_dir', self.parameters.parent_dir, '--gbd_round_id', self.parameters.gbd_round_id, '--location_id', location, '--year_id', year, '--measure_id', measure, '--machine_process', self.parameters.process ], name=DAG.Tasks.Name.SUMMARIZE_GBD.format( measure=measure, location=location, year=year ), num_cores=DAG.Tasks.Cores.SUMMARIZE, m_mem_free=DAG.Tasks.Memory.SUMMARIZE, max_runtime_seconds=DAG.Tasks.Runtime.SUMMARIZE, queue=DAG.QUEUE, upstream_tasks=[ self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.ENVELOPE_DRAWS ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=( MortalityInputs.ENVELOPE_SUMMARY ) ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.POPULATION ) ] ], tag=DAG.Tasks.Type.SUMMARIZE ) for append_sex in self.parameters.sex_ids: summarize_gbd_task.add_upstream( self.task_map[DAG.Tasks.Type.APPEND][ DAG.Tasks.Name.APPEND_SHOCKS.format( location=location, sex=append_sex ) ] ) self.task_map[DAG.Tasks.Type.SUMMARIZE][ summarize_gbd_task.name ] = summarize_gbd_task self.workflow.add_task(summarize_gbd_task) if ( measure == Measures.Ids.YLLS or DataBases.COD not in self.parameters.databases ): continue # Create summarize tasks for deaths and cod schema. summarize_cod_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.SUMMARIZE_COD ), args=[ '--version_id', self.parameters.version_id, '--parent_dir', self.parameters.parent_dir, '--gbd_round_id', self.parameters.gbd_round_id, '--location_id', location, '--year_id', year ], name=DAG.Tasks.Name.SUMMARIZE_COD.format( measure=Measures.Ids.DEATHS, location=location, year=year ), num_cores=DAG.Tasks.Cores.SUMMARIZE, m_mem_free=DAG.Tasks.Memory.SUMMARIZE, max_runtime_seconds=DAG.Tasks.Runtime.SUMMARIZE, queue=DAG.QUEUE, upstream_tasks=[ self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.ENVELOPE_DRAWS ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=( MortalityInputs.ENVELOPE_SUMMARY ) ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.POPULATION ) ] ], tag=DAG.Tasks.Type.SUMMARIZE ) for append_sex in self.parameters.sex_ids: summarize_cod_task.add_upstream( self.task_map[DAG.Tasks.Type.APPEND][ DAG.Tasks.Name.APPEND_SHOCKS.format( location=location, sex=append_sex ) ] ) self.task_map[DAG.Tasks.Type.SUMMARIZE][ summarize_cod_task.name ] = summarize_cod_task self.workflow.add_task(summarize_cod_task) # pct_change summarization tasks if self.parameters.year_start_ids: for year_index in range(len(self.parameters.year_start_ids)): for pctc_location in self.parameters.location_ids: summarize_pct_change_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.SUMMARIZE_PCT_CHANGE ), args=[ '--parent_dir', self.parameters.parent_dir, '--gbd_round_id', self.parameters.gbd_round_id, '--location_id', pctc_location, '--year_start_id', self.parameters.year_start_ids[ year_index], '--year_end_id', self.parameters.year_end_ids[ year_index], '--measure_id', measure, '--machine_process', self.parameters.process ], name=DAG.Tasks.Name.SUMMARIZE_PCT_CHANGE.format( measure=measure, location=pctc_location, year_start=self.parameters.year_start_ids[year_index], year_end=self.parameters.year_end_ids[year_index] ), num_cores=DAG.Tasks.Cores.PCT_CHANGE, m_mem_free=DAG.Tasks.Memory.PCT_CHANGE, max_runtime_seconds=DAG.Tasks.Runtime.SUMMARIZE, queue=DAG.QUEUE, upstream_tasks=[ self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.ENVELOPE_DRAWS ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=( MortalityInputs.ENVELOPE_SUMMARY ) ) ], self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_MORTALITY.format( mort_process=MortalityInputs.POPULATION ) ] ], tag=DAG.Tasks.Type.SUMMARIZE ) for append_sex in self.parameters.sex_ids: summarize_pct_change_task.add_upstream( self.task_map[DAG.Tasks.Type.APPEND][ DAG.Tasks.Name.APPEND_SHOCKS.format( location=pctc_location, sex=append_sex ) ] ) self.task_map[DAG.Tasks.Type.SUMMARIZE][ summarize_pct_change_task.name ] = summarize_pct_change_task self.workflow.add_task(summarize_pct_change_task)
def create_append_shocks_tasks(self) -> None: """ Adds tasks to append shocks to Deaths and YLLs by location, sex, and year. Dependent on completed correction application for respective locations, sexes, and years. Also dependent on location aggregation. """ for sex in self.parameters.sex_ids: for location in self.parameters.location_ids: most_detailed_location = ( location in self.parameters.most_detailed_location_ids ) append_shocks = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.APPEND_SHOCKS ), args=[ '--parent_dir', self.parameters.parent_dir, '--machine_process', self.parameters.process, '--measure_ids', " ".join([str(x) for x in self.parameters.measure_ids]), '--location_id', location, '--most_detailed_location', most_detailed_location, '--sex_id', sex ], name=DAG.Tasks.Name.APPEND_SHOCKS.format( location=location, sex=sex ), num_cores=DAG.Tasks.Cores.APPEND_SHOCKS, m_mem_free=DAG.Tasks.Memory.APPEND_SHOCKS, max_runtime_seconds=DAG.Tasks.Runtime.APPEND, tag=DAG.Tasks.Type.APPEND ) if ( most_detailed_location and Measures.Ids.YLLS in self.parameters.measure_ids ): # attach calc ylls jobs as upstream append_shocks.add_upstream( self.task_map[DAG.Tasks.Type.CALCULATE][ DAG.Tasks.Name.CALC_YLLS.format( location=location, sex=sex ) ] ) elif ( most_detailed_location and Measures.Ids.YLLS not in self.parameters.measure_ids ): # add cause agg jobs as upstream. append_shocks.add_upstream( self.task_map[DAG.Tasks.Type.CAUSE_AGG][ (DAG.Tasks.Name.CAUSE_AGGREGATION .format( location=location, sex=sex )) ] ) else: # Add location aggregation tasks as upstream dependency. for location_set_id in self.parameters.location_set_ids: for loc_agg_year in self.parameters.year_ids: for loc_agg_measure in self.parameters.measure_ids: for loc_agg_type in ( LocationAggregation.Type.CODCORRECT ): if ( FilePaths.UNSCALED_DIR in loc_agg_type and loc_agg_measure == Measures.Ids.YLLS ): continue append_shocks.add_upstream( self.task_map[ DAG.Tasks.Type.LOC_AGG][( DAG.Tasks.Name.LOCATION_AGGREGATION .format( aggregation_type=( loc_agg_type .replace("/","_") ), location_set=location_set_id, measure=loc_agg_measure, year=loc_agg_year, ) )] ) self.task_map[DAG.Tasks.Type.APPEND][ append_shocks.name ] = append_shocks self.workflow.add_task(append_shocks)
def create_location_aggregation_tasks(self) -> None: """ Adds tasks to aggregate up the location hierarchy for each location set id, sex, year, and measure in our FauxCorrect run. Dependent on each location, sex, and year specific group of scalar or calculate ylls tasks to be completed for their respective measure ids. """ for location_set_id in self.parameters.location_set_ids: for year in self.parameters.year_ids: for measure in self.parameters.measure_ids: for loc_agg_type in LocationAggregation.Type.CODCORRECT: if ( FilePaths.UNSCALED_DIR in loc_agg_type and measure == Measures.Ids.YLLS ): continue agg_task = PythonTask( script=os.path.join( self.code_dir, DAG.Executables.LOC_AGG ), args=[ '--action', DAG.Tasks.Type.LOC_AGG, '--parent_dir', self.parameters.parent_dir, '--gbd_round_id', ( self.parameters.gbd_round_id ), '--aggregation_type', loc_agg_type, '--location_set_id', location_set_id, '--year_id', year, '--measure_id', measure ], name=DAG.Tasks.Name.LOCATION_AGGREGATION.format( aggregation_type=( loc_agg_type.replace("/","_") ), location_set=location_set_id, measure=measure, year=year ), num_cores=DAG.Tasks.Cores.LOCATION_AGGREGATION, m_mem_free=( DAG.Tasks.Memory.LOCATION_AGGREGATION ), max_runtime_seconds=( DAG.Tasks.Runtime.LOCATION_AGGREGATION ), queue=DAG.QUEUE, upstream_tasks=[ self.task_map[DAG.Tasks.Type.CACHE][ DAG.Tasks.Name.CACHE_REGIONAL_SCALARS ] ], tag=DAG.Tasks.Type.LOC_AGG ) # Attach upstream dependencies, all locations for # sex and year. for sex in self.parameters.sex_ids: for loc in ( self.parameters.most_detailed_location_ids ): if measure == Measures.Ids.YLLS: # If aggregating measure 4 (YLLs), attach # calc ylls jobs as upstream agg_task.add_upstream( self.task_map[DAG.Tasks.Type.CALCULATE][ DAG.Tasks.Name.CALC_YLLS.format( location=loc, sex=sex ) ] ) else: # If measure is not 4 (YLLs), then add # cause agg jobs as upstream. agg_task.add_upstream( self.task_map[DAG.Tasks.Type.CAUSE_AGG][ (DAG.Tasks.Name.CAUSE_AGGREGATION .format( location=loc, sex=sex )) ] ) if location_set_id not in [ LocationSetId.OUTPUTS, LocationSetId.SDI, LocationSetId.STANDARD ]: # If location set is one of the special # sets that central computation only aggregates # at the end of a round, add the outputs location # set as upstream dependency so it finishes first. agg_task.add_upstream( self.task_map[DAG.Tasks.Type.LOC_AGG][ DAG.Tasks.Name.LOCATION_AGGREGATION.format( aggregation_type=( loc_agg_type.replace("/","_") ), location_set=LocationSetId.OUTPUTS, measure=measure, year=year ) ] ) self.task_map[DAG.Tasks.Type.LOC_AGG][ agg_task.name ] = agg_task self.workflow.add_task(agg_task)
def create_rake_jobs(self): """Depends on GPR jobs including all the subnationals and national locations for each rake job, parallelized out by parent_id. Raking only done on the first KO (KO 0), which does not hold out any data from the dataset.""" for loc in self.subnat_locations: mem = int( np.ceil( self.rake_memory_df.query(f'location == {loc}') ['mem'].iat[0])) rt = int( np.ceil( self.rake_memory_df.query(f'location == {loc}') ['runtime'].iat[0])) if self.draws == 1000: mem *= 2 rt *= 3 rt = max(rt, 7200) if self.is_diet_model: mem *= 2 rt *= 3 rt = max(rt, 14400) task = PythonTask(script=RAKE_SCRIPT, args=[ self.run_id, self.output_path, 0, self.draws, self.run_type, self.rake_logit, loc ], name=f'rake_{self.run_id}_{loc}', num_cores=1, m_mem_free=f'{mem}G', max_runtime_seconds=rt, max_attempts=2, tag='stgpr_rake', queue='all.q', resource_scales=RESOURCE_SCALES, hard_limits=True) # grab all subnationals and country location_ids associated with a country lvl = 'level_{}'.format(NATIONAL_LEVEL) all_needed_locs = self.locs.loc[self.locs[lvl] == loc, 'location_id'].unique() # add each gpr job containing a needed national/subnational # for raking to upstreams if self.holdouts == 0: for param_group in list(range(0, len(self.param_groups))): for loc_group in list(range(0, self.nparallel)): loc_group_vals = self.parallel_groups[loc_group] common_elements = len( intersection(all_needed_locs.tolist(), loc_group_vals.tolist())) if common_elements > 0: task.add_upstream( self.gpr_jobs['gpr_{}_0_{}_{}'.format( self.run_id, param_group, loc_group)]) else: task.add_upstream(self.eval_jobs['eval_{}'.format( self.run_id)]) self.workflow.add_task(task) self.rake_jobs[task.name] = task