def load_tasks(self, cmd, opt_values, pos_args): task_list = [] for task_name in tasks: print('Configuring task %s' % task_name) func = globals()['task_' + task_name] if isinstance(func(), dict): task_dict = func() task_dict['verbosity'] = config.test.verbosity task_list.append(dict_to_task(task_dict)) else: for task_dict in func(): task_dict['verbosity'] = config.test.verbosity task_list.append(dict_to_task(task_dict)) task_config = {'verbosity': config.test.verbosity} return task_list, task_config
def reciprocal_best_last_task(self): def do_reciprocals(): rbh_df, qvd_df, dvq_df = RBL.get_reciprocals(self.translated_x_db_fn, self.db_x_translated_fn, self.bh) q_names = pd.read_csv(self.name_map_fn) d_names = pd.read_csv(self.database_name_map_fn) rbh_df.to_csv(self.unmapped_output_fn, index=False) qvd_df.to_csv(self.translated_x_db_fn + '.csv', index=False) dvq_df.to_csv(self.db_x_translated_fn + '.csv', index=False) rbh_df = self.backmap(rbh_df, q_names, d_names) qvd_df = self.backmap(qvd_df, q_names, d_names) dvq_df = self.backmap(dvq_df, q_names, d_names) rbh_df.to_csv(self.output_fn, index=False) qvd_df.to_csv(self.translated_x_db_fn + '.mapped.csv', index=False) dvq_df.to_csv(self.db_x_translated_fn + '.mapped.csv', index=False) td = {'name': 'reciprocal_best_last', 'title': title, 'actions': [ShortenedPythonAction(do_reciprocals)], 'file_dep': [self.translated_x_db_fn, self.db_x_translated_fn], 'targets': [self.output_fn + '.csv', self.translated_x_db_fn + '.csv', self.db_x_translated_fn + '.csv', self.output_fn + '.mapped.csv', self.translated_x_db_fn + '.mapped.csv', self.db_x_translated_fn + '.mapped.csv'], 'clean': [clean_targets]} return dict_to_task(td)
def crbl_filter_task(self): def do_crbl_filter(): model_df = pd.read_csv(self.model_fn) rbh_df = pd.read_csv(self.unmapped_output_fn) hits_df = pd.read_csv(self.translated_x_db_fn + '.csv') filtered_df = self.filter_from_model(model_df, rbh_df, hits_df) results = pd.concat([rbh_df, filtered_df], axis=0) q_names = pd.read_csv(self.name_map_fn) d_names = pd.read_csv(self.database_name_map_fn) results = self.backmap(results, q_names, d_names) results.to_csv(self.crbl_output_fn, index=False) self.plot_crbl_fit(model_df, rbh_df, hits_df, self.model_plot_fn) td = {'name': 'filter_crbl_hits', 'title': title, 'actions': [ShortenedPythonAction(do_crbl_filter)], 'file_dep': [self.output_fn, self.translated_x_db_fn + '.csv', self.db_x_translated_fn + '.csv', self.model_fn], 'targets': [self.crbl_output_fn, self.model_plot_fn]} return dict_to_task(td)
def generate_tasks(name, gen_result, gen_doc=None): """Create tasks from a task generator result. @param name: (string) name of taskgen function @param gen_result: value returned by a task generator function @param gen_doc: (string/None) docstring from the task generator function @return: (tuple) task,list of subtasks """ # task described as a dictionary if isinstance(gen_result, dict): if 'name' in gen_result: raise InvalidTask("Task %s. Only subtasks use field name."%name) gen_result['name'] = name # Use task generator docstring # if no doc present in task dict if not 'doc' in gen_result: gen_result['doc'] = gen_doc return [dict_to_task(gen_result)] # a generator if isgenerator(gen_result): group_task = Task(name, None, doc=gen_doc) tasks = [group_task] # the generator return subtasks as dictionaries . for task_dict in gen_result: # check valid input if not isinstance(task_dict, dict): raise InvalidTask("Task %s must yield dictionaries"% name) if 'name' not in task_dict: raise InvalidTask("Task %s must contain field name. %s"% (name,task_dict)) # name is task.subtask task_dict['name'] = "%s:%s"% (name, task_dict.get('name')) sub_task = dict_to_task(task_dict) sub_task.is_subtask = True tasks.append(sub_task) # add task dependencies to group task. group_task.task_dep = [task.name for task in tasks[1:]] return tasks raise InvalidTask("Task %s. Must return a dictionary. got %s" % (name, type(gen_result)))
def load_tasks(self, cmd, opt_values, pos_args): del cmd del opt_values del pos_args tasks = [] for task_generator in self.task_generators: tasks.append(dict_to_task(task_generator())) return tasks, {}
def visit_job(self, job): yield dict_to_task({ 'name': self.names[job], 'uptodate': [lambda: self.uptodate[job]], 'task_dep': self.task_dep, 'getargs': self.getargs, 'actions': [(job.submit_fn, [job.code, self.names[job]])], })
def load_tasks(cmd, opt_values, pos_args): cpus = multiprocessing.cpu_count() task_list = [dict_to_task(task.doit_dict) for task in tasks] config = {'verbosity': 2} if cpus > 1: config['num_process'] = cpus print "Using multiprocessing with", cpus, "processes." return task_list, config
def process_tasklist(self, tasklist): """ Process task list and create task objects """ ret = [] for task in tasklist: ret.append(dict_to_task(task)) return ret
def _processed_dicts_to_tasks(self, dicts): '''Transform a list of task dicts into a tuple of doit Task objects List must first be processed by _process_makeit_extensions ''' tasks = [] for taskdict in dicts: tasks.append(dict_to_task(taskdict)) return tuple(tasks)
def create_task(self, task): group_task = dict_to_task({ 'name': task.name, 'actions': None }) group_task.has_subtask = True creator = DistributeSubtaskCreator(task) return creator.create_subtasks()
def load_tasks(cmd, opt_values, pos_args): cpus = multiprocessing.cpu_count() task_list = [dict_to_task(task.doit_dict) for task in tasks] config = {'verbosity': 2, 'dep_file': '{}/.doit.db'.format(EOD_CONTAINER_BASE)} if cpus > 1: config['num_process'] = cpus print("Using multiprocessing with {} processes.".format(cpus)) return task_list, config
def d_to_t(*args, **kwargs): global _task_count ret_dict = task_dict_func(*args, **kwargs) if 'name' not in ret_dict: name = "{0}.func<{1}>".format(str(_task_count), task_dict_func.__name__) _task_count += 1 ret_dict['name'] = name return dict_to_task(ret_dict)
def load_tasks(self, cmd, pos_args): return [ dict_to_task( dict( name=task.get('name', uid), task_dep=task.get('deps', []), actions=self.load_actions(task), uptodate=task.get('uptodate', []), )) for uid, task in self.tasks.items() ]
def process_tasklist(self, tasklist): """ Process task list and create task objects """ ret = [] for task in tasklist: ret.append(dict_to_task(task)) print 'Starting execution of %s tasks...' % (len(ret)) return ret
def conv_pyflakes(data): """convert a yaml entry into a Pyflakes task""" if isinstance(data, str): pattern = '*' in data module = data else: assert isinstance(data, dict) pattern = False module = data['file'] flakes = Pyflakes() list_taskd = flakes.tasks(module) if pattern else [flakes(module)] for taskd in list_taskd: taskd['name'] = 'pyflakes:{}'.format(taskd['name']) yield dict_to_task(taskd)
def crbl_fit_task(self): def do_crbl_fit(): rbh_df = pd.read_csv(self.unmapped_output_fn) model_df = self.fit_crbh_model(rbh_df) model_df.to_csv(self.model_fn, index=False) td = {'name': 'fit_crbl_model', 'title': title, 'actions': [ShortenedPythonAction(do_crbl_fit)], 'file_dep': [self.output_fn, self.translated_x_db_fn + '.csv', self.db_x_translated_fn + '.csv'], 'targets': [self.model_fn]} return dict_to_task(td)
def visit_group(self, group): subtasks = itertools.chain.from_iterable( self.visit(job) for job in group.jobs) task_dict = { 'name': self.names[group], 'uptodate': [lambda: self.uptodate[group]], 'task_dep': [self.names[job] for job in group.jobs], 'actions': [], } task = dict_to_task(task_dict) task.has_subtask = True yield task for task in subtasks: task.is_subtask = True yield task
def visit_chain(self, chain): subtasks = [] old_task_dep = self.task_dep old_getargs = self.getargs for job in chain.jobs: subtasks.extend(self.visit(job)) self.task_dep = old_task_dep + [self.names[job]] self.getargs = {'depends_on': (self.names[job], 'id')} self.task_dep = old_task_dep self.getargs = old_getargs task_dict = { 'name': self.names[chain], 'uptodate': [lambda: self.uptodate[chain]], 'task_dep': [self.names[job] for job in chain.jobs], 'actions': [], } task = dict_to_task(task_dict) task.has_subtask = True yield task for task in subtasks: task.is_subtask = True yield task
def load_tasks(self, cmd, pos_args): task_list = [dict_to_task(my_builtin_task)] return task_list
def load_tasks(cmd, opt_values, pos_args): task_list = [dict_to_task(my_builtin_task)] config = {'verbosity': 2} return task_list, config
def testDictOkMinimum(self): dict_ = {"name": "simple", "actions": ["xpto 14"]} assert isinstance(task.dict_to_task(dict_), task.Task)
def load_tasks(cmd, opt_values, pos_args): task_list = [] i = 0 # Make workspace mkdir_p(MyLoader.args.brute_dir) # Job task names job_names = [] # Basename base_name = os.path.splitext(os.path.basename(MyLoader.args.brute_script))[0] # Run jobs for param in MyLoader.params: i+=1 job_name = base_name + str(i) job_names += [job_name] job_cmd_str = None # Prepend fixed options: if len(MyLoader.args.brute_script_arg) > 0: args = [] for arg in MyLoader.args.brute_script_arg: args += [arg.replace('#', str(i))] args = ' '.join(args) param = args + ' ' + param # Replace any # symbols in param with job index: param = param.replace('#', str(i)) # Write parameters to work directory: with open(os.path.join(MyLoader.args.brute_dir, job_name+".params"), 'w') as f: f.write(param+"\n") e = MyLoader.config.get("brute", "env") if e == 'local': job_cmd_str = '%s %s' % (MyLoader.args.brute_script, param) elif e == 'slurm': job_cmd_str = 'srun %s %s' % (MyLoader.args.brute_script, param) elif e == 'sge': job_cmd_str = '%s %s' % (MyLoader.args.brute_script, param) else: print("[FATAL] unknown env: " + str(e)) sys.exit(1) # Job work directory job_work_dir = os.path.join(MyLoader.args.brute_dir, job_name) # Make job work dir mkdir_task = { 'name': 'mkdir' + str(i), 'actions': [(mkdir_p, [job_work_dir])], } task_list.append(dict_to_task(mkdir_task)) # Write the job script script = get_submission_script(job_cmd_str, job_name, job_work_dir, MyLoader.config) # Write script task script_path = os.path.join(job_work_dir, "run.sh") write_script_task = { 'name': 'script' + str(i), 'actions': [(write_script_to_file, [script, script_path])], } task_list.append(dict_to_task(write_script_task)) # Run script job run_script = { 'name': 'run' + str(i), 'actions': ["bash %s" % (script_path)], 'targets': [] } task_list.append(dict_to_task(run_script)) config = {'verbosity': 2} return task_list, config
def testDictOkMinimum(self): dict_ = {'name':'simple','actions':['xpto 14']} assert isinstance(task.dict_to_task(dict_), task.Task)
def d_to_t(*args, **kwargs): ret_dict = task_dict_func(*args, **kwargs) return dict_to_task(ret_dict)
def load_tasks(self, cmd, opt_values, pos_args): task_list = [dict_to_task(my_builtin_task)] config = {'verbosity': 2} return task_list, config
def d_to_t(*args, **kwargs): global _task_count ret_dict = task_dict_func(*args, **kwargs) return dict_to_task(ret_dict)
def add_task2(task): DoitLoader.task_list.append(dict_to_task(task))
def add_task(targets, file_dep, actions, name=None, pipe=False, **kwargs): if type(targets) not in [list, tuple]: targets = [targets] if type(actions) not in [list, tuple]: actions = [actions] if type(file_dep) not in [list, tuple]: file_dep = [file_dep] if name == None: DoitLoader.task_id_counter += 1 name = 'task_%d' % (DoitLoader.task_id_counter) actions = DoitLoader.format_actions(actions) if pipe: for i in range(len(actions) - 1): actions[i][1]['stdout'] = subprocess.PIPE params = [{ 'name': 'actions', 'default': copy.deepcopy(actions) }, { 'name': 'pipe', 'default': pipe }] def run(actions): for i in range(len(actions)): for k in ['stdin', 'stdout', 'stderr']: if k in actions[i][1] and isinstance( actions[i][1][k], str): mode = 'r' if k == 'stdin' else 'w' actions[i][1][k] = open(actions[i][1][k], mode) finished = True for i in range(len(actions)): if actions[i][0].__class__.__name__ == 'function': returncode = actions[i][0](**actions[i][1]) finished = finished and returncode else: p = subprocess.Popen(actions[i][0], **actions[i][1]) connect = False if 'stdout' in actions[i][1]: if actions[i][1]['stdout'] == subprocess.PIPE \ and i < len(actions) - 1: connect = True if connect: actions[i + 1][1]['stdin'] = p.stdout else: p.communicate() finished = finished and p.returncode != None for i in range(len(actions)): for k in ['stdin', 'stdout', 'stderr']: if k in actions[i][1] and isinstance( actions[i][1][k], file): actions[i][1][k].close() return finished task = { 'name': name, 'actions': [(run, [actions])], 'targets': targets, 'file_dep': file_dep, 'params': params, 'clean': [clean_targets], 'title': DoitLoader.print_action } for k, v in kwargs.items(): task[k] = v DoitLoader.task_list.append(dict_to_task(task))
def load_tasks(self, cmd: DoitCommand, pos_args: list[str]) -> list[Task]: return [dict_to_task(test_task)]
def tasks(self): """Call this method to get tasks (not task dicts) from a pipeline.""" for d in self.task_dicts: yield dict_to_task(d)
def d_to_t(*args, **kwargs): for ret_dict in task_dict_func(*args, **kwargs): yield dict_to_task({'name': task_dict_func.__name__, **ret_dict})