def parse_args_uargs(self): args, config = parse_args_uargs(self.args, []) config = merge_dicts_smart(config, self.grid_config) config = merge_dicts_smart(config, self.params) if self.distr_info: self.set_dist_env(config) return args, config
def _dag(config: str, debug: bool = False, control_reqs=True, params: Tuple[str] = ()): logger = create_logger(_session, name='_dag') logger.info('started', ComponentType.Client) config_text = open(config, 'r').read() config_parsed = yaml_load(config_text) params = dict_from_list_str(params) config_parsed = merge_dicts_smart(config_parsed, params) config_text = yaml_dump(config_parsed) logger.info('config parsed', ComponentType.Client) type_name = config_parsed['info'].get('type', 'standard') if type_name == DagType.Standard.name.lower(): return dag_standard( session=_session, config=config_parsed, debug=debug, config_text=config_text, config_path=config, control_reqs=control_reqs, logger=logger, component=ComponentType.Client ) return dag_pipe( session=_session, config=config_parsed, config_text=config_text )
def parse_args_uargs(self): args, config = parse_args_uargs(self.args, []) config = merge_dicts_smart(config, self.grid_config) os.environ['CUDA_VISIBLE_DEVICES'] = self.task.gpu_assigned or '' if self.distr_info: self.set_dist_env(config) return args, config
def _dag(config: str, debug: bool = False, control_reqs=True, params: Tuple[str] = ()): logger = create_logger(_session, name='_dag') logger.info('started', ComponentType.Client) config_text = open(config, 'r').read() config_parsed = yaml_load(config_text) params = dict_from_list_str(params) config_parsed = merge_dicts_smart(config_parsed, params) config_text = yaml_dump(config_parsed) logger.info('config parsed', ComponentType.Client) try: commit = subprocess.check_output(["git", "rev-parse", "HEAD"]).strip() config_parsed['info']['name'] += f'_{commit.decode("utf-8")[:6]}' except Exception: logger.info('commit not parsed') type_name = config_parsed['info'].get('type', 'standard') if type_name == DagType.Standard.name.lower(): cells = grid_cells( config_parsed['grid']) if 'grid' in config_parsed else [None] dags = [] for cell in cells: dag = dag_standard(session=_session, config=config_parsed, debug=debug, config_text=config_text, config_path=config, control_reqs=control_reqs, logger=logger, component=ComponentType.Client, grid_cell=cell) dags.append(dag) return dags return [ dag_pipe(session=_session, config=config_parsed, config_text=config_text) ]
def _dag(config: str, debug: bool = False, control_reqs=True, params: Tuple[str] = ()): migrate() config_text = open(config, 'r').read() config_parsed = yaml_load(config_text) params = dict_from_list_str(params) config_parsed = merge_dicts_smart(config_parsed, params) config_text = yaml_dump(config_parsed) type_name = config_parsed['info'].get('type', 'standard') if type_name == DagType.Standard.name.lower(): return dag_standard(session=_session, config=config_parsed, debug=debug, config_text=config_text, config_path=config, control_reqs=control_reqs) return dag_pipe(session=_session, config=config_parsed, config_text=config_text)
def create_tasks(self): tasks = self.task_provider.by_dag(self.dag) tasks_new = [] tasks_old = [] for t in tasks: if t.parent: continue task = Task( name=t.name, status=TaskStatus.NotRan.value, computer=t.computer, gpu=t.gpu, gpu_max=t.gpu_max, cpu=t.cpu, executor=t.executor, memory=t.memory, steps=t.steps, dag=self.dag_db.id, debug=t.debug, type=t.type, ) task.additional_info = t.additional_info tasks_new.append(task) tasks_old.append(t) self.task_provider.bulk_save_objects(tasks_new, return_defaults=True) old2new = { t_old.id: t_new.id for t_new, t_old in zip(tasks_new, tasks_old) } dependencies = self.task_provider.get_dependencies(self.dag) dependencies_new = [] for d in dependencies: d_new = TaskDependence(task_id=old2new[d.task_id], depend_id=old2new[d.depend_id]) dependencies_new.append(d_new) self.task_provider.bulk_save_objects(dependencies_new, return_defaults=False) changes = yaml_load(self.file_changes) storages = self.dag_storage_provider.by_dag(self.dag) storages_new = [] for s, f in storages: if not isinstance(changes, dict): continue replace = self.find_replace(changes, s.path) if replace is not None and f: content = f.content.decode('utf-8') if s.path.endswith('.yml'): data = yaml_load(content) data = merge_dicts_smart(data, replace) content = yaml_dump(data) else: for k, v in replace: if k not in content: raise Exception(f'{k} is not in the content') content = content.replace(k, v) content = content.encode('utf-8') md5 = hashlib.md5(content).hexdigest() f = self.file_provider.by_md5(md5) if not f: f = File(content=content, created=now(), project=self.dag_db.project, md5=md5, dag=self.dag_db.id) self.file_provider.add(f) s_new = DagStorage(dag=self.dag_db.id, file=f.id, path=s.path, is_dir=s.is_dir) storages_new.append(s_new) self.dag_storage_provider.bulk_save_objects(storages_new, return_defaults=False)