def test_dag_pickle(self): import os.path as OP import os from dag import DAG,Process,load # PICKLE d = DAG() d.add_process(Process("ple",['a','b'],['c','d'],"-brief")) d.add_process(Process("ple",['c','e'],['f'])) filename = d.save() self.assertTrue(OP.isfile(filename)) # UNPICKLE try: with open(filename,"rb") as file: d2 = load(file) finally: os.unlink(filename)
def create_dag(input_filename, parsers, init_file=None, engine=dag.Engine.SHELL, num_cores=None): """ Takes an input file that contains a list of commands and generates a dag. Jobs that have all of their prerequisites met are started, unless the --setup_only flag is provided. @param input_filename: Filename to be parsed into a DAG @type input_filename: str @param parsers: Dictionary that maps string command names to functions that are used to create DAG. @type parsers: dict @param init_file: Optional file to be used for startup variables and routines. @type init_file: file @param num_processors: Optional number of processors used in multiprocessing. @type num_processors: int @return: DAG object if successful. Otherwise, None is returned @rtype: dag.DAG """ import dag.util as dag_utils from dag import DAG, Engine, DagException # PROJECT SPECIFIC DEFINES. FACTOR OUT. if not init_file: init_file = dag_utils.open_user_init() # If we still don't have the init file, there is a problem. if not init_file: if init_file is None: raise DagException("No init file provided.") else: raise DagException("Could not open init file ({0}). File not found." .format(init_file.name)) init_code = compile(init_file.read(), init_file.name, "exec") exec(init_code) root_dag = DAG() root_dag.engine = engine root_dag.num_cores = num_cores parser_kmap = {} # used as the second argument of parser functions (below) # dependencies dict is used to allow the user # to define explicit dependencies. dependencies = {} with open(input_filename, "r") as infile: for line in infile: line = line.strip() if len(line) == 0: continue if line[0] == '#': # Comment line continue continue if line[0] == '%': # Preprocess (parser_kmap, extra_processes, dependencies) = preprocess_line(line, parser_kmap, dependencies) for extra_proc in extra_processes: root_dag.add_process(extra_proc) continue tokens = line.split(' ') for token in tokens: if not token: tokens.remove(token) pname = tokens[0] parser_args = tokens[1:] # used by function below # Is the process name set explicitly? process_name = None # This is an option internal name for the process, AKA workunit_name if pname[0] == '@': process_name = pname[1:] pname = parser_args[0] parser_args = parser_args[1:] if root_dag.engine == Engine.SHELL: import dag.shell proc_list = dag.shell.parse_shell(pname, parser_args, parser_kmap, parsers, init_code) num_procs = len(root_dag.processes) for proc in proc_list: proc.workunit_name = "%s-%d" % (proc.cmd, num_procs) num_procs += 1 else: if not pname in parsers.keys(): print("No function for %s" % pname) print("Known functions: ", parsers.keys()) raise DagException("Unknown Function: {0}".format(pname)) funct = "%s(parser_args,parser_kmap)" % parsers[pname] print("Running %s" % funct) proc_list = eval(funct) # uses parser_args if proc_list is None: continue # If given explicitly set workunit name if process_name: proc_count = 1 use_suffix = len(proc_list) > 1 for i in proc_list: if use_suffix: i.workunit_name = "%s-%d" % (process_name, proc_count) else: i.workunit_name = process_name proc_count += 1 for i in proc_list: root_dag.add_process(i) # Set explicit dependencies, if any for parent_name in dependencies: print("Added dependency of %s" % parent_name) NO_SUCH_FMT = "No such process '%s'" parent_process = root_dag.get_process(parent_name) if not parent_process: print(NO_SUCH_FMT % parent_name) continue for child in dependencies[parent_name]: child_proc = root_dag.get_process(child) if not child_proc: print(NO_SUCH_FMT % child) continue if child not in [proc.workunit_name for proc in parent_process.children]: parent_process.children.append(child_proc) print("%s depends on %s" % (child, parent_name)) return root_dag