def test_basic(): assert topological_sort([1,2,3], [(1,2),(1,3),(3,2)]) == [1,3,2] assert topological_sort([1,2], [(2,1),(2,1)]) == [2,1] assert topological_sort([0,1,2,3], [(1,0),(1,2),(1,3),(3,2)]) == [1,3,2,0] assert not topological_sort([1,2], [(1,2),(2,1)]) assert not topological_sort([0,1,2], [(0,1),(1,2),(2,1)])
def process_config(self): """ Processes a populated config dict, identifies files to be processed, creates Document objects for each, links dependencies and finally does topological sort to establish order of batch run. """ # Define the parse_doc nested function which we will call recursively. def parse_doc(path, input_directive, args = {}): # If a specification is nested in a dependency, then input_directive # may be a dict. If so, split it into parts before continuing. try: a, b = input_directive.popitem() input_directive = a args = b except AttributeError: pass tokens = input_directive.split("|") if "/" in tokens[0]: raise Exception("paths not allowed in tokens: %s" % tokens[0]) if path == '.': glob_string = tokens[0] else: glob_string = os.path.join(re.sub("^\./", "", path), tokens[0]) filters = tokens[1:] docs = [] # virtual document if re.search("@", glob_string): virtual = True dangerous = not args.has_key('contents') if dangerous and not self.args['danger']: raise Exception(""" You are attempting to access a remote file %s. You must enable --danger flag to do this.""" % glob_string) glob_string = glob_string.replace("@", "") else: virtual = False regex = fnmatch.translate(glob_string).replace(".*", "(.*)") matcher = re.compile(regex) files = glob.glob(glob_string) nofiles = len(files) == 0 if nofiles and virtual: files = [glob_string] for f in files: create = True if not virtual: if os.path.isdir(f): create = False if args.has_key('disabled'): if args['disabled']: create = False print "document %s|%s disabled" % (f, "|".join(filters)) inputs = [] if args.has_key('inputs'): if isinstance(args['inputs'], str): raise Exception("""this input should be an array, not a string: %s""" % args['inputs']) for i in args['inputs']: for doc in parse_doc(path, i): inputs.append(doc.key()) m = matcher.match(f) if m and len(m.groups()) > 0: rootname = matcher.match(f).group(1) # The 'ifinput' directive says that if an input exists matching # the specified pattern, we should create this document and it # will depend on the specified input. if args.has_key('ifinput'): self.log.debug(f) if isinstance(args['ifinput'], str) or isinstance(args['ifinput'], unicode): ifinputs = [args['ifinput']] else: self.log.debug("treating input %s as iterable. class: %s" % ( args['ifinput'], args['ifinput'].__class__.__name__)) ifinputs = args['ifinput'] for s in ifinputs: self.log.debug("evaluating ifinput %s" % s) ifinput = s.replace("%", rootname) self.log.debug("evaluating ifinput %s" % ifinput) input_docs = parse_doc(path, ifinput, {}) for input_doc in input_docs: self.log.debug(input_doc.key()) inputs.append(input_doc.key()) if len(input_docs) == 0: create = False if args.has_key('ifnoinput'): ifinput = args['ifnoinput'].replace("%", rootname) input_docs = parse_doc(path, ifinput, {}) if len(input_docs) > 0: create = False if args.has_key('except'): try: except_re = re.compile(args['except']) except sre_constants.error as e: raise Exception("""You passed 'except' value of %s. Please pass a valid Python-style regular expression for 'except', NOT a glob-style matcher. Error message from re.compile: %s""" % (args['except'], e)) if re.match(except_re, f): print "skipping %s for %s as it matches except pattern %s" % ( f, input_directive, args['except'] ) create = False if create: doc = dexy.document.Document() doc.set_controller(self) # Filters can either be included in the name... doc.set_name_and_filters(f, filters) # ...or they may be listed explicitly. if args.has_key('filters'): doc.filters += args['filters'] doc.setup_log() # After name has been set doc.virtual = virtual # Here we are assuming that if we get a key with blank args # this should not override a previous key. A key which does # have args should override any previous key. key = doc.key() self.log.debug("creating doc %s for glob %s" % (key, glob_string)) if self.members.has_key(key): self.log.debug("found existing key %s" % key) doc = self.members[key] else: self.log.debug("no existing key %s" % key) if args.has_key('priority'): doc.priority = args['priority'] del args['priority'] if len(args) > 0: self.log.debug("args: %s" % args) doc.args = args doc.use_all_inputs = args.has_key('allinputs') for i in inputs: doc.add_input_key(i) if not hasattr(doc, 'args'): doc.args = args self.members[key] = doc docs.append(doc) # just a local list return docs # end of parse_doc nested function def get_pos(member): key = member.key() return self.members.keys().index(key) def depend(parent, child): self.depends.append((get_pos(child), get_pos(parent))) # The real processing starts here. self.members = OrderedDict() self.depends = [] self.batch_id = self.db.next_batch_id() print "batch id is", self.batch_id self.log.debug("About to process config") self.log.debug(self.config) for path, config in self.config.iteritems(): ### @export "features-global-args-1" if config.has_key("$globals"): global_args = config["$globals"] else: global_args = {} if self.args.has_key('globals'): global_args.update(self.args['globals']) for k, v in config.iteritems(): local_args = global_args.copy() local_args.update(v) for kg in global_args.keys(): if local_args.has_key(kg): if isinstance(local_args[kg], dict): local_args[kg].update(global_args[kg]) parse_doc(path, k, local_args) ### @end # Determine dependencies for doc in self.members.values(): doc.finalize_inputs(self.members) for input_doc in doc.inputs: depend(doc, input_doc) ordering, leftover_graph_items = topological_sort(range(len(self.members)), self.depends) if leftover_graph_items and not ordering: # circular references! print debugging help before stopping for doc, depends_on in leftover_graph_items: print self.members.values()[doc].key(), "depends on" for i in depends_on: print " ", self.members.values()[i].key() print print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print "The above dependencies were not able to be resolved.\n\n" raise Exception("There are circular references, can't do topological sort!") ordered_members = OrderedDict() for i in ordering: key = self.members.keys()[i] ordered_members[key] = self.members[key] self.members = ordered_members
def test_topological_sort_circular_references(): assert not topological_sort([1,2], [(1,2),(2,1)])[0] assert not topological_sort([0,1,2], [(0,1),(1,2),(2,1)])[0]
def test_topological_sort_valid(): assert topological_sort([1,2,3], [(1,2),(1,3),(3,2)])[0] == [1,3,2] assert topological_sort([1,2], [(2,1),(2,1)])[0] == [2,1] assert topological_sort([0,1,2,3], [(1,0),(1,2),(1,3),(3,2)])[0] == [1,3,2,0]
def process_config(self): def parse_doc(path, input_directive, args = {}): # If a specification is nested in a dependency, then input_directive # may be a dict. If so, split it into parts before continuing. try: a, b = input_directive.popitem() input_directive = a args = b except AttributeError: pass tokens = input_directive.split("|") if "/" in tokens[0]: raise Exception("paths not allowed in tokens: %s" % tokens[0]) if path == '.': glob_string = tokens[0] else: glob_string = os.path.join(re.sub("^\./", "", path), tokens[0]) filters = tokens[1:] docs = [] # virtual document if re.search("@", glob_string): # TODO some virtual files are local, not remote. test on # presence of 'url' or something more appropriate. virtual = True if not self.allow_remote: raise Exception("""You are attempting to access a remote file. You must enable --dangerous mode to do this. Please check Dexy help and call the dexy command again.""") glob_string = glob_string.replace("@", "") else: virtual = False regex = fnmatch.translate(glob_string).replace(".*", "(.*)") matcher = re.compile(regex) files = glob.glob(glob_string) nofiles = len(files) == 0 if nofiles and virtual: files = [glob_string] for f in files: create = True if not virtual: if os.path.isdir(f): create = False if args.has_key('disabled'): if args['disabled']: create = False print "document %s|%s disabled" % (f, "|".join(filters)) inputs = [] if args.has_key('inputs'): if isinstance(args['inputs'], str): raise Exception("""this input should be an array, not a string: %s""" % args['inputs']) for i in args['inputs']: for doc in parse_doc(path, i): inputs.append(doc.key()) m = matcher.match(f) if m and len(m.groups()) > 0: rootname = matcher.match(f).group(1) # The 'ifinput' directive says that if an input exists matching # the specified pattern, we should create this document and it # will depend on the specified input. if args.has_key('ifinput'): self.log.debug(f) if isinstance(args['ifinput'], str) or isinstance(args['ifinput'], unicode): ifinputs = [args['ifinput']] else: self.log.debug("treating input %s as iterable. class: %s" % ( args['ifinput'], args['ifinput'].__class__.__name__)) ifinputs = args['ifinput'] for s in ifinputs: self.log.debug("evaluating ifinput %s" % s) ifinput = s.replace("%", rootname) self.log.debug("evaluating ifinput %s" % ifinput) input_docs = parse_doc(path, ifinput, {}) for input_doc in input_docs: self.log.debug(input_doc.key()) inputs.append(input_doc.key()) if len(input_docs) == 0: create = False if args.has_key('ifnoinput'): ifinput = args['ifnoinput'].replace("%", rootname) input_docs = parse_doc(path, ifinput, {}) if len(input_docs) > 0: create = False if args.has_key('except'): try: except_re = re.compile(args['except']) except sre_constants.error as e: raise Exception("""You passed 'except' value of %s. Please pass a valid Python-style regular expression for 'except', NOT a glob-style matcher. Error message from re.compile: %s""" % (args['except'], e)) if re.match(except_re, f): print "skipping %s as it matches except pattern %s" % (f, args['except']) create = False if create: # Filters can either be included in the name... doc = Document(self.artifact_class, f, filters) # ...or they may be listed explicitly. if args.has_key('filters'): doc.filters += args['filters'] # Here we are assuming that if we get a key with blank args # this should not override a previous key. A key which does # have args should override any previous key. key = doc.key() self.log.debug("creating doc %s for glob %s" % (key, glob_string)) if self.members.has_key(key): self.log.debug("found existing key %s" % key) doc = self.members[key] else: self.log.debug("no existing key %s" % key) if len(args) > 0: self.log.debug("args: %s" % args) doc.args = args doc.use_all_inputs = args.has_key('allinputs') for i in inputs: doc.add_input_key(i) if not hasattr(doc, 'args'): doc.args = args self.members[key] = doc docs.append(doc) # just a local list return docs def get_pos(member): key = member.key() return self.members.keys().index(key) def depend(parent, child): self.depends.append((get_pos(child), get_pos(parent))) self.members = OrderedDict() self.depends = [] # Create Document objects for all docs. self.log.debug("About to process config\n") self.log.debug(self.config) for path, config in self.config.items(): for k, v in config.items(): parse_doc(path, k, v) # Determine dependencies for doc in self.members.values(): doc.finalize_inputs(self.members) for input_doc in doc.inputs: depend(doc, input_doc) ordering = topological_sort(range(len(self.members)), self.depends) ordered_members = OrderedDict() for i in ordering: key = self.members.keys()[i] ordered_members[key] = self.members[key] self.members = ordered_members