Python debug Exemples, happy.log.logger.debug Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def precheck(self):
     """
     Checks to see if the preconditions for this HappyJob have been met.
     If so, returns true, and the HappyJob is executed.
     
     It is expected that this method will be overidden to implement custom
     checks in a subclass (use lamdbas instead?)
     
     @return: STATUS_READY if the HappyJob's preconditions are met and the job can be run.
              STATUS_WAIT if the job is not ready to be run
              STATUS_SKIP if the job has already been run
              STATUS_ERROR if we should abort
     """
     if (not dfs.exists(self.outputpath)):
         logger.debug("precheck(%s): outputpath %s does not exist, ready to run." 
                      % (self, self.outputpath))
         return 'ready'
     inTSs = [dfs.modtime(file) for file in self.inputpaths]
     outTS = dfs.modtime(self.outputpath)
     newer = reduce(lambda x,y: x or y, [(inTS>outTS) for inTS in inTSs])
     logger.debug("Input timestamps: %s" % inTSs)
     logger.debug("Output timestamp: %s" % outTS)
     if newer:
         logger.debug("At least one input file is newer than outputfile, ready to run.")
         dfs.delete(self.outputpath)
         return 'ready'
     else:
         logger.debug("All input files are newer than outputfile, skipping.")
         return 'skip'

Exemple #2

0

Afficher le fichier

Fichier : test_dag.py Projet : tristanbuckner/happy

 def testTopoSort(self):
     """  Topological sort of the DAG """
     logger.info("In TestFlow.testSort ...")
     logger.debug("Setting up DAG ...")
     a = HappyJobNode()
     b = HappyJobNode()
     c = HappyJobNode()
     d = HappyJobNode()
     e = HappyJobNode()
     f = HappyJobNode()
     g = HappyJobNode()
     h = HappyJobNode()
     i = HappyJobNode()
     j = HappyJobNode()
     a.addChild(b)
     a.addChild(c)
     b.addChild(d)
     b.addChild(e)
     c.addChild(f)
     c.addChild(g)
     d.addChild(h)
     g.addChild(i)        
     i.addChild(j)   
     e.addChild(j)     
     b.addChild(i)
     a.addChild(j)
     logger.debug("Testing topological sort ...")
     sort = a.sort()
     self.assertEqual(sort, [a, b, c, d, e, f, g, h, i, j])        
     logger.info("DONE.")

Exemple #3

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def __init__(self, **kwargs):
     logger.debug("Creating TripleQueryNode.")
     DAG.__init__(self, **kwargs)
     self.inputpaths = kwargs.get("inputpaths", None)
     self.outputpath = kwargs.get("outputpath", None)
     self.force = kwargs.get("force", False)
     self.query = kwargs.get("query", None)
     self.status = None

Exemple #4

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def __init__(self, job=NullNode(name="root"), workingDir="tmp", cleanupTemp=False, inputpaths=None, outputpath=None, **kwargs):
     logger.debug("Creating Flow() object, workingDir=%s" % workingDir)
     self.startNode = job
     self.lastNode = job
     self.workingDir = workingDir
     self.cleanupTemp = cleanupTemp
     self.default_inputpaths = inputpaths
     self.default_outputpath = outputpath

Exemple #5

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def dictize(self):
     """
     Create a dictionary representation of this DAG.  Requires subclasses to 
     """
     dict = {}
     for node in self.sort():
         logger.debug("Dictize: id %s has name %s" % (node._id, node.name))
         x = node._kwargs()
         dict[node._id]={"klass":node.__class__.__name__, 
                         "kwargs": x,
                         "children":[child._id for child in node.children()]}
     return dict

Exemple #6

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def fire(self):
     """
     Runs this node's TripleQuery job.  Blocks until completed.
     """
     job = TripleQuery(self.query, self.inputpaths, self.outputpath)
     try:
         job.run()
         logger.debug("TripleQuery run.  Setting status to done.")
         self.status = 'done'
     except Exception:
         logger.error("Caught exception in TripleQuery.  Setting status to fail and deleting output.")
         dfs.delete(self.outputpath)
         self.status = 'fail'

Exemple #7

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def __copy__(self):
     """
     Jython copy.copy() does not work by default.
     """
     logger.debug("Copying Flow() object.")
     c = Flow()
     c.workingDir = self.workingDir 
     c.cleanupTemp = self.cleanupTemp
     c.default_inputpaths = self.default_inputpaths
     c.default_outputpath = self.default_outputpath
     c.startNode = self.startNode
     c.lastNode = self.lastNode
     return c

Exemple #8

0

Afficher le fichier

Fichier : test_dag.py Projet : tristanbuckner/happy

 def testDAG(self):
     """ Set up with many relationships """
     logger.info("In TestFlow.testDAG ...")
     logger.debug("Setting up DAG ...")
     a = HappyJobNode()
     b = HappyJobNode()
     c = HappyJobNode()
     d = HappyJobNode()
     e = HappyJobNode()
     f = HappyJobNode()
     g = HappyJobNode()
     h = HappyJobNode()
     i = HappyJobNode()
     a.addChild(b)
     a.addChild(c)
     b.addChild(c)
     d.addChild(f)
     e.addChild(f)
     c.addChild(g)
     f.addChild(g)
     h.addChild(g)
     g.addChild(i)        
     logger.debug("Testing parent/child relationships ...")
     self.assertEqual(a.parents(), [])
     self.assertEqual(a.children(), [b, c])
     self.assertEqual(c.parents(), [a, b])
     self.assertEqual(f.parents(), [d, e])
     self.assertEqual(g.parents(), [c, f, h])
     self.assertEqual(g.children(), [i])
     logger.debug("Testing node retrieval ...")
     nodes0 = set([a,b,c,d,e,f,g,h,i])
     nodes1 = a.nodes()
     nodes2 = e.nodes()
     self.assertEqual(nodes0, nodes1)
     self.assertEqual(nodes0, nodes2)
     logger.debug("Testing sinks and sources ...")
     sinks = a.sinks()
     self.assertEqual(sinks, [i])
     sources = a.sources()
     self.assertEqual(sources, [a, d, e, h])
     logger.debug("Testing isAncestorOf() and isDecendentOf() ...")
     self.assert_(a.isAncestorOf(b))
     self.assert_(a.isAncestorOf(g))
     self.assert_(a.isAncestorOf(i))
     self.assert_(not a.isAncestorOf(d))
     self.assert_(i.isDecendentOf(g))
     self.assert_(i.isDecendentOf(a))
     self.assert_(i.isDecendentOf(b))
     self.assert_(i.isDecendentOf(e))
     self.assert_(not f.isDecendentOf(a))
     logger.info("DONE.")

Exemple #9

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def fire(self):
     """
     Runs this node's HappyJob.  Blocks until completed.
     """
     if (self.job):
         job = self.job
         try:
             job.run()
             logger.debug("Job run.  Setting status to done.")
             self.status = 'done'
         except Exception:
             logger.error("Caught exception.  Setting status to fail and deleting output.")
             dfs.delete(self.outputpath)
             self.status = 'fail'

Exemple #10

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def postcheck(self):
     """
     Checks to see if the postconditions for this HappyJob have been met.
     If so, returns true, and this Node's children are fired.
     
     It is expected that this method will be overidden to implement custom
     checks in a subclass (use lamdbas instead?)
     
     The default implementation returns the status set by fire():
     'ready' if the job completed, 'fail' is the job threw an exception
     
     @return: True if this HappyJob's postonditions are met. Children exec.
              False if the children jobs should not be fired.
     """
     logger.debug("Postcheck status is %s" % self.status)
     return self.status

Exemple #11

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

    def run(self, force=False, workingDir=None):
        """
        Runs the entire job chain (ie DAG) that contains this node.
        """
        logger.debug("Calling HappyJobNode.run(), workingDir=%s" % workingDir)
        self.linkNodes(workingDir)
        if force:
            self.deleteOutFiles(onlytmp=False)
        # stack = self.sources()
        stack = self.sort()
        logger.info("Stack order is: %s" % (", ".join([str(x._id) for x in stack],)))
        ok_children = self.sources()
        while stack:
            node = stack.pop(0)
            putChildren = False
            
            if (not node in ok_children):
                logger.warn("Branch terminated: node %s not in ok_children list %s." % (node, ok_children))
                continue
            
            pre = node.precheck()
            if node.force:
                logger.info("FORCING %s [%s --> %s] (delete %s first)" % (node, node.inputpaths, node.outputpath, node.outputpath))
                dfs.delete(node.outputpath)
                node.fire()
            elif (pre =='ready'):
                logger.info("Running %s [%s --> %s]" % (node, node.inputpaths, node.outputpath))
                node.fire()
            else:
                logger.info("Skipping job %s: already done" % node)
                putChildren = True
                self.status = 'skip'
            
            post = node.postcheck()    
            if (post == 'done'):
                logger.info("Job %s completed successfully. " % node)
                putChildren = True
            elif (post == 'fail'):
                logger.info("Job %s failed.  Not adding children." % node)

            if putChildren:
                if (node.isSink()):
                    logger.info("Job %s is a sink, no children." % node)
                else:
                    newChildren = [child for child in node.children() if child not in ok_children]
                    logger.info("Placing children %s of job %s on stack." %  (newChildren, node))
                    ok_children.extend(newChildren)

Exemple #12

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def __init__(self, **kwargs):        
     logger.debug("Creating NullNode.")
     self._inputpaths = []
     self._outputpath = None
     self._parents = []
     self._children = []
     self.job = NullJob
     HappyJobNode.__init__(self, **kwargs)
     self._name = kwargs.get("name", None)
     self.force = kwargs.get("force", False)
     if kwargs.has_key("inputpaths"):
         self._inputpaths = kwargs.get("inputpaths")
         logger.debug("Using inputpaths=%s" % self._inputpaths)
         if len(self.job.inputpaths) > 1:
             raise ValueException, "NullNodes can only handle a single inputpath."
         self._outputpath = self.inputpaths[0]
     self.status = None

Exemple #13

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def __init__(self, **kwargs):
     logger.debug("Creating HappyJobNode.")
     jobparam = kwargs.get("job", None)
     if (not jobparam):
         self.job = NullJob()
     elif (type(jobparam)==str):
         self.job = eval(jobparam)
     else:
         self.job = jobparam
     DAG.__init__(self, **kwargs)
     self.force = kwargs.get("force", False)
     if kwargs.has_key("inputpaths"):
         self.job.inputpaths = kwargs.get("inputpaths")
     if kwargs.has_key("outputpath"):
         if (kwargs.get("outputpath") and not type(kwargs.get("outputpath"))==str):
             raise ValueError, "HappyJobNode.outputpath only accepts a single file; got %s." % kwargs.get("outputpath")
         self.job.outputpath = kwargs.get("outputpath")
     self.status = None

Exemple #14

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def __init__(self, **kwargs):
     """
     Create a new DAG node.
     
     @param: name -- the name of the node
     @param: parents -- parents of this node
     @param: children -- children of this node
     @param: _id -- the id of the node (for deserialization only)
     """
     logger.debug("Creating DAG: %s" % kwargs)
     self._id = kwargs.get("_id", DAG._nextId())
     self.name = kwargs.get("name", None)
     self._parents = []
     self._children = []
     if (kwargs.has_key("parents")):
         self.addParent(kwargs["parents"])
     if (kwargs.has_key("children")):
         self.addChild(kwargs["children"])
     if not self.name:    
         self.name = self._auto_name()

Exemple #15

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def linkNodes(self, workingDir=None):
     """
     Assures that every parent/child pair have a matching file in
     their inFile / outFile lists.  Creates files if necessary. 
     
     @param workingDir: the directory to create temp files in. 
     """
     if workingDir:
         logger.info("Linking nodes, using workingDir = %s" % (workingDir))    
         if dfs.exists(workingDir):
             fs = dfs.fileStatus(workingDir)
             if not fs.isDir():
                 raise FlowException, "%s is a file, not a directory." % (workingDir)
         else:
             logger.info("Creating working directory %s." % (workingDir))    
             # dfs.mkdir(workingDir)
     stack = self.sources()
     for source in stack:
         if ((not source.inputpaths) or len(source.inputpaths)<1):
             raise FlowException, "Source node %s has no inputpaths defined." % source
     while stack:
         node = stack.pop(0)
         if node.outputpath:
             logger.trace("linkNodes(): %s has an outputpath '%s'.  Using it." % (node, node.outputpath))
             filename = node.outputpath
         else:
             filename = "tmp.%s" % (node.name)
             if workingDir:
                 filename = "%s/%s" % (workingDir, filename)
             logger.trace("linkNodes(): Created temp outfile '%s' for %s." % (filename, node))
             node.outputpath = filename
         for child in node.children():
             if ((not child.inputpaths) or 
                (len(set(node.outputpath) & set(child.inputpaths)) == 0)):
                 logger.debug("linkNodes(): Linked %s and %s with file '%s'." % (node, child, filename))
                 child.inputpaths = castList(child.inputpaths) + [filename]
             stack.append(child)
         logger.debug("%s has inputs %s and outputs %s" % (node, node.inputpaths, node.outputpath))

Exemple #16

0

Afficher le fichier

Fichier : test_flow.py Projet : tristanbuckner/happy

 def testFlow2(self):
     logger.info("In TestFlow.testFlow2() ...")
     test_flow = Flow2(inputpaths=['/data/graph/latest/crawl'], outputpath='namelist')
     (names, types) = test_flow.split()
     names.chain(HappyJobNode(name='get_names', 
                              job=FilterExact(filterkey='propname', 
                                              filtervalues=['/type/object/name', '/common/topic/alias'],
                                              keyfield='a:guid', mapfields={'value':'name'})))
     types.chain(HappyJobNode(name='get_types', 
                              job=FilterExact(filterkey='propname', 
                                              filtervalues=['/type/object/type'],
                                              keyfield='b:guid', mapfields={'target':'type'})))
     names.chain(HappyJobNode(name='join_name_types',
                              job=InnerJoin(joinkeys=['a:guid', 'b:guid'], outkey='guid'),
                              force=True),
                 join=types)
     names.chain(HappyJobNode(name='filter_people',
                              job=FilterExact(filterkey='type', 
                                              filtervalues=['/people/person'],
                                              keyfield='guid', mapfields={'type':'type', 'name':'name'})))
     names.chain(HappyJobNode(name='invert_names', 
                              job=AggregateJson(aggkey='name', aggfunc='agg.list("guid")')))
     logger.debug("DAG: \n%s\n" % names.startNode.dictize())
     names.run(force=False)

Exemple #17

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def _auto_name(self, job):
     """
     Generates a unique name for this node, if one was not provided.
     """
     root = job.__class__.__name__
     nodes = list(self.lastNode.nodes())
     matches = [node.name for node in nodes if node.name.startswith(root)]
     logger.debug("Node names: %s" % nodes)
     if (len(matches)==0):
         return root + '_1'
     try:
         iter_str = [name.split('_')[-1] for name in matches]
         logger.debug("Node iter_str: %s" % iter_str)
         iters = [int(i) for i in iter_str]
         logger.debug("Node iters: %s" % iter_str)
         max_iter = max(iters) + 1
         logger.debug("max_iter: %s" % max_iter)
         return root + '_' + str(max_iter)
     except:
         logger.warn("Could not determine iteration: %s " % matches)
         return root + '_1'

Exemple #18

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

    def chain(self, node=None, join=None):
        """
        Add a new node to the chain at lastNode return the modified object
        """
        if not node: node = NullJob()
        
        if join:
            logger.debug("In Flow.chain(): join=%s" % join)
            if (type(join) != type(list())): 
                join = [join]
            for jn in join:
                logger.debug("Joining node %s into chain %s." % (jn, node))
                jn.lastNode.addChild(node)

        logger.debug("Chaining %s to %s." % (self, node))
        self.lastNode.addChild(node)
        self.lastNode = node
        return self

Exemple #19

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def run(self):
     """
     Runs the flow.
     """
     logger.debug("Preparing to run Flow.")
     sources = self.startNode.sources()
     logger.debug("Sources: %s" % sources)
     for node in sources:
         if not node.inputpaths:
             logger.debug("Source %s does not have inputpaths, setting to: %s" % (node, self.default_inputpaths))
             node.inputpaths = self.default_inputpaths
     sinks = self.startNode.sinks()
     logger.debug("Sinks: %s" % sinks)
     for node in sinks:
         if not node.outputpath:
             logger.debug("Source %s does not have outputpath, setting to: %s" % (node, self.default_outputpath))
             node.outputpath = self.default_outputpath
     
     logger.debug("Calling HappyJobNode.run(), workingDir = %s" % self.workingDir)                
     self.startNode.run(force=False, workingDir=self.workingDir)

Exemple #20

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def _getoutputpath(self):
     logger.debug("Called NullNode._getoutputpath() = %s" % self._outputpath)
     return self._outputpath

Exemple #21

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def _setoutputpath(self, outputpath):
     logger.debug("Called NullNode._setoutputpath(%s)" % outputpath)
     self._inputpaths = [outputpath]
     self._outputpath = outputpath

Exemple #22

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def _getinputpaths(self):
     logger.debug("Called NullNode._getinputpaths() = %s" % self._inputpaths)
     return self._inputpaths

Exemple #23

0

Afficher le fichier

Fichier : flow.py Projet : tristanbuckner/happy

 def _setinputpaths(self, inputpaths):
     logger.debug("Called NullNode._setinputpaths(%s)" % inputpaths)
     self._inputpaths = uniq(inputpaths)
     self._outputpath = inputpaths[0]