Example #1
0
    def run(self, force=False, workingDir=None):
        """
        Runs the entire job chain (ie DAG) that contains this node.
        """
        logger.debug("Calling HappyJobNode.run(), workingDir=%s" % workingDir)
        self.linkNodes(workingDir)
        if force:
            self.deleteOutFiles(onlytmp=False)
        # stack = self.sources()
        stack = self.sort()
        logger.info("Stack order is: %s" % (", ".join([str(x._id) for x in stack],)))
        ok_children = self.sources()
        while stack:
            node = stack.pop(0)
            putChildren = False
            
            if (not node in ok_children):
                logger.warn("Branch terminated: node %s not in ok_children list %s." % (node, ok_children))
                continue
            
            pre = node.precheck()
            if node.force:
                logger.info("FORCING %s [%s --> %s] (delete %s first)" % (node, node.inputpaths, node.outputpath, node.outputpath))
                dfs.delete(node.outputpath)
                node.fire()
            elif (pre =='ready'):
                logger.info("Running %s [%s --> %s]" % (node, node.inputpaths, node.outputpath))
                node.fire()
            else:
                logger.info("Skipping job %s: already done" % node)
                putChildren = True
                self.status = 'skip'
            
            post = node.postcheck()    
            if (post == 'done'):
                logger.info("Job %s completed successfully. " % node)
                putChildren = True
            elif (post == 'fail'):
                logger.info("Job %s failed.  Not adding children." % node)

            if putChildren:
                if (node.isSink()):
                    logger.info("Job %s is a sink, no children." % node)
                else:
                    newChildren = [child for child in node.children() if child not in ok_children]
                    logger.info("Placing children %s of job %s on stack." %  (newChildren, node))
                    ok_children.extend(newChildren)
Example #2
0
 def _auto_name(self, job):
     """
     Generates a unique name for this node, if one was not provided.
     """
     root = job.__class__.__name__
     nodes = list(self.lastNode.nodes())
     matches = [node.name for node in nodes if node.name.startswith(root)]
     logger.debug("Node names: %s" % nodes)
     if (len(matches)==0):
         return root + '_1'
     try:
         iter_str = [name.split('_')[-1] for name in matches]
         logger.debug("Node iter_str: %s" % iter_str)
         iters = [int(i) for i in iter_str]
         logger.debug("Node iters: %s" % iter_str)
         max_iter = max(iters) + 1
         logger.debug("max_iter: %s" % max_iter)
         return root + '_' + str(max_iter)
     except:
         logger.warn("Could not determine iteration: %s " % matches)
         return root + '_1'
Example #3
0
 def smap(self, records, task):
     """
     First, we pivot both files on their join keys, and set the
     joinorder for the secondsort such that all joinkey1 records
     come first.
     """
     inpath = "/" + task.getInputPath() + "/"
     logger.warn("INPATH: %s" % inpath)
     for key, json in records:
         record = happy.json.decode(json)
         if ((inpath.find(self.file1)>=0) and record.has_key(self.key1)):
             newrec = self._modkeys(record, self.keymod1)
             record['__infile__'] = task.getInputPath()
             newrec['__joinorder__'] = 1
             k1 = record[self.key1]
             if happy.flow.isIterable(k1): k1=':|:'.join(k1)
             task.collect(k1, 1, happy.json.encode(newrec))
         if ((inpath.find(self.file2)>=0) and record.has_key(self.key2)):
             newrec = self._modkeys(record, self.keymod2)
             newrec['__joinorder__'] = 2
             k2 = record[self.key2]
             if happy.flow.isIterable(k2): k2=':|:'.join(k2)
             task.collect(k2, 2, happy.json.encode(newrec))