コード例 #1
0
ファイル: flow.py プロジェクト: tristanbuckner/happy
 def precheck(self):
     """
     Checks to see if the preconditions for this HappyJob have been met.
     If so, returns true, and the HappyJob is executed.
     
     It is expected that this method will be overidden to implement custom
     checks in a subclass (use lamdbas instead?)
     
     @return: STATUS_READY if the HappyJob's preconditions are met and the job can be run.
              STATUS_WAIT if the job is not ready to be run
              STATUS_SKIP if the job has already been run
              STATUS_ERROR if we should abort
     """
     if (not dfs.exists(self.outputpath)):
         logger.debug("precheck(%s): outputpath %s does not exist, ready to run." 
                      % (self, self.outputpath))
         return 'ready'
     inTSs = [dfs.modtime(file) for file in self.inputpaths]
     outTS = dfs.modtime(self.outputpath)
     newer = reduce(lambda x,y: x or y, [(inTS>outTS) for inTS in inTSs])
     logger.debug("Input timestamps: %s" % inTSs)
     logger.debug("Output timestamp: %s" % outTS)
     if newer:
         logger.debug("At least one input file is newer than outputfile, ready to run.")
         dfs.delete(self.outputpath)
         return 'ready'
     else:
         logger.debug("All input files are newer than outputfile, skipping.")
         return 'skip'
コード例 #2
0
ファイル: test_flow.py プロジェクト: tristanbuckner/happy
 def _testHappyRun(self):
     logger.info("In TestFlow.testSingleRun() ...")
     h = IdentityJob()
     h.inputpaths = "small"
     h.outputpath = "crap"
     dfs.delete('crap')
     h.run()
     dfs.delete('crap')
コード例 #3
0
ファイル: flow.py プロジェクト: tristanbuckner/happy
 def deleteOutFiles(self, onlytmp=True):
     """
     Deletes all files listed as outputs in the Flow.
     """
     self.linkNodes()
     for node in self.sort():
         file = node.outputpath
         if (not onlytmp or file[0:4]=='tmp.'):
             logger.info("Deleting output file '%s'" % file)
             dfs.delete(file)
コード例 #4
0
ファイル: flow.py プロジェクト: tristanbuckner/happy
 def fire(self):
     """
     Runs this node's TripleQuery job.  Blocks until completed.
     """
     job = TripleQuery(self.query, self.inputpaths, self.outputpath)
     try:
         job.run()
         logger.debug("TripleQuery run.  Setting status to done.")
         self.status = 'done'
     except Exception:
         logger.error("Caught exception in TripleQuery.  Setting status to fail and deleting output.")
         dfs.delete(self.outputpath)
         self.status = 'fail'
コード例 #5
0
ファイル: flow.py プロジェクト: tristanbuckner/happy
 def fire(self):
     """
     Runs this node's HappyJob.  Blocks until completed.
     """
     if (self.job):
         job = self.job
         try:
             job.run()
             logger.debug("Job run.  Setting status to done.")
             self.status = 'done'
         except Exception:
             logger.error("Caught exception.  Setting status to fail and deleting output.")
             dfs.delete(self.outputpath)
             self.status = 'fail'
コード例 #6
0
ファイル: flow.py プロジェクト: tristanbuckner/happy
    def run(self, force=False, workingDir=None):
        """
        Runs the entire job chain (ie DAG) that contains this node.
        """
        logger.debug("Calling HappyJobNode.run(), workingDir=%s" % workingDir)
        self.linkNodes(workingDir)
        if force:
            self.deleteOutFiles(onlytmp=False)
        # stack = self.sources()
        stack = self.sort()
        logger.info("Stack order is: %s" % (", ".join([str(x._id) for x in stack],)))
        ok_children = self.sources()
        while stack:
            node = stack.pop(0)
            putChildren = False
            
            if (not node in ok_children):
                logger.warn("Branch terminated: node %s not in ok_children list %s." % (node, ok_children))
                continue
            
            pre = node.precheck()
            if node.force:
                logger.info("FORCING %s [%s --> %s] (delete %s first)" % (node, node.inputpaths, node.outputpath, node.outputpath))
                dfs.delete(node.outputpath)
                node.fire()
            elif (pre =='ready'):
                logger.info("Running %s [%s --> %s]" % (node, node.inputpaths, node.outputpath))
                node.fire()
            else:
                logger.info("Skipping job %s: already done" % node)
                putChildren = True
                self.status = 'skip'
            
            post = node.postcheck()    
            if (post == 'done'):
                logger.info("Job %s completed successfully. " % node)
                putChildren = True
            elif (post == 'fail'):
                logger.info("Job %s failed.  Not adding children." % node)

            if putChildren:
                if (node.isSink()):
                    logger.info("Job %s is a sink, no children." % node)
                else:
                    newChildren = [child for child in node.children() if child not in ok_children]
                    logger.info("Placing children %s of job %s on stack." %  (newChildren, node))
                    ok_children.extend(newChildren)
コード例 #7
0
ファイル: test.py プロジェクト: tristanbuckner/happy
 def testSequenceFiles(self):
     # prep:
     dfs.delete("testcase")
     # write:
     for i, compressiontype in enumerate(["lzo", "gzip", "zlib"]):
         filename = "testcase/testcase" + str(i) + ".seq"
         collector = dfs.createCollector(filename, type="sequence", compressiontype=compressiontype)
         for _ in range(1000):
             collector.collect("key", "value")
         collector.close()
     # read:
     sequence = dfs.readSequenceFile("testcase")
     counter = 0
     for key, value in sequence:
         counter += 1
         self.assertEqual(key, "key", "Wrong key")
         self.assertEqual(value, "value", "Wrong value")
     sequence.close()
     self.assertEqual(counter, 3000, "Wrong number of values")
     # cleanup:
     dfs.delete("testcase")
コード例 #8
0
ファイル: test.py プロジェクト: tristanbuckner/happy
 def testReadWrite(self):
     # prep:
     filename = "testcase.txt"
     filename2 = "testcase2.txt"        
     dfs.delete(filename)
     dfs.delete(filename2)
     # write:
     file = dfs.write(filename)
     self.assertNotEqual(file, None, "No file found")
     file.write("test1\n")
     file.write("test2\n")
     file.write("test3\n")
     file.close()
     # rename:
     dfs.rename(filename, filename2)
     # read:
     file = dfs.read(filename2)
     self.assertNotEqual(file, None, "No file found")
     lines = file.readlines()
     file.close()
     self.assertEqual(len(lines), 3, "Wrong number of lines was read")
     for i, line in enumerate(lines):
         self.assertEqual(line, "test" + str(i + 1) + "\n", "Wrong line value")
     # grep:
     grepresult = list(dfs.grep(filename2, "t2"))
     self.assertEqual(grepresult, ["test2"], "grep failed")
     # cleanup:
     dfs.delete(filename2)