Example #1
0
 def extract_features(self,bin_pid):
     jobid = gen_id()[:5]
     def selflog(line):
         self.log('[%s] %s' % (jobid, line))
     def self_check_log(line,bin_pid):
         selflog(line)
         now = time.time()
         elapsed = now - self.last_check
         self.last_check = now
         if elapsed > CHECK_EVERY:
             if self.complete(bin_pid):
                 msg = 'STOPPING JOB - %s completed by another worker' % bin_pid
                 selflog(msg)
                 raise JobExit(msg, SKIP)
     if self.complete(bin_pid):
         selflog('SKIPPING %s - already completed' % bin_pid)
         return SKIP
     job_dir = os.path.join(self.config.tmp_dir, gen_id())
     zip_dir = os.path.join(self.config.tmp_dir, gen_id())
     bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid))
     try:
         os.makedirs(job_dir)
         selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid))
     except:
         selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid))
     try:
         os.makedirs(zip_dir)
         selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid))
     except:
         selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid))
     selflog('LOADING and STITCHING %s' % bin_pid)
     with open(bin_zip_path,'wb') as binzip:
         represent.binpid2zip(bin_pid, binzip, resolver=self.resolver)
     blobzipurl = bin_pid + '_blob.zip'
     blobzipfile = re.sub(r'\.zip','_blob.zip',bin_zip_path)
     selflog('LOADING blob zip from %s -> %s' % (blobzipurl, blobzipfile))
     drain(UrlSource(blobzipurl), LocalFileSink(blobzipfile))
     feature_csv = os.path.join(job_dir, csvname(bin_pid))
     multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid))
     matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid))
     namespace = os.path.dirname(bin_zip_path) + '/'
     lid = os.path.basename(bin_zip_path)
     cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/')
     selflog('RUNNING %s' % cmd)
     try:
         self.output_check = CHECK_EVERY
         matlab.run(cmd)
         if not os.path.exists(feature_csv):
             msg = 'WARNING bin_features succeeded but no output file found at %s' % feature_csv
             selflog(msg)
             raise JobExit(msg,FAIL)
         if not self.complete(bin_pid): # check to make sure another worker hasn't finished it in the meantime
             selflog('DEPOSITING features csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit))
             self.deposit.deposit(bin_pid,feature_csv)
             selflog('DEPOSITED features csv for %s ' % bin_pid)
             if os.path.exists(multiblob_csv):
                 selflog('DEPOSITING multiblob csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit))
                 self.multiblob_deposit.deposit(bin_pid,multiblob_csv)
                 selflog('DEPOSITED multiblob csv for %s ' % bin_pid)
         else:
             selflog('NOT SAVING - features for %s already present at output destination' % bin_pid)
     except KeyboardInterrupt:
         selflog('KeyboardInterrupt, exiting')
         return DIE
     except JobExit:
         pass
     finally:
         try:
             shutil.rmtree(job_dir)
             selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid))
         except:
             selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid))
         try:
             shutil.rmtree(zip_dir)
             selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid))
         except:
             selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid))
         selflog('DONE - no more actions for %s' % bin_pid)
Example #2
0
 def extract_blobs(self,bin_pid):
     try:
         jobid = self.config.task_id
     except:
         jobid = gen_id()[:5]
     def selflog(line):
         self.log('[%s] %s' % (jobid, line))
     def self_check_log(line,bin_pid):
         selflog(line)
         now = time.time()
         elapsed = now - self.last_check
         self.last_check = now
         if elapsed > CHECK_EVERY:
             if self.exists(bin_pid):
                 msg = 'STOPPING JOB - %s completed by another worker' % bin_pid
                 selflog(msg)
                 raise JobExit(msg, SKIP)
     if self.exists(bin_pid):
         selflog('SKIPPING %s - already completed' % bin_pid)
         return SKIP
     job_dir = os.path.join(self.config.tmp_dir, gen_id())
     zip_dir = os.path.join(self.config.tmp_dir, gen_id())
     bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid))
     try:
         os.makedirs(job_dir)
         selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid))
     except:
         selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid))
     try:
         os.makedirs(zip_dir)
         selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid))
     except:
         selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid))
     try:
         selflog('LOADING and STITCHING %s' % bin_pid)
         with open(bin_zip_path,'wb') as binzip:
             targets = represent.binpid2zip(bin_pid, binzip, resolver=self.resolver)
             if len(targets)==0:
                 selflog('SKIPPING %s - no targets in bin' % bin_pid)
                 return SKIP
         tmp_file = os.path.join(job_dir, zipname(bin_pid))
         matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid))
         cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, bin_zip_path, job_dir)
         self.output_check = CHECK_EVERY
         matlab.run(cmd)
         if not os.path.exists(tmp_file):
             selflog('WARNING bin_blobs succeeded but no output file found at %s' % tmp_file)
         elif not self.exists(bin_pid): # check to make sure another worker hasn't finished it in the meantime
             selflog('DEPOSITING blob zip for %s to deposit service at %s' % (bin_pid, self.config.blob_deposit))
             self.deposit.deposit(bin_pid,tmp_file)
             selflog('DEPOSITED blob zip for %s ' % bin_pid)
         else:
             selflog('NOT SAVING - blobs for %s already present at output destination' % bin_pid)
     except KeyboardInterrupt:
         selflog('KeyboardInterrupt, exiting')
         return DIE
     except JobExit:
         pass
     finally:
         try:
             shutil.rmtree(job_dir)
             selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid))
         except:
             selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid))
         try:
             shutil.rmtree(zip_dir)
             selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid))
         except:
             selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid))
         selflog('DONE - no more actions for %s' % bin_pid)