Esempio n. 1
0
def binpid2zip(pid, outfile, log_callback=None):
    def log(msg):
        if log_callback is not None:
            log_callback(msg)
    """Generate a zip file given a canonical pid"""
    parsed = parse_pid(pid)
    bin_pid = ''.join([parsed[NAMESPACE], parsed[BIN_LID]])
    timestamp = iso8601(strptime(parsed[TIMESTAMP], parsed[TIMESTAMP_FORMAT]))
    log('copying raw data for %s to temp files ...' % bin_pid)
    with tempfile.NamedTemporaryFile() as hdr_tmp:
        hdr_path = hdr_tmp.name
        drain(UrlSource(bin_pid+'.hdr'), LocalFileSink(hdr_path))
        hdr = parse_hdr_file(hdr_path)
    with tempfile.NamedTemporaryFile() as adc_tmp:
        adc_path = adc_tmp.name
        drain(UrlSource(bin_pid+'.adc'), LocalFileSink(adc_path))
        adc = Adc(adc_path, parsed[SCHEMA_VERSION])
        unstitched_targets = add_pids(adc.get_targets(), bin_pid)
        stitched_targets = list_stitched_targets(unstitched_targets)
    with tempfile.NamedTemporaryFile() as roi_tmp:
        roi_path = roi_tmp.name
        drain(UrlSource(bin_pid+'.roi'), LocalFileSink(roi_path))
        canonical_pid = bin_pid
        log('copied raw data for %s' % canonical_pid)
        """*parsed_pid - result of parsing pid
        *canonical_pid - canonicalized with URL prefix
        *targets - list of (stitched) targets
        *hdr - result of parsing header file
        *timestamp - timestamp (FIXME in what format?)
        *roi_path - path to ROI file
        outfile - where to write resulting zip file"""
        log('creating zip file for %s' % bin_pid)
        with open(outfile,'wb') as fout:
            return bin2zip(parsed,bin_pid,stitched_targets,hdr,timestamp,roi_path,fout)
Esempio n. 2
0
def binpid2zip(bin_pid, outfile, resolver_file='oii/ifcb/mvco.xml', resolver=None):
    """Generate a zip file given a pid (using a resolver) to find the files"""
    if resolver is None:
        resolver = parse_stream(resolver_file)
    hit = resolver['pid'].resolve(pid=bin_pid)
    with tempfile.NamedTemporaryFile() as hdr:
        hdr_path = hdr.name
        drain(UrlSource(bin_pid+'.hdr'), LocalFileSink(hdr_path))
        with tempfile.NamedTemporaryFile() as adc:
            adc_path = adc.name
            drain(UrlSource(bin_pid+'.adc'), LocalFileSink(adc_path))
            with tempfile.NamedTemporaryFile() as roi:
                roi_path = roi.name
                drain(UrlSource(bin_pid+'.roi'), LocalFileSink(roi_path))
                return bin_zip(hit, hdr_path, adc_path, roi_path, outfile)
Esempio n. 3
0
 def extract_features(self,bin_pid):
     jobid = gen_id()[:5]
     def selflog(line):
         self.log('[%s] %s' % (jobid, line))
     def self_check_log(line,bin_pid):
         selflog(line)
         now = time.time()
         elapsed = now - self.last_check
         self.last_check = now
         if elapsed > CHECK_EVERY:
             if self.complete(bin_pid):
                 msg = 'STOPPING JOB - %s completed by another worker' % bin_pid
                 selflog(msg)
                 raise JobExit(msg, SKIP)
     if self.complete(bin_pid):
         selflog('SKIPPING %s - already completed' % bin_pid)
         return SKIP
     job_dir = os.path.join(self.config.tmp_dir, gen_id())
     zip_dir = os.path.join(self.config.tmp_dir, gen_id())
     bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid))
     try:
         os.makedirs(job_dir)
         selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid))
     except:
         selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid))
     try:
         os.makedirs(zip_dir)
         selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid))
     except:
         selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid))
     selflog('LOADING and STITCHING %s' % bin_pid)
     with open(bin_zip_path,'wb') as binzip:
         represent.binpid2zip(bin_pid, binzip, resolver=self.resolver)
     blobzipurl = bin_pid + '_blob.zip'
     blobzipfile = re.sub(r'\.zip','_blob.zip',bin_zip_path)
     selflog('LOADING blob zip from %s -> %s' % (blobzipurl, blobzipfile))
     drain(UrlSource(blobzipurl), LocalFileSink(blobzipfile))
     feature_csv = os.path.join(job_dir, csvname(bin_pid))
     multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid))
     matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid))
     namespace = os.path.dirname(bin_zip_path) + '/'
     lid = os.path.basename(bin_zip_path)
     cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/')
     selflog('RUNNING %s' % cmd)
     try:
         self.output_check = CHECK_EVERY
         matlab.run(cmd)
         if not os.path.exists(feature_csv):
             msg = 'WARNING bin_features succeeded but no output file found at %s' % feature_csv
             selflog(msg)
             raise JobExit(msg,FAIL)
         if not self.complete(bin_pid): # check to make sure another worker hasn't finished it in the meantime
             selflog('DEPOSITING features csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit))
             self.deposit.deposit(bin_pid,feature_csv)
             selflog('DEPOSITED features csv for %s ' % bin_pid)
             if os.path.exists(multiblob_csv):
                 selflog('DEPOSITING multiblob csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit))
                 self.multiblob_deposit.deposit(bin_pid,multiblob_csv)
                 selflog('DEPOSITED multiblob csv for %s ' % bin_pid)
         else:
             selflog('NOT SAVING - features for %s already present at output destination' % bin_pid)
     except KeyboardInterrupt:
         selflog('KeyboardInterrupt, exiting')
         return DIE
     except JobExit:
         pass
     finally:
         try:
             shutil.rmtree(job_dir)
             selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid))
         except:
             selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid))
         try:
             shutil.rmtree(zip_dir)
             selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid))
         except:
             selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid))
         selflog('DONE - no more actions for %s' % bin_pid)