def extract_features(pid,job): def log_callback(msg): logging.warn('FEATURES %s' % msg) client.heartbeat(pid,message=msg) parsed_pid = parse_pid(pid) bin_lid = parsed_pid[LID] bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) binzip_url = ''.join([bin_pid,'_binzip.zip']) blob_url = ''.join([bin_pid,'_blob.zip']) features_url = ''.join([bin_pid,'_features.csv']) multiblob_url = ''.join([bin_pid,'_multiblob.csv']) if exists(features_url): log_callback('skipping %s - features exist' % pid) return log_callback('computing features for %s' % pid) with safe_tempdir() as binzip_dir: # download bin zip binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid) log_callback('downloading %s to %s' % (binzip_url, binzip_path)) download(binzip_url, binzip_path) # download blob zip blob_path = os.path.join(binzip_dir, '%s_blob.zip' % bin_lid) log_callback('downloading %s to %s' % (blob_url, blob_path)) download(blob_url, blob_path) # compute features with safe_tempdir() as job_dir: # output of matlab job feature_csv = os.path.join(job_dir, csvname(bin_pid)) multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid)) # params for matlab job namespace = os.path.dirname(binzip_path) + '/' lid = os.path.basename(binzip_path) matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback) cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/') log_callback('running %s' % cmd) matlab.run(cmd) log_callback('matlab exited') if os.path.exists(feature_csv): log_callback('features found at %s' % feature_csv) else: raise Exception('no features found') log_callback('uploading %s' % features_url) upload(feature_csv, features_url) if os.path.exists(multiblob_csv): log_callback('multiblob found at %s' % multiblob_csv) log_callback('uploading %s' % multiblob_url) upload(multiblob_csv, multiblob_url) log_callback('complete') client.wakeup()
def run_callback(self,message): jobid = gen_id()[:5] def selflog(line): self.log('%s %s' % (jobid, line)) def self_check_log(line,bin_pid): selflog(line) self.output_check -= 1 if self.output_check <= 0: if self.exists(bin_pid): selflog('STOPPING JOB - %s completed by another worker' % bin_pid) raise JobExit(bin_pid, SKIP) self.output_check = CHECK_EVERY bin_pid = message dest_file = self.storage.dest(bin_pid) if self.exists(bin_pid): selflog('SKIPPING %s - already completed' % bin_pid) return SKIP job_dir = os.path.join(self.config.tmp_dir, gen_id()) try: os.makedirs(job_dir) except: selflog('WARNING cannot create temporary directory %s' % job_dir) tmp_file = os.path.join(job_dir, self.storage.zipname(bin_pid)) matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid)) cmd = 'bin_blobs(\'%s\',\'%s\')' % (bin_pid, job_dir) try: self.output_check = CHECK_EVERY matlab.run(cmd) if not os.path.exists(tmp_file): selflog('WARNING bin_blobs succeeded but produced no output for %s' % bin_pid) elif not self.exists(bin_pid): # check to make sure another worker hasn't finished it in the meantime if self.deposit is not None: selflog('DEPOSITING blob zip for %s to deposit service' % bin_pid) self.deposit.deposit(bin_pid,tmp_file) else: selflog('SAVING completed blob zip for %s to %s' % (bin_pid, dest_file)) local_deposit(bin_pid,tmp_file) else: selflog('NOT SAVING - blobs for %s already present at output destination' % bin_pid) except KeyboardInterrupt: selflog('KeyboardInterrupt, requeueing job before exit') return DIE finally: try: shutil.rmtree(job_dir) except: selflog('WARNING cannot remove temporary directory %s' % job_dir)
def extract_blobs(pid,job): def log_callback(msg): logging.warn('BLOBS %s' % msg) client.heartbeat(pid,message=msg) parsed_pid = parse_pid(pid) bin_lid = parsed_pid[LID] bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) binzip_url = ''.join([bin_pid,'_binzip.zip']) binzip_file = os.path.basename(binzip_url) deposit_url = '%s_blobs.zip' % bin_pid if exists(deposit_url): log_callback('skipping %s - blobs exist' % pid) return log_callback('computing blobs for %s' % pid) with safe_tempdir() as binzip_dir: # first, copy the zipfile to a temp dir binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid) log_callback('downloading %s to %s' % (binzip_url, binzip_path)) download(binzip_url, binzip_path) # now run bin_blobs with safe_tempdir() as job_dir: # configure matlab matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback) # run command blobs_file = os.path.join(job_dir, blob_zip_name(bin_pid)) cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, binzip_path, job_dir) log_callback('running %s' % cmd) matlab.run(cmd) log_callback('MATLAB done, checking for %s' % blobs_file) if not os.path.exists(blobs_file): raise Exception('missing output file') log_callback('depositing %s' % blobs_file) upload(blobs_file, deposit_url) log_callback('deposited %s' % blobs_file) log_callback('completed %s' % bin_pid) client.wakeup()
def extract_features(self,bin_pid): jobid = gen_id()[:5] def selflog(line): self.log('[%s] %s' % (jobid, line)) def self_check_log(line,bin_pid): selflog(line) now = time.time() elapsed = now - self.last_check self.last_check = now if elapsed > CHECK_EVERY: if self.complete(bin_pid): msg = 'STOPPING JOB - %s completed by another worker' % bin_pid selflog(msg) raise JobExit(msg, SKIP) if self.complete(bin_pid): selflog('SKIPPING %s - already completed' % bin_pid) return SKIP job_dir = os.path.join(self.config.tmp_dir, gen_id()) zip_dir = os.path.join(self.config.tmp_dir, gen_id()) bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid)) try: os.makedirs(job_dir) selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid)) try: os.makedirs(zip_dir) selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid)) selflog('LOADING and STITCHING %s' % bin_pid) with open(bin_zip_path,'wb') as binzip: represent.binpid2zip(bin_pid, binzip, resolver=self.resolver) blobzipurl = bin_pid + '_blob.zip' blobzipfile = re.sub(r'\.zip','_blob.zip',bin_zip_path) selflog('LOADING blob zip from %s -> %s' % (blobzipurl, blobzipfile)) drain(UrlSource(blobzipurl), LocalFileSink(blobzipfile)) feature_csv = os.path.join(job_dir, csvname(bin_pid)) multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid)) matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid)) namespace = os.path.dirname(bin_zip_path) + '/' lid = os.path.basename(bin_zip_path) cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/') selflog('RUNNING %s' % cmd) try: self.output_check = CHECK_EVERY matlab.run(cmd) if not os.path.exists(feature_csv): msg = 'WARNING bin_features succeeded but no output file found at %s' % feature_csv selflog(msg) raise JobExit(msg,FAIL) if not self.complete(bin_pid): # check to make sure another worker hasn't finished it in the meantime selflog('DEPOSITING features csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit)) self.deposit.deposit(bin_pid,feature_csv) selflog('DEPOSITED features csv for %s ' % bin_pid) if os.path.exists(multiblob_csv): selflog('DEPOSITING multiblob csv for %s to deposit service at %s' % (bin_pid, self.config.features_deposit)) self.multiblob_deposit.deposit(bin_pid,multiblob_csv) selflog('DEPOSITED multiblob csv for %s ' % bin_pid) else: selflog('NOT SAVING - features for %s already present at output destination' % bin_pid) except KeyboardInterrupt: selflog('KeyboardInterrupt, exiting') return DIE except JobExit: pass finally: try: shutil.rmtree(job_dir) selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid)) try: shutil.rmtree(zip_dir) selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid)) selflog('DONE - no more actions for %s' % bin_pid)
def extract_blobs(self,bin_pid): try: jobid = self.config.task_id except: jobid = gen_id()[:5] def selflog(line): self.log('[%s] %s' % (jobid, line)) def self_check_log(line,bin_pid): selflog(line) now = time.time() elapsed = now - self.last_check self.last_check = now if elapsed > CHECK_EVERY: if self.exists(bin_pid): msg = 'STOPPING JOB - %s completed by another worker' % bin_pid selflog(msg) raise JobExit(msg, SKIP) if self.exists(bin_pid): selflog('SKIPPING %s - already completed' % bin_pid) return SKIP job_dir = os.path.join(self.config.tmp_dir, gen_id()) zip_dir = os.path.join(self.config.tmp_dir, gen_id()) bin_zip_path = os.path.join(zip_dir, binzipname(bin_pid)) try: os.makedirs(job_dir) selflog('CREATED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (job_dir, bin_pid)) try: os.makedirs(zip_dir) selflog('CREATED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot create temporary directory %s for %s' % (zip_dir, bin_pid)) try: selflog('LOADING and STITCHING %s' % bin_pid) with open(bin_zip_path,'wb') as binzip: targets = represent.binpid2zip(bin_pid, binzip, resolver=self.resolver) if len(targets)==0: selflog('SKIPPING %s - no targets in bin' % bin_pid) return SKIP tmp_file = os.path.join(job_dir, zipname(bin_pid)) matlab = Matlab(self.config.matlab_exec_path,self.config.matlab_path,output_callback=lambda l: self_check_log(l, bin_pid)) cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, bin_zip_path, job_dir) self.output_check = CHECK_EVERY matlab.run(cmd) if not os.path.exists(tmp_file): selflog('WARNING bin_blobs succeeded but no output file found at %s' % tmp_file) elif not self.exists(bin_pid): # check to make sure another worker hasn't finished it in the meantime selflog('DEPOSITING blob zip for %s to deposit service at %s' % (bin_pid, self.config.blob_deposit)) self.deposit.deposit(bin_pid,tmp_file) selflog('DEPOSITED blob zip for %s ' % bin_pid) else: selflog('NOT SAVING - blobs for %s already present at output destination' % bin_pid) except KeyboardInterrupt: selflog('KeyboardInterrupt, exiting') return DIE except JobExit: pass finally: try: shutil.rmtree(job_dir) selflog('DELETED temporary directory %s for %s' % (job_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (job_dir, bin_pid)) try: shutil.rmtree(zip_dir) selflog('DELETED temporary directory %s for %s' % (zip_dir, bin_pid)) except: selflog('WARNING cannot remove temporary directory %s for %s' % (zip_dir, bin_pid)) selflog('DONE - no more actions for %s' % bin_pid)