def extract_features(pid,job): def log_callback(msg): logging.warn('FEATURES %s' % msg) client.heartbeat(pid,message=msg) parsed_pid = parse_pid(pid) bin_lid = parsed_pid[LID] bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) binzip_url = ''.join([bin_pid,'_binzip.zip']) blob_url = ''.join([bin_pid,'_blob.zip']) features_url = ''.join([bin_pid,'_features.csv']) multiblob_url = ''.join([bin_pid,'_multiblob.csv']) if exists(features_url): log_callback('skipping %s - features exist' % pid) return log_callback('computing features for %s' % pid) with safe_tempdir() as binzip_dir: # download bin zip binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid) log_callback('downloading %s to %s' % (binzip_url, binzip_path)) download(binzip_url, binzip_path) # download blob zip blob_path = os.path.join(binzip_dir, '%s_blob.zip' % bin_lid) log_callback('downloading %s to %s' % (blob_url, blob_path)) download(blob_url, blob_path) # compute features with safe_tempdir() as job_dir: # output of matlab job feature_csv = os.path.join(job_dir, csvname(bin_pid)) multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid)) # params for matlab job namespace = os.path.dirname(binzip_path) + '/' lid = os.path.basename(binzip_path) matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback) cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/') log_callback('running %s' % cmd) matlab.run(cmd) log_callback('matlab exited') if os.path.exists(feature_csv): log_callback('features found at %s' % feature_csv) else: raise Exception('no features found') log_callback('uploading %s' % features_url) upload(feature_csv, features_url) if os.path.exists(multiblob_csv): log_callback('multiblob found at %s' % multiblob_csv) log_callback('uploading %s' % multiblob_url) upload(multiblob_csv, multiblob_url) log_callback('complete') client.wakeup()
def extract_blobs(pid,job): def log_callback(msg): logging.warn('BLOBS %s' % msg) client.heartbeat(pid,message=msg) parsed_pid = parse_pid(pid) bin_lid = parsed_pid[LID] bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) binzip_url = ''.join([bin_pid,'_binzip.zip']) binzip_file = os.path.basename(binzip_url) deposit_url = '%s_blobs.zip' % bin_pid if exists(deposit_url): log_callback('skipping %s - blobs exist' % pid) return log_callback('computing blobs for %s' % pid) with safe_tempdir() as binzip_dir: # first, copy the zipfile to a temp dir binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid) log_callback('downloading %s to %s' % (binzip_url, binzip_path)) download(binzip_url, binzip_path) # now run bin_blobs with safe_tempdir() as job_dir: # configure matlab matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback) # run command blobs_file = os.path.join(job_dir, blob_zip_name(bin_pid)) cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, binzip_path, job_dir) log_callback('running %s' % cmd) matlab.run(cmd) log_callback('MATLAB done, checking for %s' % blobs_file) if not os.path.exists(blobs_file): raise Exception('missing output file') log_callback('depositing %s' % blobs_file) upload(blobs_file, deposit_url) log_callback('deposited %s' % blobs_file) log_callback('completed %s' % bin_pid) client.wakeup()