Beispiel #1
0
def extract_features(pid,job):
    def log_callback(msg):
        logging.warn('FEATURES %s' % msg)
        client.heartbeat(pid,message=msg)
    parsed_pid = parse_pid(pid)
    bin_lid = parsed_pid[LID]
    bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) 
    binzip_url = ''.join([bin_pid,'_binzip.zip'])
    blob_url = ''.join([bin_pid,'_blob.zip'])
    features_url = ''.join([bin_pid,'_features.csv'])
    multiblob_url = ''.join([bin_pid,'_multiblob.csv'])
    if exists(features_url):
        log_callback('skipping %s - features exist' % pid)
        return
    log_callback('computing features for %s' % pid)
    with safe_tempdir() as binzip_dir:
        # download bin zip
        binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid)
        log_callback('downloading %s to %s' % (binzip_url, binzip_path))
        download(binzip_url, binzip_path)
        # download blob zip
        blob_path = os.path.join(binzip_dir, '%s_blob.zip' % bin_lid)
        log_callback('downloading %s to %s' % (blob_url, blob_path))
        download(blob_url, blob_path)
        # compute features
        with safe_tempdir() as job_dir:
            # output of matlab job
            feature_csv = os.path.join(job_dir, csvname(bin_pid))
            multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid))
            # params for matlab job
            namespace = os.path.dirname(binzip_path) + '/'
            lid = os.path.basename(binzip_path)
            matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback)
            cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/')
            log_callback('running %s' % cmd)
            matlab.run(cmd)
            log_callback('matlab exited')
            if os.path.exists(feature_csv):
                log_callback('features found at %s' % feature_csv)
            else:
                raise Exception('no features found')
            log_callback('uploading %s' % features_url)
            upload(feature_csv, features_url)
            if os.path.exists(multiblob_csv):
                log_callback('multiblob found at %s' % multiblob_csv)
                log_callback('uploading %s' % multiblob_url)
                upload(multiblob_csv, multiblob_url)
                log_callback('complete')
            client.wakeup()
Beispiel #2
0
def extract_blobs(pid,job):
    def log_callback(msg):
        logging.warn('BLOBS %s' % msg)
        client.heartbeat(pid,message=msg)
    parsed_pid = parse_pid(pid)
    bin_lid = parsed_pid[LID]
    bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) 
    binzip_url = ''.join([bin_pid,'_binzip.zip'])
    binzip_file = os.path.basename(binzip_url)
    deposit_url = '%s_blobs.zip' % bin_pid
    if exists(deposit_url):
        log_callback('skipping %s - blobs exist' % pid)
        return
    log_callback('computing blobs for %s' % pid)
    with safe_tempdir() as binzip_dir:
        # first, copy the zipfile to a temp dir
        binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid)
        log_callback('downloading %s to %s' % (binzip_url, binzip_path))
        download(binzip_url, binzip_path)
        # now run bin_blobs
        with safe_tempdir() as job_dir:
            # configure matlab
            matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback)
            # run command
            blobs_file = os.path.join(job_dir, blob_zip_name(bin_pid))
            cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, binzip_path, job_dir)
            log_callback('running %s' % cmd)
            matlab.run(cmd)
            log_callback('MATLAB done, checking for %s' % blobs_file)
            if not os.path.exists(blobs_file):
                raise Exception('missing output file')
            log_callback('depositing %s' % blobs_file)
            upload(blobs_file, deposit_url)
            log_callback('deposited %s' % blobs_file)
    log_callback('completed %s' % bin_pid)
    client.wakeup()