Example #1
0
def binpid2zip(pid, outfile, log_callback=None):
    def log(msg):
        if log_callback is not None:
            log_callback(msg)
    """Generate a zip file given a canonical pid"""
    parsed = parse_pid(pid)
    bin_pid = ''.join([parsed[NAMESPACE], parsed[BIN_LID]])
    timestamp = iso8601(strptime(parsed[TIMESTAMP], parsed[TIMESTAMP_FORMAT]))
    log('copying raw data for %s to temp files ...' % bin_pid)
    with tempfile.NamedTemporaryFile() as hdr_tmp:
        hdr_path = hdr_tmp.name
        drain(UrlSource(bin_pid+'.hdr'), LocalFileSink(hdr_path))
        hdr = parse_hdr_file(hdr_path)
    with tempfile.NamedTemporaryFile() as adc_tmp:
        adc_path = adc_tmp.name
        drain(UrlSource(bin_pid+'.adc'), LocalFileSink(adc_path))
        adc = Adc(adc_path, parsed[SCHEMA_VERSION])
        unstitched_targets = add_pids(adc.get_targets(), bin_pid)
        stitched_targets = list_stitched_targets(unstitched_targets)
    with tempfile.NamedTemporaryFile() as roi_tmp:
        roi_path = roi_tmp.name
        drain(UrlSource(bin_pid+'.roi'), LocalFileSink(roi_path))
        canonical_pid = bin_pid
        log('copied raw data for %s' % canonical_pid)
        """*parsed_pid - result of parsing pid
        *canonical_pid - canonicalized with URL prefix
        *targets - list of (stitched) targets
        *hdr - result of parsing header file
        *timestamp - timestamp (FIXME in what format?)
        *roi_path - path to ROI file
        outfile - where to write resulting zip file"""
        log('creating zip file for %s' % bin_pid)
        with open(outfile,'wb') as fout:
            return bin2zip(parsed,bin_pid,stitched_targets,hdr,timestamp,roi_path,fout)
Example #2
0
 def test_integrity(self,b):
     parsed = parse_pid(b.lid)
     schema_version = parsed[SCHEMA_VERSION]
     fs = {}
     for f in b.files:
         fs[f.filetype] = f.local_path
     try:
         check_fileset(fs, schema_version)
         return True
     except:
         return False
Example #3
0
def accepts_product(product_pid):
    parsed = parse_pid(product_pid)

    namespace = parsed[NAMESPACE]
    product = parsed[PRODUCT]
    
    ep = '%sapi/accepts_products/%s' % (namespace, product)
    
    try:
        return requests.get(ep).json()[product]
    except:
        return False
Example #4
0
def do_webcache(pid,job):
    parsed = parse_pid(pid)
    bin_pid = ''.join([parsed[NAMESPACE], parsed[BIN_LID]])
    mosaic_base_url = '%sapi/mosaic/size/800x600/scale/0.33/page/1' % parsed[NAMESPACE]
    mosaic_json = '%s/%s.json' % (mosaic_base_url, bin_pid)
    mosaic_jpg = '%s/%s.jpg' % (mosaic_base_url, bin_pid)
    logging.warn('WEBCACHE hitting %s' % mosaic_json)
    r1 = requests.get(mosaic_json)
    json = r1.json() # read it, and throw it away
    logging.warn('WEBCACHE hitting %s' % mosaic_jpg)
    r2 = requests.get(mosaic_jpg)
    img_data = StringIO(r2.content) # read it, and throw it away
    logging.warn('WEBCACHE done for %s' % pid)
Example #5
0
def get_product_destination(session, pid, product_type=None):
    parsed = parse_pid(pid)
    if product_type is None:
        product_type = parsed[PRODUCT]
    ts_label = parsed[TS_LABEL]
    if product_type=='multiblob': # sidecar files for features
        product_type='features'
    roots = get_data_roots(session, ts_label, product_type=product_type)
    if not roots:
        raise NotFound('no product destination found')
    root = roots[0]
    S = next(get_resolver().ifcb.files.product_path(root=root,**parsed))
    return S[FILE_PATH]
Example #6
0
def extract_features(pid,job):
    def log_callback(msg):
        logging.warn('FEATURES %s' % msg)
        client.heartbeat(pid,message=msg)
    parsed_pid = parse_pid(pid)
    bin_lid = parsed_pid[LID]
    bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) 
    binzip_url = ''.join([bin_pid,'_binzip.zip'])
    blob_url = ''.join([bin_pid,'_blob.zip'])
    features_url = ''.join([bin_pid,'_features.csv'])
    multiblob_url = ''.join([bin_pid,'_multiblob.csv'])
    if exists(features_url):
        log_callback('skipping %s - features exist' % pid)
        return
    log_callback('computing features for %s' % pid)
    with safe_tempdir() as binzip_dir:
        # download bin zip
        binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid)
        log_callback('downloading %s to %s' % (binzip_url, binzip_path))
        download(binzip_url, binzip_path)
        # download blob zip
        blob_path = os.path.join(binzip_dir, '%s_blob.zip' % bin_lid)
        log_callback('downloading %s to %s' % (blob_url, blob_path))
        download(blob_url, blob_path)
        # compute features
        with safe_tempdir() as job_dir:
            # output of matlab job
            feature_csv = os.path.join(job_dir, csvname(bin_pid))
            multiblob_csv = os.path.join(job_dir, 'multiblob', multiblobname(bin_pid))
            # params for matlab job
            namespace = os.path.dirname(binzip_path) + '/'
            lid = os.path.basename(binzip_path)
            matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback)
            cmd = 'bin_features(\'%s\',\'%s\',\'%s\',\'chatty\')' % (namespace, lid, job_dir + '/')
            log_callback('running %s' % cmd)
            matlab.run(cmd)
            log_callback('matlab exited')
            if os.path.exists(feature_csv):
                log_callback('features found at %s' % feature_csv)
            else:
                raise Exception('no features found')
            log_callback('uploading %s' % features_url)
            upload(feature_csv, features_url)
            if os.path.exists(multiblob_csv):
                log_callback('multiblob found at %s' % multiblob_csv)
                log_callback('uploading %s' % multiblob_url)
                upload(multiblob_csv, multiblob_url)
                log_callback('complete')
            client.wakeup()
Example #7
0
def do_binzip(pid, job):
    def log_callback(msg):
        logging.warn('BINZIP %s' % msg)
        client.heartbeat(pid,message=msg)
    parsed = parse_pid(pid)
    binzip_url = '%s%s_binzip.zip' % (parsed[NAMESPACE], parsed[BIN_LID])
    log_callback('creating zipfile for %s' % pid)
    with tempfile.NamedTemporaryFile() as zip_tmp:
        zip_path = zip_tmp.name
        binpid2zip(pid, zip_path)
        # construct binzip URL
        log_callback('depositing %s' % binzip_url)
        upload(zip_path, binzip_url)
    log_callback('deposited %s' % binzip_url)
    client.wakeup()
Example #8
0
def do_acc(pid, job):
    parsed = parse_pid(pid)
    lid = parsed[LID]
    ts_label = parsed[TS_LABEL]
    roots = get_data_roots(session, ts_label) # get raw data roots
    fileset = parsed_pid2fileset(parsed, roots)
    fileset[LID] = lid
    session.expire_all() # don't be stale!
    acc = Accession(session,ts_label)#,fast=True)
    # FIXME fast=True disables checksumming
    client.update(pid,ttl=3600) # allow 1hr for accession
    ret = acc.add_fileset(fileset)
    if ret=='ADDED':
        schedule_products(pid, client)
        session.commit()
        client.wakeup()
    elif ret=='FAILED':
        raise Exception('accession failed')
Example #9
0
def extract_blobs(pid,job):
    def log_callback(msg):
        logging.warn('BLOBS %s' % msg)
        client.heartbeat(pid,message=msg)
    parsed_pid = parse_pid(pid)
    bin_lid = parsed_pid[LID]
    bin_pid = ''.join([parsed_pid[NAMESPACE], parsed_pid[LID]]) 
    binzip_url = ''.join([bin_pid,'_binzip.zip'])
    binzip_file = os.path.basename(binzip_url)
    deposit_url = '%s_blobs.zip' % bin_pid
    if exists(deposit_url):
        log_callback('skipping %s - blobs exist' % pid)
        return
    log_callback('computing blobs for %s' % pid)
    with safe_tempdir() as binzip_dir:
        # first, copy the zipfile to a temp dir
        binzip_path = os.path.join(binzip_dir, '%s.zip' % bin_lid)
        log_callback('downloading %s to %s' % (binzip_url, binzip_path))
        download(binzip_url, binzip_path)
        # now run bin_blobs
        with safe_tempdir() as job_dir:
            # configure matlab
            matlab = Matlab(MATLAB_EXEC_PATH, MATLAB_PATH, output_callback=log_callback)
            # run command
            blobs_file = os.path.join(job_dir, blob_zip_name(bin_pid))
            cmd = 'bin_blobs(\'%s\',\'%s\',\'%s\')' % (bin_pid, binzip_path, job_dir)
            log_callback('running %s' % cmd)
            matlab.run(cmd)
            log_callback('MATLAB done, checking for %s' % blobs_file)
            if not os.path.exists(blobs_file):
                raise Exception('missing output file')
            log_callback('depositing %s' % blobs_file)
            upload(blobs_file, deposit_url)
            log_callback('deposited %s' % blobs_file)
    log_callback('completed %s' % bin_pid)
    client.wakeup()
Example #10
0
def accession_demo(session,ts_label,root):
    # now accede
    for fs in get_resolver().ifcb.files.list_raw_filesets(root):
        lid = fs['lid']
        try:
            parsed = parse_pid(lid)
        except:
            print 'barf %s' % lid
            raise
        ts = text2utcdatetime(parsed['timestamp'], parsed['timestamp_format'])
        b = Bin(ts_label=ts_label, lid=lid, sample_time=ts)
        session.add(b)
        # now make mostly bogus fixity entries
        now = datetime.now()
        paths = [fs['hdr_path'], fs['adc_path'], fs['roi_path']]
        filetypes = ['hdr','adc','roi']
        for path,filetype in zip(paths,filetypes):
            length = os.stat(path).st_size
            name = os.path.basename(path)
            #checksum = sha1_file(path)
            checksum = 'placeholder'
            f = File(local_path=path, filename=name, length=length, filetype=filetype, sha1=checksum, fix_time=now)
            b.files.append(f)
    session.commit()
Example #11
0
 def new_bin(self,lid):
     parsed = parse_pid(lid)
     sample_time = get_timestamp(parsed)
     return Bin(ts_label=self.ts_label, lid=lid, sample_time=sample_time)
Example #12
0
def pid2fileset(pid,roots):
    parsed_pid = parse_pid(pid)
    return parsed_pid2fileset(parsed_pid,roots)