def plan(config, executor):
    "Identifying ORFs with significant 3-base periodicities."
    if config.get('skip_prediction', False):
        return

    assert os.path.exists(DATAPATH), \
        "Missing pynpact/data for acgt_gamma prediction. " \
        "Expected at " + DATAPATH

    rconfig = reducedict(config, [
        'filename', 'significance', 'GeneDescriptorSkip1', 'mycoplasma', 'stderr'])
    h = Hasher().hashdict(rconfig)
    h.hashfiletime(config['filename'])
    h.hashfiletime(BIN)
    outdir = parsing.derive_filename(config, h.hexdigest(), '.predict')

    log.debug("Adding prediction filenames to config dict.")
    # strip 4 characters off here b/c that's how acgt_gamma does
    # it at about lines 262-270
    j = lambda ext: os.path.join(
        outdir, os.path.basename(config['filename'])[:-4] + ext)
    config['NewOrfsFile'] = config['File_of_new_CDSs'] = j(".newcds")
    config['ModifiedOrfsFile'] = config['File_of_published_rejected_CDSs'] = j(".modified")
    config['HitsFile'] = config['File_of_G+C_coding_potential_regions'] = j('.profiles')
    config[OUTPUTKEY] = outdir

    return enqueue(_acgt_gamma, executor, rconfig, outdir)
Beispiel #2
0
def get_hash(config):
    config = reducedict(config, KEYS)
    h = Hasher()
    h.hashdict(config)
    h.hashfiletime(BIN)
    h.hashfiletime(config['filename'])
    return config, h.hexdigest()
def plan(config, executor):
    if 'nprofile' in config:
        return
    config['nprofile'] = True

    parsing.length(config)
    rconfig = reducedict(config, KEYS)
    h = Hasher()
    h.hashdict(rconfig)
    h.hashfiletime(BIN)
    hash = h.hexdigest()
    target = parsing.derive_filename(config, hash, 'nprofile')
    config[OUTPUTKEY] = target
    config[JSONOUTPUTKEY] = target + '.json'
    jobs = enqueue(_nprofile, executor, rconfig, target)
    enqueue(_nprofile_to_json, executor, {OUTPUTKEY: target},
            config[JSONOUTPUTKEY], after=jobs)
    return jobs