Beispiel #1
0
def get_hash(config):
    config = reducedict(config, KEYS)
    h = Hasher()
    h.hashdict(config)
    h.hashfiletime(BIN)
    h.hashfiletime(config['filename'])
    return config, h.hexdigest()
Beispiel #2
0
def allplots(config, executor):
    after = []
    try:
        after.extend(extract.plan(config, executor))
    except:
        pass
    after.extend(nprofile.plan(config, executor))
    after.extend(acgt_gamma.plan(config, executor))

    parsing.length(config)
    parsing.first_page_title(config)
    parsing.following_page_title(config)
    parsing.endBase(config)

    h = Hasher()
    # Strip down to the config for this task only
    rconfig = reducedict(config, KEYS + FILE_KEYS)

    basesPerGraph = rconfig['basesPerGraph']
    graphsPerPage = rconfig['graphsPerPage']
    startBase = rconfig.pop('startBase')
    endBase = rconfig.pop('endBase')
    bp_per_page = rconfig['bp_per_page'] = basesPerGraph * graphsPerPage
    page_count = math.ceil(float(endBase - startBase) / bp_per_page)
    log.info("Generating %d pages of allplots", page_count)
    page_num = 1  # page number offset
    filenames = []
    waiton = []
    # per-page loop
    while startBase < endBase:
        pconfig = dict(rconfig.items())
        pconfig['page_num'] = page_num
        pconfig['startBase'] = startBase
        if startBase + bp_per_page < endBase:
            pconfig['endBase'] = startBase + bp_per_page
        else:
            pconfig['endBase'] = endBase
        h = Hasher().hashdict(pconfig).hashfiletime(BIN).hashfiletime(__file__)
        psname = parsing.derive_filename(config, h.hexdigest(), 'ps')
        filenames.append(psname)
        waiton.extend(enqueue(_ap, executor, pconfig, psname, after=after))
        page_num += 1
        startBase += bp_per_page

    # Finally set the output filenames into the master config dict
    config['psnames'] = filenames
    return waiton
def plan(config, executor):
    "Identifying ORFs with significant 3-base periodicities."
    if config.get('skip_prediction', False):
        return

    assert os.path.exists(DATAPATH), \
        "Missing pynpact/data for acgt_gamma prediction. " \
        "Expected at " + DATAPATH

    rconfig = reducedict(config, [
        'filename', 'significance', 'GeneDescriptorSkip1', 'mycoplasma', 'stderr'])
    h = Hasher().hashdict(rconfig)
    h.hashfiletime(config['filename'])
    h.hashfiletime(BIN)
    outdir = parsing.derive_filename(config, h.hexdigest(), '.predict')

    log.debug("Adding prediction filenames to config dict.")
    # strip 4 characters off here b/c that's how acgt_gamma does
    # it at about lines 262-270
    j = lambda ext: os.path.join(
        outdir, os.path.basename(config['filename'])[:-4] + ext)
    config['NewOrfsFile'] = config['File_of_new_CDSs'] = j(".newcds")
    config['ModifiedOrfsFile'] = config['File_of_published_rejected_CDSs'] = j(".modified")
    config['HitsFile'] = config['File_of_G+C_coding_potential_regions'] = j('.profiles')
    config[OUTPUTKEY] = outdir

    return enqueue(_acgt_gamma, executor, rconfig, outdir)
def plan(config, executor):
    if 'nprofile' in config:
        return
    config['nprofile'] = True

    parsing.length(config)
    rconfig = reducedict(config, KEYS)
    h = Hasher()
    h.hashdict(rconfig)
    h.hashfiletime(BIN)
    hash = h.hexdigest()
    target = parsing.derive_filename(config, hash, 'nprofile')
    config[OUTPUTKEY] = target
    config[JSONOUTPUTKEY] = target + '.json'
    jobs = enqueue(_nprofile, executor, rconfig, target)
    enqueue(_nprofile_to_json, executor, {OUTPUTKEY: target},
            config[JSONOUTPUTKEY], after=jobs)
    return jobs