def plan(config, executor):
    "Identifying ORFs with significant 3-base periodicities."
    if config.get('skip_prediction', False):
        return

    assert os.path.exists(DATAPATH), \
        "Missing pynpact/data for acgt_gamma prediction. " \
        "Expected at " + DATAPATH

    rconfig = reducedict(config, [
        'filename', 'significance', 'GeneDescriptorSkip1', 'mycoplasma', 'stderr'])
    h = Hasher().hashdict(rconfig)
    h.hashfiletime(config['filename'])
    h.hashfiletime(BIN)
    outdir = parsing.derive_filename(config, h.hexdigest(), '.predict')

    log.debug("Adding prediction filenames to config dict.")
    # strip 4 characters off here b/c that's how acgt_gamma does
    # it at about lines 262-270
    j = lambda ext: os.path.join(
        outdir, os.path.basename(config['filename'])[:-4] + ext)
    config['NewOrfsFile'] = config['File_of_new_CDSs'] = j(".newcds")
    config['ModifiedOrfsFile'] = config['File_of_published_rejected_CDSs'] = j(".modified")
    config['HitsFile'] = config['File_of_G+C_coding_potential_regions'] = j('.profiles')
    config[OUTPUTKEY] = outdir

    return enqueue(_acgt_gamma, executor, rconfig, outdir)
Example #2
0
def plan(config, executor):
    if parsing.isgbk(config):
        logger.debug(
            "GBK file, extracting known gene names %s", config['filename'])
        rconfig, hash = get_hash(config)
        target_file = parsing.derive_filename(config, hash, 'genes')
        config[OUTPUTKEY] = target_file
        return enqueue(_extract, executor, rconfig, target_file)
def plan(config, executor):
    if parsing.isgbk(config):
        logger.debug(
            "GBK file, extracting known CDS to json %s", config['filename'])
        rconfig, hash = get_hash(config)
        target_file = parsing.derive_filename(config, hash, 'track.genes.json')
        config['InputCDSFileJson'] = target_file
        return enqueue(_extract, executor, rconfig, target_file)
Example #4
0
def combine_ps_files(config, executor):
    after = allplots(config, executor)
    psnames = config['psnames']
    log.debug("Going to combine %d postscript files", len(psnames))
    combined_ps_name = parsing.derive_filename(
        config, Hasher().hashlist(psnames).hexdigest(), 'ps')
    config['combined_ps_name'] = combined_ps_name
    return enqueue(
        _combine_ps_files, executor, config, combined_ps_name, after=after)
def _new_track_file_name(pathconfig, gbkfilename):
    fn = gbkfilename
    ext = None
    m = re.search(r'\.([^/\\]*)$', fn)
    if m:
        ext = m.groups()[0]
    if not ext.endswith('json'):
        ext = "track.%s.json" % ext
    dt = datetime.datetime.now()
    ds = re.sub(r':|\.|-', '_', dt.isoformat("_"))
    fn = parsing.derive_filename(pathconfig, ds, ext)
    return fn
Example #6
0
def allplots(config, executor):
    after = []
    try:
        after.extend(extract.plan(config, executor))
    except:
        pass
    after.extend(nprofile.plan(config, executor))
    after.extend(acgt_gamma.plan(config, executor))

    parsing.length(config)
    parsing.first_page_title(config)
    parsing.following_page_title(config)
    parsing.endBase(config)

    h = Hasher()
    # Strip down to the config for this task only
    rconfig = reducedict(config, KEYS + FILE_KEYS)

    basesPerGraph = rconfig['basesPerGraph']
    graphsPerPage = rconfig['graphsPerPage']
    startBase = rconfig.pop('startBase')
    endBase = rconfig.pop('endBase')
    bp_per_page = rconfig['bp_per_page'] = basesPerGraph * graphsPerPage
    page_count = math.ceil(float(endBase - startBase) / bp_per_page)
    log.info("Generating %d pages of allplots", page_count)
    page_num = 1  # page number offset
    filenames = []
    waiton = []
    # per-page loop
    while startBase < endBase:
        pconfig = dict(rconfig.items())
        pconfig['page_num'] = page_num
        pconfig['startBase'] = startBase
        if startBase + bp_per_page < endBase:
            pconfig['endBase'] = startBase + bp_per_page
        else:
            pconfig['endBase'] = endBase
        h = Hasher().hashdict(pconfig).hashfiletime(BIN).hashfiletime(__file__)
        psname = parsing.derive_filename(config, h.hexdigest(), 'ps')
        filenames.append(psname)
        waiton.extend(enqueue(_ap, executor, pconfig, psname, after=after))
        page_num += 1
        startBase += bp_per_page

    # Finally set the output filenames into the master config dict
    config['psnames'] = filenames
    return waiton
def plan(config, executor):
    if 'nprofile' in config:
        return
    config['nprofile'] = True

    parsing.length(config)
    rconfig = reducedict(config, KEYS)
    h = Hasher()
    h.hashdict(rconfig)
    h.hashfiletime(BIN)
    hash = h.hexdigest()
    target = parsing.derive_filename(config, hash, 'nprofile')
    config[OUTPUTKEY] = target
    config[JSONOUTPUTKEY] = target + '.json'
    jobs = enqueue(_nprofile, executor, rconfig, target)
    enqueue(_nprofile_to_json, executor, {OUTPUTKEY: target},
            config[JSONOUTPUTKEY], after=jobs)
    return jobs
def _new_gbk_file_name(pathconfig):
    dt = datetime.datetime.now()
    ds = re.sub(r':|\.|-', '_', dt.isoformat("_"))
    fn = parsing.derive_filename(pathconfig, ds, "gbk")
    return fn
def test_derive_filename(gbkconfig):
    result = parsing.derive_filename(gbkconfig, 'asdf', 'foo')
    assert result.endswith('/NC_017123-asdf.foo')