Ejemplo n.º 1
0
import logging
import math
from subprocess import PIPE

from pynpact.steps import extract, nprofile, acgt_gamma
from pynpact import binfile
from pynpact import capproc, parsing
from pynpact.util import Hasher, reducedict, which, replace_ext
from pynpact.steps import producer, enqueue


log = logging.getLogger('pynpact.steps.allplots')
statuslog = logging.getLogger('pynpact.statuslog')


BIN = binfile('Allplots')

KEYS = ['first_page_title', 'following_page_title',
        'length', 'startBase', 'endBase', 'period',
        'basesPerGraph', 'graphsPerPage', 'x-tics',
        'nucleotides', 'alternate_colors', 'basename']
FILE_KEYS = ['File_of_unbiased_CDSs',
             'File_of_conserved_CDSs',
             'File_of_new_CDSs',
             'File_of_published_rejected_CDSs',
             'File_of_stretches_where_CG_is_asymmetric',
             'File_of_published_accepted_CDSs',
             'File_of_potential_new_CDSs',
             'File_of_blocks_from_new_ORFs_as_cds',
             'File_of_blocks_from_annotated_genes_as_cds',
             'File_of_GeneMark_regions',
import logging
import sys
import json

from path import Path
from pynpact import binfile
from pynpact import capproc, parsing
from pynpact.util import Hasher, reducedict
from pynpact.steps import producer, enqueue


logger = logging.getLogger('pynpact.steps.nprofile')
statuslog = logging.getLogger('pynpact.statuslog')


BIN = binfile('nprofile')

KEYS = ['nucleotides', 'length', 'window_size', 'step', 'period', 'ddna', 'stderr']
OUTPUTKEY = 'File_list_of_nucleotides_in_200bp windows'
JSONOUTPUTKEY = 'nprofileData'


def plan(config, executor):
    if 'nprofile' in config:
        return
    config['nprofile'] = True

    parsing.length(config)
    rconfig = reducedict(config, KEYS)
    h = Hasher()
    h.hashdict(rconfig)
Ejemplo n.º 3
0
import os.path
import sys
import logging
from pynpact import capproc, parsing
from pynpact import binfile, DATAPATH
from pynpact.util import Hasher, reducedict, mkdtemp_rename
from pynpact.steps import producer, enqueue


log = logging.getLogger("pynpact.steps.acgt_gamma")
statuslog = logging.getLogger("pynpact.statuslog")

BIN = binfile("acgt_gamma")
OUTPUTKEY = "acgt_gamma_output"


def plan(config, executor):
    "Identifying ORFs with significant 3-base periodicities."
    if config.get("skip_prediction", False):
        return

    assert os.path.exists(DATAPATH), "Missing pynpact/data for acgt_gamma prediction. " "Expected at " + DATAPATH

    rconfig = reducedict(config, ["filename", "significance", "GeneDescriptorSkip1"])
    h = Hasher().hashdict(rconfig)
    h.hashfiletime(config["filename"])
    h.hashfiletime(BIN)
    outdir = parsing.derive_filename(config, h.hexdigest(), ".predict")

    log.debug("Adding prediction filenames to config dict.")
    # strip 4 characters off here b/c that's how acgt_gamma does
Ejemplo n.º 4
0
"""
from __future__ import absolute_import
import logging
import os.path
import sys

from pynpact import binfile, InvalidGBKException
from pynpact import capproc, parsing
from pynpact.util import Hasher, reducedict
from pynpact.steps import producer, enqueue


logger = logging.getLogger('pynpact.steps.extract')
statuslog = logging.getLogger('pynpact.statuslog')

BIN = binfile("extract")

KEYS = ['GeneDescriptorKey1', 'GeneDescriptorKey2',
        'GeneDescriptorSkip1', 'GeneDescriptorSkip2',
        'filename']

OUTPUTKEY = 'File_of_published_accepted_CDSs'


def plan(config, executor):
    if parsing.isgbk(config):
        logger.debug(
            "GBK file, extracting known gene names %s", config['filename'])
        rconfig, hash = get_hash(config)
        target_file = parsing.derive_filename(config, hash, 'genes')
        config[OUTPUTKEY] = target_file