import logging import math from subprocess import PIPE from pynpact.steps import extract, nprofile, acgt_gamma from pynpact import binfile from pynpact import capproc, parsing from pynpact.util import Hasher, reducedict, which, replace_ext from pynpact.steps import producer, enqueue log = logging.getLogger('pynpact.steps.allplots') statuslog = logging.getLogger('pynpact.statuslog') BIN = binfile('Allplots') KEYS = ['first_page_title', 'following_page_title', 'length', 'startBase', 'endBase', 'period', 'basesPerGraph', 'graphsPerPage', 'x-tics', 'nucleotides', 'alternate_colors', 'basename'] FILE_KEYS = ['File_of_unbiased_CDSs', 'File_of_conserved_CDSs', 'File_of_new_CDSs', 'File_of_published_rejected_CDSs', 'File_of_stretches_where_CG_is_asymmetric', 'File_of_published_accepted_CDSs', 'File_of_potential_new_CDSs', 'File_of_blocks_from_new_ORFs_as_cds', 'File_of_blocks_from_annotated_genes_as_cds', 'File_of_GeneMark_regions',
import logging import sys import json from path import Path from pynpact import binfile from pynpact import capproc, parsing from pynpact.util import Hasher, reducedict from pynpact.steps import producer, enqueue logger = logging.getLogger('pynpact.steps.nprofile') statuslog = logging.getLogger('pynpact.statuslog') BIN = binfile('nprofile') KEYS = ['nucleotides', 'length', 'window_size', 'step', 'period', 'ddna', 'stderr'] OUTPUTKEY = 'File_list_of_nucleotides_in_200bp windows' JSONOUTPUTKEY = 'nprofileData' def plan(config, executor): if 'nprofile' in config: return config['nprofile'] = True parsing.length(config) rconfig = reducedict(config, KEYS) h = Hasher() h.hashdict(rconfig)
import os.path import sys import logging from pynpact import capproc, parsing from pynpact import binfile, DATAPATH from pynpact.util import Hasher, reducedict, mkdtemp_rename from pynpact.steps import producer, enqueue log = logging.getLogger("pynpact.steps.acgt_gamma") statuslog = logging.getLogger("pynpact.statuslog") BIN = binfile("acgt_gamma") OUTPUTKEY = "acgt_gamma_output" def plan(config, executor): "Identifying ORFs with significant 3-base periodicities." if config.get("skip_prediction", False): return assert os.path.exists(DATAPATH), "Missing pynpact/data for acgt_gamma prediction. " "Expected at " + DATAPATH rconfig = reducedict(config, ["filename", "significance", "GeneDescriptorSkip1"]) h = Hasher().hashdict(rconfig) h.hashfiletime(config["filename"]) h.hashfiletime(BIN) outdir = parsing.derive_filename(config, h.hexdigest(), ".predict") log.debug("Adding prediction filenames to config dict.") # strip 4 characters off here b/c that's how acgt_gamma does
""" from __future__ import absolute_import import logging import os.path import sys from pynpact import binfile, InvalidGBKException from pynpact import capproc, parsing from pynpact.util import Hasher, reducedict from pynpact.steps import producer, enqueue logger = logging.getLogger('pynpact.steps.extract') statuslog = logging.getLogger('pynpact.statuslog') BIN = binfile("extract") KEYS = ['GeneDescriptorKey1', 'GeneDescriptorKey2', 'GeneDescriptorSkip1', 'GeneDescriptorSkip2', 'filename'] OUTPUTKEY = 'File_of_published_accepted_CDSs' def plan(config, executor): if parsing.isgbk(config): logger.debug( "GBK file, extracting known gene names %s", config['filename']) rconfig, hash = get_hash(config) target_file = parsing.derive_filename(config, hash, 'genes') config[OUTPUTKEY] = target_file