예제 #1
0
 def test_cluster(self):
     mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "")
     os.chdir(os.path.join(mod_dir, "data/examples"))
     arg = namedtuple('args', 'debug print_debug MIN_SEQ')
     args = arg(True, True, 1)
     seqL = parse_ma_file("seqs_set.ma")
     c = pybedtools.BedTool("2_clusters_2_seqs_shared")
     initialize_logger(".", args.debug, args.print_debug)
     logger = mylog.getLogger(__name__)
     logger.info("Start reduceloci test")
     clus_obj = detect_clusters(c, seqL, args.MIN_SEQ)
     clusLred = reduceloci(clus_obj, ".")
예제 #2
0
 def test_database(self):
     if find_cmd("sqlite3"):
         with make_workdir() as workdir:
             arg = namedtuple('args', 'debug print_debug json ')
             args = arg(True, True, "../../data/examples/seqcluster.json")
             initialize_logger(".", args.debug, args.print_debug)
             logger = mylog.getLogger(__name__)
             logger.info(args)
             logger.info("Reading data")
             data = load_data(args.json)
             logger.info("Create database")
             make_database(data)
예제 #3
0
 def test_database(self):
     if find_cmd("sqlite3"):
         with make_workdir() as workdir:
             arg = namedtuple('args', 'debug print_debug json ')
             args = arg(True, True, "../../data/examples/seqcluster.json")
             initialize_logger(".", args.debug, args.print_debug)
             logger = mylog.getLogger(__name__)
             logger.info(args)
             logger.info("Reading data")
             data = load_data(args.json)
             logger.info("Create database")
             make_database(data)
예제 #4
0
 def test_databse(self):
     if find_cmd("sqlite3"):
         mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "")
         os.chdir(os.path.join(mod_dir, "data/examples"))
         arg = namedtuple('args', 'debug print_debug json ')
         args = arg(True, True, "seqcluster.json")
         initialize_logger(".", args.debug, args.print_debug)
         logger = mylog.getLogger(__name__)
         logger.info(args)
         logger.info("Reading data")
         data = load_data(args.json)
         logger.info("Create databse")
         make_database(data)
예제 #5
0
 def test_predict(self):
     if find_cmd("tRNAscan-SE"):
         mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "")
         os.chdir(os.path.join(mod_dir, "data/examples"))
         out_dir = os.path.join(mod_dir, "data/examples/predictions")
         arg = namedtuple('args', 'debug print_debug MIN_SEQ json reference')
         args = arg(True, True, 1, "seqcluster.json", "../genomes/genome.fa")
         initialize_logger(".", args.debug, args.print_debug)
         logger = mylog.getLogger(__name__)
         logger.info(args)
         logger.info("Reading data")
         data = load_data(args.json)
         logger.info("Start prediction")
         make_predictions(data, out_dir, args)
예제 #6
0
 def test_databse(self):
     if find_cmd("sqlite3"):
         mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace(
             "seqcluster/", "")
         os.chdir(os.path.join(mod_dir, "data/examples"))
         arg = namedtuple('args', 'debug print_debug json ')
         args = arg(True, True, "seqcluster.json")
         initialize_logger(".", args.debug, args.print_debug)
         logger = mylog.getLogger(__name__)
         logger.info(args)
         logger.info("Reading data")
         data = load_data(args.json)
         logger.info("Create databse")
         make_database(data)
예제 #7
0
 def test_predict(self):
     if find_cmd("tRNAscan-SE"):
         mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace(
             "seqcluster/", "")
         os.chdir(os.path.join(mod_dir, "data/examples"))
         out_dir = os.path.join(mod_dir,
                                "data/examples/test_out_predictions")
         arg = namedtuple('args',
                          'debug print_debug MIN_SEQ json reference')
         args = arg(True, True, 1, "seqcluster.json",
                    "../genomes/genome.fa")
         initialize_logger(".", args.debug, args.print_debug)
         logger = mylog.getLogger(__name__)
         logger.info(args)
         logger.info("Reading data")
         data = load_data(args.json)
         logger.info("Start prediction")
         is_tRNA(data, out_dir, args)
예제 #8
0
def main(**kwargs):
    kwargs = parse_cl(sys.argv[1:])
    initialize_logger(kwargs['args'].out, kwargs['args'].debug, kwargs['args'].print_debug)
    logger = mylog.getLogger(__name__)
    start = time.time()
    if "prepare" in kwargs:
        logger.info("Run prepare")
        prepare(kwargs["args"])
    elif "cluster" in kwargs:
        logger.info("Run cluster")
        cluster(kwargs["args"])
    elif "report" in kwargs:
        logger.info("Run report")
        report(kwargs["args"])
    elif "predict" in kwargs:
        logger.info("Run predictions")
        predictions(kwargs["args"])
    elif "target" in kwargs:
        logger.info("Run target annotation")
        targets_enrichment(kwargs["args"])
    elif "seqbuster" in kwargs:
        logger.info("Run seqbuster")
        miraligner(kwargs["args"])
    elif "explore" in kwargs:
        logger.info("Run explore")
        explore(kwargs["args"])
    elif "stats" in kwargs:
        logger.info("Run stats")
        stats(kwargs["args"])
    elif "collapse" in kwargs:
        logger.info("Run collapse")
        collapse_fastq(kwargs["args"])
    elif "simulator" in kwargs:
        logger.info("Run simulator")
        simulate(kwargs["args"])
    logger.info('It took %.3f minutes' % ((time.time()-start)/60))
예제 #9
0
from seqcluster.libs.utils import file_exists
import seqcluster.libs.logger as mylog
from seqcluster.libs import do
from seqcluster.libs.read import load_data
from seqcluster.libs.mystats import up_threshold
from seqcluster.detect.cluster import detect_clusters, clean_bam_file, peak_calling, detect_complexity
from seqcluster.detect.description import best_precursor
from seqcluster.libs.annotation import anncluster
from seqcluster.libs.inputs import parse_ma_file
from seqcluster.detect.metacluster import reduceloci, _get_seqs
from seqcluster.libs.tool import generate_position_bed
from seqcluster.libs.classes import *
import seqcluster.libs.parameters as param
from seqcluster.db import make_database

logger = mylog.getLogger(__name__)


def cluster(args):
    """
    Creating clusters
    """

    args = _check_args(args)
    read_stats_file = op.join(args.dir_out, "read_stats.tsv")
    if file_exists(read_stats_file):
        os.remove(read_stats_file)

    bam_file, seq_obj = _clean_alignment(args)

    logger.info("Parsing matrix file")
예제 #10
0
from operator import itemgetter

from  seqcluster.libs import pysen
import numpy as np

import seqcluster.libs.logger as mylog
from seqcluster.libs.classes import *
# from seqcluster.function.peakdetect import peakdetect as peakdetect


logger = mylog.getLogger(__name__)


def sort_precursor(c, loci):
    """
    Sort loci according to number of sequences mapped there.
    """
    # Original Py 2.7 code
    #data_loci = map(lambda (x): [x, loci[x].chr, int(loci[x].start), int(loci[x].end), loci[x].strand, len(c.loci2seq[x])], c.loci2seq.keys())
    # 2to3 suggested Py 3 rewrite
    data_loci = [[x, loci[x].chr, int(loci[x].start), int(loci[x].end), loci[x].strand, len(c.loci2seq[x])] for x in list(c.loci2seq.keys())]
    data_loci = sorted(data_loci, key=itemgetter(5), reverse=True)
    return data_loci

def best_precursor(clus, loci):
    """
    Select best precursor asuming size around 100 nt
    """
    data_loci = sort_precursor(clus, loci)
    current_size = data_loci[0][5]
    best = 0
예제 #11
0
import seqcluster.libs.logger as mylog
import os

from seqcluster.libs.classes import annotation, dbannotation

logger = mylog.getLogger("run")


def read_gtf_line(cols, field="name"):
    """parse gtf line to get class/name information"""
    field = field.lower()
    try:
        group = cols[2]
        attrs = cols[8].split(";")
        name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith(field)]
        if not name:
            name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith("gene_id")]
        if not name:
            name = ["None"]
        biotype = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith("biotype")]
        if biotype:
            group = biotype[0]
        c = cols[0]
        s = int(cols[3])
        e = int(cols[4])
        st = cols[6]
        return [c, s, e, st, group, name[0]]
    except(Exception, e):
        logger.error(cols)
        logger.error("File is not in correct format")
        logger.error("Expect chr source feature start end . strand attributes")