def test_cluster(self): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "") os.chdir(os.path.join(mod_dir, "data/examples")) arg = namedtuple('args', 'debug print_debug MIN_SEQ') args = arg(True, True, 1) seqL = parse_ma_file("seqs_set.ma") c = pybedtools.BedTool("2_clusters_2_seqs_shared") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info("Start reduceloci test") clus_obj = detect_clusters(c, seqL, args.MIN_SEQ) clusLred = reduceloci(clus_obj, ".")
def test_database(self): if find_cmd("sqlite3"): with make_workdir() as workdir: arg = namedtuple('args', 'debug print_debug json ') args = arg(True, True, "../../data/examples/seqcluster.json") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info(args) logger.info("Reading data") data = load_data(args.json) logger.info("Create database") make_database(data)
def test_database(self): if find_cmd("sqlite3"): with make_workdir() as workdir: arg = namedtuple('args', 'debug print_debug json ') args = arg(True, True, "../../data/examples/seqcluster.json") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info(args) logger.info("Reading data") data = load_data(args.json) logger.info("Create database") make_database(data)
def test_databse(self): if find_cmd("sqlite3"): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "") os.chdir(os.path.join(mod_dir, "data/examples")) arg = namedtuple('args', 'debug print_debug json ') args = arg(True, True, "seqcluster.json") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info(args) logger.info("Reading data") data = load_data(args.json) logger.info("Create databse") make_database(data)
def test_predict(self): if find_cmd("tRNAscan-SE"): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "") os.chdir(os.path.join(mod_dir, "data/examples")) out_dir = os.path.join(mod_dir, "data/examples/predictions") arg = namedtuple('args', 'debug print_debug MIN_SEQ json reference') args = arg(True, True, 1, "seqcluster.json", "../genomes/genome.fa") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info(args) logger.info("Reading data") data = load_data(args.json) logger.info("Start prediction") make_predictions(data, out_dir, args)
def test_databse(self): if find_cmd("sqlite3"): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace( "seqcluster/", "") os.chdir(os.path.join(mod_dir, "data/examples")) arg = namedtuple('args', 'debug print_debug json ') args = arg(True, True, "seqcluster.json") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info(args) logger.info("Reading data") data = load_data(args.json) logger.info("Create databse") make_database(data)
def test_predict(self): if find_cmd("tRNAscan-SE"): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace( "seqcluster/", "") os.chdir(os.path.join(mod_dir, "data/examples")) out_dir = os.path.join(mod_dir, "data/examples/test_out_predictions") arg = namedtuple('args', 'debug print_debug MIN_SEQ json reference') args = arg(True, True, 1, "seqcluster.json", "../genomes/genome.fa") initialize_logger(".", args.debug, args.print_debug) logger = mylog.getLogger(__name__) logger.info(args) logger.info("Reading data") data = load_data(args.json) logger.info("Start prediction") is_tRNA(data, out_dir, args)
def main(**kwargs): kwargs = parse_cl(sys.argv[1:]) initialize_logger(kwargs['args'].out, kwargs['args'].debug, kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() if "prepare" in kwargs: logger.info("Run prepare") prepare(kwargs["args"]) elif "cluster" in kwargs: logger.info("Run cluster") cluster(kwargs["args"]) elif "report" in kwargs: logger.info("Run report") report(kwargs["args"]) elif "predict" in kwargs: logger.info("Run predictions") predictions(kwargs["args"]) elif "target" in kwargs: logger.info("Run target annotation") targets_enrichment(kwargs["args"]) elif "seqbuster" in kwargs: logger.info("Run seqbuster") miraligner(kwargs["args"]) elif "explore" in kwargs: logger.info("Run explore") explore(kwargs["args"]) elif "stats" in kwargs: logger.info("Run stats") stats(kwargs["args"]) elif "collapse" in kwargs: logger.info("Run collapse") collapse_fastq(kwargs["args"]) elif "simulator" in kwargs: logger.info("Run simulator") simulate(kwargs["args"]) logger.info('It took %.3f minutes' % ((time.time()-start)/60))
from seqcluster.libs.utils import file_exists import seqcluster.libs.logger as mylog from seqcluster.libs import do from seqcluster.libs.read import load_data from seqcluster.libs.mystats import up_threshold from seqcluster.detect.cluster import detect_clusters, clean_bam_file, peak_calling, detect_complexity from seqcluster.detect.description import best_precursor from seqcluster.libs.annotation import anncluster from seqcluster.libs.inputs import parse_ma_file from seqcluster.detect.metacluster import reduceloci, _get_seqs from seqcluster.libs.tool import generate_position_bed from seqcluster.libs.classes import * import seqcluster.libs.parameters as param from seqcluster.db import make_database logger = mylog.getLogger(__name__) def cluster(args): """ Creating clusters """ args = _check_args(args) read_stats_file = op.join(args.dir_out, "read_stats.tsv") if file_exists(read_stats_file): os.remove(read_stats_file) bam_file, seq_obj = _clean_alignment(args) logger.info("Parsing matrix file")
from operator import itemgetter from seqcluster.libs import pysen import numpy as np import seqcluster.libs.logger as mylog from seqcluster.libs.classes import * # from seqcluster.function.peakdetect import peakdetect as peakdetect logger = mylog.getLogger(__name__) def sort_precursor(c, loci): """ Sort loci according to number of sequences mapped there. """ # Original Py 2.7 code #data_loci = map(lambda (x): [x, loci[x].chr, int(loci[x].start), int(loci[x].end), loci[x].strand, len(c.loci2seq[x])], c.loci2seq.keys()) # 2to3 suggested Py 3 rewrite data_loci = [[x, loci[x].chr, int(loci[x].start), int(loci[x].end), loci[x].strand, len(c.loci2seq[x])] for x in list(c.loci2seq.keys())] data_loci = sorted(data_loci, key=itemgetter(5), reverse=True) return data_loci def best_precursor(clus, loci): """ Select best precursor asuming size around 100 nt """ data_loci = sort_precursor(clus, loci) current_size = data_loci[0][5] best = 0
import seqcluster.libs.logger as mylog import os from seqcluster.libs.classes import annotation, dbannotation logger = mylog.getLogger("run") def read_gtf_line(cols, field="name"): """parse gtf line to get class/name information""" field = field.lower() try: group = cols[2] attrs = cols[8].split(";") name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith(field)] if not name: name = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith("gene_id")] if not name: name = ["None"] biotype = [attr.strip().split(" ")[1] for attr in attrs if attr.strip().split(" ")[0].lower().endswith("biotype")] if biotype: group = biotype[0] c = cols[0] s = int(cols[3]) e = int(cols[4]) st = cols[6] return [c, s, e, st, group, name[0]] except(Exception, e): logger.error(cols) logger.error("File is not in correct format") logger.error("Expect chr source feature start end . strand attributes")