def test_identify_transcript_singal(self): gene_list = identify_element("Chr01", 4648477, 4653385, self.ngs, list(), list(), list(), self.ann) gene = gene_list[0] trans = identify_transcript(gene, self.ann) self.assertEqual( str(trans.isoA), "[Isoform: segment array: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]; tss=True; tes=True; invalid=False]" )
def test_identify_transcript_multi(self): gene_list = identify_element("Chr01", 795113, 808059, self.ngs, list(), list(), list(), self.ann) gene = gene_list[0] trans = identify_transcript(gene, self.ann) self.assertEqual( str(trans.isoA), "[Isoform: segment array: [1, 0, 1, 0, 1, 0]; tss=True; tes=True; invalid=False, " + "Isoform: segment array: [1, 0, 1, 0, 1, 1]; tss=True; tes=True; invalid=False]" )
def do(self, out_dir, ngs_obj_list, tgs_obj_list, ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path=None): """Identify isoforms, until receive finish message""" gene_cluster_indx = 0 with OutputHandle(os.path.join(out_dir, "node_{0}".format(self.rank))) as f_out: data = self.recv_data() while data != self.finish_msg: chrom, start, end = data gene_cluster_list = identify_element( chrom, start, end, ngs_obj_list, tgs_obj_list, ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path) for gene_cluster in gene_cluster_list: if not gene_cluster.has_element(): continue gene_cluster_indx += 1 gene_cluster_name = "n_{0}_c_{1}".format( self.rank + 1, gene_cluster_indx) gene_cluster.write_element2bed6(*f_out.element_handles(), gene_cluster_name) trans = identify_transcript(gene_cluster, ann) trans.write2bed12(gene_cluster_name, *f_out.isoform_handles()) self.debug("LBIdentifyIsoWorker: finish identification\n") self.comm.send((self.rank, self.finish_msg), dest=self.root, tag=self.worker_tag) data = self.recv_data() self.comm.send((self.rank, self.finish_msg), dest=self.root, tag=self.worker_tag)
def main(args): """Main entry point allowing external calls Args: args ([str]): command line parameter list """ args = parse_args(args) setup_logging(args.loglevel) check_paraclu(args) _logger.debug("Starting IGIA ...") ngs_obj_list = [SeqFile(x, "NGS") for x in args.ngs_file] tgs_obj_list = [SeqFile(x, "TGS") for x in args.tgs_file] ext_tss_list = load_txs(args.tss) ext_tes_list = load_txs(args.tes) out_dir = args.out_dir ann = load_ann(args.ann, args.size, out_dir, "ANN") # Update Global variables. GVAR.RULE = args.rule GVAR.TXS_DIFF = args.dtxs GVAR.SPLICED_INTRON_PIR_CUTOFF = args.pir f_genome = args.f_genome paraclu_path = args.paraclu_path load_seqinfo(ngs_obj_list) _logger.info("Start building linkage ...") bam_list = ngs_obj_list + tgs_obj_list if ann is not None: bam_list += [ann] linkage = find_linkage(bam_list) _logger.info("Finish building linkage") cluster_indx = 0 with OutputHandle(out_dir) as f_out: for chrom, start, end in linkage.iterlinkage(): try: if args.time_out is not None: signal.signal(signal.SIGALRM, time_out_handler) signal.alarm(args.time_out) _logger.debug( "Start identifying elements in {0}:{1}-{2}".format( chrom, start, end)) gene_cluster_list = identify_element( chrom, start, end, ngs_obj_list, tgs_obj_list, ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path) _logger.debug( "Finish identifying elements in {0}:{1}-{2}".format( chrom, start, end)) for gene_cluster in gene_cluster_list: # list of gene cluster without any common exon if not gene_cluster.has_element(): continue cluster_indx += 1 cluster_name = "c_{0}".format(cluster_indx) gene_cluster.write_element2bed6(*f_out.element_handles(), cluster_name) _logger.debug( "Start identifying transcript for {0}".format( gene_cluster)) trans = identify_transcript(gene_cluster, ann) trans.write2bed12(cluster_name, *f_out.isoform_handles()) _logger.debug( "Finish identifying transcript for {0}".format( gene_cluster)) if args.time_out is not None: signal.alarm(0) except TimeOutError: print("TimeOut: {0}\t{1}\t{2}\n".format(chrom, start, end)) with open(os.path.join(args.out_dir, "igia_debug_timeout.log"), "a") as f: f.write("TimeOut ({0}s): {1}\t{2}\t{3}\n".format( args.time_out, chrom, start, end)) _logger.info("End")