Example #1
0
 def test_identify_transcript_singal(self):
     gene_list = identify_element("Chr01", 4648477, 4653385, self.ngs,
                                  list(), list(), list(), self.ann)
     gene = gene_list[0]
     trans = identify_transcript(gene, self.ann)
     self.assertEqual(
         str(trans.isoA),
         "[Isoform: segment array: [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]; tss=True; tes=True; invalid=False]"
     )
Example #2
0
 def test_identify_transcript_multi(self):
     gene_list = identify_element("Chr01", 795113, 808059, self.ngs, list(),
                                  list(), list(), self.ann)
     gene = gene_list[0]
     trans = identify_transcript(gene, self.ann)
     self.assertEqual(
         str(trans.isoA),
         "[Isoform: segment array: [1, 0, 1, 0, 1, 0]; tss=True; tes=True; invalid=False, "
         +
         "Isoform: segment array: [1, 0, 1, 0, 1, 1]; tss=True; tes=True; invalid=False]"
     )
Example #3
0
    def do(self,
           out_dir,
           ngs_obj_list,
           tgs_obj_list,
           ext_tss_list,
           ext_tes_list,
           ann,
           f_genome,
           paraclu_path=None):
        """Identify isoforms, until receive finish message"""
        gene_cluster_indx = 0
        with OutputHandle(os.path.join(out_dir,
                                       "node_{0}".format(self.rank))) as f_out:
            data = self.recv_data()
            while data != self.finish_msg:
                chrom, start, end = data
                gene_cluster_list = identify_element(
                    chrom, start, end, ngs_obj_list, tgs_obj_list,
                    ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path)

                for gene_cluster in gene_cluster_list:
                    if not gene_cluster.has_element():
                        continue
                    gene_cluster_indx += 1
                    gene_cluster_name = "n_{0}_c_{1}".format(
                        self.rank + 1, gene_cluster_indx)
                    gene_cluster.write_element2bed6(*f_out.element_handles(),
                                                    gene_cluster_name)
                    trans = identify_transcript(gene_cluster, ann)
                    trans.write2bed12(gene_cluster_name,
                                      *f_out.isoform_handles())
                self.debug("LBIdentifyIsoWorker: finish identification\n")
                self.comm.send((self.rank, self.finish_msg),
                               dest=self.root,
                               tag=self.worker_tag)
                data = self.recv_data()
        self.comm.send((self.rank, self.finish_msg),
                       dest=self.root,
                       tag=self.worker_tag)
Example #4
0
def main(args):
    """Main entry point allowing external calls
    Args:
      args ([str]): command line parameter list
    """
    args = parse_args(args)
    setup_logging(args.loglevel)
    check_paraclu(args)

    _logger.debug("Starting IGIA ...")

    ngs_obj_list = [SeqFile(x, "NGS") for x in args.ngs_file]
    tgs_obj_list = [SeqFile(x, "TGS") for x in args.tgs_file]
    ext_tss_list = load_txs(args.tss)
    ext_tes_list = load_txs(args.tes)

    out_dir = args.out_dir
    ann = load_ann(args.ann, args.size, out_dir, "ANN")

    # Update Global variables.
    GVAR.RULE = args.rule
    GVAR.TXS_DIFF = args.dtxs
    GVAR.SPLICED_INTRON_PIR_CUTOFF = args.pir
    f_genome = args.f_genome
    paraclu_path = args.paraclu_path

    load_seqinfo(ngs_obj_list)
    _logger.info("Start building linkage ...")
    bam_list = ngs_obj_list + tgs_obj_list
    if ann is not None:
        bam_list += [ann]
    linkage = find_linkage(bam_list)
    _logger.info("Finish building linkage")

    cluster_indx = 0
    with OutputHandle(out_dir) as f_out:
        for chrom, start, end in linkage.iterlinkage():
            try:
                if args.time_out is not None:
                    signal.signal(signal.SIGALRM, time_out_handler)
                    signal.alarm(args.time_out)
                _logger.debug(
                    "Start identifying elements in {0}:{1}-{2}".format(
                        chrom, start, end))
                gene_cluster_list = identify_element(
                    chrom, start, end, ngs_obj_list, tgs_obj_list,
                    ext_tss_list, ext_tes_list, ann, f_genome, paraclu_path)
                _logger.debug(
                    "Finish identifying elements in {0}:{1}-{2}".format(
                        chrom, start, end))

                for gene_cluster in gene_cluster_list:  # list of gene cluster without any common exon
                    if not gene_cluster.has_element():
                        continue
                    cluster_indx += 1
                    cluster_name = "c_{0}".format(cluster_indx)
                    gene_cluster.write_element2bed6(*f_out.element_handles(),
                                                    cluster_name)

                    _logger.debug(
                        "Start identifying transcript for {0}".format(
                            gene_cluster))
                    trans = identify_transcript(gene_cluster, ann)
                    trans.write2bed12(cluster_name, *f_out.isoform_handles())
                    _logger.debug(
                        "Finish identifying transcript for {0}".format(
                            gene_cluster))
                if args.time_out is not None:
                    signal.alarm(0)
            except TimeOutError:
                print("TimeOut: {0}\t{1}\t{2}\n".format(chrom, start, end))
                with open(os.path.join(args.out_dir, "igia_debug_timeout.log"),
                          "a") as f:
                    f.write("TimeOut ({0}s): {1}\t{2}\t{3}\n".format(
                        args.time_out, chrom, start, end))

    _logger.info("End")