def make_chimera(cluster_pair, cluster_shelve, transcript_dict, genome_tx_trees, annotation_source): # lookup 5' and 3' clusters cluster5p = cluster_shelve[str(cluster_pair.id5p)] cluster3p = cluster_shelve[str(cluster_pair.id3p)] # get 5' and 3' transcripts transcripts5p = lookup_transcripts(cluster5p, transcript_dict, genome_tx_trees) transcripts3p = lookup_transcripts(cluster3p, transcript_dict, genome_tx_trees) # lookup chimera type and distance chimera_type, distance = get_chimera_type(cluster5p, cluster3p, transcripts5p, transcripts3p, transcript_dict, genome_tx_trees) # format transcript information tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info( transcripts5p, annotation_source) tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info( transcripts3p, annotation_source) # make chimera object c = Chimera() c.rname5p = cluster5p.rname c.start5p = cluster5p.start c.end5p = cluster5p.end c.rname3p = cluster3p.rname c.start3p = cluster3p.start c.end3p = cluster3p.end c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id) frags = set(cluster_pair.qnames) frags.update(cluster_pair.spanning_qnames) c.num_frags = len(frags) c.strand5p = cluster5p.strand c.strand3p = cluster3p.strand c.chimera_type = chimera_type c.distance = distance c.num_discordant_frags = len(cluster_pair.qnames) c.num_spanning_frags = len(cluster_pair.spanning_qnames) c.num_discordant_frags_5p = len(cluster5p.qnames) c.num_discordant_frags_3p = len(cluster3p.qnames) c.num_concordant_frags_5p = cluster5p.concordant_frags c.num_concordant_frags_3p = cluster3p.concordant_frags c.biotypes_5p = sorted(biotypes_5p) c.biotypes_3p = sorted(biotypes_3p) c.genes_5p = sorted(gene_names_5p) c.genes_3p = sorted(gene_names_3p) c.transcripts_5p = sorted(tx_names_5p) c.transcripts_3p = sorted(tx_names_3p) return c
def make_chimera(cluster_pair, cluster_shelve, transcript_dict, genome_tx_trees, annotation_source): # lookup 5' and 3' clusters cluster5p = cluster_shelve[str(cluster_pair.id5p)] cluster3p = cluster_shelve[str(cluster_pair.id3p)] # get 5' and 3' transcripts transcripts5p = lookup_transcripts(cluster5p, transcript_dict, genome_tx_trees) transcripts3p = lookup_transcripts(cluster3p, transcript_dict, genome_tx_trees) # lookup chimera type and distance chimera_type, distance = get_chimera_type(cluster5p, cluster3p, transcripts5p, transcripts3p, transcript_dict, genome_tx_trees) # format transcript information tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info(transcripts5p, annotation_source) tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info(transcripts3p, annotation_source) # make chimera object c = Chimera() c.rname5p = cluster5p.rname c.start5p = cluster5p.start c.end5p = cluster5p.end c.rname3p = cluster3p.rname c.start3p = cluster3p.start c.end3p = cluster3p.end c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id) frags = set(cluster_pair.qnames) frags.update(cluster_pair.spanning_qnames) c.num_frags = len(frags) c.strand5p = cluster5p.strand c.strand3p = cluster3p.strand c.chimera_type = chimera_type c.distance = distance c.num_discordant_frags = len(cluster_pair.qnames) c.num_spanning_frags = len(cluster_pair.spanning_qnames) c.num_discordant_frags_5p = len(cluster5p.qnames) c.num_discordant_frags_3p = len(cluster3p.qnames) c.num_concordant_frags_5p = cluster5p.concordant_frags c.num_concordant_frags_3p = cluster3p.concordant_frags c.biotypes_5p = sorted(biotypes_5p) c.biotypes_3p = sorted(biotypes_3p) c.genes_5p = sorted(gene_names_5p) c.genes_3p = sorted(gene_names_3p) c.transcripts_5p = sorted(tx_names_5p) c.transcripts_3p = sorted(tx_names_3p) return c
def read_pairs_to_chimera(chimera_name, tid5p, tid3p, readpairs, tid_tx_map, genome_tx_trees, trim_bp): # get gene information tx5p = tid_tx_map[tid5p] tx3p = tid_tx_map[tid3p] # categorize chimera type chimera_type, distance = get_chimera_type(tx5p, tx3p, genome_tx_trees) # create chimera object c = Chimera() iter5p = itertools.imap(operator.itemgetter(0), readpairs) iter3p = itertools.imap(operator.itemgetter(1), readpairs) c.partner5p = ChimeraPartner.from_discordant_reads(iter5p, tx5p, trim_bp) c.partner3p = ChimeraPartner.from_discordant_reads(iter3p, tx3p, trim_bp) c.name = chimera_name c.chimera_type = chimera_type c.distance = distance # raw reads c.encomp_read_pairs = readpairs return c