def make_chimera(cluster_pair, cluster_shelve, transcript_dict, genome_tx_trees, annotation_source): # lookup 5' and 3' clusters cluster5p = cluster_shelve[str(cluster_pair.id5p)] cluster3p = cluster_shelve[str(cluster_pair.id3p)] # get 5' and 3' transcripts transcripts5p = lookup_transcripts(cluster5p, transcript_dict, genome_tx_trees) transcripts3p = lookup_transcripts(cluster3p, transcript_dict, genome_tx_trees) # lookup chimera type and distance chimera_type, distance = get_chimera_type(cluster5p, cluster3p, transcripts5p, transcripts3p, transcript_dict, genome_tx_trees) # format transcript information tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info( transcripts5p, annotation_source) tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info( transcripts3p, annotation_source) # make chimera object c = Chimera() c.rname5p = cluster5p.rname c.start5p = cluster5p.start c.end5p = cluster5p.end c.rname3p = cluster3p.rname c.start3p = cluster3p.start c.end3p = cluster3p.end c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id) frags = set(cluster_pair.qnames) frags.update(cluster_pair.spanning_qnames) c.num_frags = len(frags) c.strand5p = cluster5p.strand c.strand3p = cluster3p.strand c.chimera_type = chimera_type c.distance = distance c.num_discordant_frags = len(cluster_pair.qnames) c.num_spanning_frags = len(cluster_pair.spanning_qnames) c.num_discordant_frags_5p = len(cluster5p.qnames) c.num_discordant_frags_3p = len(cluster3p.qnames) c.num_concordant_frags_5p = cluster5p.concordant_frags c.num_concordant_frags_3p = cluster3p.concordant_frags c.biotypes_5p = sorted(biotypes_5p) c.biotypes_3p = sorted(biotypes_3p) c.genes_5p = sorted(gene_names_5p) c.genes_3p = sorted(gene_names_3p) c.transcripts_5p = sorted(tx_names_5p) c.transcripts_3p = sorted(tx_names_3p) return c
def make_chimera(cluster_pair, cluster_shelve, transcript_dict, genome_tx_trees, annotation_source): # lookup 5' and 3' clusters cluster5p = cluster_shelve[str(cluster_pair.id5p)] cluster3p = cluster_shelve[str(cluster_pair.id3p)] # get 5' and 3' transcripts transcripts5p = lookup_transcripts(cluster5p, transcript_dict, genome_tx_trees) transcripts3p = lookup_transcripts(cluster3p, transcript_dict, genome_tx_trees) # lookup chimera type and distance chimera_type, distance = get_chimera_type(cluster5p, cluster3p, transcripts5p, transcripts3p, transcript_dict, genome_tx_trees) # format transcript information tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info(transcripts5p, annotation_source) tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info(transcripts3p, annotation_source) # make chimera object c = Chimera() c.rname5p = cluster5p.rname c.start5p = cluster5p.start c.end5p = cluster5p.end c.rname3p = cluster3p.rname c.start3p = cluster3p.start c.end3p = cluster3p.end c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id) frags = set(cluster_pair.qnames) frags.update(cluster_pair.spanning_qnames) c.num_frags = len(frags) c.strand5p = cluster5p.strand c.strand3p = cluster3p.strand c.chimera_type = chimera_type c.distance = distance c.num_discordant_frags = len(cluster_pair.qnames) c.num_spanning_frags = len(cluster_pair.spanning_qnames) c.num_discordant_frags_5p = len(cluster5p.qnames) c.num_discordant_frags_3p = len(cluster3p.qnames) c.num_concordant_frags_5p = cluster5p.concordant_frags c.num_concordant_frags_3p = cluster3p.concordant_frags c.biotypes_5p = sorted(biotypes_5p) c.biotypes_3p = sorted(biotypes_3p) c.genes_5p = sorted(gene_names_5p) c.genes_3p = sorted(gene_names_3p) c.transcripts_5p = sorted(tx_names_5p) c.transcripts_3p = sorted(tx_names_3p) return c