def extend_UTRs(old_fn, new_fn, UTR_fn, genome_dir): UTR_boundaries = call_UTRs.read_UTR_file(UTR_fn) all_features = get_all_features(old_fn) genes = transcript.get_gff_transcripts(all_features, '/dev/null') genes = {g.name: g for g in genes} smallest_start = lambda e: e.start largest_end = lambda e: e.end for name in UTR_boundaries: gene = genes[name] if len(gene.mRNAs) != 1: raise ValueError('not exactly one mRNA') _, _, five_pos, three_pos = UTR_boundaries[name] if gene.strand == '+': leftmost_exon = min(gene.exons, key=smallest_start) leftmost_exon.start = five_pos gene.mRNAs[0].start = five_pos gene.top_level_feature.start = five_pos rightmost_exon = max(gene.exons, key=largest_end) rightmost_exon.end = three_pos gene.mRNAs[0].end = three_pos gene.top_level_feature.end = three_pos elif gene.strand == '-': leftmost_exon = max(gene.exons, key=largest_end) leftmost_exon.end = five_pos gene.mRNAs[0].end = five_pos gene.top_level_feature.end = five_pos rightmost_exon = min(gene.exons, key=smallest_start) rightmost_exon.start = three_pos gene.mRNAs[0].start = three_pos gene.top_level_feature.start = three_pos with open(new_fn, 'w') as new_fh: original_lines = open(old_fn) for line in original_lines: if line.startswith('#'): new_fh.write(line) else: break new_fh.write('''\ # UTRs have been extended. # Top-level features have had distances to the closest other top-level features annotated. ''') mark_nearby(all_features, genome_dir) for feature in all_features: new_fh.write(str(feature) + '\n')
def get_CDSs(gff_fn, genome_dir, annotate_nearby=False): all_features = get_all_features(gff_fn) if annotate_nearby: mark_nearby(all_features, genome_dir) genes = transcript.get_gff_transcripts(all_features, genome_dir) translated_genes = [g for g in genes if g.CDSs] return translated_genes
def get_noncoding_RNA_transcripts(gff_fn): all_features = get_all_features(gff_fn) genes = transcript.get_gff_transcripts(all_features, '/dev/null') rRNA_transcripts = [] tRNA_transcripts = [] other_ncRNA_transcripts = [] for gene in genes: if gene.top_level_feature.feature == 'rRNA': rRNA_transcripts.append(gene) elif gene.top_level_feature.feature == 'tRNA': tRNA_transcripts.append(gene) elif 'RNA' in gene.top_level_feature.feature: other_ncRNA_transcripts.append(gene) return rRNA_transcripts, tRNA_transcripts, other_ncRNA_transcripts