def t_parent_child(self): """Summarize parent-child relationships in a GFF file. """ gff_examiner = GFFExaminer() pc_map = gff_examiner.parent_child_map(self._test_gff_file) print pprint.pprint(pc_map)
def explore_gff(self, gff_path): from BCBio.GFF import GFFExaminer examiner = GFFExaminer() with open(gff_path) as h: parentchild = examiner.parent_child_map(h) pprint.pprint(parentchild) with open(gff_path) as h: pprint.pprint(examiner.available_limits(h))
def t_examiner_with_fasta(self): """Perform high level examination of files with FASTA directives. """ examiner = GFFExaminer() pc_map = examiner.parent_child_map(self._gff_file) assert pc_map[('UCSC', 'mRNA')] == [('UCSC', 'CDS')] limits = examiner.available_limits(self._gff_file) assert limits['gff_id'].keys()[0][0] == 'chr17' assert sorted(limits['gff_source_type'].keys()) == \ [('UCSC', 'CDS'), ('UCSC', 'mRNA')]
def t_parent_child_file_modes(self): """Summarize parent-child relationships in a GFF file. """ gff_examiner = GFFExaminer() # Use the loaded-from-filename as reference pc_map = gff_examiner.parent_child_map(self._test_gff_file) with open(self._test_gff_file, "rt") as handle: assert pc_map == gff_examiner.parent_child_map(handle) with open(self._test_gff_file, "rb") as handle: if six.PY2: assert pc_map == gff_examiner.parent_child_map(handle) else: try: gff_examiner.parent_child_map(handle) except TypeError as e: assert str( e) == "input handle must be opened in text mode", e else: assert False, "expected TypeError to be raised"
dest="prefix", help="Prefix of output files", default="prefix") parser.add_argument("-l", "--length_distribution_file_prefix", action="store", dest="len_distr_file", help="Output file with lengths distibutions", default="length_distribution") args = parser.parse_args() examiner = GFFExaminer() with open(args.gff, "r") as in_fd: pprint.pprint(examiner.parent_child_map(in_fd)) with open(args.gff, "r") as in_fd: record_dict = dict([(record.id, record) for record in GFF.parse(in_fd)]) gene_dict = OrderedDict({}) for record_id in record_dict: for feature in record_dict[record_id].features: if feature.type == "gene": gene_dict[feature.qualifiers["Name"][0]] = OrderedDict({}) for sub_feature in feature.sub_features: gene_dict[feature.qualifiers["Name"][0]][ sub_feature.type] = len(sub_feature) if feature.type in ("snoRNA", "ncRNA", "snRNA"): gene_dict[feature.qualifiers["Name"][0]] = OrderedDict( {"ncRNA": len(feature)})
# -*- coding: utf-8 -*- """ Getting gene sequences from our PG files Author: Daniel Martinez-Martinez """ import os import pprint from BCBio import GFF from BCBio.GFF import GFFExaminer ann_folder = '.\\annotations' file = "NT12004_22.gff" in_file = os.path.join(ann_folder, file) # to explore gff files examiner = GFFExaminer() in_handle = open(in_file) pprint.pprint(examiner.parent_child_map(in_handle)) # in_handle.close() # to parse the document in_handle = open(file) for rec in GFF.parse(in_handle): print(rec) in_handle.close()
import pprint from BCBio.GFF import GFFExaminer in_file = "Nagalakshmi_2008_UTRs.gff3" examiner = GFFExaminer() in_handle = open(in_file) pprint.pprint(examiner.parent_child_map(in_handle)) in_handle.close() from BCBio import GFF in_file = "Nagalakshmi_2008_UTRs.gff3" in_handle = open(in_file) for rec in GFF.parse(in_handle): print rec in_handle.close()