Exemple #1
0
 def t_parent_child(self):
     """Summarize parent-child relationships in a GFF file.
     """
     gff_examiner = GFFExaminer()
     pc_map = gff_examiner.parent_child_map(self._test_gff_file)
     print
     pprint.pprint(pc_map)
    def explore_gff(self, gff_path):

        from BCBio.GFF import GFFExaminer
        examiner = GFFExaminer()
        with open(gff_path) as h:
            parentchild = examiner.parent_child_map(h)
            pprint.pprint(parentchild)
        with open(gff_path) as h:
            pprint.pprint(examiner.available_limits(h))
Exemple #3
0
 def t_examiner_with_fasta(self):
     """Perform high level examination of files with FASTA directives.
     """
     examiner = GFFExaminer()
     pc_map = examiner.parent_child_map(self._gff_file)
     assert pc_map[('UCSC', 'mRNA')] == [('UCSC', 'CDS')]
     limits = examiner.available_limits(self._gff_file)
     assert limits['gff_id'].keys()[0][0] == 'chr17'
     assert sorted(limits['gff_source_type'].keys()) == \
             [('UCSC', 'CDS'), ('UCSC', 'mRNA')]
    def t_parent_child_file_modes(self):
        """Summarize parent-child relationships in a GFF file.
        """
        gff_examiner = GFFExaminer()
        # Use the loaded-from-filename as reference
        pc_map = gff_examiner.parent_child_map(self._test_gff_file)

        with open(self._test_gff_file, "rt") as handle:
            assert pc_map == gff_examiner.parent_child_map(handle)

        with open(self._test_gff_file, "rb") as handle:
            if six.PY2:
                assert pc_map == gff_examiner.parent_child_map(handle)
            else:
                try:
                    gff_examiner.parent_child_map(handle)
                except TypeError as e:
                    assert str(
                        e) == "input handle must be opened in text mode", e
                else:
                    assert False, "expected TypeError to be raised"
Exemple #5
0
                    dest="prefix",
                    help="Prefix of output files",
                    default="prefix")
parser.add_argument("-l",
                    "--length_distribution_file_prefix",
                    action="store",
                    dest="len_distr_file",
                    help="Output file with lengths distibutions",
                    default="length_distribution")

args = parser.parse_args()

examiner = GFFExaminer()

with open(args.gff, "r") as in_fd:
    pprint.pprint(examiner.parent_child_map(in_fd))

with open(args.gff, "r") as in_fd:
    record_dict = dict([(record.id, record) for record in GFF.parse(in_fd)])

gene_dict = OrderedDict({})
for record_id in record_dict:
    for feature in record_dict[record_id].features:
        if feature.type == "gene":
            gene_dict[feature.qualifiers["Name"][0]] = OrderedDict({})
            for sub_feature in feature.sub_features:
                gene_dict[feature.qualifiers["Name"][0]][
                    sub_feature.type] = len(sub_feature)
        if feature.type in ("snoRNA", "ncRNA", "snRNA"):
            gene_dict[feature.qualifiers["Name"][0]] = OrderedDict(
                {"ncRNA": len(feature)})
Exemple #6
0
# -*- coding: utf-8 -*-
"""
Getting gene sequences from our PG files

Author: Daniel Martinez-Martinez
"""

import os

import pprint
from BCBio import GFF
from BCBio.GFF import GFFExaminer

ann_folder = '.\\annotations'
file = "NT12004_22.gff"

in_file = os.path.join(ann_folder, file)

# to explore gff files
examiner = GFFExaminer()
in_handle = open(in_file)
pprint.pprint(examiner.parent_child_map(in_handle))
# in_handle.close()

# to parse the document
in_handle = open(file)
for rec in GFF.parse(in_handle):
    print(rec)
in_handle.close()
import pprint
from BCBio.GFF import GFFExaminer
 
in_file = "Nagalakshmi_2008_UTRs.gff3"
examiner = GFFExaminer()
in_handle = open(in_file)
pprint.pprint(examiner.parent_child_map(in_handle))
in_handle.close()

from BCBio import GFF
 
in_file = "Nagalakshmi_2008_UTRs.gff3"
 
in_handle = open(in_file)
for rec in GFF.parse(in_handle):
    print rec
in_handle.close()