def get_gene_coverage(predicted_file, actual_file, predicted_coords_file=None):
    """ Returns gene coverage information for genbank+gff3 files """ 
    if predicted_coords_file is not None: 
        predicted = get_translated_annotations(predicted_file, predicted_coords_file)
    else:
        predicted = parselib.get_feature_locations(predicted_file)
    overlapper = overlap.overlap_detector(predicted)

    fraction_overlap = array([ overlapper.find_max_overlap(gene.start, gene.end, gene.strand) \
        for gene in parselib.get_feature_locations(actual_file) ])

    return fraction_overlap
Exemple #2
0
def get_gene_coverage(predicted_file, actual_file, predicted_coords_file=None):
    """ Returns gene coverage information for genbank+gff3 files """
    if predicted_coords_file is not None:
        predicted = get_translated_annotations(predicted_file,
                                               predicted_coords_file)
    else:
        predicted = parselib.get_feature_locations(predicted_file)
    overlapper = overlap.overlap_detector(predicted)

    fraction_overlap = array([ overlapper.find_max_overlap(gene.start, gene.end, gene.strand) \
        for gene in parselib.get_feature_locations(actual_file) ])

    return fraction_overlap
def get_translated_annotations(predicted_genes_file, alignment_coords_file):
    """ Translate annotation coordinates back to reference sequences. 
        predicted_genes_file should be a GFF file containing annotations 
          for a collection of contigs.
        alignment_coords_file should a NUCMER alignment coords file, mapping
          said contigs to a reference genome.  
        
        With these two files, this function maps genes/features found in contigs
        back to the reference sequence.  This can be used to compare the number of
        genes found in contigs versus those found in the reference genome 
    """
    nucmer_records = nm.get_nucmer_records_by_contig(alignment_coords_file)
    for gene in pl.get_feature_locations(predicted_genes_file):
        if gene.seq_id in nucmer_records:
            for record in nucmer_records[gene.seq_id]:
                new_location = translate_annotation(gene, record)
                if new_location is not None:
                    yield new_location
Exemple #4
0
def get_translated_annotations(predicted_genes_file, alignment_coords_file):
    """ Translate annotation coordinates back to reference sequences. 
        predicted_genes_file should be a GFF file containing annotations 
          for a collection of contigs.
        alignment_coords_file should a NUCMER alignment coords file, mapping
          said contigs to a reference genome.  
        
        With these two files, this function maps genes/features found in contigs
        back to the reference sequence.  This can be used to compare the number of
        genes found in contigs versus those found in the reference genome 
    """ 
    nucmer_records = nm.get_nucmer_records_by_contig(alignment_coords_file)
    for gene in pl.get_feature_locations(predicted_genes_file):
        if gene.seq_id in nucmer_records:
            for record in nucmer_records[gene.seq_id]:
                new_location = translate_annotation(gene, record)
                if new_location is not None:
                    yield new_location