Ejemplo n.º 1
0
def merge_gffs(rna_gff_files, cds_gff_files, contigs_fasta_file,
               output_file_prefix):

    # parse RNA GFF files
    rna_hits = {f: get_gff_hits(f) for f in rna_gff_files}

    # parse CDS files
    cds_hits = {}
    for cds_gff_file in cds_gff_files:
        for contig, hits in generate_hits(cds_gff_file,
                                          format=GFF,
                                          sort='score'):
            # get regions with rRNAs for this contig
            rna_regions = get_rna_regions(rna_hits, contig)

            # collect CDSs that don't overlap
            cds_hits.setdefault(contig, []).extend(
                [h for h in hits if h.checkForOverlap(rna_regions)[1] is None])

    # the source data
    hit_list_dicts = list(rna_hits.values())
    hit_list_dicts.append(cds_hits)

    # output files
    with open(output_file_prefix + ".gff", 'w') as GFFOUT:
        with open(output_file_prefix + ".fna", 'w') as FNAOUT:
            with open(output_file_prefix + ".faa", 'w') as FAAOUT:
                write_annotations_to_files(hit_list_dicts,
                                           contigs_fasta_file,
                                           GFFOUT,
                                           FNAOUT,
                                           FAAOUT)
Ejemplo n.º 2
0
def get_gff_hits(hit_table_gff, **filter_args):
    filter_args.setdefault('sort', 'score')
    filter_args.setdefault('nonoverlapping', True)
    return {
        c: list(h)
        for c, h in generate_hits(hit_table_gff, format=GFF, **filter_args)
    }
Ejemplo n.º 3
0
def merge_gffs(rna_gff_files, cds_gff_files, contigs_fasta_file,
               output_file_prefix):

    # parse RNA GFF files
    rna_hits = {f: get_gff_hits(f) for f in rna_gff_files}

    # parse CDS files
    cds_hits = {}
    for cds_gff_file in cds_gff_files:
        for contig, hits in generate_hits(cds_gff_file,
                                          format=GFF,
                                          sort='score'):
            # get regions with rRNAs for this contig
            rna_regions = get_rna_regions(rna_hits, contig)

            # collect CDSs that don't overlap
            cds_hits.setdefault(contig, []).extend(
                [h for h in hits if h.checkForOverlap(rna_regions)[1] is None])

    # the source data
    hit_list_dicts = list(rna_hits.values())
    hit_list_dicts.append(cds_hits)

    # output files
    with open(output_file_prefix + ".gff", 'w') as GFFOUT:
        with open(output_file_prefix + ".fna", 'w') as FNAOUT:
            with open(output_file_prefix + ".faa", 'w') as FAAOUT:
                write_annotations_to_files(hit_list_dicts,
                                           contigs_fasta_file,
                                           GFFOUT,
                                           FNAOUT,
                                           FAAOUT)
Ejemplo n.º 4
0
def get_gff_hits(hit_table_gff, **filter_args):
    filter_args.setdefault('sort', 'score')
    filter_args.setdefault('nonoverlapping', True)
    return {c: list(h) for c, h in generate_hits(hit_table_gff,
                                                 format=GFF,
                                                 **filter_args)}
Ejemplo n.º 5
0
def get_gff_hits(hit_table_gff, **filter_args):
    return {c: list(h) for c, h in generate_hits(hit_table_gff, format=GFF, **filter_args)}