Exemple #1
0
    def test_embl_to_gb(self):
        # EMBL records have more features than genbank, (ex more than one date,
        # embl class, DOI cross references) so I can't convert an embl to gb
        # and then to embl keeping all those data. But I can start from
        # genbank record

        # do embl file -> embl object -> gb file -> gb object ->
        # embl file. Ensure that first and last files are identical
        embl = DNA.read(self.single_rna_simple_fp, format="embl")

        # "write" genbank record in a embl file
        with io.StringIO() as fh:
            DNA.write(embl, format="genbank", file=fh)

            # read genbank file
            fh.seek(0)
            genbank = DNA.read(fh, format="genbank")

        # "write" genbank record in a embl file
        with io.StringIO() as fh:
            DNA.write(genbank, format="embl", file=fh)

            # read file object
            obs = fh.getvalue()

        # test objects
        with open(self.single_rna_simple_fp) as fh:
            exp = fh.read()

        self.assertEqual(exp, obs)
Exemple #2
0
    def test_embl_to_gb(self):
        # EMBL records have more features than genbank, (ex more than one date,
        # embl class, DOI cross references) so I can't convert an embl to gb
        # and then to embl keeping all those data. But I can start from
        # genbank record

        # do embl file -> embl object -> gb file -> gb object ->
        # embl file. Ensure that first and last files are identical
        embl = DNA.read(self.single_rna_simple_fp, format="embl")

        # "write" genbank record in a embl file
        with io.StringIO() as fh:
            DNA.write(embl, format="genbank", file=fh)

            # read genbank file
            fh.seek(0)
            genbank = DNA.read(fh, format="genbank")

        # "write" genbank record in a embl file
        with io.StringIO() as fh:
            DNA.write(genbank, format="embl", file=fh)

            # read file object
            obs = fh.getvalue()

        # test objects
        with open(self.single_rna_simple_fp) as fh:
            exp = fh.read()

        self.assertEqual(exp, obs)
Exemple #3
0
def reformat_egid(genbank_fp,
                  output_dir):
    """ Reformat input genome to the formats accepted by EGID.

    Parameters
    ----------
    genbank_fp: string
        file path to genome in GenBank format
    output_dir: string
        output directory path

    Notes
    -----
    Input to EGID are five obsolete NCBI standard files: gbk, fna, faa, ffn
    and ptt.
    """
    (gb, genes) = _merge_genbank_seqs(genbank_fp)
    DNA.write(gb, join(output_dir, 'id.fna'), format='fasta')
    DNA.write(gb, join(output_dir, 'id.gbk'), format='genbank')
    nucl_seq = str(gb)
    output_f = {}
    for x in ('faa', 'ffn', 'ptt'):
        output_f[x] = open(join(output_dir, 'id.' + x), 'w')
    output_f['ptt'].write('locus001\n' + str(len(genes)) + ' proteins\n')
    # a ptt file contains the following columns:
    fields = ('Location', 'Strand', 'Length', 'PID', 'Gene', 'Synonym',
              'Code', 'COG', 'Product')
    output_f['ptt'].write('\t'.join(fields) + '\n')
    gid = 1  # assign an incremental integer to the current gene
    for (gene, l) in sorted(genes.items(), key=lambda x: x[1][1]):
        output_f['faa'].write('>' + gene + '\n' + l[0] + '\n')
        output_f['ptt'].write(str(l[1]) + '..' + str(l[2]) + '\t' +
                              l[3] + '\t' + str(len(l[0])) + '\t' +
                              str(gid) + '\t-\tgene' + str(gid) +
                              '\t-\t-\t-\n')
        if l[3] == '+':  # positive strand
            output_f['ffn'].write('>locus001:' + str(l[1]) + '-' +
                                  str(l[2]) + '\n' +
                                  nucl_seq[l[1]-1:l[2]] + '\n')
        else:  # negative strand (reverse complement)
            rc_seq = str(DNA(nucl_seq[l[1]-1:l[2]]).reverse_complement())
            output_f['ffn'].write('>locus001:c' + str(l[2]) + '-' +
                                  str(l[1]) + '\n' + rc_seq + '\n')
        gid += 1
    for x in output_f:
        output_f[x].close()
Exemple #4
0
def reformat_genemark(genbank_fp, output_dir):
    """ Reformat input genome to the formats accepted by GeneMark.

    Parameters
    ----------
    genbank_fp: string
        file path to genome in GenBank format
    output_dir: string
        output directory path

    Notes
    -----
    GeneMark's acceptable input file format is FASTA (genome sequence).
    """
    gb = _merge_genbank_seqs(genbank_fp)[0]
    DNA.write(gb, join(output_dir, 'id.fna'), format='fasta')
    DNA.write(gb, join(output_dir, 'id.gbk'), format='genbank')
Exemple #5
0
def reformat_genemark(genbank_fp,
                      output_dir):
    """ Reformat input genome to the formats accepted by GeneMark.

    Parameters
    ----------
    genbank_fp: string
        file path to genome in GenBank format
    output_dir: string
        output directory path

    Notes
    -----
    GeneMark's acceptable input file format is FASTA (genome sequence).
    """
    gb = _merge_genbank_seqs(genbank_fp)[0]
    DNA.write(gb, join(output_dir, 'id.fna'), format='fasta')
    DNA.write(gb, join(output_dir, 'id.gbk'), format='genbank')
Exemple #6
0
    def test_gb_to_embl(self):
        genbank = DNA.read(self.genbank_fp, format="genbank")

        with io.StringIO() as fh:
            DNA.write(genbank, format="embl", file=fh)

            # EMBL can't deal with genbank version (ie M14399.1  GI:145229)
            # read embl data and write to gb
            fh.seek(0)
            embl = DNA.read(fh, format="embl")

        with io.StringIO() as fh:
            DNA.write(embl, format="genbank", file=fh)

            # read gb data
            obs = fh.getvalue()

        with open(self.genbank_fp) as fh:
            exp = fh.read()

        self.assertEqual(exp, obs)
Exemple #7
0
    def test_gb_to_embl(self):
        genbank = DNA.read(self.genbank_fp, format="genbank")

        with io.StringIO() as fh:
            DNA.write(genbank, format="embl", file=fh)

            # EMBL can't deal with genbank version (ie M14399.1  GI:145229)
            # read embl data and write to gb
            fh.seek(0)
            embl = DNA.read(fh, format="embl")

        with io.StringIO() as fh:
            DNA.write(embl, format="genbank", file=fh)

            # read gb data
            obs = fh.getvalue()

        with open(self.genbank_fp) as fh:
            exp = fh.read()

        self.assertEqual(exp, obs)