コード例 #1
0
    def write_as_fasta(self, fh, n=None):
        """-----------------------------------------------------------------------------------------
        Write to a file in fasta format, if n is defined, write only the specified ORF in the list

        :param fh, open filehandle for writing
        :param n: integer, index of ORF to write, write all if not specified
        :return: n
        -----------------------------------------------------------------------------------------"""
        fasta = Fasta()
        nwritten = 0

        if n is None:
            # print all ORFS
            for orf in self.orf:
                fasta.id = orf['id']
                fasta.doc = 'len={} strand={} frame={} begin={} end={}'. \
                    format(orf['length'], orf['direction'], orf['frame'], orf['begin'], orf['end'])
                fasta.seq = orf['sequence']
                fh.write(fasta.format(linelen=60))
                fh.write('\n')
                nwritten += 1

        elif n < len(self.orf):
            # print the selected ORF
            orf = self.orf[n]
            fasta.id = orf['id']
            fasta.doc = 'len={} strand={} frame={} begin={} end={}'. \
                format(orf['length'], orf['direction'], orf['frame'], orf['begin'], orf['end'])
            fasta.seq = orf['sequence']
            fh.write(fasta.format(linelen=60))
            fh.write('\n')
            nwritten = 1

        return nwritten
コード例 #2
0
ファイル: windowmatch.py プロジェクト: gribskov/biocomputing
    import sys
    from sequence.fasta import Fasta
    from plotter import Plotter

    match = Windowmatch()
    print('done {}'.format(type(match)))
    print(match.alphabet)

    # match.readNCBI('table/NUC4.4.matrix')
    print(match.format())

    fasta1 = Fasta(filename=sys.argv[1])
    fasta1.read()

    fasta2 = Fasta()
    fasta2.id = 'seq2'
    fasta2.doc = ' bases 1:50'
    fasta2.seq = fasta1.seq[:50]

    fasta1.seq = fasta1.seq[:200]

    match.s1 = fasta1
    match.s2 = fasta2
    l1, l2 = match.seqToInt()
    print(l1, l2)

    match.window = 10
    match.threshold = 5
    nmatch = match.windowScore()
    print('window: {}     threshold: {}     nmatch: {}'. \
          format(match.window, match.threshold, nmatch))
コード例 #3
0
ファイル: wordmatch.py プロジェクト: gribskov/biocomputing
                # the true end)
                filtered.append([pos, runlen])

            diagonal[d] = filtered

        return nmatch


# --------------------------------------------------------------------------------------------------
# Testing
# --------------------------------------------------------------------------------------------------
if __name__ == '__main__':
    print('\ntest 0: identity matching')
    print('\texpect 7 matches\n')
    fasta = Fasta()
    fasta.id = 'test0'
    fasta.doc = '5 letter DNA test'
    fasta.seq = 'ACAGT'
    print('{}\n'.format(fasta.format()))

    match = Match()
    match.s1 = fasta
    match.s2 = fasta
    nmatch = match.identityPos()
    print('matches: {}'.format(nmatch))

    print('\ntest 1: identity matching, unequal length sequences')
    print('\texpect 11 matches\n')
    match = Match()

    fasta1 = Fasta()
コード例 #4
0
        if info['feature'] in features:
            flist[info['ID']] = info
            nfeature += 1
        elif info['Parent'] in flist:
            for k in info:
                if k not in flist[info['Parent']]:
                    flist[info['Parent']][k] = info[k]
        else:
            # flist[info['ID']] = info
            sys.stderr.write('unknown feature {}\n'.format(info['feature']))

    # write out sequences
    for gene in flist:
        thisgene = flist[gene]
        f = Fasta()
        f.id = thisgene['ID']
        f.doc = ''
        for k in save:
            if k in thisgene:
                f.doc += ' {}:{}'.format(k, thisgene[k])
        f.seq = seq[thisgene['seqname']][thisgene['begin'] - 1:thisgene['end']]
        if (thisgene['end'] - thisgene['begin'] > 100000):
            # coordinates cross origin
            f.seq = seq[thisgene['seqname']][thisgene['end'] - 1:] + seq[
                thisgene['seqname']][:thisgene['begin']]

        if thisgene['strand'] == '-':
            f.seq = complement(f.seq)

        sys.stdout.write(f.format(linelen=100))
コード例 #5
0
ファイル: plotter.py プロジェクト: gribskov/biocomputing
        return True


# --------------------------------------------------------------------------------------------------
# Testing
# --------------------------------------------------------------------------------------------------
if __name__ == '__main__':
    from wordmatch import Match  # for testing only
    from sequence.fasta import Fasta

    print('\ntest 1: identity matching, unequal length sequences')
    print('\texpect 11 matches\n')
    match = Match()

    fasta1 = Fasta()
    fasta1.id = 'test0.1'
    fasta1.doc = '5 letter DNA test'
    fasta1.seq = 'ACAGT'
    match.s1 = fasta1

    fasta2 = Fasta()
    fasta2.id = 'test0.2'
    fasta2.doc = '7 letter DNA test'
    fasta2.seq = 'ACAGTAA'
    match.s2 = fasta2

    nmatch = match.identity()

    plot = Plotter()
    plot.match = match
    plot.setup()
コード例 #6
0
     "phams":["56154"],
     "Start":15822,
     "Stop":16230,
     "Length":408,
     "Name":"24",
     "translation":"MTNVFTLDAMREETRKKYQPVKIGLSEDVTVELKPLLKLGKKAREAVADAVKEIEALPDEIDEDDEDSDELMDEVAEKICESIAKVFKLIATSPRKLLAELDTEEEPQIRAELYGAVLRTWMRET QLGEAAPSPN",
     "Orientation":"F",
     "Notes":"b'tail assembly chaperone'"} ...

Michael Gribskov     10 April 2021
================================================================================================="""
import sys
import json
from sequence.fasta import Fasta

# --------------------------------------------------------------------------------------------------
# main program
# --------------------------------------------------------------------------------------------------
if __name__ == '__main__':
    fp = open(sys.argv[1], 'r')
    phage = json.load(fp)

    for gene in phage['results']:
        f = Fasta()
        f.id = gene['GeneID']
        f.seq = gene['translation']
        f.doc = gene['Notes'][2:-1]
        print(f.format(linelen=100))

    exit(0)
コード例 #7
0
ファイル: raw2fasta.py プロジェクト: gribskov/biocomputing
        base = base.replace('.seq', '')
        sys.stdout.write('\n\tExpanded file: {}\n\tbasename: {}\n'.format(
            infilename, base))
        outfilename = base + '.fasta'
        outfile = None
        try:
            outfile = open(outfilename, 'w')
        except:
            sys.stderr.write(
                'Unable to open output file ({})\n'.format(outfilename))
            exit(2)

        # process all sequences in the file
        n = 0
        for seq in infile:
            fasta = Fasta()
            fasta.id = base + '_{}'.format(n)
            fasta.seq = seq.rstrip().upper()
            fasta.doc = 'length={}'.format(fasta.length())
            outfile.write(fasta.format(linelen=100))
            n += 1

        infile.close()
        outfile.close()
        sys.stdout.write('\t{} sequences written to {}\n'.format(
            n, outfilename))

    # end of loop over files

exit(0)