コード例 #1
0
template.application_select(['Pfam', 'Panther', 'SignalP'])
template.output_select = 'json'
template.poll_time = 60
template.poll_max = 100

sequence_limit = 20
batch_limit = 5
n_sequence = 0
nskip = 60
s = 0
while fasta.next():
    while s < nskip:
        s += 1
        fasta.next()

    if fasta.length() < args.minlen: continue  # skip short sequences
    if n_sequence >= sequence_limit: break  # run up to sequence_limit queries
    n_sequence += 1

    # copy the template and add the sequence information
    ips = template.clone()
    ips.sequence = fasta.translate().seq.rstrip('*').format(linelen=60)
    ips.jobname = fasta.id
    ips.title = 'ORF{}'.format(n_sequence)
    joblist[ips] = 'new'

    # submit job
    if ips.run():
        joblist[ips] = 'submitted'

    if n_sequence % batch_limit and n_sequence < sequence_limit:
コード例 #2
0
fasta = Fasta()
fasta.fh = cl.fasta_file

trimre = re.compile(trim)

# initialize counters
base_total = 0
base_current = 0
n_out = 0
n_seq = 0
n_current = 0

while fasta.next():

    if trimre: fasta.trimDocByRegex(trimre)
    if not n_seq or base_current + fasta.length() > maxbases:
        # if number of bases would be greater than cutoff after adding the new sequence
        # close current output file, open new
        # report statistics for old file
        # reset current file counters

        try:
            # prevents error on first pass - file is not yet open
            outfile.close()
            print('   ', base_current, 'bases/amino acids', end=' ')
            print('in', n_current, 'sequences', end=' ')
            print('written to', outfilename)

        except NameError:
            pass
コード例 #3
0
ファイル: raw2fasta.py プロジェクト: gribskov/biocomputing
        base = base.replace('.seq', '')
        sys.stdout.write('\n\tExpanded file: {}\n\tbasename: {}\n'.format(
            infilename, base))
        outfilename = base + '.fasta'
        outfile = None
        try:
            outfile = open(outfilename, 'w')
        except:
            sys.stderr.write(
                'Unable to open output file ({})\n'.format(outfilename))
            exit(2)

        # process all sequences in the file
        n = 0
        for seq in infile:
            fasta = Fasta()
            fasta.id = base + '_{}'.format(n)
            fasta.seq = seq.rstrip().upper()
            fasta.doc = 'length={}'.format(fasta.length())
            outfile.write(fasta.format(linelen=100))
            n += 1

        infile.close()
        outfile.close()
        sys.stdout.write('\t{} sequences written to {}\n'.format(
            n, outfilename))

    # end of loop over files

exit(0)