예제 #1
0
파일: head.py 프로젝트: JohnReid/biopsy
from optparse import OptionParser

#
# Parse the options
#
option_parser = OptionParser()
option_parser.add_option(
    "-n",
    "--num-seqs",
    dest="num_seqs",
    default=10,
    type='int',
    help="Number of sequences to output."
)
options, args = option_parser.parse_args()

#
# Check args
#
if 1 != len(args):
    print >> sys.stderr, 'USAGE: %s <fasta file>' % __file__
    sys.exit(-1)
fasta = args[0]
if '-' == fasta:
    input = sys.stdin
else:
    input = open(fasta, 'r')

for i, seq in zip(xrange(options.num_seqs), F.iterseq(input, corebio.seq.dna_alphabet)):
    F.writeseq(sys.stdout, seq)
예제 #2
0
"""
Code that reads in 2 sets of sequences and outputs those in the first that are not in the second.
"""

import sys, corebio.seq_io.fasta_io as F, corebio.seq
from optparse import OptionParser

#
# Parse the options
#
option_parser = OptionParser()
options, args = option_parser.parse_args()

#
# Check args
#
if 2 != len(args):
    print >> sys.stderr, 'USAGE: %s <fasta file> <fasta file>' % __file__
    sys.exit(-1)
fasta1, fasta2 = args

# read in second fasta
to_subtract = set(
    seq.description.strip().lower()
    for seq in F.iterseq(open(fasta2, 'r'), corebio.seq.dna_alphabet))

# read in first fasta
for seq in F.iterseq(open(fasta1, 'r'), corebio.seq.dna_alphabet):
    if seq.description.strip().lower() not in to_subtract:
        F.writeseq(sys.stdout, seq)
예제 #3
0
option_parser.add_option('-m',
                         '--max-sequences',
                         dest='max_seqs',
                         type='int',
                         default=-1,
                         help="Set a limit on the number of sequences output.")
options, args = option_parser.parse_args()

#
# Check args
#
if 2 != len(args):
    print >> sys.stderr, 'USAGE: %s <fasta file> <max sequence length>' % __file__
    sys.exit(-1)

fasta = args[0]
length = int(args[1])
if '-' == fasta:
    input = sys.stdin
else:
    input = open(fasta, 'r')

#
# Read the sequences
#
alphabet = corebio.seq.reduced_nucleic_alphabet
for i, seq in enumerate(F.iterseq(input, alphabet)):
    if options.max_seqs == i:
        break
    F.writeseq(sys.stdout, shorten(seq))
예제 #4
0
    '--max-sequences',
    dest='max_seqs',
    type='int',
    default=-1,
    help="Set a limit on the number of sequences output."
)
options, args = option_parser.parse_args()

#
# Check args
#
if 2 != len(args):
    print >> sys.stderr, 'USAGE: %s <fasta file> <max sequence length>' % __file__
    sys.exit(-1)

fasta = args[0]
length = int(args[1])
if '-' == fasta:
    input = sys.stdin
else:
    input = open(fasta, 'r')

#
# Read the sequences
#
alphabet = corebio.seq.reduced_nucleic_alphabet
for i, seq in enumerate(F.iterseq(input, alphabet)):
    if options.max_seqs == i:
        break
    F.writeseq(sys.stdout, shorten(seq))
예제 #5
0
    shuffled.name = '%s (shuffled)' % seq.name,
    shuffled.description = '%s (shuffled)' % seq.description,
    shuffled.alphabet = seq.alphabet
    return shuffled


#
# Parse the options
#
option_parser = OptionParser()
options, args = option_parser.parse_args()


#
# Check args
#
if 1 != len(args):
    print >> sys.stderr, 'USAGE: %s <fasta file>' % __file__
    sys.exit(-1)
fasta = args[0]


#
# Shuffle the sequences
#
for seq in F.iterseq(
    open(fasta, 'r'),
    corebio.seq.dna_alphabet
):
    F.writeseq(sys.stdout, shuffle(seq))