from optparse import OptionParser # # Parse the options # option_parser = OptionParser() option_parser.add_option( "-n", "--num-seqs", dest="num_seqs", default=10, type='int', help="Number of sequences to output." ) options, args = option_parser.parse_args() # # Check args # if 1 != len(args): print >> sys.stderr, 'USAGE: %s <fasta file>' % __file__ sys.exit(-1) fasta = args[0] if '-' == fasta: input = sys.stdin else: input = open(fasta, 'r') for i, seq in zip(xrange(options.num_seqs), F.iterseq(input, corebio.seq.dna_alphabet)): F.writeseq(sys.stdout, seq)
""" Code that reads in 2 sets of sequences and outputs those in the first that are not in the second. """ import sys, corebio.seq_io.fasta_io as F, corebio.seq from optparse import OptionParser # # Parse the options # option_parser = OptionParser() options, args = option_parser.parse_args() # # Check args # if 2 != len(args): print >> sys.stderr, 'USAGE: %s <fasta file> <fasta file>' % __file__ sys.exit(-1) fasta1, fasta2 = args # read in second fasta to_subtract = set( seq.description.strip().lower() for seq in F.iterseq(open(fasta2, 'r'), corebio.seq.dna_alphabet)) # read in first fasta for seq in F.iterseq(open(fasta1, 'r'), corebio.seq.dna_alphabet): if seq.description.strip().lower() not in to_subtract: F.writeseq(sys.stdout, seq)
option_parser.add_option('-m', '--max-sequences', dest='max_seqs', type='int', default=-1, help="Set a limit on the number of sequences output.") options, args = option_parser.parse_args() # # Check args # if 2 != len(args): print >> sys.stderr, 'USAGE: %s <fasta file> <max sequence length>' % __file__ sys.exit(-1) fasta = args[0] length = int(args[1]) if '-' == fasta: input = sys.stdin else: input = open(fasta, 'r') # # Read the sequences # alphabet = corebio.seq.reduced_nucleic_alphabet for i, seq in enumerate(F.iterseq(input, alphabet)): if options.max_seqs == i: break F.writeseq(sys.stdout, shorten(seq))
'--max-sequences', dest='max_seqs', type='int', default=-1, help="Set a limit on the number of sequences output." ) options, args = option_parser.parse_args() # # Check args # if 2 != len(args): print >> sys.stderr, 'USAGE: %s <fasta file> <max sequence length>' % __file__ sys.exit(-1) fasta = args[0] length = int(args[1]) if '-' == fasta: input = sys.stdin else: input = open(fasta, 'r') # # Read the sequences # alphabet = corebio.seq.reduced_nucleic_alphabet for i, seq in enumerate(F.iterseq(input, alphabet)): if options.max_seqs == i: break F.writeseq(sys.stdout, shorten(seq))
shuffled.name = '%s (shuffled)' % seq.name, shuffled.description = '%s (shuffled)' % seq.description, shuffled.alphabet = seq.alphabet return shuffled # # Parse the options # option_parser = OptionParser() options, args = option_parser.parse_args() # # Check args # if 1 != len(args): print >> sys.stderr, 'USAGE: %s <fasta file>' % __file__ sys.exit(-1) fasta = args[0] # # Shuffle the sequences # for seq in F.iterseq( open(fasta, 'r'), corebio.seq.dna_alphabet ): F.writeseq(sys.stdout, shuffle(seq))