# removes samples from FASTA file: # # ./me FASTA_FILE sample_1,sample_2,[...],sample_N # import sys import Oligotyping.lib.fastalib as u from Oligotyping.utils.utils import pretty_print as pp fasta = u.SequenceSource(sys.argv[1]) output = u.FastaOutput(sys.argv[1] + '-SAMPLES-REMOVED.fa') samples_to_be_removed = [s.strip() for s in sys.argv[2].split(',')] while fasta.next(): if fasta.pos % 1000 == 0: sys.stderr.write('\rreads processed so far: %s' % (pp(fasta.pos))) sys.stderr.flush() sample_name = '_'.join(fasta.id.split('_')[:-1]) if sample_name in samples_to_be_removed: continue output.store(fasta, split=False) sys.stderr.write('\rNew FASTA file .............: %s\n' % (sys.argv[1] + '-SAMPLES-REMOVED.fa')) fasta.close() output.close()
import sys import operator import Oligotyping.lib.fastalib as u from Oligotyping.utils.utils import pretty_print as pp fasta = u.SequenceSource(sys.argv[1]) samples = {} while fasta.next(): if fasta.pos % 1000 == 0: sys.stderr.write('\rreads processed so far: %d' % (fasta.pos)) sys.stderr.flush() sample_name = '_'.join(fasta.id.split('_')[:-1]) if samples.has_key(sample_name): samples[sample_name] += 1 else: samples[sample_name] = 1 sys.stderr.write('\rSamples and read counts found in the FASTA file:\n') for sample, read_count in sorted(samples.iteritems(), key=operator.itemgetter(1), reverse = True): print '%-30s %s' % (sample, pp(read_count)) print print print 'Total number of samples: ', pp(len(samples)) print 'Total number of reads: ', pp(fasta.pos) print fasta.close()