def test_take_sample(): 'tests take sample test' #basic test items = iter(range(100)) sample = list(take_sample(items, 10)) assert len(sample) == 10 assert sample != [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] n_items = [1000, 100, 100000, 100000, 100000] sample_sizes = [990, 8, 100, 100, 200] for n_item, sample_size in zip(n_items, sample_sizes): repeats = 0 while repeats < 10: repeats += 1 iterator= iter(range(n_item)) a = take_sample(iterator, sample_size) assert sample_size == len(list(a))
def seqs_in_file(seq_fhand, qual_fhand=None, format=None, sample_size=None, double_encoding=False): 'It yields a seqrecord for each of the sequences found in the seq file.' if format is None: format = guess_seq_file_format(seq_fhand) seqs =_seqs_in_file(seq_fhand, qual_fhand=qual_fhand, file_format=format, double_encoding=double_encoding) if sample_size is None: return seqs try: num_seqs = num_seqs_in_file(seq_fhand, format) except NotImplementedError: num_seqs = None return take_sample(seqs, sample_size, num_seqs)
def sample_bam(bam_fhand, out_bam_fhand, sample_size): 'It takes a sample from a bam' sam_fhand = NamedTemporaryFile(suffix='.sam') final_sam = NamedTemporaryFile(suffix='.sam') bam2sam(bam_fhand.name, sam_fhand.name, header=True) # First get header for line in open(sam_fhand.name): if line[0] == '@': final_sam.write(line) else: break sam_body = take_sample(_reads_in_sam(sam_fhand), sample_size=sample_size) for line in sam_body: final_sam.write(line) final_sam.flush() sam2bam(final_sam.name, out_bam_fhand.name)
def test_tee_sample(self): 'It tests that tee and sample behave ok together' items = iter(range(1000)) sample = take_sample(items, 50) sample1, sample2 = itertools.tee(sample)