def test_take_sample():
        'tests take sample test'
        #basic test
        items = iter(range(100))
        sample = list(take_sample(items, 10))
        assert len(sample) == 10
        assert sample != [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        n_items = [1000, 100, 100000, 100000, 100000]
        sample_sizes = [990, 8, 100, 100, 200]

        for n_item, sample_size in zip(n_items, sample_sizes):
            repeats = 0
            while repeats < 10:
                repeats += 1
                iterator= iter(range(n_item))
                a = take_sample(iterator, sample_size)
                assert sample_size ==  len(list(a))
Example #2
0
def seqs_in_file(seq_fhand, qual_fhand=None, format=None, sample_size=None,
                 double_encoding=False):
    'It yields a seqrecord for each of the sequences found in the seq file.'

    if format is None:
        format = guess_seq_file_format(seq_fhand)
    seqs =_seqs_in_file(seq_fhand, qual_fhand=qual_fhand, file_format=format,
                        double_encoding=double_encoding)

    if sample_size is None:
        return seqs
    try:
        num_seqs = num_seqs_in_file(seq_fhand, format)
    except NotImplementedError:
        num_seqs = None

    return take_sample(seqs, sample_size, num_seqs)
Example #3
0
def sample_bam(bam_fhand, out_bam_fhand, sample_size):
    'It takes a sample from a bam'
    sam_fhand = NamedTemporaryFile(suffix='.sam')
    final_sam = NamedTemporaryFile(suffix='.sam')
    bam2sam(bam_fhand.name, sam_fhand.name, header=True)

    # First get header
    for line in open(sam_fhand.name):
        if line[0] == '@':
            final_sam.write(line)
        else:
            break
    sam_body = take_sample(_reads_in_sam(sam_fhand), sample_size=sample_size)

    for line in sam_body:
        final_sam.write(line)
    final_sam.flush()
    sam2bam(final_sam.name, out_bam_fhand.name)
 def test_tee_sample(self):
     'It tests that tee and sample behave ok together'
     items = iter(range(1000))
     sample = take_sample(items, 50)
     sample1, sample2 = itertools.tee(sample)