def generate_sample_reads():
    sample_genome = np.array(common.convert_genome_str_to_int("TAATGCCATGGGATGTT"))
    sample_read_length = 3
    sample_read_pair_gap = 1
    num_reads = len(sample_genome) - sample_read_length * 2 - sample_read_pair_gap + 1
    sample_reads = np.zeros((num_reads, 2, sample_read_length), dtype=np.uint8)
    for i in xrange(num_reads):
        sample_reads[i] = [
            sample_genome[i : i + sample_read_length],
            sample_genome[
                i + sample_read_length + sample_read_pair_gap : i + sample_read_length * 2 + sample_read_pair_gap
            ],
        ]
    return sample_reads
def get_reads(dataset_name, read_length=50):
    with open("dataset/%s/reads.txt" % dataset_name, "r") as f:
        lines = f.readlines()

    read_length = len(lines[1].rstrip().split(',')[0])

    all_reads = np.zeros((len(lines) - 1, 2, read_length), dtype=np.uint8)

    pair_idx = 0
    for line in lines[1:]:
        line = line.rstrip()
        raw_read_pairs = line.split(',')
        int_read_pairs = map(lambda x: common.convert_genome_str_to_int(x), raw_read_pairs)
        if len(int_read_pairs[1]) < read_length:
            all_reads[pair_idx, 0] = int_read_pairs[0]
            for i in xrange(len(int_read_pairs[1])):
                all_reads[pair_idx, 1, i] = int_read_pairs[1][i]
        else:
            all_reads[pair_idx] = int_read_pairs
        pair_idx += 1

    return all_reads