Esempio n. 1
0
def merge_generators(filename, batchsize, seqlen):
    X = iu.train_generator(filename + ".seq", batchsize, seqlen, "seq", "repeat")
    # merge the marks here, and the split them up in training.
    C = iu.train_generator(filename + ".chromtracks", batchsize, seqlen, "chrom", "repeat")
    y = iu.train_generator(filename + ".labels", batchsize, seqlen, "labels", "repeat")
    while True:
        print X.next,
        yield [X.next(), C.next()], y.next()
Esempio n. 2
0
def merge_generators(path, batchsize, seqlen, mode):
    dat_seq = train_generator(path['seq'], batchsize, seqlen, 'seq', mode)
    dat_chromatin = []
    for chromatin_track in path['chromatin_tracks']:
        dat_chromatin.append(
            train_generator(chromatin_track, batchsize, seqlen, 'chrom', mode))
    y = train_generator(path['labels'], batchsize, seqlen, 'labels', mode)
    while True:
        combined_chrom_data = []
        for chromatin_track_generators in dat_chromatin:
            x = next(chromatin_track_generators)
            combined_chrom_data.append(pd.DataFrame(x))
        chromatin_features = pd.concat(combined_chrom_data, axis=1).values
        sequence_features = next(dat_seq)
        labels = next(y)
        yield [sequence_features, chromatin_features], labels
Esempio n. 3
0
def data_generator(path, batchsize, seqlen, bin_size):
    dat_seq = train_generator(path['seq'], batchsize, seqlen, 'seq', 'repeat')
    dat_chromatin = []
    for chromatin_track in path['chromatin_tracks']:
        dat_chromatin.append(
            train_generator(chromatin_track, batchsize, seqlen, 'chrom',
                            'repeat'))
    y = train_generator(path['labels'], batchsize, seqlen, 'labels', 'repeat')
    while True:
        combined_chrom_data = []
        for chromatin_track_generators in dat_chromatin:
            curr_chromatin_mark = next(chromatin_track_generators)
            mark_resolution = curr_chromatin_mark.shape
            assert (mark_resolution == (batchsize, seqlen/bin_size)),\
                "Please check binning, specified bin size=50"
            combined_chrom_data.append(pd.DataFrame(curr_chromatin_mark))
        chromatin_features = pd.concat(combined_chrom_data, axis=1).values
        print(chromatin_features.shape)
        sequence_features = next(dat_seq)
        labels = next(y)
        yield [sequence_features, chromatin_features], labels
Esempio n. 4
0
def data_generator(path, batchsize, seqlen):
    X = train_generator(path['seq'], batchsize, seqlen, 'seq', 'repeat')
    y = train_generator(path['labels'], batchsize, seqlen, 'labels', 'repeat')
    while True:
        yield next(X), next(y)
def val_generator(filename, batchsize, seqlen):
    X = iu.train_generator(filename + ".seq", batchsize, seqlen, "seq", "repeat")
    y = iu.train_generator(filename + ".labels", batchsize, seqlen, "labels", "repeat")
    while True:
        yield X.next(), y.next()