def merge_generators(filename, batchsize, seqlen): X = iu.train_generator(filename + ".seq", batchsize, seqlen, "seq", "repeat") # merge the marks here, and the split them up in training. C = iu.train_generator(filename + ".chromtracks", batchsize, seqlen, "chrom", "repeat") y = iu.train_generator(filename + ".labels", batchsize, seqlen, "labels", "repeat") while True: print X.next, yield [X.next(), C.next()], y.next()
def merge_generators(path, batchsize, seqlen, mode): dat_seq = train_generator(path['seq'], batchsize, seqlen, 'seq', mode) dat_chromatin = [] for chromatin_track in path['chromatin_tracks']: dat_chromatin.append( train_generator(chromatin_track, batchsize, seqlen, 'chrom', mode)) y = train_generator(path['labels'], batchsize, seqlen, 'labels', mode) while True: combined_chrom_data = [] for chromatin_track_generators in dat_chromatin: x = next(chromatin_track_generators) combined_chrom_data.append(pd.DataFrame(x)) chromatin_features = pd.concat(combined_chrom_data, axis=1).values sequence_features = next(dat_seq) labels = next(y) yield [sequence_features, chromatin_features], labels
def data_generator(path, batchsize, seqlen, bin_size): dat_seq = train_generator(path['seq'], batchsize, seqlen, 'seq', 'repeat') dat_chromatin = [] for chromatin_track in path['chromatin_tracks']: dat_chromatin.append( train_generator(chromatin_track, batchsize, seqlen, 'chrom', 'repeat')) y = train_generator(path['labels'], batchsize, seqlen, 'labels', 'repeat') while True: combined_chrom_data = [] for chromatin_track_generators in dat_chromatin: curr_chromatin_mark = next(chromatin_track_generators) mark_resolution = curr_chromatin_mark.shape assert (mark_resolution == (batchsize, seqlen/bin_size)),\ "Please check binning, specified bin size=50" combined_chrom_data.append(pd.DataFrame(curr_chromatin_mark)) chromatin_features = pd.concat(combined_chrom_data, axis=1).values print(chromatin_features.shape) sequence_features = next(dat_seq) labels = next(y) yield [sequence_features, chromatin_features], labels
def data_generator(path, batchsize, seqlen): X = train_generator(path['seq'], batchsize, seqlen, 'seq', 'repeat') y = train_generator(path['labels'], batchsize, seqlen, 'labels', 'repeat') while True: yield next(X), next(y)
def val_generator(filename, batchsize, seqlen): X = iu.train_generator(filename + ".seq", batchsize, seqlen, "seq", "repeat") y = iu.train_generator(filename + ".labels", batchsize, seqlen, "labels", "repeat") while True: yield X.next(), y.next()