def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = list(range(N)) random.shuffle(order) tmp_data = [[[None] * N, [None] * N], [None] * N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][0][i] = self.data[0][0][order[i]] tmp_data[0][1][i] = self.data[0][1][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle( [Xs, masks, ys, self.names, self.ts], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] names = self.names[i:i + B] ts = self.ts[i:i + B] X = common_utils.pad_zeros(X) # (B, T, D) mask = common_utils.pad_zeros(mask) # (B, T) y = common_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) # (B, T, 1) batch_data = ([X, mask], y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): print(f"examples: {self.n_examples} steps: {self.steps}") B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = int(self.n_examples * 1.15) while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size print(f"Reading chunk size: {current_size} with {remaining} remaining") ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] print(f"len(Xs): {len(Xs)}") Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) for i in range(0, current_size, B): X = common_utils.pad_zeros(Xs[i:i + B]) y = ys[i:i+B] y_true = np.array(y) batch_names = names[i:i+B] batch_ts = ts[i:i+B] if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) #aflanders: debug-Convert to tensors # X = tf.convert_to_tensor(X) # y = tf.convert_to_tensor(y) # y_true = tf.convert_to_tensor(y_true) #aflanders: debug-Convert to tensors if self.return_y_true: batch_data = (X, y, y_true) else: batch_data = (X, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs, Ts = preprocess_chunk_time(Xs, ts, self.discretizer, self.normalizer, self.max_seq_len, self.mask_value) (Xs, Ts, ys, ts, names) = common_utils.sort_and_shuffle( [Xs, Ts, ys, ts, names], B) for i in range(0, current_size, B): X = common_utils.pad_zeros(Xs[i:i + B]) T = common_utils.pad_zeros(Ts[i:i + B]) y = np.array(ys[i:i + B]) batch_names = names[i:i + B] batch_ts = ts[i:i + B] if self.use_time: batch_data = ([X, T], y) else: batch_data = (X, y) if not self.return_names: yield batch_data else: yield { "data": batch_data, "names": batch_names, "ts": batch_ts }
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer, max_seq_len=1200, mask_value=0.) (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) for i in range(0, current_size, B): X = common_utils.pad_zeros(Xs[i:i + B]) y = ys[i:i + B] y_true = np.array(y) batch_names = names[i:i + B] batch_ts = ts[i:i + B] if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) if self.return_y_true: batch_data = (X, y, y_true) else: batch_data = (X, y) if not self.return_names: yield batch_data else: yield { "data": batch_data, "names": batch_names, "ts": batch_ts }
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = list(range(N)) random.shuffle(order) tmp_data = [[None] * N, [None] * N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][i] = self.data[0][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely X = self.data[0] y = self.data[1] (X, y, self.names, self.ts) = common_utils.sort_and_shuffle( [X, y, self.names, self.ts], B) self.data = [X, y] self.data[1] = np.array( self.data[1]) # this is important for Keras for i in range(0, len(self.data[0]), B): x = self.data[0][i:i + B] y = self.data[1][i:i + B] names = self.names[i:i + B] ts = self.ts[i:i + B] x = common_utils.pad_zeros(x) y = np.array(y) # (B, 25) if self.target_repl: y_rep = np.expand_dims(y, axis=1).repeat(x.shape[1], axis=1) # (B, T, 25) batch_data = (x, [y, y_rep]) else: batch_data = (x, y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def getitem(self, index, return_y_true=False): print(f"Start: {index} from reader:{self.reader.listfile}") B = self.batch_size ret = common_utils.read_chunk_index(self.reader, index*B, B) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) #(Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) i=0 X = common_utils.pad_zeros(Xs[i:i + B]) y = ys[i:i+B] y_true = np.array(y) batch_names = names[i:i+B] batch_ts = ts[i:i+B] if self.partition == 'log': y = [metrics.get_bin_log(x, 10) for x in y] if self.partition == 'custom': y = [metrics.get_bin_custom(x, 10) for x in y] y = np.array(y) #aflanders: debug-Convert to tensors # X = tf.convert_to_tensor(X) # y = tf.convert_to_tensor(y) # y_true = tf.convert_to_tensor(y_true) #aflanders: debug-Convert to tensors if return_y_true: batch_data = (X, y, y_true) else: batch_data = (X, y) print(f"End: {index} from reader:{self.reader.listfile}") if not self.return_names: return batch_data else: return {"data": batch_data, "names": batch_names, "ts": batch_ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: self.reader.random_shuffle() remaining = self.n_examples while remaining > 0: current_size = min(self.chunk_size, remaining) remaining -= current_size ret = common_utils.read_chunk(self.reader, current_size) Xs = ret["X"] ts = ret["t"] ys = ret["y"] names = ret["name"] Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer) (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B) for i in range(0, current_size, B): X = common_utils.pad_zeros(Xs[i:i + B]) y_1d = np.array(ys[i:i + B]) y = y_1d #print(y_1d) # print(self.num_classes) if self.num_classes!=1: y = np.zeros((y_1d.size,self.num_classes)) #print(y) y[np.arange(y_1d.size),y_1d] = 1 #print(y) batch_names = names[i:i+B] batch_ts = ts[i:i+B] weight_list = [self.class_0_weight if x==0 else self.class_1_weight for x in np.nditer(y_1d)] #print(weight_list) sample_weight = np.asanyarray(weight_list,dtype=float) sample_weight = sample_weight.reshape(y_1d.shape) batch_data = (X, y,sample_weight) #batch_data = (X,y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
def _generator(self): B = self.batch_size while True: if self.shuffle: N = len(self.data[1]) order = list(range(N)) random.shuffle(order) tmp_data = [[[None] * N, [None] * N], [None] * N] tmp_names = [None] * N tmp_ts = [None] * N for i in range(N): tmp_data[0][0][i] = self.data[0][0][order[i]] tmp_data[0][1][i] = self.data[0][1][order[i]] tmp_data[1][i] = self.data[1][order[i]] tmp_names[i] = self.names[order[i]] tmp_ts[i] = self.ts[order[i]] self.data = tmp_data self.names = tmp_names self.ts = tmp_ts else: # sort entirely Xs = self.data[0][0] masks = self.data[0][1] ys = self.data[1] (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle( [Xs, masks, ys, self.names, self.ts], B) self.data = [[Xs, masks], ys] for i in range(0, len(self.data[1]), B): X = self.data[0][0][i:i + B] mask = self.data[0][1][i:i + B] y = self.data[1][i:i + B] names = self.names[i:i + B] ts = self.ts[i:i + B] y_true = [np.array(x) for x in y] y_true = common_utils.pad_zeros(y_true) y_true = np.expand_dims(y_true, axis=-1) if self.partition == 'log': y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y ] if self.partition == 'custom': y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y ] X = common_utils.pad_zeros(X) # (B, T, D) mask = common_utils.pad_zeros(mask) # (B, T) y = common_utils.pad_zeros(y) y = np.expand_dims(y, axis=-1) if self.return_y_true: batch_data = ([X, mask], y, y_true) else: batch_data = ([X, mask], y) if not self.return_names: yield batch_data else: yield {"data": batch_data, "names": names, "ts": ts}
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kv_pairs = list(self.data.items()) data_index = [pair[0] for pair in kv_pairs].index('X') if data_index > 0: kv_pairs[0], kv_pairs[data_index] = kv_pairs[ data_index], kv_pairs[0] mas = [kv[1] for kv in kv_pairs] if self.shuffle: N = len(self.data['X']) order = list(range(N)) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( T, axis=1) # (B, T, 1) outputs.append(ihm_seq) # los los_M = self.data['los_M'][i:i + B] los_M = common_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1) los_y = self.data['los_y'][i:i + B] los_y_true = common_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) if self.partition == 'log': los_y = [ np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y ] if self.partition == 'custom': los_y = [ np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y ] los_y = common_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1) los_y = np.expand_dims(los_y, axis=-1) # (B, T, 1) outputs.append(los_y) inputs = [X, ihm_M, los_M] if self.return_y_true: batch_data = (inputs, outputs, los_y_true) else: batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield { 'data': batch_data, 'names': self.data['names'][i:i + B], 'los_ts': self.data['los_ts'][i:i + B] }
def _generator(self): B = self.batch_size while True: # convert to right format for sort_and_shuffle kv_pairs = list(self.data.items()) data_index = [pair[0] for pair in kv_pairs].index('X') if data_index > 0: kv_pairs[0], kv_pairs[data_index] = kv_pairs[ data_index], kv_pairs[0] mas = [kv[1] for kv in kv_pairs] if self.shuffle: N = len(self.data['X']) order = list(range(N)) random.shuffle(order) tmp = [None] * len(mas) for mas_idx in range(len(mas)): tmp[mas_idx] = [None] * len(mas[mas_idx]) for i in range(N): tmp[mas_idx][i] = mas[mas_idx][order[i]] for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = tmp[i] else: # sort entirely mas = common_utils.sort_and_shuffle(mas, B) for i in range(len(kv_pairs)): self.data[kv_pairs[i][0]] = mas[i] for i in range(0, len(self.data['X']), B): outputs = [] # X X = self.data['X'][i:i + B] T = self.data['T'][i:i + B] X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1) T = common_utils.pad_zeros(T, min_length=self.ihm_pos + 1) t = X.shape[1] # ihm ihm_M = np.array(self.data['ihm_M'][i:i + B]) ihm_M = np.expand_dims(ihm_M, axis=-1) # (B, 1) ihm_y = np.array(self.data['ihm_y'][i:i + B]) ihm_y = np.expand_dims(ihm_y, axis=-1) # (B, 1) outputs.append(ihm_y) if self.target_repl: ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat( t, axis=1) # (B, t, 1) outputs.append(ihm_seq) # decomp decomp_y = self.data['decomp_y'][i:i + B] decomp_y = np.expand_dims(decomp_y, axis=-1) # (B, 1) outputs.append(decomp_y) # los los_y = self.data['los_y'][i:i + B] los_y = np.expand_dims(los_y, axis=-1) # (B, 1) outputs.append(los_y) # pheno pheno_y = np.array(self.data['pheno_y'][i:i + B]) outputs.append(pheno_y) if self.target_repl: pheno_seq = np.expand_dims(pheno_y, axis=1).repeat( t, axis=1) # (B, t, 25) outputs.append(pheno_seq) inputs = [X, T, ihm_M] batch_data = (inputs, outputs) if not self.return_names: yield batch_data else: yield { 'data': batch_data, 'names': self.data['names'][i:i + B], 'decomp_ts': self.data['decomp_ts'][i:i + B], 'los_ts': self.data['los_ts'][i:i + B], 'pheno_ts': self.data['pheno_ts'][i:i + B] }