def train(self, train_adata, condition_key, le=None, n_epochs=1000, batch_size=256): train_adata = remove_sparsity(train_adata) x_train = train_adata.X y_train, _ = label_encoder(train_adata, le, condition_key) y_train = np.reshape(y_train, (-1,)) train_loader = Loader(x_train, labels=y_train, shuffle=True) self.model_backend.train(train_loader, n_epochs, batch_size)
def train(self, train_data, use_validation=False, valid_data=None, n_epochs=25, batch_size=32, early_stop_limit=20, threshold=0.0025, initial_run=True, shuffle=True): """ Trains the network `n_epochs` times with given `train_data` and validates the model using validation_data if it was given in the constructor function. This function is using `early stopping` technique to prevent overfitting. # Parameters n_epochs: int number of epochs to iterate and optimize network weights early_stop_limit: int number of consecutive epochs in which network loss is not going lower. After this limit, the network will stop training. threshold: float Threshold for difference between consecutive validation loss values if the difference is upper than this `threshold`, this epoch will not considered as an epoch in early stopping. full_training: bool if `True`: Network will be trained with all batches of data in each epoch. if `False`: Network will be trained with a random batch of data in each epoch. initial_run: bool if `True`: The network will initiate training and log some useful initial messages. if `False`: Network will resume the training using `restore_model` function in order to restore last model which has been trained with some training dataset. # Returns Nothing will be returned # Example ```python import scanpy as sc import scgen train_data = sc.read(train_katrain_kang.h5ad >>> validation_data = sc.read(valid_kang.h5ad) network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"}) network.scripts(n_epochs=20) ``` """ if initial_run: log.info("----Training----") assign_step_zero = tensorflow.assign(self.global_step, 0) _init_step = self.sess.run(assign_step_zero) if not initial_run: self.saver.restore(self.sess, self.model_to_use) train_labels, le = label_encoder(train_data) if use_validation and valid_data is None: raise Exception("valid_data is None but use_validation is True.") if use_validation: valid_labels, _ = label_encoder(valid_data) loss_hist = [] patience = early_stop_limit min_delta = threshold patience_cnt = 0 for it in range(n_epochs): increment_global_step_op = tensorflow.assign(self.global_step, self.global_step + 1) _step = self.sess.run(increment_global_step_op) current_step = self.sess.run(self.global_step) train_loss = 0 for lower in range(0, train_data.shape[0], batch_size): upper = min(lower + batch_size, train_data.shape[0]) if sparse.issparse(train_data.X): x_mb = train_data[lower:upper, :].X.A else: x_mb = train_data[lower:upper, :].X y_mb = train_labels[lower:upper] _, current_loss_train = self.sess.run([self.solver, self.vae_loss], feed_dict={self.x: x_mb, self.encoder_labels: y_mb, self.decoder_labels: y_mb, self.time_step: current_step, self.size: len(x_mb), self.is_training: True}) train_loss += current_loss_train print(f"iteration {it}: {train_loss // (train_data.shape[0] // batch_size)}") if use_validation: valid_loss = 0 for lower in range(0, valid_data.shape[0], batch_size): upper = min(lower + batch_size, valid_data.shape[0]) if sparse.issparse(valid_data.X): x_mb = valid_data[lower:upper, :].X.A else: x_mb = valid_data[lower:upper, :].X y_mb = valid_labels[lower:upper] current_loss_valid = self.sess.run(self.vae_loss, feed_dict={self.x: x_mb, self.encoder_labels: y_mb, self.decoder_labels: y_mb, self.time_step: current_step, self.size: len(x_mb), self.is_training: False}) valid_loss += current_loss_valid loss_hist.append(valid_loss / valid_data.shape[0]) if it > 0 and loss_hist[it - 1] - loss_hist[it] > min_delta: patience_cnt = 0 else: patience_cnt += 1 if patience_cnt > patience: os.makedirs(self.model_to_use, exist_ok=True) save_path = self.saver.save(self.sess, self.model_to_use) break else: os.makedirs(self.model_to_use, exist_ok=True) save_path = self.saver.save(self.sess, self.model_to_use) log.info(f"Model saved in file: {save_path}. Training finished")
def train(self, train_adata, valid_adata=None, condition_encoder=None, condition_key='condition', n_epochs=25, batch_size=32, early_stop_limit=20, lr_reducer=10, threshold=0.0025, monitor='val_loss', shuffle=True, verbose=2, save=True): """ Trains the network `n_epochs` times with given `train_data` and validates the model using validation_data if it was given in the constructor function. This function is using `early stopping` technique to prevent overfitting. # Parameters n_epochs: int number of epochs to iterate and optimize network weights early_stop_limit: int number of consecutive epochs in which network loss is not going lower. After this limit, the network will stop training. threshold: float Threshold for difference between consecutive validation loss values if the difference is upper than this `threshold`, this epoch will not considered as an epoch in early stopping. full_training: bool if `True`: Network will be trained with all batches of data in each epoch. if `False`: Network will be trained with a random batch of data in each epoch. initial_run: bool if `True`: The network will initiate training and log some useful initial messages. if `False`: Network will resume the training using `restore_model` function in order to restore last model which has been trained with some training dataset. # Returns Nothing will be returned # Example ```python import scanpy as sc import scgen train_data = sc.read(train_katrain_kang.h5ad >>> validation_data = sc.read(valid_kang.h5ad) network = scgen.CVAE(train_data=train_data, use_validation=True, validation_data=validation_data, model_path="./saved_models/", conditions={"ctrl": "control", "stim": "stimulated"}) network.scripts(n_epochs=20) ``` """ train_labels_encoded, _ = label_encoder(train_adata, condition_encoder, condition_key) train_labels_onehot = to_categorical(train_labels_encoded, num_classes=self.n_conditions) callbacks = [ History(), CSVLogger(filename="./csv_logger.log"), ] if early_stop_limit > 0: callbacks.append( EarlyStopping(patience=early_stop_limit, monitor=monitor, min_delta=threshold)) if lr_reducer > 0: callbacks.append( ReduceLROnPlateau(monitor=monitor, patience=lr_reducer, verbose=verbose)) if verbose > 2: callbacks.append( LambdaCallback(on_epoch_end=lambda epoch, logs: print_message( epoch, logs, n_epochs, verbose))) fit_verbose = 0 else: fit_verbose = verbose if sparse.issparse(train_adata.X): train_adata.X = train_adata.X.A x = [train_adata.X, train_labels_onehot, train_labels_onehot] y = [train_adata.X, train_labels_encoded] if valid_adata is not None: if sparse.issparse(valid_adata.X): valid_adata.X = valid_adata.X.A valid_labels_encoded, _ = label_encoder(valid_adata, condition_encoder, condition_key) valid_labels_onehot = to_categorical(valid_labels_encoded, num_classes=self.n_conditions) x_valid = [valid_adata.X, valid_labels_onehot, valid_labels_onehot] y_valid = [valid_adata.X, valid_labels_encoded] self.cvae_model.fit(x=x, y=y, epochs=n_epochs, batch_size=batch_size, validation_data=(x_valid, y_valid), shuffle=shuffle, callbacks=callbacks, verbose=fit_verbose) else: self.cvae_model.fit(x=x, y=y, epochs=n_epochs, batch_size=batch_size, shuffle=shuffle, callbacks=callbacks, verbose=fit_verbose) if save: self.save_model()
(valid_adata.obs[cell_type_key] == specific_cell_type) & (valid_adata.obs[condition_key].isin(target_conditions)))] z_dim = 100 network = CVAE( x_dimension=net_train_adata.X.shape[1], z_dimension=z_dim, alpha=0.1, model_path=f"./models/CVAE/{data_name}/{specific_cell_type}/cvae") network.train(net_train_adata, use_validation=True, valid_data=net_valid_adata, n_epochs=120) train_labels, _ = label_encoder(train_adata, le, 'condition') cell_type_adata = train_adata[train_adata.obs[cell_type_key] == specific_cell_type] unperturbed_data = cell_type_adata[cell_type_adata.obs[condition_key] == control_condition] target_labels = np.zeros( (len(unperturbed_data), 1)) + le[target_condition] source_labels = np.zeros( (len(unperturbed_data), 1)) + le[control_condition] pred_adata = network.predict(unperturbed_data, source_labels, target_labels) pred_adata.obs[condition_key] = [ f"{specific_cell_type}_pred_{target_condition}" ] * len(target_labels) pred_adata.obs['method'] = 'CVAE'
(valid_adata.obs[cell_type_key] == specific_cell_type) & (valid_adata.obs[condition_key].isin(target_conditions)))] z_dim = 100 network = CVAE( x_dimension=net_train_adata.X.shape[1], z_dimension=z_dim, alpha=0.1, model_path=f"../models/CVAE/{data_name}/{specific_cell_type}/cvae") network.train(net_train_adata, use_validation=True, valid_data=net_valid_adata, n_epochs=120) train_labels, _ = label_encoder(train_adata, le, 'condition') cell_type_adata = train_adata[train_adata.obs[cell_type_key] == specific_cell_type] unperturbed_data = cell_type_adata[cell_type_adata.obs[condition_key] == control_condition] target_labels = np.zeros((len(unperturbed_data), 1)) + le[target_condition] source_labels = np.zeros((len(unperturbed_data), 1)) + le[control_condition] pred_adata = network.predict(unperturbed_data, source_labels, target_labels) pred_adata.obs[condition_key] = [ f"{specific_cell_type}_pred_{target_condition}" ] * len(target_labels) pred_adata.obs['method'] = 'CVAE' pred_adata.write( f"./data/reconstructed/{data_name}/CVAE-{specific_cell_type}.h5ad")