def accuracy_fn(target, output): """ function: 获取准确值 :param target: label :param output: prediction :return: accuracy """ accuracy = Accuracy() output = round(output) accuracy.update_state(y_true=target, y_pred=output) accuracy_value = accuracy.result().numpy() return accuracy_value
def report(model, ds_test_images, ds_test_labels, threshold=0.5): """""" true_iter = ds_test_labels.as_numpy_iterator() y_true = np.hstack(list(true_iter)) y_pred = model.predict(ds_test_images).flatten() class_labels = np.unique(y_true) depth = class_labels.shape[0] y_true_oh = tf.one_hot(y_true, depth=depth) y_pred_oh = tf.one_hot(np.where(y_pred < threshold, 0, 1), depth=depth) results = {'Accuracy': [], 'Precision': [], 'Recall': []} m = Accuracy() _ = m.update_state(y_true, np.around(y_pred).astype(int)) results['Accuracy'].append(m.result().numpy()) results['Precision'].append(" ") results['Recall'].append(" ") prec = [Precision(class_id=n) for n in class_labels] rec = [Recall(class_id=n) for n in class_labels] for p, r in zip(prec, rec): p.update_state(y_true_oh, y_pred_oh) r.update_state(y_true_oh, y_pred_oh) results['Accuracy'].append(" ") results['Precision'].append(p.result().numpy()) results['Recall'].append(r.result().numpy()) row_labels = [ 'All' if i == 0 else f'Class {i-1}' for i in range(depth + 1) ] return pd.DataFrame(data=results, index=row_labels)
def plot_confusion_matrix(y_true, y_pred): cm = confusion_matrix(y_true, y_pred, normalize='true') plt.imshow(cm, cmap='Blues') plt.xlabel('predictions') plt.ylabel('ground truth') # predict y_pred = np.argmax(dnn.predict(x=test_x, batch_size=256), axis=1) y_true = np.argmax(test_y, axis=1) accuracy = Accuracy() # frame-by-frame at the state level accuracy.update_state(y_true, y_pred) print('Frame-by-frame accuracy at the state level: {:.2f}%'.format( accuracy.result().numpy() * 100)) plt.figure() plot_confusion_matrix(y_true, y_pred) plt.title('Frame-by-frame confusion matrix at the state level') # frame-by-frame at the phoneme level y_pred_phones = states2phones(y_pred, phones, stateList) y_true_phones = states2phones(y_true, phones, stateList) accuracy.reset_states() accuracy.update_state(y_true_phones, y_pred_phones) print('Frame-by-frame accuracy at the phoneme level: {:.2f}%'.format( accuracy.result().numpy() * 100)) plt.figure() plot_confusion_matrix(y_true_phones, y_pred_phones) plt.title('Frame-by-frame confusion matrix at the phoneme level')
class ParallelLSTMTextClassifier(Model): EPOCHS = 1 logger = logging.getLogger('tensorflow') logger.setLevel(logging.INFO) def __init__(self, lstm_units=100, char_vocab=26, char_embed_size=100, cnn_filters=300, cnn_kernel_size=5, dropout_rate=0.2, max_char_len=10, lr=1e-4): super().__init__() self.char_embedding = Embedding(char_vocab + 1, char_embed_size) self.word_embedding = tf.Variable( np.load('../data/embedding.npy'), dtype=tf.float32, name='pretrained_embedding', trainable=False, ) self.char_cnn = Conv1D(filters=cnn_filters, kernel_size=cnn_kernel_size, activation='elu', padding='same') self.embed_drop = Dropout(dropout_rate) self.embed_fc = Dense(cnn_filters, 'elu', name='embed_fc') self.word_cnn = Conv1D(filters=cnn_filters, kernel_size=cnn_kernel_size, activation='elu', padding='same') self.word_drop = Dropout(dropout_rate) self.max_char_len = max_char_len self.char_embed_size = char_embed_size self.cnn_filters = cnn_filters self.attentive_pooling = KernelAttentivePooling(dropout_rate) self.dropout_l1 = Dropout(dropout_rate) self.dropout_l2 = Dropout(dropout_rate) self.blstm_l1 = Bidirectional(LSTM(lstm_units, return_sequences=True)) self.blstm_l2 = Bidirectional(LSTM(lstm_units, return_sequences=True)) self.dropout_op = Dropout(dropout_rate) self.units = 2 * lstm_units self.fc = Dense(units=self.units, activation='elu') self.out_linear = Dense(2) self.optimizer = Adam(lr) self.accuracy = Accuracy() self.decay_lr = tf.optimizers.schedules.ExponentialDecay( lr, 1000, 0.95) self.logger = logging.getLogger('tensorflow') self.logger.setLevel(logging.INFO) def call(self, inputs, training=False): words, chars = inputs if words.dtype != tf.int32: words = tf.cast(words, tf.int32) masks = tf.sign(words) batch_size = tf.shape(words)[0] word_len = tf.shape(words)[1] chars = self.char_embedding(chars) chars = tf.reshape( chars, (batch_size * word_len, self.max_char_len, self.char_embed_size)) chars = self.char_cnn(chars) chars = tf.reduce_max(chars, 1) chars = tf.reshape(chars, (batch_size, word_len, self.cnn_filters)) words = tf.nn.embedding_lookup(self.word_embedding, words) x = tf.concat((words, chars), axis=-1) x = tf.reshape(x, (batch_size * 10 * 10, 10, self.embedding.shape[-1])) x = self.dropout_l1(x, training=training) x = self.blstm_l1(x) x = tf.reduce_max(x, 1) x = tf.reshape(x, (batch_size * 10, 10, self.units)) x = self.dropout_l2(x, training=training) x = self.blstm_l2(x) x = tf.reduce_max(x, 1) x = tf.reshape(x, (batch_size, 10, self.units)) masks = tf.reshape(tf.sign(tf.reduce_sum(masks, 1)), (self.batch_size, 10)) x = self.attentive_pooling((x, masks), training=training) x = self.dropout_op(x, training=training) x = self.fc(x) x = self.out_linear(x) return x def fit(self, data, epochs=EPOCHS): t0 = time.time() step = 0 epoch = 1 while epoch <= epochs: for texts, labels in data: with tf.GradientTape() as tape: logits = self.call(texts, training=True) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, label_smoothing=.2, )) self.optimizer.lr.assign(self.decay_lr(step)) grads = tape.gradient(loss, self.trainable_variables) grads, _ = tf.clip_by_global_norm(grads, 5.) self.optimizer.apply_gradients( zip(grads, self.trainable_variables)) if step % 100 == 0: self.logger.info( "Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}" .format(step, loss.numpy().item(), time.time() - t0, self.optimizer.lr.numpy().item())) t0 = time.time() step += 1 epoch += 1 return True def evaluate(self, data): self.accuracy.reset_states() for texts, labels in data: logits = self.call(texts, training=False) y_pred = tf.argmax(logits, axis=-1) self.accuracy.update_state(y_true=labels, y_pred=y_pred) accuracy = self.accuracy.result().numpy() self.logger.info("Evaluation Accuracy: {:.3f}".format(accuracy)) self.logger.info("Accuracy: {:.3f}".format(accuracy))
def cnn_1d_pipeline(train_data, train_labels, val_data, val_labels, test_data, test_labels, classes, weights): """This function is the main pipeline for the 1D CNN. Parameters : ------------ xxx_data : np.array Data of the train, validation and test datasets xxx_labels : np.array Labels of the train, validation and test datasets classes : np.array Classes of the dataset weights : np.array Weights applied for each class during training Returns : ----------- Accuracy : float Precision : float Recall : float F1-Score : float Metrics evaluated on the test set """ # Reshaping the data and labels train_data = train_data.reshape( train_data.shape[0] * train_data.shape[1] * train_data.shape[2], train_data.shape[3]) train_labels = train_labels.reshape( train_labels.shape[0] * train_labels.shape[1] * train_labels.shape[2]) val_data = val_data.reshape( val_data.shape[0] * val_data.shape[1] * val_data.shape[2], val_data.shape[3]) val_labels = val_labels.reshape(val_labels.shape[0] * val_labels.shape[1] * val_labels.shape[2]) test_shape = test_labels.shape test_data = test_data.reshape( test_data.shape[0] * test_data.shape[1] * test_data.shape[2], test_data.shape[3]) test_labels = test_labels.reshape( test_labels.shape[0] * test_labels.shape[1] * test_labels.shape[2]) #Calculating the weights of each pixel train_weights = np.ones((train_labels.shape[0])) for i in range(len(classes)): train_weights[train_labels == classes[i]] = weights[i] val_weights = np.ones((val_labels.shape[0])) for i in range(len(classes)): val_weights[val_labels == classes[i]] = weights[i] test_weights = np.ones((test_labels.shape[0])) for i in range(len(classes)): test_weights[test_labels == classes[i]] = weights[i] # One-Hot Encoding the labels train_labels = tf.one_hot(train_labels, depth=17, dtype=tf.int8).numpy() val_labels = tf.one_hot(val_labels, depth=17, dtype=tf.int8).numpy() # Creating a data generator class class DataGenerator(tf.keras.utils.Sequence): def __init__(self, data, labels, weights, batch_size=32, n_classes=10, shuffle=True): 'Initialization' self.data = data self.labels = labels self.weights = weights self.batch_size = batch_size self.shuffle = shuffle self.on_epoch_end() def __len__(self): 'Denotes the number of batches per epoch' return int(np.floor(len(self.data) / self.batch_size)) def __getitem__(self, index): 'Generate one batch of data' # Generate indexes of the batch indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] # Get data and labels data_yield = self.data[indexes] labels_yield = self.labels[indexes] weights_yield = self.weights[indexes] return data_yield, labels_yield, weights_yield def on_epoch_end(self): 'Updates indexes after each epoch' self.indexes = np.arange(len(self.data)) if self.shuffle == True: np.random.shuffle(self.indexes) ## -- Training the model -- batch_size = 3000 steps_per_epoch = 20 train_generator = DataGenerator(train_data, train_labels, train_weights, batch_size, shuffle=True) val_generator = DataGenerator(val_data, val_labels, val_weights, batch_size, shuffle=True) checkpoint_filepath = "tmp/checkpoint_salinas_convnet" model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, save_weights_only=True, monitor='val_loss', mode='min', save_best_only=True) model = cnn_1D(shape=(train_data.shape[1], 1), kernel_size=7, nb_filters_0=64, nb_dense_neurons=500, output_channels=17, kernel_reg=0.0009875, dense_reg=0.0012559, dropout=0) model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.00074582), metrics=["accuracy"]) print(model.summary()) # Training the model history = model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=500, validation_data=val_generator, callbacks=[ model_checkpoint_callback, tf.keras.callbacks.EarlyStopping( monitor='val_accuracy', patience=30) ]) # Making a prediction on the test data y_pred = model.predict(test_data, batch_size=batch_size) y_pred = np.argmax(y_pred[:, 1:], axis=1) + 1 # Calculating accuracy accuracy = Accuracy() accuracy.update_state(test_labels, y_pred) pred_accuracy = accuracy.result() print(pred_accuracy) # Computing the confusion matrix conf = confusion_matrix(test_labels, y_pred) print(conf) y_pred = y_pred.reshape(test_shape) y_pred[test_labels.reshape(test_shape) == 0] = 0 y_pred = y_pred.reshape(test_shape[0] * test_shape[1] * test_shape[2]) # Calculating metrics accuracy = Accuracy() accuracy.update_state(test_labels, y_pred, sample_weight=test_weights) recall = tf.keras.metrics.Recall() recall.update_state( tf.one_hot(test_labels, depth=17).numpy().flatten(), tf.one_hot(y_pred, depth=17).numpy().flatten()) precision = tf.keras.metrics.Precision() precision.update_state( tf.one_hot(test_labels, depth=17).numpy().flatten(), tf.one_hot(y_pred, depth=17).numpy().flatten()) return (accuracy.result().numpy(), precision.result().numpy(), recall.result().numpy(), 2 * precision.result().numpy() * recall.result().numpy() / (precision.result().numpy() + recall.result().numpy()))
def price_direction_accuracy(targets_directs, predicts_directs): accur = Accuracy() accur.update_state(targets_directs, predicts_directs) return accur.result().numpy()
def cnn_1d_pipeline(train_data, train_labels, val_data, val_labels, test_data, test_labels, classes, weights): """This function is the main pipeline for the 1D CNN. Parameters : ------------ xxx_data : np.array Data of the train, validation and test datasets xxx_labels : np.array Labels of the train, validation and test datasets classes : np.array Classes of the dataset weights : np.array Weights applied for each class during training Returns : ----------- Accuracy : float Precision : float Recall : float F1-Score : float Metrics evaluated on the test set """ # Reshaping the data train_data = train_data.reshape(train_data.shape[0]*train_data.shape[1]*train_data.shape[2], train_data.shape[3]) train_labels = train_labels.reshape(train_labels.shape[0]*train_labels.shape[1]*train_labels.shape[2]) val_data = val_data.reshape(val_data.shape[0]*val_data.shape[1]*val_data.shape[2], val_data.shape[3]) val_labels = val_labels.reshape(val_labels.shape[0]*val_labels.shape[1]*val_labels.shape[2]) test_shape = test_labels.shape test_data = test_data.reshape(test_data.shape[0]*test_data.shape[1]*test_data.shape[2], test_data.shape[3]) test_labels = test_labels.reshape(test_labels.shape[0]*test_labels.shape[1]*test_labels.shape[2]) # Computing the weights of each pixel train_weights = np.ones((train_labels.shape[0])) for i in range(len(classes)): train_weights[train_labels == classes[i]] = weights[i] val_weights = np.ones((val_labels.shape[0])) for i in range(len(classes)): val_weights[val_labels == classes[i]] = weights[i] test_weights = np.ones((test_labels.shape[0])) for i in range(len(classes)): test_weights[test_labels == classes[i]] = weights[i] # One-Hot Encoding the labels train_labels = tf.one_hot(train_labels, depth=6, dtype=tf.int8).numpy() val_labels = tf.one_hot(val_labels, depth=6, dtype=tf.int8).numpy() # Creating a data generator class class DataGenerator(tf.keras.utils.Sequence): def __init__(self, data, labels, weights, batch_size=32, n_classes=10, shuffle=True): 'Initialization' self.data = data self.labels = labels self.weights = weights self.batch_size = batch_size self.shuffle = shuffle self.on_epoch_end() def __len__(self): 'Denotes the number of batches per epoch' return int(np.floor(len(self.data) / self.batch_size)) def __getitem__(self, index): 'Generate one batch of data' # Generate indexes of the batch indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size] # Get data and labels data_yield = self.data[indexes] labels_yield = self.labels[indexes] weights_yield = self.weights[indexes] return data_yield, labels_yield, weights_yield def on_epoch_end(self): 'Updates indexes after each epoch' self.indexes = np.arange(len(self.data)) if self.shuffle == True: np.random.shuffle(self.indexes) ## -- Training the model -- batch_size = 30000 steps_per_epoch = 100 train_generator = DataGenerator(train_data, train_labels, train_weights, batch_size, shuffle=True) val_generator = DataGenerator(val_data, val_labels, val_weights, batch_size, shuffle=True) checkpoint_filepath = "tmp/checkpoint_SPARCS_cnn_1D" model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, save_weights_only=True, monitor='val_loss', mode='min', save_best_only=True) model = cnn_1D(shape=(10, 1), kernel_size=3, nb_filters_0=128, nb_dense_neurons=180, kernel_reg=0.0083, dense_reg=0.054, output_channels=6, dropout=0.0, learning_rate=0.000322) print(model.summary()) # Training the model history = model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=500, validation_data = val_generator, validation_steps = 5, callbacks=[EarlyStopping(monitor='val_accuracy', patience=40)] ) # Making a prediction y_pred = model.predict(test_data, batch_size=30000) y_pred = np.argmax(y_pred, axis=1) # Computing accuracy accuracy = Accuracy() accuracy.update_state(test_labels, y_pred, sample_weight=test_weights) pred_accuracy = accuracy.result() print(pred_accuracy) # Computing the confusion matrix print("Computing confusion matrix") conf = confusion_matrix(test_labels, y_pred) print(conf) np.save("logs/metrics/SPARCS/cnn_1D/confusion_matrix", conf) y_pred = y_pred.reshape(test_shape) print(y_pred.shape) # Plotting result images on the test dataset c = 0 cmap = plt.get_cmap('viridis', 6) for image in y_pred[:30]: plt.imshow(image+1e-5, vmin=0, vmax=6, cmap=cmap) plt.colorbar() plt.savefig(f"images/SPARCS/cnn_1D/pred{c}_raw") plt.clf() plt.imshow(test_labels.reshape(test_shape)[c]+1e-5, vmin=0, vmax=6, cmap=cmap) plt.colorbar() plt.savefig(f"images/SPARCS/cnn_1D/GT{c}") plt.clf() c+=1 # Applying a median filter on the results for i in range(len(y_pred)): y_pred[i] = medfilt(y_pred[i], kernel_size = 3) # Plotting the median filtered result images c = 0 for image in y_pred[:30]: plt.imshow(image+1e-5, vmin=0, vmax=6, cmap=cmap) plt.colorbar() plt.savefig(f"images/SPARCS/cnn_1D/pred{c}_filtered") plt.clf() c+=1 # Computing metrics on the test dataset y_pred = y_pred.reshape(test_shape[0]*test_shape[1]*test_shape[2]) accuracy = Accuracy() accuracy.update_state(test_labels, y_pred, sample_weight=test_weights) test_labels = tf.one_hot(test_labels, depth=6).numpy().flatten() y_pred = tf.one_hot(y_pred, depth=6).numpy().flatten() recall = tf.keras.metrics.Recall() recall.update_state(test_labels, y_pred) precision = tf.keras.metrics.Precision() precision.update_state(test_labels, y_pred) # Printing metrics test_accuracy = accuracy.result().numpy() test_recall = recall.result().numpy() test_precision = precision.result().numpy() test_f1 = 2/(1/test_recall + 1/test_precision) print("Test accuracy = ", test_accuracy) print("Test recall =", test_recall) print("Test precision=", test_precision) print("Test f1 =", test_f1)
class DualStudent(Model): """" Dual Student for Automatic Speech Recognition (ASR). How to train: 1) set the optimizer by means of compile(), 2) use train() How to test: use test() Remarks: - Do not use fit() by Keras, use train() - Do not use evaluate() by Keras, use test() - Compiled metrics and loss (i.e. set by means of compile()) are not used Original proposal for image classification: https://arxiv.org/abs/1909.01804 """ def __init__(self, n_classes, n_hidden_layers=3, n_units=96, consistency_loss='mse', consistency_scale=10, stabilization_scale=100, xi=0.6, padding_value=0., sigma=0.01, schedule='rampup', schedule_length=5, version='mono_directional'): """ Constructs a Dual Student model. :param n_classes: number of classes (i.e. number of units in the last layer of each student) :param n_hidden_layers: number of hidden layers in each student (i.e. LSTM layers) :param n_units: number of units for each hidden layer :param consistency_loss: one of 'mse', 'kl' :param consistency_scale: maximum value of weight for consistency constraint :param stabilization_scale: maximum value of weight for stabilization constraint :param xi: threshold for stable sample :param padding_value: value used to pad input sequences (used as mask_value for Masking layer) :param sigma: standard deviation for noisy augmentation :param schedule: type of schedule for lambdas, one of 'rampup', 'triangular_cycling', 'sinusoidal_cycling' :param schedule_length: :param version: one of: - 'mono_directional': both students have mono-directional LSTM layers - 'bidirectional: both students have bidirectional LSTM layers - 'imbalanced': one student has mono-directional LSTM layers, the other one bidirectional """ super(DualStudent, self).__init__() # store parameters self.n_classes = n_classes self.padding_value = padding_value self.n_units = n_units self.n_hidden_layers = n_hidden_layers self.xi = xi self.consistency_scale = consistency_scale self.stabilization_scale = stabilization_scale self.sigma = sigma self.version = version self.schedule = schedule self.schedule_length = schedule_length self._lambda1 = None self._lambda2 = None # schedule for lambdas if schedule == 'rampup': self.schedule_fn = sigmoid_rampup elif schedule == 'triangular_cycling': self.schedule_fn = triangular_cycling elif schedule == 'sinusoidal_cycling': self.schedule_fn = sinusoidal_cycling else: raise ValueError('Invalid schedule') # loss self._loss_cls = SparseCategoricalCrossentropy() # classification loss self._loss_sta = MeanSquaredError() # stabilization loss if consistency_loss == 'mse': self._loss_con = MeanSquaredError() # consistency loss elif consistency_loss == 'kl': self._loss_con = KLDivergence() else: raise ValueError('Invalid consistency metric') # metrics for training self._loss1 = Mean( name='loss1') # we want to average the loss for each batch self._loss2 = Mean(name='loss2') self._loss1_cls = Mean(name='loss1_cls') self._loss2_cls = Mean(name='loss2_cls') self._loss1_con = Mean(name='loss1_con') self._loss2_con = Mean(name='loss2_con') self._loss1_sta = Mean(name='loss1_sta') self._loss2_sta = Mean(name='loss2_sta') self._acc1 = SparseCategoricalAccuracy(name='acc1') self._acc2 = SparseCategoricalAccuracy(name='acc2') # metrics for testing self._test_loss1 = Mean(name='test_loss1') self._test_loss2 = Mean(name='test_loss2') self._test_acc1_train_phones = SparseCategoricalAccuracy( name='test_acc1_train_phones') self._test_acc2_train_phones = SparseCategoricalAccuracy( name='test_acc2_train_phones') self._test_acc1 = Accuracy(name='test_acc1') self._test_acc2 = Accuracy(name='test_acc2') self._test_per1 = PhoneErrorRate(name='test_per1') self._test_per2 = PhoneErrorRate(name='test_per2') # compose students if version == 'mono_directional': lstm_types = ['mono_directional', 'mono_directional'] elif version == 'bidirectional': lstm_types = ['bidirectional', 'bidirectional'] elif version == 'imbalanced': lstm_types = ['mono_directional', 'bidirectional'] else: raise ValueError('Invalid student version') self.student1 = self._get_student('student1', lstm_types[0]) self.student2 = self._get_student('student2', lstm_types[1]) # masking layer (just to use compute_mask and remove padding) self.mask = Masking(mask_value=self.padding_value) def _get_student(self, name, lstm_type): student = Sequential(name=name) student.add(Masking(mask_value=self.padding_value)) if lstm_type == 'mono_directional': for i in range(self.n_hidden_layers): student.add(LSTM(units=self.n_units, return_sequences=True)) elif lstm_type == 'bidirectional': for i in range(self.n_hidden_layers): student.add( Bidirectional( LSTM(units=self.n_units, return_sequences=True))) else: raise ValueError('Invalid LSTM version') student.add(Dense(units=self.n_classes, activation="softmax")) return student def _noisy_augment(self, x): return x + tf.random.normal(shape=x.shape, stddev=self.sigma) def call(self, inputs, training=False, student='student1', **kwargs): """ Feed-forwards inputs to one of the students. This function is called internally by __call__(). Do not use it directly, use the model as callable. You may prefer to use pad_and_predict() instead of this, because it pads the sequences and splits in batches. For a big dataset, it is strongly suggested that you use pad_and_predict(). :param inputs: tensor of shape (batch_size, n_frames, n_features) :param training: boolean, whether the call is in inference mode or training mode :param student: one of 'student1', 'student2' :return: tensor of shape (batch_size, n_frames, n_classes), softmax activations (probabilities) """ if student == 'student1': return self.student1(inputs, training=training) elif student != 'student1': return self.student2(inputs, training=training) else: raise ValueError('Invalid student') def build(self, input_shape): super(DualStudent, self).build(input_shape) self.student1.build(input_shape) self.student2.build(input_shape) def train(self, x_labeled, x_unlabeled, y_labeled, x_val=None, y_val=None, n_epochs=10, batch_size=32, shuffle=True, evaluation_mapping=None, logs_path=None, checkpoints_path=None, initial_epoch=0, seed=None): """ Trains the students with both labeled and unlabeled data (semi-supervised learning). :param x_labeled: numpy array of numpy arrays (n_frames, n_features), features corresponding to y_labeled. 'n_frames' can vary, padding is added to make x_labeled a tensor. :param x_unlabeled: numpy array of numpy arrays of shape (n_frames, n_features), features without labels. 'n_frames' can vary, padding is added to make x_unlabeled a tensor. :param y_labeled: numpy array of numpy arrays of shape (n_frames,), labels corresponding to x_labeled. 'n_frames' can vary, padding is added to make y_labeled a tensor. :param x_val: like x_labeled, but for validation set :param y_val: like y_labeled, but for validation set :param n_epochs: integer, number of training epochs :param batch_size: integer, batch size :param shuffle: boolean, whether to shuffle at each epoch or not :param evaluation_mapping: dictionary {training label -> test label}, the test phones should be a subset of the training phones :param logs_path: path where to save logs for TensorBoard :param checkpoints_path: path to a directory. If the directory contains checkpoints, the latest checkpoint is restored. :param initial_epoch: int, initial epoch from which to start the training. It can be used together with checkpoints_path to resume the training from a previous run. :param seed: seed for the random number generator """ # set seed if seed is not None: np.random.seed(seed) tf.random.set_seed(seed) # show summary self.build(input_shape=(None, ) + x_labeled[0].shape) self.student1.summary() self.student2.summary() # setup for logs train_summary_writer = None if logs_path is not None: train_summary_writer = tf.summary.create_file_writer(logs_path) # setup for checkpoints checkpoint = None if checkpoints_path is not None: checkpoint = tf.train.Checkpoint(optimizer=self.optimizer, model=self) checkpoint_path = tf.train.latest_checkpoint(checkpoints_path) if checkpoint_path is not None: checkpoint.restore(checkpoint_path) checkpoint_path = Path(checkpoints_path) / 'ckpt' checkpoint_path = str(checkpoint_path) # compute batch sizes labeled_batch_size = ceil( len(x_labeled) / (len(x_unlabeled) + len(x_labeled)) * batch_size) unlabeled_batch_size = batch_size - labeled_batch_size n_batches = min(ceil(len(x_unlabeled) / unlabeled_batch_size), ceil(len(x_labeled) / labeled_batch_size)) # training loop for epoch in trange(initial_epoch, n_epochs, desc='epochs'): # ramp up lambda1 and lambda2 self._lambda1 = self.consistency_scale * self.schedule_fn( epoch, self.schedule_length) self._lambda2 = self.stabilization_scale * self.schedule_fn( epoch, self.schedule_length) # shuffle training set if shuffle: indices = np.arange( len(x_labeled) ) # get indices to shuffle coherently features and labels np.random.shuffle(indices) x_labeled = x_labeled[indices] y_labeled = y_labeled[indices] np.random.shuffle(x_unlabeled) for i in trange(n_batches, desc='batches'): # select batch x_labeled_batch = select_batch(x_labeled, i, labeled_batch_size) x_unlabeled_batch = select_batch(x_unlabeled, i, unlabeled_batch_size) y_labeled_batch = select_batch(y_labeled, i, labeled_batch_size) # pad batch x_labeled_batch = pad_sequences(x_labeled_batch, padding='post', value=self.padding_value, dtype='float32') x_unlabeled_batch = pad_sequences(x_unlabeled_batch, padding='post', value=self.padding_value, dtype='float32') y_labeled_batch = pad_sequences(y_labeled_batch, padding='post', value=-1) # convert to tensors x_labeled_batch = tf.convert_to_tensor(x_labeled_batch) x_unlabeled_batch = tf.convert_to_tensor(x_unlabeled_batch) y_labeled_batch = tf.convert_to_tensor(y_labeled_batch) # train step self._train_step(x_labeled_batch, x_unlabeled_batch, y_labeled_batch) # put metrics in dictionary (easy management) train_metrics = { self._loss1.name: self._loss1.result(), self._loss2.name: self._loss2.result(), self._loss1_cls.name: self._loss1_cls.result(), self._loss2_cls.name: self._loss2_cls.result(), self._loss1_con.name: self._loss1_con.result(), self._loss2_con.name: self._loss2_con.result(), self._loss1_sta.name: self._loss1_sta.result(), self._loss2_sta.name: self._loss2_sta.result(), self._acc1.name: self._acc1.result(), self._acc2.name: self._acc2.result(), } metrics = {'train': train_metrics} # test on validation set if x_val is not None and y_val is not None: val_metrics = self.test(x_val, y_val, evaluation_mapping=evaluation_mapping) metrics['val'] = val_metrics # print metrics for dataset, metrics_ in metrics.items(): print(f'Epoch {epoch + 1} - ', dataset, ' - ', sep='', end='') for k, v in metrics_.items(): print(f'{k}: {v}, ', end='') print() # save logs if train_summary_writer is not None: with train_summary_writer.as_default(): for dataset, metrics_ in metrics.items(): for k, v in metrics_.items(): tf.summary.scalar(k, v, step=epoch) # save checkpoint if checkpoint is not None: checkpoint.save(file_prefix=checkpoint_path) # reset metrics self._loss1.reset_states() self._loss2.reset_states() self._loss1_cls.reset_states() self._loss2_cls.reset_states() self._loss1_con.reset_states() self._loss2_con.reset_states() self._loss1_sta.reset_states() self._loss2_sta.reset_states() self._acc1.reset_states() self._acc2.reset_states() """ If you want to use graph execution, pad the whole dataset externally and uncomment the decorator below. If you uncomment the decorator without padding the dataset, the graph will be compiled for each batch, because train() pads at batch level and so the batches have different shapes. This would result in worse performance compared to eager execution. """ # @tf.function def _train_step(self, x_labeled, x_unlabeled, y_labeled): # noisy augmented batches (TODO: improvement with data augmentation instead of noise) B1_labeled = self._noisy_augment(x_labeled) B2_labeled = self._noisy_augment(x_labeled) B1_unlabeled = self._noisy_augment(x_unlabeled) B2_unlabeled = self._noisy_augment(x_unlabeled) # compute masks (to remove padding) mask_labeled = self.mask.compute_mask(x_labeled) mask_unlabeled = self.mask.compute_mask(x_unlabeled) y_labeled = y_labeled[mask_labeled] # remove padding from labels # forward pass with tf.GradientTape(persistent=True) as tape: # predict augmented labeled samples (for classification and consistency constraint) prob1_labeled_B1 = self.student1(B1_labeled, training=True) prob1_labeled_B2 = self.student1(B2_labeled, training=True) prob2_labeled_B1 = self.student2(B1_labeled, training=True) prob2_labeled_B2 = self.student2(B2_labeled, training=True) # predict augmented unlabeled samples (for consistency and stabilization constraints) prob1_unlabeled_B1 = self.student1(B1_unlabeled, training=True) prob1_unlabeled_B2 = self.student1(B2_unlabeled, training=True) prob2_unlabeled_B1 = self.student2(B1_unlabeled, training=True) prob2_unlabeled_B2 = self.student2(B2_unlabeled, training=True) # remove padding prob1_labeled_B1 = prob1_labeled_B1[mask_labeled] prob1_labeled_B2 = prob1_labeled_B2[mask_labeled] prob2_labeled_B1 = prob2_labeled_B1[mask_labeled] prob2_labeled_B2 = prob2_labeled_B2[mask_labeled] prob1_unlabeled_B1 = prob1_unlabeled_B1[mask_unlabeled] prob1_unlabeled_B2 = prob1_unlabeled_B2[mask_unlabeled] prob2_unlabeled_B1 = prob2_unlabeled_B1[mask_unlabeled] prob2_unlabeled_B2 = prob2_unlabeled_B2[mask_unlabeled] # compute classification losses L1_cls = self._loss_cls(y_labeled, prob1_labeled_B1) L2_cls = self._loss_cls(y_labeled, prob2_labeled_B2) # concatenate labeled and unlabeled probability predictions (for consistency loss) prob1_labeled_unlabeled_B1 = tf.concat( [prob1_labeled_B1, prob1_unlabeled_B1], axis=0) prob1_labeled_unlabeled_B2 = tf.concat( [prob1_labeled_B2, prob1_unlabeled_B2], axis=0) prob2_labeled_unlabeled_B1 = tf.concat( [prob2_labeled_B1, prob2_unlabeled_B1], axis=0) prob2_labeled_unlabeled_B2 = tf.concat( [prob2_labeled_B2, prob2_unlabeled_B2], axis=0) # compute consistency losses L1_con = self._loss_con(prob1_labeled_unlabeled_B1, prob1_labeled_unlabeled_B2) L2_con = self._loss_con(prob2_labeled_unlabeled_B1, prob2_labeled_unlabeled_B2) # prediction P1_unlabeled_B1 = tf.argmax(prob1_unlabeled_B1, axis=-1) P1_unlabeled_B2 = tf.argmax(prob1_unlabeled_B2, axis=-1) P2_unlabeled_B1 = tf.argmax(prob2_unlabeled_B1, axis=-1) P2_unlabeled_B2 = tf.argmax(prob2_unlabeled_B2, axis=-1) # confidence (probability of predicted class) M1_unlabeled_B1 = tf.reduce_max(prob1_unlabeled_B1, axis=-1) M1_unlabeled_B2 = tf.reduce_max(prob1_unlabeled_B2, axis=-1) M2_unlabeled_B1 = tf.reduce_max(prob2_unlabeled_B1, axis=-1) M2_unlabeled_B2 = tf.reduce_max(prob2_unlabeled_B2, axis=-1) # stable samples (masks to index probabilities) R1 = tf.logical_and( P1_unlabeled_B1 == P1_unlabeled_B2, tf.logical_or(M1_unlabeled_B1 > self.xi, M1_unlabeled_B2 > self.xi)) R2 = tf.logical_and( P2_unlabeled_B1 == P2_unlabeled_B2, tf.logical_or(M2_unlabeled_B1 > self.xi, M2_unlabeled_B2 > self.xi)) R12 = tf.logical_and(R1, R2) # stabilities epsilon1 = MSE(prob1_unlabeled_B1[R12], prob1_unlabeled_B2[R12]) epsilon2 = MSE(prob2_unlabeled_B1[R12], prob2_unlabeled_B2[R12]) # compute stabilization losses L1_sta = self._loss_sta( prob1_unlabeled_B1[R12][epsilon1 > epsilon2], prob2_unlabeled_B1[R12][epsilon1 > epsilon2]) L2_sta = self._loss_sta( prob1_unlabeled_B2[R12][epsilon1 < epsilon2], prob2_unlabeled_B2[R12][epsilon1 < epsilon2]) L1_sta += self._loss_sta( prob1_unlabeled_B1[tf.logical_and(tf.logical_not(R1), R2)], prob2_unlabeled_B1[tf.logical_and(tf.logical_not(R1), R2)]) L2_sta += self._loss_sta( prob1_unlabeled_B2[tf.logical_and(R1, tf.logical_not(R2))], prob2_unlabeled_B2[tf.logical_and(R1, tf.logical_not(R2))]) # compute complete losses L1 = L1_cls + self._lambda1 * L1_con + self._lambda2 * L1_sta L2 = L2_cls + self._lambda1 * L2_con + self._lambda2 * L2_sta # backward pass gradients1 = tape.gradient(L1, self.student1.trainable_variables) gradients2 = tape.gradient(L2, self.student2.trainable_variables) self.optimizer.apply_gradients( zip(gradients1, self.student1.trainable_variables)) self.optimizer.apply_gradients( zip(gradients2, self.student2.trainable_variables)) del tape # to release memory (persistent tape) # update metrics self._loss1.update_state(L1) self._loss2.update_state(L2) self._loss1_cls.update_state(L1_cls) self._loss2_cls.update_state(L2_cls) self._loss1_con.update_state(L1_con) self._loss2_con.update_state(L2_con) self._loss1_sta.update_state(L1_sta) self._loss2_sta.update_state(L2_sta) self._acc1.update_state(y_labeled, prob1_labeled_B1) self._acc2.update_state(y_labeled, prob2_labeled_B2) def test(self, x, y, batch_size=32, evaluation_mapping=None): """ Tests the model (both students). :param x: numpy array of numpy arrays (n_frames, n_features), features corresponding to y_labeled. 'n_frames' can vary, padding is added to make x a tensor. :param y: numpy array of numpy arrays of shape (n_frames,), labels corresponding to x_labeled. 'n_frames' can vary, padding is added to make y a tensor. :param batch_size: integer, batch size :param evaluation_mapping: dictionary {training label -> test label}, the test phones should be a subset of the training phones :return: dictionary {metric_name -> value} """ # test batch by batch n_batches = ceil(len(x) / batch_size) for i in trange(n_batches, desc='test batches'): # select batch x_batch = select_batch(x, i, batch_size) y_batch = select_batch(y, i, batch_size) # pad batch x_batch = pad_sequences(x_batch, padding='post', value=self.padding_value, dtype='float32') y_batch = pad_sequences(y_batch, padding='post', value=-1) # convert to tensors x_batch = tf.convert_to_tensor(x_batch) y_batch = tf.convert_to_tensor(y_batch) # test step self._test_step(x_batch, y_batch, evaluation_mapping) # put metrics in dictionary (easy management) test_metrics = { self._test_loss1.name: self._test_loss1.result(), self._test_loss2.name: self._test_loss2.result(), self._test_acc1_train_phones.name: self._test_acc1_train_phones.result(), self._test_acc2_train_phones.name: self._test_acc2_train_phones.result(), self._test_acc1.name: self._test_acc1.result(), self._test_acc2.name: self._test_acc2.result(), self._test_per1.name: self._test_per1.result(), self._test_per2.name: self._test_per2.result(), } # reset metrics self._test_loss1.reset_states() self._test_loss2.reset_states() self._test_acc1_train_phones.reset_states() self._test_acc2_train_phones.reset_states() self._test_acc1.reset_states() self._test_acc2.reset_states() self._test_per1.reset_states() self._test_per2.reset_states() return test_metrics # @tf.function # see note in _train_step() def _test_step(self, x, y, evaluation_mapping): # compute mask (to remove padding) mask = self.mask.compute_mask(x) # forward pass y_prob1_train_phones = self.student1(x, training=False) y_prob2_train_phones = self.student2(x, training=False) y_pred1_train_phones = tf.argmax(y_prob1_train_phones, axis=-1) y_pred2_train_phones = tf.argmax(y_prob2_train_phones, axis=-1) y_train_phones = tf.identity(y) # map labels to set of test phones if evaluation_mapping is not None: y = tf.numpy_function(map_labels, [y_train_phones, evaluation_mapping], [tf.float32]) y_pred1 = tf.numpy_function( map_labels, [y_pred1_train_phones, evaluation_mapping], [tf.float32]) y_pred2 = tf.numpy_function( map_labels, [y_pred2_train_phones, evaluation_mapping], [tf.float32]) else: y = y_train_phones y_pred1 = y_pred1_train_phones y_pred2 = y_pred2_train_phones # update phone error rate self._test_per1.update_state(y, y_pred1, mask) self._test_per2.update_state(y, y_pred2, mask) # remove padding y_pred1 = y_pred1[mask] y_pred2 = y_pred2[mask] y_prob1_train_phones = y_prob1_train_phones[mask] y_prob2_train_phones = y_prob2_train_phones[mask] y_train_phones = y_train_phones[mask] y = y[mask] # compute loss loss1 = self._loss_cls(y_train_phones, y_prob1_train_phones) loss2 = self._loss_cls(y_train_phones, y_prob2_train_phones) # update loss self._test_loss1.update_state(loss1) self._test_loss2.update_state(loss2) # update accuracy using training phones self._test_acc1_train_phones.update_state(y_train_phones, y_prob1_train_phones) self._test_acc2_train_phones.update_state(y_train_phones, y_prob2_train_phones) # update accuracy using test phones self._test_acc1.update_state(y, y_pred1) self._test_acc2.update_state(y, y_pred2)
class ParallelLSTMTextClassifier(Model): EPOCHS = 1 logger = logging.getLogger('tensorflow') logger.setLevel(logging.INFO) def __init__(self, lstm_units=100, lr=1e-4, dropout_rate=0.2): super().__init__() self.embedding = tf.Variable(np.load('../data/embedding.npy'), dtype=tf.float32, name='pretrained_embedding', trainable=False) self.dropout_l1 = Dropout(dropout_rate) self.dropout_l2 = Dropout(dropout_rate) self.dropout_l3 = Dropout(dropout_rate) self.blstm_l1 = Bidirectional(LSTM(lstm_units, return_sequences=True)) self.blstm_l2 = Bidirectional(LSTM(lstm_units, return_sequences=True)) self.blstm_l3 = Bidirectional(LSTM(lstm_units, return_sequences=True)) self.dropout_op = Dropout(dropout_rate) self.units = 2 * lstm_units self.fc = Dense(units=self.units, activation='elu') self.out_linear = Dense(2) self.optimizer = Adam(lr) self.decay_lr = ExponentialDecay(lr, 1000, 0.90) self.accuracy = Accuracy() self.logger = logging.getLogger('tensorflow') self.logger.setLevel(logging.INFO) def call(self, inputs, training=False): if inputs.dtype != tf.int32: inputs = tf.cast(inputs, tf.int32) batch_size = tf.shape(inputs)[0] x = tf.nn.embedding_lookup(self.embedding, inputs) x = tf.reshape(x, (batch_size * 10 * 10, 10, self.embedding.shape[-1])) x = self.dropout_l1(x, training=training) x = self.blstm_l1(x) x = tf.reduce_max(x, 1) x = tf.reshape(x, (batch_size * 10, 10, self.units)) x = self.dropout_l2(x, training=training) x = self.blstm_l2(x) x = tf.reduce_max(x, 1) x = tf.reshape(x, (batch_size, 10, self.units)) x = self.dropout_l3(x, training=training) x = self.blstm_l3(x) x = tf.reduce_max(x, 1) x = self.dropout_op(x, training=training) x = self.fc(x) x = self.out_linear(x) return x def fit(self, data, epochs=EPOCHS): t0 = time.time() step = 0 epoch = 1 while epoch <= epochs: for texts, labels in data: with tf.GradientTape() as tape: logits = self.call(texts, training=True) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, label_smoothing=.2, )) self.optimizer.lr.assign(self.decay_lr(step)) grads = tape.gradient(loss, self.trainable_variables) grads, _ = tf.clip_by_global_norm(grads, 5.) self.optimizer.apply_gradients( zip(grads, self.trainable_variables)) if step % 100 == 0: self.logger.info( "Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}" .format(step, loss.numpy().item(), time.time() - t0, self.optimizer.lr.numpy().item())) t0 = time.time() step += 1 epoch += 1 return True def evaluate(self, data): self.accuracy.reset_states() for texts, labels in data: logits = self.call(texts, training=False) y_pred = tf.argmax(logits, axis=-1) self.accuracy.update_state(y_true=labels, y_pred=y_pred) accuracy = self.accuracy.result().numpy() self.logger.info("Evaluation Accuracy: {:.3f}".format(accuracy)) self.logger.info("Accuracy: {:.3f}".format(accuracy))