def fmeasure(y_true, y_predicted): classes = np.array(list(y_predicted[0][1].keys())) y_true_one_hot = labels2onehot(y_true, classes) y_pred_labels = [y_predicted[i][0] for i in range(len(y_predicted))] y_pred_one_hot = labels2onehot(y_pred_labels, classes) try: _ = K.is_keras_tensor(y_pred_one_hot) return fbeta_score_K(y_true_one_hot, y_pred_one_hot, beta=1) except ValueError: return fbeta_score_np(y_true_one_hot, y_pred_one_hot, beta=1)
def infer_on_batch(self, batch, labels=None): if labels: onehot_labels = labels2onehot(labels, classes=np.arange(1, 20)) metrics_values = self.model.test_on_batch(batch, onehot_labels) return metrics_values else: predictions = self.model.predict(batch) return predictions
def fmeasure(y_true, y_predicted, average="macro"): """ Calculate F1-measure Args: y_true: array of true binary labels y_predicted: list of predictions. Each prediction is a tuple of two elements (predicted_labels, dictionary like {"label_i": probability_i} ) where probability is float or keras.tensor average: determines the type of averaging performed on the data Returns: F1-measure """ classes = np.array(list(y_predicted[0][1].keys())) y_true_one_hot = labels2onehot(y_true, classes) y_pred_labels = [y_predicted[i][0] for i in range(len(y_predicted))] y_pred_one_hot = labels2onehot(y_pred_labels, classes) return f1_score(y_true_one_hot, y_pred_one_hot, average=average)
def roc_auc_score(y_true, y_predicted): classes = np.array(list(y_predicted[0][1].keys())) y_true_one_hot = labels2onehot(y_true, classes) y_pred_probas = [list(y_predicted[i][1].values()) for i in range(len(y_predicted))] try: _ = K.is_keras_tensor(y_pred_probas) auc_score = auc(y_true_one_hot, y_pred_probas) auc_score = tf.where(tf.is_nan(auc_score), 0., auc_score) except ValueError: auc_score = roc_auc_score_np(y_true_one_hot, y_pred_probas) return auc_score
def train_on_batch(self, texts, labels): """ Train the model on the given batch Args: batch - list of data where batch[0] is list of texts and batch[1] is list of labels Returns: loss and metrics values on the given batch """ texts = self.tokenizer(list(texts)) features = self.texts2vec(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.train_on_batch(features, onehot_labels) return metrics_values
def train_on_batch(self, batch): """ Train the model on the given batch Args: batch - list of data where batch[0] is list of texts and batch[1] is list of labels Returns: loss and metrics values on the given batch """ texts = self.tokenizer.infer(instance=list(batch[0])) labels = list(batch[1]) features = self.texts2vec(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.train_on_batch(features, onehot_labels) return metrics_values
def train_on_batch(self, texts, labels): """ Train the model on the given batch Args: texts - list of texts (or list of lists of text tokens) labels - list of labels Returns: loss and metrics values on the given batch """ if isinstance(texts[0], str): texts = self.tokenizer(list(texts)) features = self.texts2vec(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.train_on_batch(features, onehot_labels) return metrics_values
def train_on_batch(self, xa, ya): def add_noise(feats, labels, num_noise): fn = feats ln = labels for i in range(num_noise): noise = np.random.normal(1, 0.02, feats.shape) noised = feats * noise fn = np.vstack([fn, noised]) ln = np.vstack([ln, labels]) return fn, ln vectors = np.array(xa) labels = labels2onehot(np.array(ya), classes=self.classes) va, la = add_noise(vectors, labels, 10) metrics_values = self.model.train_on_batch(va, la) return metrics_values
def roc_auc_score(y_true, y_predicted): """Compute Area Under the Curve (AUC) from prediction scores. Args: y_true: true binary labels y_predicted: list of predictions. Each prediction is a tuple of two elements (predicted_labels, dictionary like {"label_i": probability_i} ) Returns: Area Under the Curve (AUC) from prediction scores """ classes = np.array(list(y_predicted[0][1].keys())) y_true_one_hot = labels2onehot(y_true, classes) y_pred_probas = [ list(y_predicted[i][1].values()) for i in range(len(y_predicted)) ] auc_score = roc_auc_score_np(y_true_one_hot, y_pred_probas) return auc_score
def infer_on_batch(self, batch, labels=None): """ Infer the model on the given batch Args: batch - list of texts labels - list of labels Returns: loss and metrics values on the given batch, if labels are given predictions, otherwise """ texts = self.tokenizer(batch) if labels: features = self.texts2vec(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.test_on_batch(features, onehot_labels) return metrics_values else: features = self.texts2vec(texts) predictions = self.model.predict(features) return predictions
def infer_on_batch(self, batch, labels=None): """ Infer the model on the given batch Args: batch - list of texts labels - list of labels Returns: loss and metrics values on the given batch, if labels are given predictions, otherwise """ texts = self.tokenizer.infer(instance=batch) if labels: features = self.texts2vec(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.test_on_batch(features, onehot_labels) return metrics_values else: features = self.texts2vec(texts) predictions = self.model.predict(features) return predictions
def infer_on_batch(self, texts, labels=None): """ Infer the model on the given batch Args: texts - list of texts (or list of lists of text tokens) labels - list of labels Returns: loss and metrics values on the given batch, if labels are given predictions, otherwise """ if isinstance(texts[0], str): texts = self.tokenizer(list(texts)) if labels: features = self.texts2vec(texts) onehot_labels = labels2onehot(labels, classes=self.classes) metrics_values = self.model.test_on_batch(features, onehot_labels) return metrics_values else: features = self.texts2vec(texts) predictions = self.model.predict(features) return predictions
def train(self, dataset, *args, **kwargs): """ Train the model using batches and validation Args: dataset: instance of class Dataset Returns: None """ updates = 0 val_loss = 1e100 val_increase = 0 epochs_done = 0 n_train_samples = len(dataset.data['train']) valid_iter_all = dataset.iter_all(data_type='valid') valid_x = [] valid_y = [] for valid_i, valid_sample in enumerate(valid_iter_all): valid_x.append(valid_sample[0]) valid_y.append(valid_sample[1]) valid_x = self.texts2vec(valid_x) valid_y = labels2onehot(valid_y, classes=self.classes) print('\n____Training over {} samples____\n\n'.format(n_train_samples)) try: while epochs_done < self.opt['epochs']: batch_gen = dataset.batch_generator(batch_size=self.opt['batch_size'], data_type='train') for step, batch in enumerate(batch_gen): metrics_values = self.train_on_batch(batch) updates += 1 if self.opt['verbose'] and step % 50 == 0: log_metrics(names=self.metrics_names, values=metrics_values, updates=updates, mode='train') epochs_done += 1 if epochs_done % self.opt['val_every_n_epochs'] == 0: if 'valid' in dataset.data.keys(): valid_metrics_values = self.model.test_on_batch(x=valid_x, y=valid_y) log_metrics(names=self.metrics_names, values=valid_metrics_values, mode='valid') if valid_metrics_values[0] > val_loss: val_increase += 1 print("__Validation impatience {} out of {}".format( val_increase, self.opt['val_patience'])) if val_increase == self.opt['val_patience']: print("___Stop training: validation is out of patience___") break else: val_increase = 0 val_loss = valid_metrics_values[0] print('epochs_done: {}'.format(epochs_done)) except KeyboardInterrupt: print('Interrupted', file=sys.stderr) self.save()