def fmeasure(y_true, y_predicted):
    classes = np.array(list(y_predicted[0][1].keys()))
    y_true_one_hot = labels2onehot(y_true, classes)
    y_pred_labels = [y_predicted[i][0] for i in range(len(y_predicted))]
    y_pred_one_hot = labels2onehot(y_pred_labels, classes)

    try:
        _ = K.is_keras_tensor(y_pred_one_hot)
        return fbeta_score_K(y_true_one_hot, y_pred_one_hot, beta=1)
    except ValueError:
        return fbeta_score_np(y_true_one_hot, y_pred_one_hot, beta=1)
 def infer_on_batch(self, batch, labels=None):
     if labels:
         onehot_labels = labels2onehot(labels, classes=np.arange(1, 20))
         metrics_values = self.model.test_on_batch(batch, onehot_labels)
         return metrics_values
     else:
         predictions = self.model.predict(batch)
         return predictions
def fmeasure(y_true, y_predicted, average="macro"):
    """
    Calculate F1-measure
    Args:
        y_true: array of true binary labels
        y_predicted: list of predictions.
                Each prediction is a tuple of two elements
                (predicted_labels, dictionary like {"label_i": probability_i} )
                where probability is float or keras.tensor
        average: determines the type of averaging performed on the data

    Returns:
        F1-measure
    """
    classes = np.array(list(y_predicted[0][1].keys()))
    y_true_one_hot = labels2onehot(y_true, classes)
    y_pred_labels = [y_predicted[i][0] for i in range(len(y_predicted))]
    y_pred_one_hot = labels2onehot(y_pred_labels, classes)

    return f1_score(y_true_one_hot, y_pred_one_hot, average=average)
Exemple #4
0
def roc_auc_score(y_true, y_predicted):
    classes = np.array(list(y_predicted[0][1].keys()))
    y_true_one_hot = labels2onehot(y_true, classes)
    y_pred_probas = [list(y_predicted[i][1].values()) for i in range(len(y_predicted))]

    try:
        _ = K.is_keras_tensor(y_pred_probas)
        auc_score = auc(y_true_one_hot, y_pred_probas)
        auc_score = tf.where(tf.is_nan(auc_score), 0., auc_score)
    except ValueError:
        auc_score = roc_auc_score_np(y_true_one_hot, y_pred_probas)
    return auc_score
Exemple #5
0
    def train_on_batch(self, texts, labels):
        """
        Train the model on the given batch
        Args:
            batch - list of data where batch[0] is list of texts and batch[1] is list of labels

        Returns:
            loss and metrics values on the given batch
        """
        texts = self.tokenizer(list(texts))
        features = self.texts2vec(texts)
        onehot_labels = labels2onehot(labels, classes=self.classes)
        metrics_values = self.model.train_on_batch(features, onehot_labels)
        return metrics_values
Exemple #6
0
    def train_on_batch(self, batch):
        """
        Train the model on the given batch
        Args:
            batch - list of data where batch[0] is list of texts and batch[1] is list of labels

        Returns:
            loss and metrics values on the given batch
        """
        texts = self.tokenizer.infer(instance=list(batch[0]))
        labels = list(batch[1])
        features = self.texts2vec(texts)
        onehot_labels = labels2onehot(labels, classes=self.classes)
        metrics_values = self.model.train_on_batch(features, onehot_labels)
        return metrics_values
Exemple #7
0
    def train_on_batch(self, texts, labels):
        """
        Train the model on the given batch
        Args:
            texts - list of texts (or list of lists of text tokens)
            labels - list of labels

        Returns:
            loss and metrics values on the given batch
        """
        if isinstance(texts[0], str):
            texts = self.tokenizer(list(texts))
        features = self.texts2vec(texts)
        onehot_labels = labels2onehot(labels, classes=self.classes)
        metrics_values = self.model.train_on_batch(features, onehot_labels)
        return metrics_values
    def train_on_batch(self, xa, ya):
        def add_noise(feats, labels, num_noise):
            fn = feats
            ln = labels
            for i in range(num_noise):
                noise = np.random.normal(1, 0.02, feats.shape)
                noised = feats * noise
                fn = np.vstack([fn, noised])
                ln = np.vstack([ln, labels])
            return fn, ln

        vectors = np.array(xa)
        labels = labels2onehot(np.array(ya), classes=self.classes)
        va, la = add_noise(vectors, labels, 10)
        metrics_values = self.model.train_on_batch(va, la)
        return metrics_values
Exemple #9
0
def roc_auc_score(y_true, y_predicted):
    """Compute Area Under the Curve (AUC) from prediction scores.

    Args:
        y_true: true binary labels
        y_predicted: list of predictions.
                Each prediction is a tuple of two elements
                (predicted_labels, dictionary like {"label_i": probability_i} )

    Returns:
        Area Under the Curve (AUC) from prediction scores
    """
    classes = np.array(list(y_predicted[0][1].keys()))
    y_true_one_hot = labels2onehot(y_true, classes)
    y_pred_probas = [
        list(y_predicted[i][1].values()) for i in range(len(y_predicted))
    ]

    auc_score = roc_auc_score_np(y_true_one_hot, y_pred_probas)
    return auc_score
Exemple #10
0
    def infer_on_batch(self, batch, labels=None):
        """
        Infer the model on the given batch
        Args:
            batch - list of texts
            labels - list of labels

        Returns:
            loss and metrics values on the given batch, if labels are given
            predictions, otherwise
        """
        texts = self.tokenizer(batch)
        if labels:
            features = self.texts2vec(texts)
            onehot_labels = labels2onehot(labels, classes=self.classes)
            metrics_values = self.model.test_on_batch(features, onehot_labels)
            return metrics_values
        else:
            features = self.texts2vec(texts)
            predictions = self.model.predict(features)
            return predictions
Exemple #11
0
    def infer_on_batch(self, batch, labels=None):
        """
        Infer the model on the given batch
        Args:
            batch - list of texts
            labels - list of labels

        Returns:
            loss and metrics values on the given batch, if labels are given
            predictions, otherwise
        """
        texts = self.tokenizer.infer(instance=batch)
        if labels:
            features = self.texts2vec(texts)
            onehot_labels = labels2onehot(labels, classes=self.classes)
            metrics_values = self.model.test_on_batch(features, onehot_labels)
            return metrics_values
        else:
            features = self.texts2vec(texts)
            predictions = self.model.predict(features)
            return predictions
Exemple #12
0
    def infer_on_batch(self, texts, labels=None):
        """
        Infer the model on the given batch
        Args:
            texts - list of texts (or list of lists of text tokens)
            labels - list of labels

        Returns:
            loss and metrics values on the given batch, if labels are given
            predictions, otherwise
        """
        if isinstance(texts[0], str):
            texts = self.tokenizer(list(texts))
        if labels:
            features = self.texts2vec(texts)
            onehot_labels = labels2onehot(labels, classes=self.classes)
            metrics_values = self.model.test_on_batch(features, onehot_labels)
            return metrics_values
        else:
            features = self.texts2vec(texts)
            predictions = self.model.predict(features)
            return predictions
Exemple #13
0
    def train(self, dataset, *args, **kwargs):
        """
        Train the model using batches and validation
        Args:
            dataset: instance of class Dataset

        Returns:
            None
        """
        updates = 0
        val_loss = 1e100
        val_increase = 0
        epochs_done = 0

        n_train_samples = len(dataset.data['train'])

        valid_iter_all = dataset.iter_all(data_type='valid')
        valid_x = []
        valid_y = []
        for valid_i, valid_sample in enumerate(valid_iter_all):
            valid_x.append(valid_sample[0])
            valid_y.append(valid_sample[1])

        valid_x = self.texts2vec(valid_x)
        valid_y = labels2onehot(valid_y, classes=self.classes)

        print('\n____Training over {} samples____\n\n'.format(n_train_samples))

        try:
            while epochs_done < self.opt['epochs']:
                batch_gen = dataset.batch_generator(batch_size=self.opt['batch_size'],
                                                    data_type='train')
                for step, batch in enumerate(batch_gen):
                    metrics_values = self.train_on_batch(batch)
                    updates += 1

                    if self.opt['verbose'] and step % 50 == 0:
                        log_metrics(names=self.metrics_names,
                                    values=metrics_values,
                                    updates=updates,
                                    mode='train')

                epochs_done += 1
                if epochs_done % self.opt['val_every_n_epochs'] == 0:
                    if 'valid' in dataset.data.keys():
                        valid_metrics_values = self.model.test_on_batch(x=valid_x, y=valid_y)

                        log_metrics(names=self.metrics_names,
                                    values=valid_metrics_values,
                                    mode='valid')
                        if valid_metrics_values[0] > val_loss:
                            val_increase += 1
                            print("__Validation impatience {} out of {}".format(
                                val_increase, self.opt['val_patience']))
                            if val_increase == self.opt['val_patience']:
                                print("___Stop training: validation is out of patience___")
                                break
                        else:
                            val_increase = 0
                            val_loss = valid_metrics_values[0]
                print('epochs_done: {}'.format(epochs_done))
        except KeyboardInterrupt:
            print('Interrupted', file=sys.stderr)

        self.save()