コード例 #1
0
 def fit(self, x_seq, y_seq, validation_split=0., epochs=1, verbose=0):
     with self.graph.as_default():
         # Assumption: sequence_tagger(words) -> characters
         x_train, lengths = self.lang.tokenized_to_sequence(x_seq)
         y_train = utils.one_hot_encode_sequence(y_seq, self.classes)
         history = self.model.fit(x_train, y_train, verbose=verbose)
         return history.history['loss'][0], history.history['acc'][0]
コード例 #2
0
ファイル: sequence_model.py プロジェクト: czhu12/light_bulb
 def fit(self, x_seq, y_seq, validation_split=0., epochs=1):
     with self.graph.as_default():
         # Assumption: sequence_tagger(words) -> characters
         x_train, lengths = self.lang.texts_to_sequence(x_seq)
         y_train = utils.one_hot_encode_sequence(
             y_seq, valid_tokens=self.valid_outputs)
         return self.model.fit(x_train, y_train)
コード例 #3
0
    def _score_sequence(self, x_test, y_test):
        id2class = self.label_helper.score_classes
        self.logger.debug("Scoring items with model labeller.")
        y_test = self.label_helper.to_training(y_test)
        y_test = utils.one_hot_encode_sequence(
            y_test,
            id2class,
        )
        y_pred = self.model.score(x_test)
        # reshape as a classification problem, to set threshold
        threshold = Evaluator.threshold_for_precision(
            y_test.reshape((y_test.shape[0], -1)),
            y_pred.reshape((y_test.shape[0], -1)),
            TARGET_PRECISION,
        )
        threshold = 0.45

        unlabelled_texts, ids = self.dataset.model_labelling_set()
        if len(unlabelled_texts) == 0:
            self.logger.info("Model labelling done!")
            return 0

        scores = self.model.score(unlabelled_texts)

        dist = scores / scores.sum(axis=-1, keepdims=True)
        idxs = np.argmax(dist, -1)

        num_scored = 0
        self.logger.debug("set labelling threshold as: {}".format(threshold))
        for _id, (text, prediction) in zip(ids, zip(unlabelled_texts, dist)):
            # The prediction has padding so we only take the last len(text) scores.
            text_tag = []
            met_threshold = True
            for word, word_likelihood_dist in zip(text,
                                                  prediction[-len(text):]):
                idx = np.argmax(word_likelihood_dist)
                tag = id2class[idx]
                if np.max(word_likelihood_dist) < threshold:
                    met_threshold = False
                    break
                text_tag.append({'word': word, 'tag': tag})

            if met_threshold:
                self.dataset.add_label(
                    _id,
                    self.label_helper.decode(text_tag),
                    stage=Dataset.MODEL_LABELLED,
                    user=Dataset.USER_MODEL_LABELLER,
                    is_labelled=False,
                    save=True,
                )
                num_scored += 1
        return num_scored
コード例 #4
0
 def evaluate(self, x_seq, y_seq):
     with self.graph.as_default():
         x_eval, lengths = self.lang.tokenized_to_sequence(x_seq)
         y_eval = utils.one_hot_encode_sequence(y_seq, self.classes)
         return self.model.evaluate(x_eval, y_eval, verbose=0)
コード例 #5
0
ファイル: sequence_model.py プロジェクト: czhu12/light_bulb
 def evaluate(self, x_seq, y_seq):
     with self.graph.as_default():
         x_eval, lengths = self.lang.texts_to_sequence(x_seq)
         y_eval = utils.one_hot_encode_sequence(
             y_seq, valid_tokens=self.valid_outputs)
         return self.model.evaluate(x_eval, y_eval)