def fit(self, x_seq, y_seq, validation_split=0., epochs=1, verbose=0): with self.graph.as_default(): # Assumption: sequence_tagger(words) -> characters x_train, lengths = self.lang.tokenized_to_sequence(x_seq) y_train = utils.one_hot_encode_sequence(y_seq, self.classes) history = self.model.fit(x_train, y_train, verbose=verbose) return history.history['loss'][0], history.history['acc'][0]
def fit(self, x_seq, y_seq, validation_split=0., epochs=1): with self.graph.as_default(): # Assumption: sequence_tagger(words) -> characters x_train, lengths = self.lang.texts_to_sequence(x_seq) y_train = utils.one_hot_encode_sequence( y_seq, valid_tokens=self.valid_outputs) return self.model.fit(x_train, y_train)
def _score_sequence(self, x_test, y_test): id2class = self.label_helper.score_classes self.logger.debug("Scoring items with model labeller.") y_test = self.label_helper.to_training(y_test) y_test = utils.one_hot_encode_sequence( y_test, id2class, ) y_pred = self.model.score(x_test) # reshape as a classification problem, to set threshold threshold = Evaluator.threshold_for_precision( y_test.reshape((y_test.shape[0], -1)), y_pred.reshape((y_test.shape[0], -1)), TARGET_PRECISION, ) threshold = 0.45 unlabelled_texts, ids = self.dataset.model_labelling_set() if len(unlabelled_texts) == 0: self.logger.info("Model labelling done!") return 0 scores = self.model.score(unlabelled_texts) dist = scores / scores.sum(axis=-1, keepdims=True) idxs = np.argmax(dist, -1) num_scored = 0 self.logger.debug("set labelling threshold as: {}".format(threshold)) for _id, (text, prediction) in zip(ids, zip(unlabelled_texts, dist)): # The prediction has padding so we only take the last len(text) scores. text_tag = [] met_threshold = True for word, word_likelihood_dist in zip(text, prediction[-len(text):]): idx = np.argmax(word_likelihood_dist) tag = id2class[idx] if np.max(word_likelihood_dist) < threshold: met_threshold = False break text_tag.append({'word': word, 'tag': tag}) if met_threshold: self.dataset.add_label( _id, self.label_helper.decode(text_tag), stage=Dataset.MODEL_LABELLED, user=Dataset.USER_MODEL_LABELLER, is_labelled=False, save=True, ) num_scored += 1 return num_scored
def evaluate(self, x_seq, y_seq): with self.graph.as_default(): x_eval, lengths = self.lang.tokenized_to_sequence(x_seq) y_eval = utils.one_hot_encode_sequence(y_seq, self.classes) return self.model.evaluate(x_eval, y_eval, verbose=0)
def evaluate(self, x_seq, y_seq): with self.graph.as_default(): x_eval, lengths = self.lang.texts_to_sequence(x_seq) y_eval = utils.one_hot_encode_sequence( y_seq, valid_tokens=self.valid_outputs) return self.model.evaluate(x_eval, y_eval)