def setUp(self): data_path = (os.path.join(os.path.dirname(__file__), "test_participant.json")) TFIDF = False N_GRAMS = (1, 2) self.data = TrainData.load(data_path) train_ids = [t.ids for t in self.data.data["1.ABC"]] flat_train_ids = list(self.data.data["1.ABC"].ids) self.train_cntxt, self.train_acts = self._format_cntxt_indices( train_ids) self.X_speech, vectorizer = get_bow_features(self.data, tfidf=TFIDF, n_grams=N_GRAMS, max_features=None) self.X_speech = self.X_speech[flat_train_ids, :] model_gen = SpeechModel.model_generator(SGDClassifier, loss='log', average=True, penalty='l2', alpha=0.0002) self.combined_model = CombinedModel(vectorizer, model_gen, ALL_ACTIONS, speech_eps=0.15, context_eps=0.15) self.test_utter = "The green piece with two black stripes" self.test_cntxt = []
def plot_trial(trial, bag): model_path = os.path.join(args.model_path, args.participant, str(trial), "model_initial") fig_path = os.path.join( os.path.dirname(__file__), "figs", args.participant, str(trial)) if not os.path.exists(fig_path): os.makedirs(fig_path) model = CombinedModel.load_from_path(model_path, ALL_ACTIONS, JointModel.model_generator( SGDClassifier, **SPEECH_MODEL_PARAMETERS), SPEECH_EPS, CONTEXT_EPS) row = 4 col = 6 cntxt = [] i = 0 for m in bag.read_messages(): if m.topic == TOPIC: model.predict(cntxt, m.message.utter, plot=True) cntxt.append(m.message.action) i += 1 plt.tight_layout() path = os.path.join(fig_path, "sample_{}_{}".format( m.message.result, i)) plt.savefig(path, format="svg") plt.clf()
def _get_model(self, participant, trial): if trial == 0: model_type = "model_initial" t = 0 else: model_type = "model_final" t = trial - 1 model_path = os.path.join(args.model_path, participant, str(t + 1), model_type) return CombinedModel.load_from_path( model_path, ALL_ACTIONS, JointModel.model_generator(SGDClassifier, **SPEECH_MODEL_PARAMETERS), SPEECH_EPS, CONTEXT_EPS)
def train_combined_model(speech_eps, context_eps, fit_type="incremental", tfidf=False, n_grams=(1, 2), speech_model_class=JointModel, speech_model_parameters={}, init_new_speech_actions=False): path = defaults.DATA_PATH print("PATH: ", os.path.join(path, "train.json")) data = TrainData.load(os.path.join(path, "train.json")) flat_train_ids = [i for p in TRAIN_PARTICIPANTS for i in data.data[p].ids] train_ids_by_trial = [ trial.ids for p in TRAIN_PARTICIPANTS for trial in data.data[p] ] # Get features train_context, labels = format_cntxt_indices(data, train_ids_by_trial) X_speech, vectorizer = get_bow_features(data, tfidf=tfidf, n_grams=n_grams, max_features=None) X_speech = X_speech[flat_train_ids, :] model_gen = JointModel.model_generator(speech_model_class, **speech_model_parameters) combined_model = CombinedModel(vectorizer, model_gen, ALL_ACTIONS, speech_eps=speech_eps, context_eps=context_eps) if "incremental" in fit_type: combined_model.partial_fit(train_context, X_speech, labels) elif "offline" in fit_type: combined_model.fit(train_context, X_speech, labels) if init_new_speech_actions: if "incremental" not in fit_type: raise NotImplementedError( "Can't add speech data on offline speech") update_speech_for_new_actions(combined_model.speech_model, combined_model._vectorizer, weight=len(labels) * 1. / len(ALL_ACTIONS)) return combined_model
class TestCombinedModel(TestCase): def setUp(self): data_path = (os.path.join(os.path.dirname(__file__), "test_participant.json")) TFIDF = False N_GRAMS = (1, 2) self.data = TrainData.load(data_path) train_ids = [t.ids for t in self.data.data["1.ABC"]] flat_train_ids = list(self.data.data["1.ABC"].ids) self.train_cntxt, self.train_acts = self._format_cntxt_indices( train_ids) self.X_speech, vectorizer = get_bow_features(self.data, tfidf=TFIDF, n_grams=N_GRAMS, max_features=None) self.X_speech = self.X_speech[flat_train_ids, :] model_gen = SpeechModel.model_generator(SGDClassifier, loss='log', average=True, penalty='l2', alpha=0.0002) self.combined_model = CombinedModel(vectorizer, model_gen, ALL_ACTIONS, speech_eps=0.15, context_eps=0.15) self.test_utter = "The green piece with two black stripes" self.test_cntxt = [] def test_n_children(self): self.combined_model.fit(self.train_cntxt, self.X_speech, self.train_acts) self.assertEqual(self.combined_model.context_model.root.n_children, 2) def test_predict(self): self.combined_model.fit(self.train_cntxt, self.X_speech, self.train_acts) act, probs = self.combined_model.predict(self.test_cntxt, self.test_utter) self.assert_("foot" in act) def test_prob_normilization(self): self.combined_model.fit(self.train_cntxt, self.X_speech, self.train_acts) c_probs = self.combined_model.get_context_probs(self.test_cntxt) s_probs = self.combined_model.get_speech_probs(self.test_utter) self.assertAlmostEqual(sum(c_probs), 1.0, places=4) self.assertAlmostEqual(sum(s_probs), 1.0, places=4) def test_incremental_learning(self): train_ctxt_1 = [[]] train_ctxt_2 = [[]] train_utter_1 = "blue piece with two white stripes" train_utter_2 = "I am feeling fat and sassy" act_1 = ["top_2"] act_2 = ["top_1"] self.combined_model.partial_fit(train_ctxt_1, train_utter_1, act_1) self.combined_model.partial_fit(train_ctxt_2, train_utter_2, act_2) self.assertEqual(self.combined_model.context_model.root.n_children, 2) def _format_cntxt_indices(self, indices): cntxts = [] actions = [] all_labels = list(self.data.labels) for t in indices: cntxt = [] for i in t: label = all_labels[i] cntxts.append([c for c in cntxt]) actions.append(label) cntxt.append(label) return cntxts, actions
def _load_model(self, model_path, speech_eps, context_eps): self.model = CM.load_from_path(model_path, ALL_ACTIONS, JointModel.model_generator( SGDClassifier, **SPEECH_MODEL_PARAMETERS), speech_eps, context_eps)