예제 #1
0
    def setUp(self):
        data_path = (os.path.join(os.path.dirname(__file__),
                                  "test_participant.json"))

        TFIDF = False
        N_GRAMS = (1, 2)

        self.data = TrainData.load(data_path)

        train_ids = [t.ids for t in self.data.data["1.ABC"]]
        flat_train_ids = list(self.data.data["1.ABC"].ids)
        self.train_cntxt, self.train_acts = self._format_cntxt_indices(
            train_ids)

        self.X_speech, vectorizer = get_bow_features(self.data,
                                                     tfidf=TFIDF,
                                                     n_grams=N_GRAMS,
                                                     max_features=None)
        self.X_speech = self.X_speech[flat_train_ids, :]

        model_gen = JointModel.model_generator(SGDClassifier,
                                               loss='log',
                                               average=True,
                                               penalty='l2',
                                               alpha=0.0002)

        self.combined_model = CombinedModel(vectorizer,
                                            model_gen,
                                            ALL_ACTIONS,
                                            speech_eps=0.15,
                                            context_eps=0.15)

        self.test_utter = "The green piece with two black stripes"
        self.test_cntxt = []
def plot_trial(trial, bag):
    model_path = os.path.join(args.model_path, args.participant, str(trial),
                              "model_initial")
    fig_path = os.path.join(
        os.path.dirname(__file__), "figs", args.participant, str(trial))

    if not os.path.exists(fig_path):
        os.makedirs(fig_path)

    model = CombinedModel.load_from_path(model_path, ALL_ACTIONS,
                                         JointModel.model_generator(
                                             SGDClassifier,
                                             **SPEECH_MODEL_PARAMETERS),
                                         SPEECH_EPS, CONTEXT_EPS)

    row = 4
    col = 6

    cntxt = []
    i = 0

    for m in bag.read_messages():
        if m.topic == TOPIC:
            model.predict(cntxt, m.message.utter, plot=True)

            cntxt.append(m.message.action)
            i += 1

            plt.tight_layout()

            path = os.path.join(fig_path, "sample_{}_{}".format(
                m.message.result, i))
            plt.savefig(path, format="svg")
            plt.clf()
예제 #3
0
def train_combined_model(speech_eps,
                         context_eps,
                         fit_type="incremental",
                         tfidf=False,
                         n_grams=(1, 2),
                         speech_model_class=JointModel,
                         speech_model_parameters={},
                         init_new_speech_actions=False):

    path = defaults.DATA_PATH
    print("PATH: ", os.path.join(path, "train.json"))

    data = TrainData.load(os.path.join(path, "train.json"))

    flat_train_ids = [i for p in TRAIN_PARTICIPANTS for i in data.data[p].ids]
    train_ids_by_trial = [
        trial.ids for p in TRAIN_PARTICIPANTS for trial in data.data[p]
    ]

    # Get features
    train_context, labels = format_cntxt_indices(data, train_ids_by_trial)
    X_speech, vectorizer = get_bow_features(data,
                                            tfidf=tfidf,
                                            n_grams=n_grams,
                                            max_features=None)
    X_speech = X_speech[flat_train_ids, :]

    model_gen = JointModel.model_generator(speech_model_class,
                                           **speech_model_parameters)

    combined_model = CombinedModel(vectorizer,
                                   model_gen,
                                   ALL_ACTIONS,
                                   speech_eps=speech_eps,
                                   context_eps=context_eps)

    if "incremental" in fit_type:
        combined_model.partial_fit(train_context, X_speech, labels)
    elif "offline" in fit_type:
        combined_model.fit(train_context, X_speech, labels)

    if init_new_speech_actions:
        if "incremental" not in fit_type:
            raise NotImplementedError(
                "Can't add speech data on offline speech")
        update_speech_for_new_actions(combined_model.speech_model,
                                      combined_model._vectorizer,
                                      weight=len(labels) * 1. /
                                      len(ALL_ACTIONS))

    return combined_model
예제 #4
0
 def _get_model(self, participant, trial):
     if trial == 0:
         model_type = "model_initial"
         t = 0
     else:
         model_type = "model_final"
         t = trial - 1
     model_path = os.path.join(args.model_path, participant, str(t + 1),
                               model_type)
     return CombinedModel.load_from_path(
         model_path, ALL_ACTIONS,
         JointModel.model_generator(SGDClassifier,
                                    **SPEECH_MODEL_PARAMETERS), SPEECH_EPS,
         CONTEXT_EPS)
#!/usr/bin/env python

from sklearn.linear_model import SGDClassifier
from matplotlib import pyplot as plt

from hrc_speech_prediction.models import (
    JointModel, get_path_from_cli_arguments)
from hrc_speech_prediction.evaluation import Evaluation
from hrc_speech_prediction.plots import plot_incremental_scores


N_GRAMS = (1, 2)
TFIDF = False


working_path = get_path_from_cli_arguments()

speech_model_gen = JointModel.model_generator(
    SGDClassifier,
    loss='log', average=True, penalty='l2', alpha=.0002)

ev = Evaluation(speech_model_gen, working_path, n_grams=N_GRAMS, tfidf=TFIDF,
                model_variations={k: {'features': k}
                                  for k in ['speech', 'context', 'both']})
ev.evaluate_all()
scores = ev.evaluate_incremental_learning(shuffle=False)
for m in scores:
    plot_incremental_scores(scores[m], label=m)
plt.legend()
plt.show()
예제 #6
0
 def _load_model(self, model_path, speech_eps, context_eps):
     self.model = CM.load_from_path(model_path, ALL_ACTIONS,
                                    JointModel.model_generator(
                                        SGDClassifier,
                                        **SPEECH_MODEL_PARAMETERS),
                                    speech_eps, context_eps)
TFIDF = False
MODEL_PARAMS = {
    'loss': 'log',
    'average': True,
    'penalty': 'l2',
    'alpha': .02,
    'max_iter': 100,
    'tol': 1.e-3,
}

working_path = get_path_from_cli_arguments()
fig_path = os.path.join(working_path, 'figs')
if not os.path.isdir(fig_path):
    os.mkdir(fig_path)

speech_model_gen = JointModel.model_generator(SGDClassifier, **MODEL_PARAMS)

ev = Evaluation(speech_model_gen, working_path, n_grams=N_GRAMS, tfidf=TFIDF,
                model_variations={'speech': {'features': 'speech'}})
ev.evaluate_all()
classes = list(set(ev.data.labels))
utterances = list(ev.data.utterances)
digits = int(np.ceil(np.math.log10(len(utterances))))

plt.set_cmap('Blues')
fig = plt.figure()
for tst in TRAIN_PARTICIPANTS:
    train_idx = [i for p in TRAIN_PARTICIPANTS
                 for i in list(ev.data.data[p].ids)
                 if not p == tst]
    X_train = ev.get_Xs(train_idx)