def _get_all_data(self): ''' Gets the training data from a local dir. :return: type=tuple, shape=2, where: tuple[0] = X_all: type=list, shape=(m, ) tuple[1] = y_all: type=list, shape=(m, ) ''' gc.enter_func() X_all, y_all = [], [] for emotion_dir in os.listdir(self._data_path): if emotion_dir == '1_surprised': continue emotion_dir_path = os.path.join(self._data_path, emotion_dir) label = const.DIR_LABEL_DICT[emotion_dir] for dir in os.listdir(emotion_dir_path): inner_dir = os.path.join(emotion_dir_path, dir) for recording in os.listdir(inner_dir): # Comment/Uncomment this to sample only a third of their # recordings if dir == 'theirs': toss = random.randint(0, 3) if toss: continue recording_path = os.path.join(inner_dir, recording) X_all.append(recording_path) y_all.append(label) return X_all, y_all
def _init_debugging_buttons(self): gc.enter_func() self._record_btn = tk.Button(self, text='Record something', command=self._record) self._record_btn.pack() self._init_play_button()
def _predict(self): gc.enter_func() gc.log(f'predicting {self._last_record_fn}') prediction = self._controller.logic.predict(self._last_record_fn) prediction_str = const.LABEL_DIR_DICT[prediction[0]] gc.log(f'prediction: {prediction}') self._prediction_lbl.config(text=prediction_str)
def _init_predict_button(self): gc.enter_func() self._predict_button = tk.Button(self, text="Predict", command=self._predict) self._prediction_lbl = tk.Label(self) self._prediction_lbl.pack() self._predict_button.pack()
def _predict(self): gc.enter_func() gc.log(f'predicting {self._last_record_fn}') prediction = self._controller.logic.predict(self._last_record_fn) prediction = prediction[0] prediction_str = const.LABEL_DIR_DICT[prediction] gc.log(f'prediction: {prediction_str}') self._place_result(prediction)
def record(self, pre_prompt=DEF_PRE_PROMPT, post_prompt=DEF_POST_PROMPT, shell_verbose=True): gc.enter_func() fn = self._get_fn() audio_source = self._record_mic(pre_prompt, post_prompt, shell_verbose) self._save_wav(audio_source, fn) time.sleep(1) return fn
def __init__(self, parent, controller): tk.Frame.__init__(self, parent) gc.enter_func() self._controller = controller self._last_record_fn = '' self._init_widgets() self._place_record() self.configure(background='white')
def fit(self, X_train): ''' Inits a preprocessor. :param X_train: type=list>string, shape=(m, ) :param y_train: type=list>scalar, shape=(m, ) ''' gc.enter_func() self._scaler.fit(X_train)
def _init_result_button(self): gc.enter_func() self._result_button = tk.Button(self, text='Result', height=6, width=13, font=self._controller.button_font, command=self._result) self._result_button.configure(background='black', foreground='white')
def __init__(self, model, prep, name): ''' :param prep: preprocessor ''' gc.enter_func() self.prep = prep gc.log(name) self.model = model() self.classifier_name = name
def predict(self, record_fns): ''' :param record_fns: type=list/string, shape=(m, ) :return: type=list, shape=(m, ) ''' gc.enter_func() X = self._preprocessor.preprocess_X(record_fns) prediction = self._learner.predict(X) return prediction
def preprocess_X(self, filenames): ''' :param filenames: type=list, shape=(m, ) :return: type=np.array, shape=(m, d) ''' gc.enter_func() numpied = self._get_numpied(filenames) transformed = self._transform_data(numpied) return transformed
def _get_numpied(self, listed): gc.enter_func() filenames = listed if type(listed) == list else [listed] mfccs = [] for fn in tqdm(filenames): mfcc = self._get_mfcc_from_fn(fn) mfccs.append(mfcc) numpied = np.array(mfccs) return numpied
def predict(self, X): ''' Predicts a label vector. :param X: type=df, shape=(n_tweets, ) :return: type=np.array, shape=(n_tweets, ) ''' gc.enter_func() prep_X = self.prep.get_tweets_features(X) return self.model.predict(prep_X)
def _normalize(self, X): ''' Normalizes data to have mean=0 and std=1. :param X: type=np.array, shape=(m, d) :return: type=np.array, shape=(m, d) ''' gc.enter_func() mean = np.mean(X, axis=0) std = np.std(X, axis=0) return (X - mean) / (std + 1**-6)
def init(self, proprocessor, X_train, y_train): ''' :param proprocessor: implementing 'preprocess' method. :param X_train: type=list>filename, shape=(m, ) :param y_train: type=list>label, shape=(m, ) ''' gc.enter_func() self._preprocessor = proprocessor self._init_data(X_train, y_train) self._init_models()
def choose_model(self): ''' Chooses the best model for the learning task. :return: A model which is already instantiated. ''' gc.enter_func() self._cross_validate_models() best_model = max(self._scores, key=self._scores.get) chosen_model = (best_model.get_class())() chosen_model.fit(self._X_train, self._y_train) return chosen_model, best_model.get_name()
def fit(self, X_train, y_train): ''' :param X_train: type=df, shape=(n_tweets, ) :param y_train: type=df, shape=(n_tweets, ) ''' gc.enter_func() X_train = self.prep.get_tweets_features(X_train) gc.log_shape(X_train=X_train) gc.log('Done preping') y_train = y_train.values self.model.fit(X_train, y_train)
def _init_learning_buttons(self): gc.enter_func() self._learning_frame = tk.Frame(self) self._learning_frame.pack(side=tk.BOTTOM, padx=10, pady=10) self._learn_button = tk.Button(self._learning_frame, text='Learn', command=self._controller.logic.learn) self._test_button = tk.Button(self._learning_frame, text='Test', command=self._controller.logic.test) self._learn_button.pack(side=tk.LEFT, pady=10, padx=10) self._test_button.pack(side=tk.LEFT, pady=10, padx=10)
def _cross_validate_models(self): gc.enter_func() # learner = # pipeline = Pipeline([('vect', vectorizer), # ('chi', SelectKBest(chi2, k=18000)), # ('clf', LinearSVC(penalty='l2', # tol=1e-5))]) for model_enum, model in self._models.items(): accuracy = cross_val_score(model, self._X_train, self._y_train, scoring='accuracy', cv=5).mean() * 100 self._scores[model_enum] = accuracy y_pred = cross_val_predict(model, self._X_train, self._y_train) self._report(model_enum, self._y_train, y_pred, is_test=False)
def get_train_test(self, test_ratio=const.TEST_RATIO): ''' Returns train and test data. :return: type=tuple, shape=4, where: tuple[0] = X_train: type=list, shape=(m_train, ) tuple[1] = y_train: type=list, shape=(m_train, ) tuple[2] = X_test: type=list, shape=(m_test, ) tuple[3] = y_test: type=list, shape=(m_test, ) ''' gc.enter_func() X_train, X_test, y_train, y_test = train_test_split( self._X_all, self._y_all, test_size=test_ratio, shuffle=True) return X_train, y_train, X_test, y_test
def _test_simple(self, X_train, y_train, X_test, y_test): ''' Tests the model on the training and on the test sets. X: type=list, shape=(m, ) y: type=list, shape=(m, ) ''' gc.enter_func() X_train_prep = self._preprocessor.fit_and_prep_X(X_train) y_train_prep = self._preprocessor.preprocess_y(y_train) self._learner.fit(X_train_prep, y_train_prep) y_train_pred = self._learner.predict(X_train_prep) self._report_train(y_train, y_train_pred) y_test_pred = self.predict(X_test) self._report_test(y_test, y_test_pred)
def _init_pages(self): ''' Inits all pages used in the gui. :return: ''' gc.enter_func() self._frames = {} for F in (EmotioPage,LearnPage): page_name = F.__name__ frame = F(parent=self._container, controller=self) self._frames[page_name] = frame # put all of the pages in the same location; # the one on the top of the stacking order # will be the one that is visible. frame.grid(row=0, column=0, sticky="nsew")
def _report(self, y_true, y_pred, is_test): gc.enter_func() cm = confusion_matrix(y_true, y_pred) accu_score = '{0:.3f}'.format(accuracy_score(y_true, y_pred)) cm_df = pd.DataFrame(cm) cm_df.rename(columns=const.LABEL_DIR_DICT, index=const.LABEL_DIR_DICT, inplace=True) title_suffix = 'Test set' if is_test else 'Train set' gc.init_plt(f'{self._learner_name}, {title_suffix}\nAccuracy: ' f'{accu_score}') sns.heatmap(cm_df, annot=True) plt.xlabel('True label') plt.ylabel('Predicted label') fn_suffix = 'testset' if is_test else 'trainset' fn_learner_name = self._learner_name.replace(' ', '') gc.save_plt(f'{fn_learner_name}_{fn_suffix}', timed=True)
def report_predictor(predictor, pred_name, X, y, log='', is_test=False): ''' Reports some data regarding the learner and its performance. :param X: type=df, shape=(n_samples, ) :param y: type=np.array, shape=(n_samples, ) :param log: text to be logged :return: some info on the screen. ''' gc.enter_func() y_pred = predictor.predict(X) if log: print(log) cm = confusion_matrix(y, y_pred) accu_score = '{0:.3f}'.format(accuracy_score(y, y_pred)) cm_df = pd.DataFrame(cm) title_suffix = 'Test set' if is_test else 'Train set' gc.init_plt(f'{pred_name}, {title_suffix}\nAccuracy: {accu_score}') sns.heatmap(cm_df, annot=True) plt.xlabel('True label') plt.ylabel('Predicted label') fn_suffix = 'testset' if is_test else 'trainset' fn_learner_name = pred_name.replace(' ', '') gc.save_plt(f'{fn_learner_name}_{fn_suffix}')
def fit_and_prep_X(self, X): gc.enter_func() numpied = self._get_numpied(X) self._scaler.fit(numpied) return self._transform_data(numpied)
def main(): gc.enter_func() manager = Manager() manager.run()
def _show_frame(self, page_name): '''Show a frame for the given page name''' gc.enter_func() gc.log(page_name) frame = self._frames[page_name] frame.tkraise()
def run(self): gc.enter_func() print(socket.gethostbyname(socket.gethostname())) self._gui.run()
def __init__(self): self._r = None self._record_idx = 0 self._init_emotion_dirs() gc.enter_func() self._r = sr.Recognizer()