def _comp_seq_scores(self): """Compute uncertainty and outlier scores per sequence.""" # outlier scores: # use model to make predictions X_aud_3d = prepare_X(self.x_pool, 'aud', self.sequence_length) X_vid_3d = prepare_X(self.x_pool, 'vid', self.sequence_length) pred = self.model.predict([X_aud_3d, X_vid_3d]) # compute outlier score of predicted labels: mahalanobis distance # to the mean pred_flattened = pred.reshape(-1, 1) outl = self.cov_est_labels.mahalanobis(pred_flattened).reshape( pred.shape) # reshape outlier scores # mean per sequence (axis 1) self.sequence_scores['outlier'] = outl.mean(axis=1) # uncertainty scores # predictions: m_instances * labels_per_sequence * # number of predictions (t) * different_labels_predicted predictions = np.zeros( (self.sequence_scores.shape[0], int(self.sequence_length // self.n_pool), self.t)) for j in range(self.t): pred_curr = self.model_dropout_test.predict([X_aud_3d, X_vid_3d]) predictions[:, :, j] = pred_curr.squeeze(axis=-1) # drop the last axis, which is just 1 anyway # variance along the axis 2 (different dropout predictions) # mean per sequence (axis 1) pred_var = np.mean(np.var(predictions, axis=2), axis=1) self.sequence_scores['uncertainty'] = pred_var return
def evaluate_on_test_set(self): """ Evaluate model on test set. Add relevant information to history dataframe. """ # get the labels y_3d = self.y_test.to_numpy().reshape((-1, self.sequence_length)) # get predictions X_aud_3d = prepare_X(self.x_test, 'aud', self.sequence_length) X_vid_3d = prepare_X(self.x_test, 'vid', self.sequence_length) preds = self.model.predict([X_aud_3d, X_vid_3d]) # grow predictions to original size preds_grown = self._grow_labels(preds) # compute metrics # ccc self.history.loc[self.queries, 'ccc'] = ccc(y_3d, preds_grown) # mae self.history.loc[self.queries, 'mae'] = self._mae(y_3d, preds_grown) # count queried sequences self.history.loc[self.queries, 'cum_queried_seqs'] = self.queried_seq_tot
def _train(self, x, y, epochs, batch_size): """Train model on x and y. Internal helper method. """ # prepare y and X: y_3d = prepare_labels(y, self.sequence_length, self.n_pool) X_aud_3d = prepare_X(x, 'aud', self.sequence_length) X_vid_3d = prepare_X(x, 'vid', self.sequence_length) # fit model self.model.fit([X_aud_3d, X_vid_3d], [y_3d], batch_size=batch_size, epochs=epochs, verbose=0)
def _comp_seq_scores(self): """Compute uncertainty and outlier scores per sequence.""" # outlier scores: # use model to make predictions X_aud_3d = prepare_X(self.x_pool, 'aud', self.sequence_length) X_vid_3d = prepare_X(self.x_pool, 'vid', self.sequence_length) pred = self.model.predict([X_aud_3d, X_vid_3d]) # compute outlier score of predicted labels: mahalanobis distance # to the mean pred_flattened = pred.reshape(-1, 1) outl = self.cov_est_labels.mahalanobis(pred_flattened).reshape( pred.shape) # reshape outlier scores # mean per sequence (axis 1) self.sequence_scores['outlier'] = outl.mean(axis=1) return
def _comp_seq_scores(self): """Compute uncertainty scores per sequence.""" # outlier scores: # use model to make predictions X_aud_3d = prepare_X(self.x_pool, 'aud', self.sequence_length) X_vid_3d = prepare_X(self.x_pool, 'vid', self.sequence_length) # uncertainty scores # predictions: m_instances * labels_per_sequence * # number of predictions (t) * different_labels_predicted predictions = np.zeros(( self.sequence_scores.shape[0], int(self.sequence_length // self.n_pool), self.t, )) for j in range(self.t): pred_curr = self.model_dropout_test.predict([X_aud_3d, X_vid_3d]) predictions[:, :, j] = pred_curr.squeeze(axis=-1) # drop the last axis, which is just of dim 1 anyway # variance along the axis 2 (different dropout predictions) # mean per sequence (axis 1) pred_var = np.mean(np.var(predictions, axis=2), axis=1) self.sequence_scores['uncertainty'] = pred_var return predictions
def fitness(batch_size, dropout, rec_dropout, rec_l2, kernel_l2, n_neurons_hid_aud, n_neurons_hid_vid): """Tune Hyperparameters.""" # printing during hyper param tuning global runs_skopt runs_skopt += 1 print('Run skopt:{}'.format(runs_skopt)) print('hyperparameters: ') # prepare y and X from X_labelled for audio and video mod: label_dict = prepare_labels(y_labelled, SEQUENCE_LENGTH, pool_size) X_aud_3d = prepare_X(X_labelled, 'aud', SEQUENCE_LENGTH) X_vid_3d = prepare_X(X_labelled, 'vid', SEQUENCE_LENGTH) # Use Keras to train the model. # Create the neural network with these hyper-parameters. model = build_keras_model(SEQUENCE_LENGTH, N_FEATURES_AUD, N_FEATURES_VID, dropout_rate=dropout, rec_dropout_rate=rec_dropout, rec_l2=rec_l2, ker_l2=kernel_l2, pool_size=pool_size, n_neurons_hid_aud=n_neurons_hid_aud, n_neurons_hid_vid=n_neurons_hid_vid) model.save_weights(PATH_START_WEIGHTS) # k-fold cross validation: val_losses = [] kf = KFold(n_splits=5, random_state=42, shuffle=True) for train_index, test_index in kf.split( X_aud_3d): # split x # split x x_train_aud_temp, x_val_aud_temp = X_aud_3d[train_index ], X_aud_3d[test_index] x_train_vid_temp, x_val_vid_temp = X_vid_3d[train_index ], X_vid_3d[test_index] # split labels y_arou = label_dict['y_arousal'] y_valence = label_dict['y_valence'] y_arou_train, y_arou_val = y_arou[ train_index], y_arou[test_index] y_val_train, y_val_val = y_valence[ train_index], y_valence[test_index] # store results of training in history object: # load initial weights on new run of k-fold: model.load_weights(PATH_START_WEIGHTS) history = model.fit([x_train_aud_temp, x_train_vid_temp], [y_arou_train, y_val_train], epochs=200, batch_size=batch_size, validation_data=([x_val_aud_temp, x_val_vid_temp], [y_arou_val, y_val_val]), callbacks=[early_stopper], verbose=0) # minimize mean validation loss without l2 regularization terms val_losses.append( (history.history["val_pred_reg_arou_loss"][ -1] + history.history["val_pred_reg_val_loss"][-1]) / 2) # delete keras model and clear session to avoid adding # different models to the same tf graph: del model K.clear_session() # return mean loss on k held-out sets. return statistics.mean(val_losses)
res_gp.x # res_gp.func_vals # %% Model standalone: tensor_board = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_grads=False, write_images=False) # %% # %% # %% X_aud_3d = prepare_X(X_labelled, 'aud', SEQUENCE_LENGTH) X_vid_3d = prepare_X(X_labelled, 'vid', SEQUENCE_LENGTH) # %% X_vid_3d.shape # %% label_dict = prepare_labels(y_labelled, SEQUENCE_LENGTH, pool_size) X_3d = X_labelled.to_numpy().reshape(-1, SEQUENCE_LENGTH, X_labelled.shape[-1]) model = build_keras_model(SEQUENCE_LENGTH, N_FEATURES_AUD, N_FEATURES_VID, pool_size=pool_size, n_neurons_gru=64, n_neurons_hid_aud=44, n_neurons_hid_vid=100, dropout_rate=0.38, rec_dropout_rate=0.04,