def _test(df, args, params): ''' test subject results view only for best cross-val parameters ''' _info('test mode') # get X-y from df subject_list = df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] print('number of subjects = %d' % (len(subject_list))) features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) args.k_class = len(np.unique(df['y'])) print('number of classes = %d' % (args.k_class)) # length of each clip clip_time = np.zeros(args.k_class) for ii in range(args.k_class): class_df = df[df['y'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float print('seq lengths = %s' % clip_time) ''' init model ''' # get train, test sequences X_train, train_len, y_train = _get_seq(df, train_list, args) X_test, test_len, y_test = _get_seq(df, test_list, args) max_length = tf.math.reduce_max(train_len).numpy() ''' train encoder ''' then = time.time() model_encoder = GRUEncoder(X_train, args.gru_model_path, k_layers=params['k_layers'], k_hidden=params['k_hidden'], k_dim=args.k_dim, k_class=args.k_class) model_encoder.fit(X_train, y_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) ''' encoder results ''' results = {} a, a_t, c_mtx = _gru_test_acc(model_encoder, X_train, y_train, clip_time, len(train_list)) results['train'] = a a, a_t, c_mtx = _gru_test_acc(model_encoder, X_test, y_test, clip_time, len(test_list)) results['test'] = a ''' get encoder trajectories ''' traj_train = _gruenc_test_traj(model_encoder, X_train) traj_test = _gruenc_test_traj(model_encoder, X_test) ''' apply mask on trajectories ''' mask = X_train[:, :, 0] == 0.0 traj_train[mask, :] = 0.0 mask = X_test[:, :, 0] == 0.0 traj_test[mask, :] = 0.0 ''' train decoder ''' model_decoder = GRUDecoder(traj_train, X_train, k_layers=args.k_layers[0], lr=0.001) model_decoder.fit(traj_train, X_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) ''' evaluate decoder ''' train_mask = X_train != 0 test_mask = X_test != 0 ''' results on train data ''' outputs = model_decoder.predict(traj_train) o = outputs[train_mask == True] y = X_train[train_mask == True] a = mean_squared_error(o, y) print('train_recon mse = %0.3f' % a) results['train_mse'] = a a = r2_score(o, y) results['train_r2'] = a print('train_recon r2 = %0.3f' % a) ''' results on test data ''' outputs = model_decoder.predict(traj_test) o = outputs[test_mask == True] y = X_test[test_mask == True] a = mean_squared_error(o, y) print('test_recon mse = %0.3f' % a) results['test_mse'] = a a = r2_score(o, y) results['test_r2'] = a print('test_recon r2 = %0.3f' % a) ''' compare to pca reconstruction ''' train_mse, test_mse, train_r2, test_r2, pca_var = _get_pca_recon( df, train_list, test_list, args) results['pca_var'] = pca_var ''' results on train data ''' results['train_pca_mse'] = train_mse results['train_pca_r2'] = train_r2 print('t_pca_recon r2 = %0.3f' % train_r2) ''' results on test data ''' results['test_pca_mse'] = test_mse results['test_pca_r2'] = test_r2 print('s_pca_recon r2 = %0.3f' % test_r2) return results
def _train(df, args, params): ''' cross-validation results ''' _info('train mode: Running grid search') # get X-y from df subject_list = df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] print('number of subjects = %d' % (len(subject_list))) features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) args.k_class = len(np.unique(df['y'])) print('number of classes = %d' % (args.k_class)) # length of each clip clip_time = np.zeros(args.k_class) for ii in range(args.k_class): class_df = df[df['y'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float print('seq lengths = %s' % clip_time) # results dict init results = {} # mean accuracy across time results['train'] = np.zeros(args.k_fold) results['val'] = np.zeros(args.k_fold) # confusion matrices results['train_conf_mtx'] = np.zeros((args.k_class, args.k_class)) results['val_conf_mtx'] = np.zeros((args.k_class, args.k_class)) # per class temporal accuracy results['t_train'] = {} results['t_val'] = {} for ii in range(args.k_class): results['t_train'][ii] = np.zeros((args.k_fold, clip_time[ii])) results['t_val'][ii] = np.zeros((args.k_fold, clip_time[ii])) i_fold = 0 kf = KFold(n_splits=args.k_fold, random_state=K_SEED) for train, val in kf.split(train_list): _info('fold: %d/%d' % (i_fold + 1, args.k_fold)) # ***between-subject train-val split train_subs = [train_list[ii] for ii in train] val_subs = [train_list[ii] for ii in val] # get train, val sequences X_train, train_len, y_train = _get_seq(df, train_subs, args) X_val, val_len, y_val = _get_seq(df, val_subs, args) ''' train classifier ''' then = time.time() model = TCNClassifier(X_train, k_hidden=params['k_hidden'], k_wind=params['k_wind'], k_class=args.k_class) model.fit(X_train, y_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) print('--- train time = %0.4f seconds ---' % (time.time() - then)) trainable = np.sum([ tf.reshape(params, -1).shape[0] for params in model.trainable_variables ]) print('Total trainable parameters: %i' % trainable) ''' results on train data ''' a, a_t, c_mtx = _ff_acc(model, X_train, y_train, clip_time) results['train'][i_fold] = a print('tacc = %0.3f' % a) for ii in range(args.k_class): results['t_train'][ii][i_fold] = a_t[ii] results['train_conf_mtx'] += c_mtx ''' results on val data ''' a, a_t, c_mtx = _ff_acc(model, X_val, y_val, clip_time) results['val'][i_fold] = a print('vacc = %0.3f' % a) for ii in range(args.k_class): results['t_val'][ii][i_fold] = a_t[ii] results['val_conf_mtx'] += c_mtx i_fold += 1 return results
def _test(df, args, params): ''' test subject results view only for best cross-val parameters ''' _info('test mode') # get X-y from df subject_list = df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] print('number of subjects = %d' % (len(subject_list))) features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) args.k_class = len(np.unique(df['y'])) print('number of classes = %d' % (args.k_class)) # length of each clip clip_time = np.zeros(args.k_class) for ii in range(args.k_class): class_df = df[df['y'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float print('seq lengths = %s' % clip_time) # results dict init results = {} # mean accuracy across time results['train'] = np.zeros(len(test_list)) results['val'] = np.zeros(len(test_list)) # per class temporal accuracy results['t_train'] = {} results['t_test'] = {} for ii in range(args.k_class): results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii])) results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii])) results_prob = {} for method in 'train test'.split(): results_prob[method] = {} for measure in 'acc t_prob'.split(): results_prob[method][measure] = {} ''' init model ''' # get train, test sequences X_train, train_len, y_train = _get_seq(df, train_list, args) X_test, test_len, y_test = _get_seq(df, test_list, args) ''' train classifier ''' then = time.time() model = TCNClassifier(X_train, k_hidden=params['k_hidden'], k_wind=params['k_wind'], k_class=args.k_class) model.fit(X_train, y_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ''' a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, clip_time, len(train_list)) results['train'] = a print('tacc = %0.3f' % np.mean(a)) for ii in range(args.k_class): results['t_train'][ii] = a_t[ii] results['train_conf_mtx'] = c_mtx # train temporal probs results_prob['train']['acc'] = model.evaluate(X_train, y_train)[1] X_train_probs = model.predict(X_train) results_prob['train']['t_prob'] = _get_true_class_prob( y_train, X_train_probs, train_len) ''' results on test data ''' a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, clip_time, len(test_list)) results['test'] = a print('sacc = %0.3f' % np.mean(a)) for ii in range(args.k_class): results['t_test'][ii] = a_t[ii] results['test_conf_mtx'] = c_mtx # test temporal probs results_prob['test']['acc'] = model.evaluate(X_test, y_test)[1] X_test_probs = model.predict(X_test) results_prob['test']['t_prob'] = _get_true_class_prob( y_test, X_test_probs, test_len) return results, results_prob, model
def _train(df, bhv_df, args): # get X-y from df feature = [ii for ii in df.columns if 'feat' in ii] k_feat = len(feature) print('number of features = %d' % (k_feat)) k_clip = len(np.unique(df['c'])) print('number of clip = %d' % (k_clip)) subject_list = bhv_df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] # length of each clip clip_time = np.zeros(k_clip) for ii in range(k_clip): class_df = df[df['c'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float _info('seq lengths = %s' % clip_time) # init dict for all results results = {} # true and predicted scores and clip label results['y'] = {} results['y_hat'] = {} results['c'] = {} for score in SCORES: # mean scores across time results['train_%s' % score] = np.zeros(args.k_fold) results['val_%s' % score] = np.zeros(args.k_fold) # per clip temporal score results['t_train_%s' % score] = {} results['t_val_%s' % score] = {} for ii in range(k_clip): results['t_train_%s' % score][ii] = np.zeros( (args.k_fold, clip_time[ii])) results['t_val_%s' % score][ii] = np.zeros( (args.k_fold, clip_time[ii])) kf = KFold(n_splits=args.k_fold, random_state=K_SEED) # get participant lists for each assigned class # ensure they're only in train_list class_list = {} for ii in range(args.k_class): class_list[ii] = bhv_df[(bhv_df['Subject'].isin(train_list)) & (bhv_df['y'] == ii)]['Subject'].values print('No. of participants in class {} = {}'.format( ii, len(class_list[ii]))) ''' split participants in each class with kf nearly identical ratio of train and val, in all classes ''' split = {} for ii in range(args.k_class): split[ii] = kf.split(class_list[ii]) for i_fold in range(args.k_fold): _info('fold: %d/%d' % (i_fold + 1, args.k_fold)) # ***between-subject train-val split train_subs, val_subs = [], [] for ii in range(args.k_class): train, val = next(split[ii]) for jj in train: train_subs.append(class_list[ii][jj]) for jj in val: val_subs.append(class_list[ii][jj]) ''' model main ''' X_train, train_len, y_train, c_train = _get_seq(df, train_subs, args) X_val, val_len, y_val, c_val = _get_seq(df, val_subs, args) max_length = tf.reduce_max(train_len) ''' train regression model ''' then = time.time() model = GRURegressor(X_train, k_hidden=args.k_hidden, k_layers=args.k_layers, l2=args.l2, dropout=args.dropout, lr=args.lr) model.fit(X_train, y_train.reshape(y_train.shape[0], y_train.shape[1], 1), batch_size=args.batch_size, epochs=args.num_epochs, verbose=1, validation_split=0.2) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ''' s, s_t, _, _, _ = dnn_score(model, X_train, y_train, c_train, train_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['train_%s' % score][i_fold] = s[score] for ii in range(k_clip): results['t_train_%s' % score][ii][i_fold] = s_t[ii][score] print('train p = %0.3f' % s['p']) ''' results on val data ''' s, s_t, y, y_hat, c = dnn_score(model, X_val, y_val, c_val, val_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['val_%s' % score][i_fold] = s[score] for ii in range(k_clip): results['t_val_%s' % score][ii][i_fold] = s_t[ii][score] print('val p = %0.3f' % s['p']) results['y'][i_fold] = y results['y_hat'][i_fold] = y_hat results['c'][i_fold] = c return results
def _test(df, bhv_df, args): _info('test mode') # get X-y from df features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) k_clip = len(np.unique(df['c'])) print('number of clips = %d' % (k_clip)) subject_list = bhv_df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] # length of each clip clip_time = np.zeros(k_clip) for ii in range(k_clip): class_df = df[df['c'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float _info('seq lengths = %s' % clip_time) # init dict for all results results = {} for score in SCORES: # per clip temporal score results['t_train_%s' % score] = {} results['t_test_%s' % score] = {} for ii in range(k_clip): results['t_train_%s' % score][ii] = np.zeros(clip_time[ii]) results['t_test_%s' % score][ii] = np.zeros(clip_time[ii]) ''' model main ''' # get train, test sequences X_train, train_len, y_train, c_train = _get_seq(df, train_list, args) X_test, test_len, y_test, c_test = _get_seq(df, test_list, args) max_length = tf.reduce_max(train_len) ''' test regression model ''' then = time.time() model = GRURegressor(X_train, k_hidden=args.k_hidden, k_layers=args.k_layers, l2=args.l2, dropout=args.dropout, lr=args.lr) model.fit(X_train, y_train.reshape(y_train.shape[0], y_train.shape[1], 1), batch_size=args.batch_size, epochs=args.num_epochs, verbose=1, validation_split=0.2) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ''' s, s_t, _, _, _ = dnn_score(model, X_train, y_train, c_train, train_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['train_%s' % score] = s[score] for ii in range(k_clip): results['t_train_%s' % score][ii] = s_t[ii][score] print('train p = %0.3f' % s['p']) ''' results on test data ''' s, s_t, y, y_hat, c = dnn_score(model, X_test, y_test, c_test, test_len, max_length, clip_time, model_type=model_type) for score in SCORES: results['test_%s' % score] = s[score] for ii in range(k_clip): results['t_test_%s' % score][ii] = s_t[ii][score] print('test p = %0.3f' % s['p']) results['y'] = y results['y_hat'] = y_hat results['c'] = c return results
def _test(df, args): ''' test subject results view only for best cross-val parameters ''' _info('test mode') # get X-y from df subject_list = df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] pc_df = _get_pc(df, train_list, test_list, args) print('number of subjects = %d' % (len(subject_list))) features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) args.k_class = len(np.unique(df['y'])) print('number of classes = %d' % (args.k_class)) # length of each clip clip_time = np.zeros(args.k_class) for ii in range(args.k_class): class_df = df[df['y'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float _info('seq lengths = %s' % clip_time) # results dict init results = {} # mean accuracy across time results['train'] = np.zeros(len(test_list)) results['val'] = np.zeros(len(test_list)) # per class temporal accuracy results['t_train'] = {} results['t_test'] = {} for ii in range(args.k_class): results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii])) results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii])) ''' init model ''' # get train, test sequences X_train, train_len, y_train = _get_seq(pc_df, train_list, args) X_test, test_len, y_test = _get_seq(pc_df, test_list, args) ''' train classifier ''' then = time.time() model = LogReg(k_dim=args.k_dim, k_class=args.k_class) model.fit(X_train, y_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ff_test_acc works for logreg ''' a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, clip_time, len(train_list)) results['train'] = a print('tacc = %0.3f' % np.mean(a)) for ii in range(args.k_class): results['t_train'][ii] = a_t[ii] ''' results on test data ''' a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, clip_time, len(test_list)) results['test'] = a print('sacc = %0.3f' % np.mean(a)) for ii in range(args.k_class): results['t_test'][ii] = a_t[ii] return results