def _train(df, args, params): ''' cross-validation results ''' _info('Running grid search') # get X-y from df subject_list = df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] print('number of subjects = %d' % (len(subject_list))) features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) args.k_class = len(np.unique(df['y'])) print('number of classes = %d' % (args.k_class)) # length of each clip clip_time = np.zeros(args.k_class) for ii in range(args.k_class): class_df = df[df['y'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float print('seq lengths = %s' % clip_time) # results dict init results = {} # mean accuracy across time results['train'] = np.zeros(args.k_fold) results['val'] = np.zeros(args.k_fold) # confusion matrices results['train_conf_mtx'] = np.zeros((args.k_class, args.k_class)) results['val_conf_mtx'] = np.zeros((args.k_class, args.k_class)) # per class temporal accuracy results['t_train'] = {} results['t_val'] = {} for ii in range(args.k_class): results['t_train'][ii] = np.zeros((args.k_fold, clip_time[ii])) results['t_val'][ii] = np.zeros((args.k_fold, clip_time[ii])) i_fold = 0 kf = KFold(n_splits=args.k_fold, random_state=K_SEED) for train, val in kf.split(train_list): _info('fold: %d/%d' % (i_fold + 1, args.k_fold)) # ***between-subject train-val split train_subs = [train_list[ii] for ii in train] val_subs = [train_list[ii] for ii in val] # get train, val sequences X_train, train_len, y_train = _get_seq(df, train_subs, args, shuffle=True) X_val, val_len, y_val = _get_seq(df, val_subs, args) ''' train classifier ''' then = time.time() model = GRUClassifier(X_train, k_layers=params['k_layers'], k_hidden=params['k_hidden'], k_class=args.k_class) model.fit(X_train, y_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) print('--- train time = %0.4f seconds ---' % (time.time() - then)) trainable = np.sum([ tf.reshape(params, -1).shape[0] for params in model.trainable_variables ]) print('Total trainable parameters: %i' % trainable) ''' results on train data ''' a, a_t, c_mtx = _gru_acc(model, X_train, y_train, clip_time) results['train'][i_fold] = a print('tacc = %0.3f' % a) for ii in range(args.k_class): results['t_train'][ii][i_fold] = a_t[ii] results['train_conf_mtx'] += c_mtx ''' results on val data ''' a, a_t, c_mtx = _gru_acc(model, X_val, y_val, clip_time) results['val'][i_fold] = a print('vacc = %0.3f' % a) for ii in range(args.k_class): results['t_val'][ii][i_fold] = a_t[ii] results['val_conf_mtx'] += c_mtx i_fold += 1 return results
def _test(df, args, params): ''' test subject results view only for best cross-val parameters ''' _info('test mode') # get X-y from df subject_list = df['Subject'].unique() train_list = subject_list[:args.train_size] test_list = subject_list[args.train_size:] print('number of subjects = %d' % (len(subject_list))) features = [ii for ii in df.columns if 'feat' in ii] k_feat = len(features) print('number of features = %d' % (k_feat)) args.k_class = len(np.unique(df['y'])) print('number of classes = %d' % (args.k_class)) # length of each clip clip_time = np.zeros(args.k_class) for ii in range(args.k_class): class_df = df[df['y'] == ii] clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1 clip_time = clip_time.astype(int) # df saves float print('seq lengths = %s' % clip_time) # results dict init results = {} # mean accuracy across time results['train'] = np.zeros(len(test_list)) results['val'] = np.zeros(len(test_list)) # per class temporal accuracy results['t_train'] = {} results['t_test'] = {} for ii in range(args.k_class): results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii])) results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii])) results_prob = {} for method in 'train test'.split(): results_prob[method] = {} for measure in 'acc t_prob'.split(): results_prob[method][measure] = {} ''' init model ''' # get train, test sequences X_train, train_len, y_train = _get_seq(df, train_list, args) X_test, test_len, y_test = _get_seq(df, test_list, args) X_train = shuffle_ts(X_train, train_len) #X_test = shuffle_ts(X_test, test_len) ''' train classifier ''' then = time.time() model = GRUClassifier(X_train, k_layers=params['k_layers'], k_hidden=params['k_hidden'], k_class=args.k_class) model.fit(X_train, y_train, epochs=args.num_epochs, validation_split=0.2, batch_size=args.batch_size, verbose=1) print('--- train time = %0.4f seconds ---' % (time.time() - then)) ''' results on train data ''' a, a_t, c_mtx = _gru_test_acc(model, X_train, y_train, clip_time, len(train_list)) results['train'] = a print('tacc = %0.3f' % np.mean(a)) for ii in range(args.k_class): results['t_train'][ii] = a_t[ii] results['train_conf_mtx'] = c_mtx # train temporal probs results_prob['train']['acc'] = model.evaluate(X_train, y_train)[1] X_train_probs = model.predict(X_train) results_prob['train']['t_prob'] = _get_true_class_prob( y_train, X_train_probs, train_len) ''' results on test data ''' a, a_t, c_mtx = _gru_test_acc(model, X_test, y_test, clip_time, len(test_list)) results['test'] = a print('sacc = %0.3f' % np.mean(a)) for ii in range(args.k_class): results['t_test'][ii] = a_t[ii] results['test_conf_mtx'] = c_mtx # test temporal probs results_prob['test']['acc'] = model.evaluate(X_test, y_test)[1] X_test_probs = model.predict(X_test) results_prob['test']['t_prob'] = _get_true_class_prob( y_test, X_test_probs, test_len) return results, results_prob, model