コード例 #1
0
ファイル: clip_gru_recon.py プロジェクト: LCE-UMD/GRU
def _test(df, args, params):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)
    '''
    init model
    '''
    # get train, test sequences
    X_train, train_len, y_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test = _get_seq(df, test_list, args)
    max_length = tf.math.reduce_max(train_len).numpy()
    '''
    train encoder
    '''
    then = time.time()
    model_encoder = GRUEncoder(X_train,
                               args.gru_model_path,
                               k_layers=params['k_layers'],
                               k_hidden=params['k_hidden'],
                               k_dim=args.k_dim,
                               k_class=args.k_class)

    model_encoder.fit(X_train,
                      y_train,
                      epochs=args.num_epochs,
                      validation_split=0.2,
                      batch_size=args.batch_size,
                      verbose=1)
    '''
    encoder results
    '''
    results = {}

    a, a_t, c_mtx = _gru_test_acc(model_encoder, X_train, y_train, clip_time,
                                  len(train_list))
    results['train'] = a
    a, a_t, c_mtx = _gru_test_acc(model_encoder, X_test, y_test, clip_time,
                                  len(test_list))
    results['test'] = a
    '''
    get encoder trajectories
    '''
    traj_train = _gruenc_test_traj(model_encoder, X_train)
    traj_test = _gruenc_test_traj(model_encoder, X_test)
    '''
    apply mask on trajectories
    '''
    mask = X_train[:, :, 0] == 0.0
    traj_train[mask, :] = 0.0
    mask = X_test[:, :, 0] == 0.0
    traj_test[mask, :] = 0.0
    '''
    train decoder
    '''
    model_decoder = GRUDecoder(traj_train,
                               X_train,
                               k_layers=args.k_layers[0],
                               lr=0.001)

    model_decoder.fit(traj_train,
                      X_train,
                      epochs=args.num_epochs,
                      validation_split=0.2,
                      batch_size=args.batch_size,
                      verbose=1)
    '''
    evaluate decoder
    '''
    train_mask = X_train != 0
    test_mask = X_test != 0
    '''
    results on train data
    '''
    outputs = model_decoder.predict(traj_train)
    o = outputs[train_mask == True]
    y = X_train[train_mask == True]
    a = mean_squared_error(o, y)
    print('train_recon mse = %0.3f' % a)
    results['train_mse'] = a
    a = r2_score(o, y)
    results['train_r2'] = a
    print('train_recon r2 = %0.3f' % a)
    '''
    results on test data
    '''
    outputs = model_decoder.predict(traj_test)
    o = outputs[test_mask == True]
    y = X_test[test_mask == True]
    a = mean_squared_error(o, y)
    print('test_recon mse = %0.3f' % a)
    results['test_mse'] = a
    a = r2_score(o, y)
    results['test_r2'] = a
    print('test_recon r2 = %0.3f' % a)
    '''
    compare to pca reconstruction
    '''
    train_mse, test_mse, train_r2, test_r2, pca_var = _get_pca_recon(
        df, train_list, test_list, args)
    results['pca_var'] = pca_var
    '''
    results on train data
    '''
    results['train_pca_mse'] = train_mse
    results['train_pca_r2'] = train_r2
    print('t_pca_recon r2 = %0.3f' % train_r2)
    '''
    results on test data
    '''
    results['test_pca_mse'] = test_mse
    results['test_pca_r2'] = test_r2
    print('s_pca_recon r2 = %0.3f' % test_r2)

    return results
コード例 #2
0
def _train(df, args, params):
    '''
    cross-validation results
    '''
    _info('train mode: Running grid search')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(args.k_fold)
    results['val'] = np.zeros(args.k_fold)

    # confusion matrices
    results['train_conf_mtx'] = np.zeros((args.k_class, args.k_class))
    results['val_conf_mtx'] = np.zeros((args.k_class, args.k_class))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_val'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((args.k_fold, clip_time[ii]))
        results['t_val'][ii] = np.zeros((args.k_fold, clip_time[ii]))

    i_fold = 0
    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    for train, val in kf.split(train_list):
        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs = [train_list[ii] for ii in train]
        val_subs = [train_list[ii] for ii in val]

        # get train, val sequences
        X_train, train_len, y_train = _get_seq(df, train_subs, args)
        X_val, val_len, y_val = _get_seq(df, val_subs, args)
        '''
        train classifier
        '''
        then = time.time()
        model = TCNClassifier(X_train,
                              k_hidden=params['k_hidden'],
                              k_wind=params['k_wind'],
                              k_class=args.k_class)

        model.fit(X_train,
                  y_train,
                  epochs=args.num_epochs,
                  validation_split=0.2,
                  batch_size=args.batch_size,
                  verbose=1)
        print('--- train time =  %0.4f seconds ---' % (time.time() - then))

        trainable = np.sum([
            tf.reshape(params, -1).shape[0]
            for params in model.trainable_variables
        ])
        print('Total trainable parameters: %i' % trainable)
        '''
        results on train data
        '''
        a, a_t, c_mtx = _ff_acc(model, X_train, y_train, clip_time)
        results['train'][i_fold] = a
        print('tacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_train'][ii][i_fold] = a_t[ii]
        results['train_conf_mtx'] += c_mtx
        '''
        results on val data
        '''
        a, a_t, c_mtx = _ff_acc(model, X_val, y_val, clip_time)
        results['val'][i_fold] = a
        print('vacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_val'][ii][i_fold] = a_t[ii]
        results['val_conf_mtx'] += c_mtx

        i_fold += 1

    return results
コード例 #3
0
def _test(df, args, params):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(len(test_list))
    results['val'] = np.zeros(len(test_list))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_test'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii]))
        results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii]))

    results_prob = {}
    for method in 'train test'.split():
        results_prob[method] = {}
        for measure in 'acc t_prob'.split():
            results_prob[method][measure] = {}
    '''
    init model
    '''
    # get train, test sequences
    X_train, train_len, y_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test = _get_seq(df, test_list, args)
    '''
    train classifier
    '''
    then = time.time()

    model = TCNClassifier(X_train,
                          k_hidden=params['k_hidden'],
                          k_wind=params['k_wind'],
                          k_class=args.k_class)

    model.fit(X_train,
              y_train,
              epochs=args.num_epochs,
              validation_split=0.2,
              batch_size=args.batch_size,
              verbose=1)

    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, clip_time,
                                 len(train_list))
    results['train'] = a
    print('tacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_train'][ii] = a_t[ii]
    results['train_conf_mtx'] = c_mtx

    # train temporal probs
    results_prob['train']['acc'] = model.evaluate(X_train, y_train)[1]
    X_train_probs = model.predict(X_train)
    results_prob['train']['t_prob'] = _get_true_class_prob(
        y_train, X_train_probs, train_len)
    '''
    results on test data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, clip_time,
                                 len(test_list))
    results['test'] = a
    print('sacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_test'][ii] = a_t[ii]
    results['test_conf_mtx'] = c_mtx

    # test temporal probs
    results_prob['test']['acc'] = model.evaluate(X_test, y_test)[1]
    X_test_probs = model.predict(X_test)
    results_prob['test']['t_prob'] = _get_true_class_prob(
        y_test, X_test_probs, test_len)

    return results, results_prob, model
コード例 #4
0
def _train(df, bhv_df, args):
    # get X-y from df
    feature = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(feature)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clip = %d' % (k_clip))
    subject_list = bhv_df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    # length of each clip
    clip_time = np.zeros(k_clip)
    for ii in range(k_clip):
        class_df = df[df['c'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # init dict for all results
    results = {}

    # true and predicted scores and clip label
    results['y'] = {}
    results['y_hat'] = {}
    results['c'] = {}

    for score in SCORES:
        # mean scores across time
        results['train_%s' % score] = np.zeros(args.k_fold)
        results['val_%s' % score] = np.zeros(args.k_fold)

        # per clip temporal score
        results['t_train_%s' % score] = {}
        results['t_val_%s' % score] = {}

        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = np.zeros(
                (args.k_fold, clip_time[ii]))
            results['t_val_%s' % score][ii] = np.zeros(
                (args.k_fold, clip_time[ii]))

    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    # get participant lists for each assigned class
    # ensure they're only in train_list
    class_list = {}
    for ii in range(args.k_class):
        class_list[ii] = bhv_df[(bhv_df['Subject'].isin(train_list))
                                & (bhv_df['y'] == ii)]['Subject'].values
        print('No. of participants in class {} = {}'.format(
            ii, len(class_list[ii])))
    '''    
    split participants in each class with kf
    nearly identical ratio of train and val,
    in all classes
    '''
    split = {}
    for ii in range(args.k_class):
        split[ii] = kf.split(class_list[ii])

    for i_fold in range(args.k_fold):

        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs, val_subs = [], []
        for ii in range(args.k_class):
            train, val = next(split[ii])
            for jj in train:
                train_subs.append(class_list[ii][jj])
            for jj in val:
                val_subs.append(class_list[ii][jj])
        '''
        model main
        '''

        X_train, train_len, y_train, c_train = _get_seq(df, train_subs, args)
        X_val, val_len, y_val, c_val = _get_seq(df, val_subs, args)

        max_length = tf.reduce_max(train_len)
        '''
        train regression model
        '''
        then = time.time()
        model = GRURegressor(X_train,
                             k_hidden=args.k_hidden,
                             k_layers=args.k_layers,
                             l2=args.l2,
                             dropout=args.dropout,
                             lr=args.lr)
        model.fit(X_train,
                  y_train.reshape(y_train.shape[0], y_train.shape[1], 1),
                  batch_size=args.batch_size,
                  epochs=args.num_epochs,
                  verbose=1,
                  validation_split=0.2)

        print('--- train time =  %0.4f seconds ---' % (time.time() - then))
        '''
        results on train data
        '''
        s, s_t, _, _, _ = dnn_score(model,
                                    X_train,
                                    y_train,
                                    c_train,
                                    train_len,
                                    max_length,
                                    clip_time,
                                    model_type=model_type)
        for score in SCORES:
            results['train_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['t_train_%s' % score][ii][i_fold] = s_t[ii][score]
        print('train p = %0.3f' % s['p'])
        '''
        results on val data
        '''
        s, s_t, y, y_hat, c = dnn_score(model,
                                        X_val,
                                        y_val,
                                        c_val,
                                        val_len,
                                        max_length,
                                        clip_time,
                                        model_type=model_type)
        for score in SCORES:
            results['val_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['t_val_%s' % score][ii][i_fold] = s_t[ii][score]
        print('val p = %0.3f' % s['p'])

        results['y'][i_fold] = y
        results['y_hat'][i_fold] = y_hat
        results['c'][i_fold] = c

    return results
コード例 #5
0
def _test(df, bhv_df, args):

    _info('test mode')

    # get X-y from df
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clips = %d' % (k_clip))
    subject_list = bhv_df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    # length of each clip
    clip_time = np.zeros(k_clip)
    for ii in range(k_clip):
        class_df = df[df['c'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # init dict for all results
    results = {}
    for score in SCORES:

        # per clip temporal score
        results['t_train_%s' % score] = {}
        results['t_test_%s' % score] = {}

        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = np.zeros(clip_time[ii])
            results['t_test_%s' % score][ii] = np.zeros(clip_time[ii])
    '''
    model main
    '''

    # get train, test sequences
    X_train, train_len, y_train, c_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test, c_test = _get_seq(df, test_list, args)

    max_length = tf.reduce_max(train_len)
    '''
    test regression model
    '''
    then = time.time()
    model = GRURegressor(X_train,
                         k_hidden=args.k_hidden,
                         k_layers=args.k_layers,
                         l2=args.l2,
                         dropout=args.dropout,
                         lr=args.lr)
    model.fit(X_train,
              y_train.reshape(y_train.shape[0], y_train.shape[1], 1),
              batch_size=args.batch_size,
              epochs=args.num_epochs,
              verbose=1,
              validation_split=0.2)
    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    '''
    s, s_t, _, _, _ = dnn_score(model,
                                X_train,
                                y_train,
                                c_train,
                                train_len,
                                max_length,
                                clip_time,
                                model_type=model_type)
    for score in SCORES:
        results['train_%s' % score] = s[score]
        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = s_t[ii][score]
    print('train p = %0.3f' % s['p'])
    '''
    results on test data
    '''
    s, s_t, y, y_hat, c = dnn_score(model,
                                    X_test,
                                    y_test,
                                    c_test,
                                    test_len,
                                    max_length,
                                    clip_time,
                                    model_type=model_type)
    for score in SCORES:
        results['test_%s' % score] = s[score]
        for ii in range(k_clip):
            results['t_test_%s' % score][ii] = s_t[ii][score]
    print('test p = %0.3f' % s['p'])

    results['y'] = y
    results['y_hat'] = y_hat
    results['c'] = c

    return results
コード例 #6
0
def _test(df, args):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')

    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    pc_df = _get_pc(df, train_list, test_list, args)

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(len(test_list))
    results['val'] = np.zeros(len(test_list))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_test'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii]))
        results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii]))
    '''
    init model
    '''

    # get train, test sequences
    X_train, train_len, y_train = _get_seq(pc_df, train_list, args)
    X_test, test_len, y_test = _get_seq(pc_df, test_list, args)
    '''
    train classifier
    '''
    then = time.time()
    model = LogReg(k_dim=args.k_dim, k_class=args.k_class)

    model.fit(X_train,
              y_train,
              epochs=args.num_epochs,
              validation_split=0.2,
              batch_size=args.batch_size,
              verbose=1)

    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    ff_test_acc works for logreg
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, clip_time,
                                 len(train_list))
    results['train'] = a
    print('tacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_train'][ii] = a_t[ii]
    '''
    results on test data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, clip_time,
                                 len(test_list))
    results['test'] = a
    print('sacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_test'][ii] = a_t[ii]

    return results