예제 #1
0
def _train(df, args, params):
    '''
    cross-validation results
    '''
    _info('Running grid search')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(args.k_fold)
    results['val'] = np.zeros(args.k_fold)

    # confusion matrices
    results['train_conf_mtx'] = np.zeros((args.k_class, args.k_class))
    results['val_conf_mtx'] = np.zeros((args.k_class, args.k_class))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_val'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((args.k_fold, clip_time[ii]))
        results['t_val'][ii] = np.zeros((args.k_fold, clip_time[ii]))

    i_fold = 0
    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    for train, val in kf.split(train_list):
        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs = [train_list[ii] for ii in train]
        val_subs = [train_list[ii] for ii in val]

        # get train, val sequences
        X_train, train_len, y_train = _get_seq(df,
                                               train_subs,
                                               args,
                                               shuffle=True)
        X_val, val_len, y_val = _get_seq(df, val_subs, args)
        '''
        train classifier
        '''
        then = time.time()
        model = GRUClassifier(X_train,
                              k_layers=params['k_layers'],
                              k_hidden=params['k_hidden'],
                              k_class=args.k_class)

        model.fit(X_train,
                  y_train,
                  epochs=args.num_epochs,
                  validation_split=0.2,
                  batch_size=args.batch_size,
                  verbose=1)
        print('--- train time =  %0.4f seconds ---' % (time.time() - then))

        trainable = np.sum([
            tf.reshape(params, -1).shape[0]
            for params in model.trainable_variables
        ])
        print('Total trainable parameters: %i' % trainable)
        '''
        results on train data
        '''
        a, a_t, c_mtx = _gru_acc(model, X_train, y_train, clip_time)
        results['train'][i_fold] = a
        print('tacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_train'][ii][i_fold] = a_t[ii]
        results['train_conf_mtx'] += c_mtx
        '''
        results on val data
        '''
        a, a_t, c_mtx = _gru_acc(model, X_val, y_val, clip_time)
        results['val'][i_fold] = a
        print('vacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_val'][ii][i_fold] = a_t[ii]
        results['val_conf_mtx'] += c_mtx

        i_fold += 1

    return results
예제 #2
0
def _test(df, args, params):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(len(test_list))
    results['val'] = np.zeros(len(test_list))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_test'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii]))
        results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii]))

    results_prob = {}
    for method in 'train test'.split():
        results_prob[method] = {}
        for measure in 'acc t_prob'.split():
            results_prob[method][measure] = {}
    '''
    init model
    '''
    # get train, test sequences
    X_train, train_len, y_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test = _get_seq(df, test_list, args)
    X_train = shuffle_ts(X_train, train_len)
    #X_test = shuffle_ts(X_test, test_len)
    '''
    train classifier
    '''
    then = time.time()
    model = GRUClassifier(X_train,
                          k_layers=params['k_layers'],
                          k_hidden=params['k_hidden'],
                          k_class=args.k_class)

    model.fit(X_train,
              y_train,
              epochs=args.num_epochs,
              validation_split=0.2,
              batch_size=args.batch_size,
              verbose=1)

    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    '''
    a, a_t, c_mtx = _gru_test_acc(model, X_train, y_train, clip_time,
                                  len(train_list))
    results['train'] = a
    print('tacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_train'][ii] = a_t[ii]
    results['train_conf_mtx'] = c_mtx

    # train temporal probs
    results_prob['train']['acc'] = model.evaluate(X_train, y_train)[1]
    X_train_probs = model.predict(X_train)
    results_prob['train']['t_prob'] = _get_true_class_prob(
        y_train, X_train_probs, train_len)
    '''
    results on test data
    '''
    a, a_t, c_mtx = _gru_test_acc(model, X_test, y_test, clip_time,
                                  len(test_list))
    results['test'] = a
    print('sacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_test'][ii] = a_t[ii]
    results['test_conf_mtx'] = c_mtx

    # test temporal probs
    results_prob['test']['acc'] = model.evaluate(X_test, y_test)[1]
    X_test_probs = model.predict(X_test)
    results_prob['test']['t_prob'] = _get_true_class_prob(
        y_test, X_test_probs, test_len)

    return results, results_prob, model