def bpic(nhmmstates, nhmmiter):

    nhmmstates = nhmmstates[0]
    nhmmiter = nhmmiter[0] * 10
    nfolds = 5
    hmmcovtype = 'full'

    print nhmmstates, nhmmiter

    # Load the dataset
    static_train = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy'
    )
    dynamic_train = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy'
    )
    static_val = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy'
    )
    dynamic_val = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy'
    )
    labels_train = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy'
    )
    labels_val = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy'
    )

    # Merge train and test
    static_all = np.concatenate((static_train, static_val), axis=0)
    dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0)
    labels_all = np.concatenate((labels_train, labels_val), axis=0)
    nsamples = static_all.shape[0]

    # k-fold cross validation to obtain accuracy
    val_idx_list = np.array_split(range(nsamples), nfolds)
    scores = []
    for fid, val_idx in enumerate(val_idx_list):
        train_idx = list(set(range(nsamples)) - set(val_idx))

        # extract predictions using HMM on dynamic
        hmmcl = HMMClassifier()
        model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype,
                                           dynamic_all[train_idx],
                                           labels_all[train_idx])
        scores.append(
            hmmcl.test(model_pos, model_neg, dynamic_all[val_idx],
                       labels_all[val_idx]))

    print 'Result: %.4f' % np.mean(scores)
    return -np.mean(scores)
def modelopt(nhmmstates, nhmmiter):

    nhmmstates = nhmmstates[0]
    nhmmiter = nhmmiter[0] * 10
    nfolds = 5
    hmmcovtype = 'full'

    print nhmmstates, nhmmiter

    # Load the dataset
    #static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy')
    dynamic_train = np.load(
        '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy')
    #static_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy')
    #dynamic_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy')
    labels_train = np.load(
        '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy')
    #labels_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy')
    nsamples = dynamic_train.shape[0]

    # k-fold cross validation to obtain accuracy
    val_idx_list = np.array_split(range(nsamples), nfolds)
    scores = []
    for fid, val_idx in enumerate(val_idx_list):
        train_idx = list(set(range(nsamples)) - set(val_idx))
        print "Current fold is %d / %d" % (fid + 1, nfolds)

        # extract predictions using HMM on dynamic
        hmmcl = HMMClassifier()
        model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype,
                                           dynamic_train[train_idx],
                                           labels_train[train_idx])
        scores.append(
            hmmcl.test(model_pos, model_neg, dynamic_train[val_idx],
                       labels_train[val_idx]))

    print 'Result: %.4f' % np.mean(scores)
    return -np.mean(scores)
Ejemplo n.º 3
0
#
# Prepare combined datasets for the future experiments
#

# dataset to check how generative models perform if provided with static features along with dynamic
static_as_dynamic = np.zeros((static_all.shape[0], static_all.shape[1], dynamic_all.shape[2]))
for i in range(static_all.shape[0]):
    static_as_dynamic[i, :, :] = np.tile(static_all[i, :], (dynamic_all.shape[2], 1)).T
dynamic_and_static_as_dynamic = np.concatenate((dynamic_all, static_as_dynamic + np.random.uniform(-0.0001, 0.0001, static_as_dynamic.shape)), axis=1)


#
# k-fold CV for performance estimation
#
val_idx_list = np.array_split(range(nsamples), nfolds)
scores = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: []}
for fid, val_idx in enumerate(val_idx_list):
    print "Current fold is %d / %d" % (fid + 1, nfolds)
    train_idx = list(set(range(nsamples)) - set(val_idx))

    # HMM on dynamic and static (turned into fake sequences) (9)
    hmmcl = HMMClassifier()
    model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx])
    acc, auc = hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[val_idx], labels_all[val_idx])
    scores[9].append(acc)

print "===> (9) HMM on dynamic and static features: %.4f (+/- %.4f) %s" % (np.mean(scores[9]), np.std(scores[9]), scores[9])


def bpic(nhmmstates, nestimators, nhmmiter):

    nhmmstates = nhmmstates[0]
    nestimators = nestimators[0] * 100
    nhmmiter = nhmmiter[0] * 10
    nfolds = 5
    hmmcovtype = 'full'

    print nhmmstates, nestimators, nhmmiter

    # Load the dataset
    static_train = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy'
    )
    dynamic_train = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy'
    )
    static_val = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy'
    )
    dynamic_val = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy'
    )
    labels_train = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy'
    )
    labels_val = np.load(
        '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy'
    )

    # Merge train and test
    static_all = np.concatenate((static_train, static_val), axis=0)
    dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0)
    labels_all = np.concatenate((labels_train, labels_val), axis=0)
    nsamples = static_all.shape[0]

    # prepare where to store the ratios
    ratios_all_hmm = np.empty(len(labels_all))

    # split indices into folds
    enrich_idx_list = np.array_split(range(nsamples), nfolds)

    # run CV
    for fid, enrich_idx in enumerate(enrich_idx_list):
        train_idx = list(set(range(nsamples)) - set(enrich_idx))

        # extract predictions using HMM on dynamic
        hmmcl = HMMClassifier()
        model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype,
                                           dynamic_all[train_idx],
                                           labels_all[train_idx])
        ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios(
            model_pos, model_neg, dynamic_all[enrich_idx])

    # dataset for hybrid learning
    enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T),
                                     axis=1)

    # (2.) k-fold cross validation to obtain accuracy
    val_idx_list = np.array_split(range(nsamples), nfolds)
    scores = []
    for fid, val_idx in enumerate(val_idx_list):
        train_idx = list(set(range(nsamples)) - set(val_idx))

        # Hybrid on features enriched by HMM (3)
        rf = RandomForestClassifier(n_estimators=nestimators)
        rf.fit(enriched_by_hmm[train_idx], labels_all[train_idx])
        scores.append(rf.score(enriched_by_hmm[val_idx], labels_all[val_idx]))

    print 'Result: %.4f' % np.mean(scores)
    return -np.mean(scores)