def makeCV(kfolds,X,Labels,User,Meta,clf,opts):
    users = np.unique(User)
    toPredData=[]
    Gauc = []
    for train_users,test_users in kfolds[1]:
        allProb = 0 
        test_index = np.array([True if u in set(users[test_users]) else False for u in User])

        if opts.has_key('bagging'):
            bagging = baggingIterator(opts,[users[i] for i in train_users])
        else:
            bagging = [[-1]]
        
        for bag in bagging:
            bagUsers = np.array([True if u in set(bag) else False for u in User])
            train_index = np.logical_xor(np.negative(test_index), bagUsers)
            
            try:
                # train
                updateMeta(clf,Meta[train_index])
                clf.fit(X[train_index,:,:],Labels[train_index])
                
                # predict
                prob = []
                for ut in np.unique(users[test_users]):
                    updateMeta(clf,Meta[User==ut,...])
                    prob.extend(clf.predict(X[User==ut,...]))
                prob = np.array(prob)
                
                allProb += prob/len(bagging)
            except:
                print kfolds[0]
                print [users[i] for i in train_users]
                print bag
                continue
Example #2
0
Meta = np.load('./preproc/meta_leak.npy') if 'leak' in opts else np.load(
    './preproc/meta.npy')

X_test = np.load('./preproc/test_epochs.npy')
feedbackid, User_test = np.load('./preproc/test_infos.npy')
User_test = np.array(list(map(int, User_test)))
users_test = np.unique(User_test)
Meta_test = np.load('./preproc/test_meta_leak.npy'
                    ) if 'leak' in opts else np.load('./preproc/test_meta.npy')

### training
np.random.seed(5)
allProb = 0

if 'bagging' in opts:
    bagging = baggingIterator(opts, users)
else:
    bagging = [[-1]]

t = time()
pBaggingFunc = partial(BaggingFunc,
                       Labels=Labels,
                       X=X,
                       Meta=Meta,
                       User=User,
                       X_test=X_test,
                       Meta_test=Meta_test,
                       User_test=User_test)
pool = Pool(processes=cores)
allProb = pool.map(pBaggingFunc, bagging, chunksize=1)
allProb = np.vstack(allProb)
def makeCV(kfolds, X, Labels, User, Meta, clf, opts):
    users = np.unique(User)
    toPredData = []
    Gauc = []
    for train_users, test_users in kfolds[1]:
        allProb = 0
        test_index = np.array(
            [True if u in set(users[test_users]) else False for u in User])

        if opts.has_key('bagging'):
            bagging = baggingIterator(opts, [users[i] for i in train_users])
        else:
            bagging = [[-1]]

        for bag in bagging:
            bagUsers = np.array(
                [True if u in set(bag) else False for u in User])
            train_index = np.logical_xor(np.negative(test_index), bagUsers)

            try:
                # train
                updateMeta(clf, Meta[train_index])
                clf.fit(X[train_index, :, :], Labels[train_index])

                # predict
                prob = []
                for ut in np.unique(users[test_users]):
                    updateMeta(clf, Meta[User == ut, ...])
                    prob.extend(clf.predict(X[User == ut, ...]))
                prob = np.array(prob)

                allProb += prob / len(bagging)
            except:
                print kfolds[0]
                print[users[i] for i in train_users]
                print bag
                continue

        # save & return
        predictions = OrderedDict()
        predictions['user'] = User[test_index]
        predictions['label'] = Labels[test_index]
        predictions['prediction'] = allProb
        if opts.has_key('leak'):
            predictions['prediction'] += opts['leak']['coeff'] * (
                1 - Meta[test_index, -1])
        predictions = pd.DataFrame(predictions)

        Gauc.append(roc_auc_score(predictions.label, predictions.prediction))
        toPredData.append(predictions)
    predData = pd.concat(toPredData)

    Sauc = [
        roc_auc_score(predData.loc[predData.user == i].label,
                      predData.loc[predData.user == i].prediction)
        for i in np.unique(predData.user)
    ]

    print 'Rep %d: gAUC (mean of folds) %0.5f, sAUC %0.5f (%0.5f)' % (
        kfolds[0], np.mean(Gauc), np.mean(Sauc), np.std(Sauc))

    return [Gauc, Sauc]
Labels,User = np.load('./preproc/infos.npy')
users = np.unique(User)
Meta = np.load('./preproc/meta_leak.npy') if opts.has_key('leak') else np.load('./preproc/meta.npy')

X_test = np.load('./preproc/test_epochs.npy')
feedbackid,User_test = np.load('./preproc/test_infos.npy')
User_test = np.array(map(int, User_test))
users_test = np.unique(User_test)
Meta_test = np.load('./preproc/test_meta_leak.npy') if opts.has_key('leak') else np.load('./preproc/test_meta.npy')

### training
np.random.seed(5)
allProb = 0 

if opts.has_key('bagging'):
    bagging = baggingIterator(opts,users)
else:
    bagging = [[-1]]

t = time()
pBaggingFunc = partial(BaggingFunc,Labels=Labels,X=X,Meta=Meta,User=User,X_test=X_test,Meta_test=Meta_test,User_test=User_test)
pool = Pool(processes = cores)
allProb = pool.map(pBaggingFunc,bagging,chunksize=1)
allProb = np.vstack(allProb)
allProb = np.mean(allProb,axis=0)

if opts.has_key('leak'):
    allProb += opts['leak']['coeff']*(1-Meta_test[:,-1])

print "Done in " + str(time()-t) + " second"