def makeCV(kfolds,X,Labels,User,Meta,clf,opts):
    users = np.unique(User)
    toPredData=[]
    Gauc = []
    for train_users,test_users in kfolds[1]:
        allProb = 0 
        test_index = np.array([True if u in set(users[test_users]) else False for u in User])

        if opts.has_key('bagging'):
            bagging = baggingIterator(opts,[users[i] for i in train_users])
        else:
            bagging = [[-1]]
        
        for bag in bagging:
            bagUsers = np.array([True if u in set(bag) else False for u in User])
            train_index = np.logical_xor(np.negative(test_index), bagUsers)
            
            try:
                # train
                updateMeta(clf,Meta[train_index])
                clf.fit(X[train_index,:,:],Labels[train_index])
                
                # predict
                prob = []
                for ut in np.unique(users[test_users]):
                    updateMeta(clf,Meta[User==ut,...])
                    prob.extend(clf.predict(X[User==ut,...]))
                prob = np.array(prob)
                
                allProb += prob/len(bagging)
            except:
                print kfolds[0]
                print [users[i] for i in train_users]
                print bag
                continue
Exemple #2
0
def BaggingFunc(bag, Labels, X, Meta, User, X_test, Meta_test, User_test):
    bagUsers = np.array([True if u in set(bag) else False for u in User])
    train_index = np.negative(bagUsers)
    updateMeta(clf, Meta[train_index])
    clf.fit(X[train_index, :, :], Labels[train_index])

    ### predicting
    prob = []
    for ut in users_test:
        updateMeta(clf, Meta_test[User_test == ut, ...])
        prob.extend(clf.predict(X_test[User_test == ut, ...]))
    prob = np.array(prob)

    return prob
def BaggingFunc(bag,Labels,X,Meta,User,X_test,Meta_test,User_test):
    bagUsers = np.array([True if u in set(bag) else False for u in User])
    train_index = np.negative(bagUsers)
    updateMeta(clf,Meta[train_index])
    clf.fit(X[train_index,:,:],Labels[train_index])
    
    ### predicting
    prob = []
    for ut in users_test:
        updateMeta(clf,Meta_test[User_test==ut,...])
        prob.extend(clf.predict(X_test[User_test==ut,...]))
    prob = np.array(prob)
    
    return prob
def makeCV(kfolds, X, Labels, User, Meta, clf, opts):
    users = np.unique(User)
    toPredData = []
    Gauc = []
    for train_users, test_users in kfolds[1]:
        allProb = 0
        test_index = np.array(
            [True if u in set(users[test_users]) else False for u in User])

        if opts.has_key('bagging'):
            bagging = baggingIterator(opts, [users[i] for i in train_users])
        else:
            bagging = [[-1]]

        for bag in bagging:
            bagUsers = np.array(
                [True if u in set(bag) else False for u in User])
            train_index = np.logical_xor(np.negative(test_index), bagUsers)

            try:
                # train
                updateMeta(clf, Meta[train_index])
                clf.fit(X[train_index, :, :], Labels[train_index])

                # predict
                prob = []
                for ut in np.unique(users[test_users]):
                    updateMeta(clf, Meta[User == ut, ...])
                    prob.extend(clf.predict(X[User == ut, ...]))
                prob = np.array(prob)

                allProb += prob / len(bagging)
            except:
                print kfolds[0]
                print[users[i] for i in train_users]
                print bag
                continue

        # save & return
        predictions = OrderedDict()
        predictions['user'] = User[test_index]
        predictions['label'] = Labels[test_index]
        predictions['prediction'] = allProb
        if opts.has_key('leak'):
            predictions['prediction'] += opts['leak']['coeff'] * (
                1 - Meta[test_index, -1])
        predictions = pd.DataFrame(predictions)

        Gauc.append(roc_auc_score(predictions.label, predictions.prediction))
        toPredData.append(predictions)
    predData = pd.concat(toPredData)

    Sauc = [
        roc_auc_score(predData.loc[predData.user == i].label,
                      predData.loc[predData.user == i].prediction)
        for i in np.unique(predData.user)
    ]

    print 'Rep %d: gAUC (mean of folds) %0.5f, sAUC %0.5f (%0.5f)' % (
        kfolds[0], np.mean(Gauc), np.mean(Sauc), np.std(Sauc))

    return [Gauc, Sauc]