def makeCV(kfolds,X,Labels,User,Meta,clf,opts): users = np.unique(User) toPredData=[] Gauc = [] for train_users,test_users in kfolds[1]: allProb = 0 test_index = np.array([True if u in set(users[test_users]) else False for u in User]) if opts.has_key('bagging'): bagging = baggingIterator(opts,[users[i] for i in train_users]) else: bagging = [[-1]] for bag in bagging: bagUsers = np.array([True if u in set(bag) else False for u in User]) train_index = np.logical_xor(np.negative(test_index), bagUsers) try: # train updateMeta(clf,Meta[train_index]) clf.fit(X[train_index,:,:],Labels[train_index]) # predict prob = [] for ut in np.unique(users[test_users]): updateMeta(clf,Meta[User==ut,...]) prob.extend(clf.predict(X[User==ut,...])) prob = np.array(prob) allProb += prob/len(bagging) except: print kfolds[0] print [users[i] for i in train_users] print bag continue
def BaggingFunc(bag, Labels, X, Meta, User, X_test, Meta_test, User_test): bagUsers = np.array([True if u in set(bag) else False for u in User]) train_index = np.negative(bagUsers) updateMeta(clf, Meta[train_index]) clf.fit(X[train_index, :, :], Labels[train_index]) ### predicting prob = [] for ut in users_test: updateMeta(clf, Meta_test[User_test == ut, ...]) prob.extend(clf.predict(X_test[User_test == ut, ...])) prob = np.array(prob) return prob
def BaggingFunc(bag,Labels,X,Meta,User,X_test,Meta_test,User_test): bagUsers = np.array([True if u in set(bag) else False for u in User]) train_index = np.negative(bagUsers) updateMeta(clf,Meta[train_index]) clf.fit(X[train_index,:,:],Labels[train_index]) ### predicting prob = [] for ut in users_test: updateMeta(clf,Meta_test[User_test==ut,...]) prob.extend(clf.predict(X_test[User_test==ut,...])) prob = np.array(prob) return prob
def makeCV(kfolds, X, Labels, User, Meta, clf, opts): users = np.unique(User) toPredData = [] Gauc = [] for train_users, test_users in kfolds[1]: allProb = 0 test_index = np.array( [True if u in set(users[test_users]) else False for u in User]) if opts.has_key('bagging'): bagging = baggingIterator(opts, [users[i] for i in train_users]) else: bagging = [[-1]] for bag in bagging: bagUsers = np.array( [True if u in set(bag) else False for u in User]) train_index = np.logical_xor(np.negative(test_index), bagUsers) try: # train updateMeta(clf, Meta[train_index]) clf.fit(X[train_index, :, :], Labels[train_index]) # predict prob = [] for ut in np.unique(users[test_users]): updateMeta(clf, Meta[User == ut, ...]) prob.extend(clf.predict(X[User == ut, ...])) prob = np.array(prob) allProb += prob / len(bagging) except: print kfolds[0] print[users[i] for i in train_users] print bag continue # save & return predictions = OrderedDict() predictions['user'] = User[test_index] predictions['label'] = Labels[test_index] predictions['prediction'] = allProb if opts.has_key('leak'): predictions['prediction'] += opts['leak']['coeff'] * ( 1 - Meta[test_index, -1]) predictions = pd.DataFrame(predictions) Gauc.append(roc_auc_score(predictions.label, predictions.prediction)) toPredData.append(predictions) predData = pd.concat(toPredData) Sauc = [ roc_auc_score(predData.loc[predData.user == i].label, predData.loc[predData.user == i].prediction) for i in np.unique(predData.user) ] print 'Rep %d: gAUC (mean of folds) %0.5f, sAUC %0.5f (%0.5f)' % ( kfolds[0], np.mean(Gauc), np.mean(Sauc), np.std(Sauc)) return [Gauc, Sauc]