n = 10 # repeat the CV procedure 10 times to get more precise results nFeatures = X.shape[0] niter = 10 SEED = 42 rnd = np.random.RandomState(SEED) cct = 0 # xtrain = rf.getRFX(X) # xtest = rf.getRFX_test(X_test) # for j in range(xtest.shape[0]): # mini = np.min(xtest[j,:]) # if mini == 0: # cct += 1 print "utils train test..." util_train, util_test = utils.getTrainTest() if (CreateSub == 0): # print "total rows with 0s: ", cct #=============================================================================== cv = cross_validation.ShuffleSplit(nFeatures, n_iter=niter, test_size=0.3, random_state=rnd) mean_auc = 0.0; i = 0 for train, test in cv: print "======================================= CROSS VALIDATION LOOP: ", (i+1) num_train = len(train) xtrain = X.ix[train]; ytrain = y.values[train] xtest = X.ix[test]; ytest = y.values[test] xtrain = rf.getRFX(xtrain) xtest = rf.getRFX_test(xtest) #xtrain = utils.get_numerical_features(xtrain, test=False)
n = 10 # repeat the CV procedure 10 times to get more precise results nFeatures = X.shape[0] niter = 10 SEED = 42 rnd = np.random.RandomState(SEED) cct = 0 # xtrain = rf.getRFX(X) # xtest = rf.getRFX_test(X_test) # for j in range(xtest.shape[0]): # mini = np.min(xtest[j,:]) # if mini == 0: # cct += 1 print "utils train test..." util_train, util_test, util_train_nsp, util_test_nsp = utils.getTrainTest() # blendTrain = np.empty((num_train, 3)) # blendTest = np.zeros((X_test.shape[0],5)) # blendTestTemp1 = np.empty((X_test.shape[0],10)) # blendTestTemp2 = np.empty((X_test.shape[0],10)) # blendTestTemp3 = np.empty((X_test.shape[0],10)) # blendTestTemp4 = np.empty((X_test.shape[0],10)) # blendTestTemp5 = np.empty((X_test.shape[0],10)) # blendTestTemp6 = np.empty((X_test.shape[0],10)) n_folds = 10 verbose = True shuffle = False skf = list(StratifiedKFold(y, n_folds))