def runRegressionModelTest(featureSet, valueVector, model): output = '' clf = 0 if model == 1: print "\nLINEAR REGRESSION\n" clf = linear_regression_fit(featureSet, valueVector) elif model == 2: print "\nSVR\n" clf = SVR_fit(featureSet, valueVector) elif model == 4: print "\nSTOCHASTIC\n" clf = SGD_r_fit(featureSet, valueVector) joblib.dump(clf, 'sgd.pkl') elif model == 5: print "\nNEIGHBOURS\n" clf = neighbours_fit(featureSet, valueVector) elif model == 6: print "\nLOGISTIC\n" clf = log_regression_fit(featureSet, valueVector) elif model == 7: print "\nBAYESIANRIDGE\n" clf = bayesian_ridge_fit(featureSet, valueVector) elif model == 8: print "\nRIDGE\n" clf = ridge_fit(featureSet, valueVector) elif model == 9: print "\nELASTIC NET\n" clf = elastic_fit(featureSet, valueVector) elif model == 10: print "\nLASSO\n" clf = lasso_fit(featureSet, valueVector) else: print 'Invalid choice\n' return clf
def runRegressionModelTest(featureSet, valueVector, X_test, y_test, model): output = '' score = 0 clf = 0 if model == 1: output += "\nLINEAR REGRESSION\n" clf = linear_regression_fit(featureSet, valueVector) elif model == 2: output += "\nSVR\n" clf = SVR_fit(featureSet, valueVector) elif model == 3: output += "\nEXTREME LEARNING MACHINE\n" clf = elm.ELMRegressor() clf.fit(featureSet, valueVector) joblib.dump(clf, 'elm.pkl') elif model == 4: output += "\nSTOCHASTIC\n" clf = SGD_r_fit(featureSet, valueVector) joblib.dump(clf, 'sgd.pkl') elif model == 5: output += "\nNEIGHBOURS\n" clf = neighbours_fit(featureSet, valueVector) elif model == 6: output += "\nLOGISTIC\n" clf = log_regression_fit(featureSet, valueVector) elif model == 7: output += "\nBAYESIANRIDGE\n" clf = bayesian_ridge_fit(featureSet, valueVector) else: output += 'Invalid choice\n' score = mean_squared_error(y_test, clf.predict(X_test)) score2 = r2_score(y_test, clf.predict(X_test)) cv = cross_validation.ShuffleSplit(featureSet.shape[0], n_iter=50, test_size=0.25, random_state=0) a = cross_validation.cross_val_score(clf, featureSet, valueVector, cv=cv) a = a[a > 0] output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n' output += ('Mean Score: %.3f\n' % np.mean(a)) output += ('Mean Squared Error: %.3f\n' % score) output += ('R^2: %.3f\n' % score2) return output
def runRegressionModelTest(featureSet, valueVector, X_test, y_test, model): output = '' score = 0 clf = 0 if model == 1: output += "\nLINEAR REGRESSION\n" clf = linear_regression_fit(featureSet, valueVector) elif model == 2: output += "\nSVR\n" clf = SVR_fit(featureSet, valueVector) elif model == 3: output += "\nEXTREME LEARNING MACHINE\n" clf = elm.ELMRegressor() clf.fit(featureSet, valueVector) joblib.dump(clf, 'elm.pkl') elif model == 4: output += "\nSTOCHASTIC\n" clf = SGD_r_fit(featureSet, valueVector) joblib.dump(clf, 'sgd.pkl') elif model == 5: output += "\nNEIGHBOURS\n" clf = neighbours_fit(featureSet, valueVector) elif model == 6: output += "\nLOGISTIC\n" clf = log_regression_fit(featureSet, valueVector) elif model == 7: output += "\nBAYESIANRIDGE\n" clf = bayesian_ridge_fit(featureSet, valueVector) else : output += 'Invalid choice\n' score = mean_squared_error(y_test, clf.predict(X_test)) score2 = r2_score(y_test, clf.predict(X_test)) cv = cross_validation.ShuffleSplit(featureSet.shape[0], n_iter=50,test_size=0.25,random_state=0) a = cross_validation.cross_val_score(clf, featureSet, valueVector, cv=cv) a = a[a > 0] output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n' output += ('Mean Score: %.3f\n' % np.mean(a)) output += ('Mean Squared Error: %.3f\n' % score) output += ('R^2: %.3f\n' % score2) return output
def determine_NDGC(X,y,K,cv,clf_min,typev): dt = getDateTime('timeData.npy') NDGC = [] tau = [] print "K=",K print "CV=",cv for j in np.arange(0,cv): # Train Classifier score = 0 while score < clf_min: shuffle_in_unison(X0,y0) X,Xt,y,yt = cross_validation.train_test_split(X0,y0, test_size=0.3, random_state=0) clf = SVR_fit(X,y) #clf = linear_regression_fit(X, y) score = r2_score(yt, clf.predict(Xt)) commList = [] N = len(Xt) predicted = [] recorded = [] for i,row in enumerate(Xt): predicted.append(clf.predict(row)) recorded.append(yt[i]) tau.append(stats.kendalltau(predicted, recorded)[0]) # bin the recorded values recorded = binning_tuple(recorded,5) for i, t in enumerate(recorded): commList.append((predicted[i], t, dt[i])) DCG = 0 iDCG = 0 ind = 1 sorted_by_ratio = sorted(commList, key=lambda tup: tup[1])[::-1] rankedList = [] # Build Ranked List for i,tup in enumerate(sorted_by_ratio): rankedList.append((i+1, tup[0], tup[1], tup[2])) # Sort by predictions if typev == 1: print "Classifier" rankedList = sorted(rankedList, key=lambda tup: tup[1])[::-1] if typev == 2: print "TimeStamp" rankedList = sorted(rankedList, key=lambda tup: tup[3])[::-1] if typev == 3: print "Random" random.shuffle(rankedList) ind = 1 for tup in rankedList: fav = N - tup[0] + 1 rank = tup[2] pow = 2**rank - 1 CG = rank/math.log(ind+1,2) DCG += CG if ind == K: break; ind += 1 # Sort by community rankedList = sorted(rankedList, key=lambda tup: tup[2])[::-1] #print "Ranked ratio List",rankedList ind = 1 for tup in rankedList: fav = N - tup[0] + 1 rank = tup[2] pow = 2**rank - 1 CG = rank/math.log(ind+1,2) iDCG += CG if ind == K: break; ind += 1 print 'Test',j," - ", DCG/float(iDCG) NDGC.append(DCG/float(iDCG)) return np.mean(NDGC), np.mean(tau)