f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8)) ax1.set_title(headers[col] + ' ' + scaled[0]) ax1.hist(X[:, col], bins=bincount) ax2.hist(X_scaled[:, col], bins=bincount) X[:, col] = X_binned ax3.hist(X[:, col], bins=bincount) ax3.set_title(headers[col] + ' ' + scaled[1]) f.show() if values: f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8)) ax1.hist(y, bins=bincount) y_Binned = binScaling(y, 3) print "Binned the values" ax2.hist(preprocessing.scale(y), bins=bincount) ax3.hist(y_Binned, bins=bincount) f.suptitle('Y Values') f.show() if timeData: for i in range(len(headers)): dt = getDateTime('timeData.npy') plt.scatter(dt, preprocessing.scale(y), label='Values') plt.gcf().autofmt_xdate() plt.xlabel('Time') plt.grid() plt.show() i += 1 plt.show()
def determine_NDGC(X,y,K,cv,clf_min,typev): dt = getDateTime('timeData.npy') NDGC = [] tau = [] print "K=",K print "CV=",cv for j in np.arange(0,cv): # Train Classifier score = 0 while score < clf_min: shuffle_in_unison(X0,y0) X,Xt,y,yt = cross_validation.train_test_split(X0,y0, test_size=0.3, random_state=0) clf = SVR_fit(X,y) #clf = linear_regression_fit(X, y) score = r2_score(yt, clf.predict(Xt)) commList = [] N = len(Xt) predicted = [] recorded = [] for i,row in enumerate(Xt): predicted.append(clf.predict(row)) recorded.append(yt[i]) tau.append(stats.kendalltau(predicted, recorded)[0]) # bin the recorded values recorded = binning_tuple(recorded,5) for i, t in enumerate(recorded): commList.append((predicted[i], t, dt[i])) DCG = 0 iDCG = 0 ind = 1 sorted_by_ratio = sorted(commList, key=lambda tup: tup[1])[::-1] rankedList = [] # Build Ranked List for i,tup in enumerate(sorted_by_ratio): rankedList.append((i+1, tup[0], tup[1], tup[2])) # Sort by predictions if typev == 1: print "Classifier" rankedList = sorted(rankedList, key=lambda tup: tup[1])[::-1] if typev == 2: print "TimeStamp" rankedList = sorted(rankedList, key=lambda tup: tup[3])[::-1] if typev == 3: print "Random" random.shuffle(rankedList) ind = 1 for tup in rankedList: fav = N - tup[0] + 1 rank = tup[2] pow = 2**rank - 1 CG = rank/math.log(ind+1,2) DCG += CG if ind == K: break; ind += 1 # Sort by community rankedList = sorted(rankedList, key=lambda tup: tup[2])[::-1] #print "Ranked ratio List",rankedList ind = 1 for tup in rankedList: fav = N - tup[0] + 1 rank = tup[2] pow = 2**rank - 1 CG = rank/math.log(ind+1,2) iDCG += CG if ind == K: break; ind += 1 print 'Test',j," - ", DCG/float(iDCG) NDGC.append(DCG/float(iDCG)) return np.mean(NDGC), np.mean(tau)
f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8)) ax1.set_title(headers[col] + " " + scaled[0]) ax1.hist(X[:, col], bins=bincount) ax2.hist(X_scaled[:, col], bins=bincount) X[:, col] = X_binned ax3.hist(X[:, col], bins=bincount) ax3.set_title(headers[col] + " " + scaled[1]) f.show() if values: f, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8)) ax1.hist(y, bins=bincount) y_Binned = binScaling(y, 3) print "Binned the values" ax2.hist(preprocessing.scale(y), bins=bincount) ax3.hist(y_Binned, bins=bincount) f.suptitle("Y Values") f.show() if timeData: for i in range(len(headers)): dt = getDateTime("timeData.npy") plt.scatter(dt, preprocessing.scale(y), label="Values") plt.gcf().autofmt_xdate() plt.xlabel("Time") plt.grid() plt.show() i += 1 plt.show()