def heart(dataType): title = '{0} Ada Boost'.format(dataType) package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest param_range = list(range(1, 160, 10)) param = 'n_estimators' params = {'algorithm': 'SAMME.R'} clf = AdaBoostClassifier() clf.set_params(**params) plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title) plotter.plotLearningCurve(clf, title=title, xTrain=xTrain, yTrain=yTrain) title = 'Heart' clf.fit(xTrain, yTrain) plotter.plotConfusion(clf, title, ['Diameter narrowing ', 'Diameter not narrowing'], xTest, yTest)
def verifyMessage(self, msg): ''' Verify the given message content and mark it as received. The message have to be one of created by the :func:`generateMessage` generator. :param msg: message to verify ''' sender = msg[0]['sender'] number = msg[0]['number'] expectedMsg = data.createData(sender, self.size, number, self.house, self.queue) equal = expectedMsg == msg[0] if sender not in self.received: self.received[sender] = ( BitVector(size=self.count), # received BitVector(size=self.count), # duplicated BitVector(size=self.count)) # wrong content if self.received[sender][0][number]: # duplicate self.received[sender][1][number] = 1 else: # first time received self.received[sender][0][number] = 1 if not equal: self.received[sender][2][number] = 1
def heart(dataType): package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest title = '{0} Decision Tree'.format(dataType) xLabel = 'Depth' scoreList = util.ScoreList(xLabel) param_range = list(range(1, 20)) param = 'max_depth' params = { 'class_weight': None, 'criterion': 'entropy', 'max_features': None, 'min_samples_leaf': 10, 'splitter': 'best' } clf_tree = DecisionTreeClassifier(random_state=util.randState) clf_tree.set_params(**params) plotter.plotValidationCurve(clf_tree, xTrain, yTrain, param, param_range, graphTitle=title + ' Max Depth ') plotter.plotLearningCurve(clf_tree, title=title + 'Max Depth', xTrain=xTrain, yTrain=yTrain) clf_tree = DecisionTreeClassifier(random_state=util.randState) clf_tree.set_params(**params) clf_tree.max_depth = 8 param_range = [10, 50, 75, 100] param = 'min_samples_leaf' plotter.plotValidationCurve(clf_tree, xTrain, yTrain, param, param_range, graphTitle=title + ' Min Samples Leaf ') clf_tree.min_samples_leaf = 10 title = 'Heart' # plotter.plotLearningCurve(clf_tree, title=title + 'Min Samples Leaf', xTrain=xTrain, yTrain=yTrain) plotter.plotLearningCurve(clf_tree, title=title, xTrain=xTrain, yTrain=yTrain) clf_tree.fit(xTrain, yTrain) plotter.plotConfusion(clf_tree, title, ['Diameter narrowing ', 'Diameter not narrowing'], xTest, yTest)
def run(): getClusterData('adult', 'PCA') adultPackage = unprocess(data.createData('adult')) heartPackage = unprocess(data.createData('heart')) adultPackage = data.createData('adult') heartPackage = data.createData('heart') topRange = 5 adultClasses = ['>50K', '<=50K'] heartClasses = ['Diameter narrowing ', 'Diameter not narrowing'] runAll(adultPackage, adultClasses, topRange, 'Adult', 'None') runAll(heartPackage, heartClasses, topRange, 'Heart', 'None') adultPackage.xTrain = dimRedu.getPCAData(adultPackage.xTrain, 'Adult') heartPackage.xTrain = dimRedu.getPCAData(heartPackage.xTrain, 'Heart') runAll(adultPackage, adultClasses, topRange, 'Adult', 'PCA') runAll(heartPackage, heartClasses, topRange, 'Heart', 'PCA') adultPackage = data.createData('adult') heartPackage = data.createData('heart') adultPackage.xTrain = dimRedu.getICAData(adultPackage.xTrain, 'Adult') heartPackage.xTrain = dimRedu.getICAData(heartPackage.xTrain, 'Heart') runAll(adultPackage, adultClasses, topRange, 'Adult', 'ICA') runAll(heartPackage, heartClasses, topRange, 'Heart', 'ICA') adultPackage = data.createData('adult') heartPackage = data.createData('heart') adultPackage.xTrain = dimRedu.getRCAData(adultPackage.xTrain, 'Adult') heartPackage.xTrain = dimRedu.getRCAData(heartPackage.xTrain, 'Heart') runAll(adultPackage, adultClasses, topRange, 'Adult', 'RCA') runAll(heartPackage, heartClasses, topRange, 'Heart', 'RCA') adultPackage = unprocess(adultPackage) heartPackage = unprocess(heartPackage) adultPackage.xTrain = dimRedu.getFAMDData(adultPackage.xTrain, 'Adult') heartPackage.xTrain = dimRedu.getFAMDData(heartPackage.xTrain, 'Heart') runAll(adultPackage, adultClasses, topRange, 'Adult', 'FAMD') runAll(heartPackage, heartClasses, topRange, 'Heart', 'FAMD') dimRedu.run(heartPackage, 'Heart') dimRedu.run(adultPackage, 'Adult') print('done')
def getClusterData(dataType, clusterType): package = data.createData(dataType) if (clusterType == 'PCA'): x = dimRedu.getPCAData(package.xTrain, dataType) elif clusterType == 'ICA': x = dimRedu.getICAData(package.xTrain, dataType) elif clusterType == 'RCA': x = dimRedu.getRCAData(package.xTrain, dataType) else: x = dimRedu.getFAMDData(package.xTrain, dataType) km = KMeans(n_clusters=4) cluster_labels = km.fit_predict(x) predictions = km.predict(x) a = 4
def adult(dataType): package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest xLabel = 'Network Layers' scoreList = util.ScoreList(xLabel) title = '{0} Neural Network'.format(dataType) params = { 'activation': 'relu', 'learning_rate': 'adaptive', 'solver': 'sgd' } params = { 'activation': 'relu', 'learning_rate': 'invscaling', 'solver': 'lbfgs' } # params = searcher.searchNetwork(xTrain, yTrain, xTest, yTest) clf = MLPClassifier(max_iter=250) input = package.features.shape[1] input = int(.7 * input) # hiddenLayers = (input, 20) # hiddenLayers = (input,) # param_range = [(input,1,2), (input,2,2),(input,3,2),(input,4,2),(input,5,2),(input,6,2),(input,7,2),(input,8,2),(input,9,2), (input,10,2)] # xRange = [1, 2, 3, 4, 5, 6, 7 , 8, 9, 10] # plotter.plotValidationCurve(clf, xTrain, yTrain, 'hidden_layer_sizes', param_range, title + ' Hidden Layers ', xRange) # clf = MLPClassifier(hidden_layer_sizes = (input,7,2)) # clf.set_params(**params) # param_range = [50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 400, 500, 600, 700, 800] # plotter.plotValidationCurve(clf, xTrain, yTrain, 'max_iter', param_range, graphTitle=title + ' Max Iterations ') clf = MLPClassifier(hidden_layer_sizes=(input, 7, 2)) clf.max_iter = 150 plotter.plotLearningCurves(clf, title=title, xTrain=xTrain, yTrain=yTrain) title = 'Adult' clf.fit(xTrain, yTrain) plotter.plotConfusion(clf, title, ['>50K', '<=50K'], xTest, yTest) return
def run(dataType): # dataType = 'heart' package = data.createData(dataType) if dataType == 'heart': iterations = range(997, 1000) iterations = [100, 600, 997, 999, 1000, 2000, 3000, 4000] iterations = [] for i in range(100, 4000, 20): iterations.append(i) # iterations = [599, 6000] heart(package, iterations) mlRoseHeart(package, iterations) # else: # iterations = range(1, 800) # iterations = [799, 800] # adult(package, iterations) # mlRoseAdult(package, iterations) return
def adultFit(dataType): package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest xLabel = 'Network Layers' scoreList = util.ScoreList(xLabel) title = '{0} Neural Network Fit Times'.format(dataType) params = { 'activation': 'relu', 'learning_rate': 'invscaling', 'solver': 'lbfgs' } input = package.features.shape[1] input = int(.7 * input) param_range = [ 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 400, 500, 600, 700, 800 ] times = [] for param in param_range: clf = MLPClassifier(hidden_layer_sizes=(input, 7, 2)) clf.set_params(**params) clf.max_iter = param start = time() clf.fit(xTrain, yTrain) end = time() times.append(end - start) plotter.plot(param_range, times, 'max_iter', 'fit times', title) return
def heart(dataType): package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest xLabel = 'K' scoreList = util.ScoreList(xLabel) title = '{0} KNN'.format(dataType) # searcher.searchKNN(xTrain, yTrain, xTest, yTest) params = {'algorithm': 'auto', 'p': 1, 'weights': 'uniform'} params = {'algorithm': 'ball_tree', 'p': 1, 'weights': 'distance'} # params = searcher.searchKNN(xTrain, yTrain, xTest, yTest) param = 'n_neighbors' param_range = list(range(1, 50)) #np.linspace(1, 50, 50) clf = KNeighborsClassifier() clf.set_params(**params) plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title) clf = KNeighborsClassifier() clf.set_params(**params) clf.n_neighbors = 12 plotter.plotLearningCurve(clf, title=title, xTrain=xTrain, yTrain=yTrain) # plotter.plotAll(clf, title, param, param_range, xTrain, yTrain, xTest, yTest) title = 'Heart' clf.fit(xTrain, yTrain) plotter.plotConfusion(clf, title, ['Diameter narrowing ', 'Diameter not narrowing'], xTest, yTest)
def verifyMessage(self, msg): ''' Verify the given message content and mark it as received. The message have to be one of created by the :func:`generateMessage` generator. :param msg: message to verify ''' sender = msg[0]['sender'] number = msg[0]['number'] expectedMsg = data.createData(sender, self.size, number, self.house, self.queue) equal = expectedMsg == msg[0] if sender not in self.received: self.received[sender] = (BitVector(size=self.count), # received BitVector(size=self.count), # duplicated BitVector(size=self.count)) # wrong content if self.received[sender][0][number]: # duplicate self.received[sender][1][number] = 1 else: # first time received self.received[sender][0][number] = 1 if not equal: self.received[sender][2][number] = 1
def adult(dataType): package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest xLabel = 'Degrees' title = '{0} SVM'.format(dataType) param_range = list(range(1,8)) # polyparams = searcher.searchSVMPoly(xTrain, yTrain, xTest, yTest) polyparams = {'C': 0.1, 'degree': 1, 'gamma': 50, 'kernel': 'poly'} clf = createBaseSVC() clf.set_params(**polyparams) plotter.plotValidationCurve(clf, xTrain, yTrain, 'degree', param_range, graphTitle=title + ' Poly degree ') clf.degree = 1 plotter.plotLearningCurve(clf, title=title + ' Poly degree ', xTrain=xTrain, yTrain=yTrain) # rbfParams = searcher.searchSVMRBF(xTrain, yTrain, xTest, yTest) rbfParams = {'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'} clf = createBaseSVC() clf.set_params(**rbfParams) param = 'C' # param_range = [0.01,0.05,1,10,50,100,200,300,500, 1000] param_range = [0.01,0.05,1,10,15] plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title+ ' RBF - C ') clf.C = 10 plotter.plotLearningCurve(clf, title=title + ' RBF', xTrain=xTrain, yTrain=yTrain) title = 'Adult' clf.fit(xTrain, yTrain) plotter.plotConfusion(clf, title, ['>50K', '<=50K'], xTest, yTest)
def heart(dataType): package = data.createData(dataType) xTrain = package.xTrain xTest = package.xTest yTrain = package.yTrain yTest = package.yTest title = '{0} SVM'.format(dataType) param_range = list(range(1,8)) # polyparams = searcher.searchSVMPoly(xTrain, yTrain, xTest, yTest) # polyparams = {'kernel': 'poly', 'gamma': 'scale'} polyparams = {'C': 0.01, 'degree': 3, 'gamma': 10, 'kernel': 'poly'} clf = createBaseSVC() clf.set_params(**polyparams) plotter.plotValidationCurve(clf, xTrain, yTrain, 'degree', param_range, graphTitle=title + ' Poly Degree ') clf.degree = 3 plotter.plotLearningCurve(clf, title=title + ' Poly degree ', xTrain=xTrain, yTrain=yTrain) # rbfParams = searcher.searchSVMRBF(xTrain, yTrain, xTest, yTest) rbfParams = {'C': 1, 'gamma': 1, 'kernel': 'rbf'} clf = createBaseSVC() clf.set_params(**rbfParams) param = 'C' param_range = [0.01,0.05,0.25, 0.5, 1] plotter.plotValidationCurve(clf, xTrain, yTrain, param, param_range, graphTitle=title + ' RBF - C ') clf.C = 0.5 plotter.plotLearningCurve(clf, title=title + ' RBF', xTrain=xTrain, yTrain=yTrain) title = 'Heart' clf.fit(xTrain, yTrain) plotter.plotConfusion(clf, title, ['Diameter narrowing ', 'Diameter not narrowing'], xTest, yTest)
def run(): dataType = 'heart' package = data.createData(dataType) iterations = range(599, 699) heart(package, iterations, 'Baseline') package.xTrain = dimRedu.getPCAData(package.xTrain, 'Heart') package.xTest = dimRedu.getPCAData(package.xTest, 'Heart') heart(package, iterations, 'PCA') package = data.createData(dataType) package.xTrain = dimRedu.getICAData(package.xTrain, 'Heart') package.xTest = dimRedu.getICAData(package.xTest, 'Heart') heart(package, iterations, 'ICA') package = data.createData(dataType) package.xTrain = dimRedu.getRCAData(package.xTrain, 'Heart') package.xTest = dimRedu.getRCAData(package.xTest, 'Heart') heart(package, iterations, 'RCA') package = data.createData(dataType) package = unprocess(package) package.xTrain = dimRedu.getFAMDData(package.Unprocessed_xTrain, 'Heart') package.xTest = dimRedu.getFAMDData(package.Unprocessed_xTest, 'Heart') heart(package, iterations, 'FAMD') dataType = 'adult' package = data.createData(dataType) iterations = range(799, 899) adult(package, iterations, 'Baseline') package.xTrain = dimRedu.getPCAData(package.xTrain, 'Adult') package.xTest = dimRedu.getPCAData(package.xTest, 'Adult') adult(package, iterations, 'PCA') package = data.createData(dataType) package.xTrain = dimRedu.getICAData(package.xTrain, 'Adult') package.xTest = dimRedu.getICAData(package.xTest, 'Adult') adult(package, iterations, 'ICA') package = data.createData(dataType) package.xTrain = dimRedu.getRCAData(package.xTrain, 'Adult') package.xTest = dimRedu.getRCAData(package.xTest, 'Adult') adult(package, iterations, 'RCA') package = data.createData(dataType) package = unprocess(package) package.xTrain = dimRedu.getFAMDData(package.Unprocessed_xTrain, 'Adult') package.xTest = dimRedu.getFAMDData(package.Unprocessed_xTest, 'Adult') adult(package, iterations, 'FAMD') # # iterations = [599, 6000] # heart(package, iterations) # else: # iterations = range(1, 800) # iterations = [799, 800] # adult(package, iterations) return
import warnings warnings.filterwarnings("ignore") import data if __name__ == '__main__': import plotclusterscores plotclusterscores.run() import dimRedu datatypes = ['adult', 'heart'] for d in datatypes: package = data.createData(d) dimRedu.run(package, d) import neuralDim neuralDim.run() import cluster cluster.run()
#build the dictionary of labels to do some compute labelsDict = {} for label in dataSet[:, -1]: if label not in labelsDict: labelsDict[label] = 0 labelsDict[label] += 1 print("labelDict:", labelsDict) #compute the entropy for key in labelsDict.keys(): prob = float(labelsDict[key]) / numOfSamples entropy += prob * math.log2(prob) return entropy * -1 samples, cluts = data.createData() print("entropy:", getEntropy(samples)) ''' def splitDataSet(dataSet, axis, value): retDataSet = [] for featVec in dataSet: if featVec[axis] == value: reducedFeatVec = featVec[:axis] #chop out axis used for splitting reducedFeatVec.extend(featVec[axis+1:]) retDataSet.append(reducedFeatVec) return retDataSet print(splitDataSet(samples,0,1)) '''
for coll in ln.collections: coll.remove() lineSet.clear() ln = plt.contour(X, Y, Z, [-0.5, 0, 0.5], colors=['r', 'black', 'b']) lineSet.append(ln) if label != None: plt.xlabel(label) # plt.show() plt.savefig("cache/data_mode" + str(self.config.mode) + "_%.8d.png" % self.figCount) self.figCount += 1 plt.pause(pause) if __name__ == "__main__": x, y, c = data.createData(4) xx = x[0:44] yy = y[0:44] cc = c[0:44] del x[0:44] del y[0:44] del c[0:44] Data = concate2D(x, y) Data_test = concate2D(xx, yy) lineSet = [] # --- Experiment for 3.1 --- # config = netConfig() # config.set(inputD=3, outputD=1, layer=1, nodes=[1], lr=0.01, mode=0, batch=1, # maxIter=100, active=noActive, activeDiff=noActiveDiff)
import json import datetime import dateutil.parser import pandas as pd import pytz from data import createData # importing sample data data = createData() data_for_df = {'time_of_day': [], "count": []} timeList = pd.DataFrame(data_for_df) timeList.set_index('time_of_day', inplace=True) today = pytz.UTC.localize(datetime.datetime.now()) threeMonthsAgo = today - datetime.timedelta(days=90) count = 1 curDate = dateutil.parser.parse(data[count]['start_date']) while (curDate > threeMonthsAgo and count < len(data)): dayStr = curDate.strftime("%a") # getting day of week in 3 letters dayTime = curDate.strftime("%H") # getting time of day dayStr = dayStr + " " + dayTime if dayStr in timeList.index.values: timeList['count'][dayStr] = timeList['count'][dayStr] + 1 else: tempDf = pd.DataFrame({'time_of_day': [dayStr], "count": [1]}) tempDf.set_index('time_of_day', inplace=True) timeList = timeList.append(tempDf)
#sigma:matrix sigma #v_t:matrix v_t #num:the shape of new sigma matrix we want to take def reshapeMatrix(u,sigma,v_t,num): sigma_matrix=np.zeros(shape=(num,num)) for i in range(num): sigma_matrix[i][i]=sigma[i] return u[:,:num],sigma_matrix,v_t[:num,:] def EuclidSimilarity(vecA,vecB): return 1.0/(1.0+np.linalg.norm(vecA-vecB)) def predict() dataSet1=data.createData() #print(dataSet1) #print(dataSet1.shape) U,sigma,V_T=np.linalg.svd(dataSet1) ''' print("U:\n",U) print(U.shape) print(type(U)) print("sigma:\n",sigma) print("V_T:\n",V_T) print("\n\n\n\n\n") ''' U_matrix,sigma_matrix,V_T_matrix=reshapeMatrix(U,sigma,V_T,3) ''' print("U:\n",U_matrix)