def __process(nrOfIterations, learningRate, hiddenNeuronsNumber, aConst): dataset = ProblemData("resources/data.data") trainX, trainY, testX, testY = dataset.splitData() neuralNetwork = ANN(deepcopy(trainX), deepcopy(trainY), learningRate, hiddenNeuronsNumber, aConst) iterations = [] for i in range(nrOfIterations): neuralNetwork.feedForward() neuralNetwork.backProp() iterations.append(i) for i in range(len(testX)): predictedOut = neuralNetwork.getOutput(testX[i]) print("Predicted output: {0}\nReal value: {1}".format( predictedOut, testY[i])) matplotlib.pyplot.plot(iterations, neuralNetwork.getLoss(), label='loss value vs iteration') matplotlib.pyplot.xlabel('Iterations') matplotlib.pyplot.ylabel('Loss function') matplotlib.pyplot.legend() matplotlib.pyplot.show()
def main(): # loading the data as prblmData prblmData = ProblemData(defaultSignalValue=defaultSignalValue, numNodes=numNodes) prblmData = prblmData.loadData(useStoredData=useStoredData, inputFileName=inputFileName, storeReadData=storeReadData, storeDataName=storeDataName, rowReadUntil=readDataUntilRow) # partitioning data and providing test and train sets and corresponding labels as dataPar dataPar = DataPartition() dataPar = dataPar.makeTrainTest(prblmData=prblmData, readSampleSize=sampleSize, testPartitionSize=testPartitionSize, randomState=0, doNormalize=False, useSubSample=useSubSample, storeSubSample=True, subSamplePcklName=subSamplePcklName) # providing normalized data of dataPar as dataParNormal dataParNormal = DataPartition() dataParNormal = dataParNormal.makeTrainTest( prblmData=prblmData, readSampleSize=sampleSize, testPartitionSize=testPartitionSize, randomState=0, doNormalize=True, useSubSample=useSubSample, storeSubSample=True, subSamplePcklName=subSamplePcklName) # Feature reduction of normalized data by PCA(with pcaNcomponents dimensions) as dataParPca pcaNcomponent = 10 pcaObj = PCA(n_components=pcaNcomponent) fit = pcaObj.fit(dataParNormal.fVecTrain) dataParPca = DataPartition() dataParPca.fVecTrain = pcaObj.fit_transform(dataParNormal.fVecTrain) dataParPca.fVecTest = pcaObj.fit_transform(dataParNormal.fVecTest) dataParPca.labelTrain = dataParNormal.labelTrain dataParPca.labelTest = dataParNormal.labelTest dataParPca.isNormalized = True # plotting data by first and second principle components of applied PCA on data plt.plot(dataParPca.fVecTrain[:, 0], dataParPca.fVecTrain[:, 1], 'b.') plt.title('2D PCA') plt.show() # Three classifiers (random forest, KNN, SVM) with hyper parameters (will be tuned by cross validation) are defined below: # parameter cv shows number of partitions for cross validation of hyper parameter tuning # n_jobs = -1 run the data on multiple cores # Random Forest: # n_estimators: number of trees to make the forest #criteria: measuring quality of a split. “gini” for the Gini impurity and “entropy” for the information gain # max_features: number of features to consider when looking for the best split paramGrid_rf = { 'n_estimators': [5, 10, 17, 30], 'criterion': ['gini', 'entropy'], 'max_features': ['auto', 0.01, 0.1, 0.9], 'n_jobs': [-1] } #paramGrid_rf = {'n_estimators': [30] ,'criterion': ['gini'] ,'n_jobs': [-1]} # n_jobs => runs in parallel clf_rf = GridSearchCV(RandomForestClassifier(), paramGrid_rf, cv=3) # ,scoring='%s_macro' % score # KNN: # n_neighbors: number of neighbours to consider # weights: how to weight labels of neighbors. uniform or distance(consider reverse of the neighbors distance) # metric: how to measure the distance: 'minkowski', 'euclidean' or 'manhattan' paramGrid_knn = { 'n_neighbors': [3, 5, 9, 15], 'weights': ['uniform', 'distance'], 'metric': ['minkowski', 'euclidean', 'manhattan'] } #paramGrid_knn = {'n_neighbors': [5, 9], 'weights': ['distance'],'metric': ['manhattan']} clf_knn = GridSearchCV(KNeighborsClassifier(algorithm='kd_tree', n_jobs=-1), paramGrid_knn, cv=3) #, verbose=10 # ,scoring='%s_macro' % score # SVM: # C : Penalty parameter for miss classification # kernel: used kernel 'linear', 'poly', 'rbf' // rbf is more time consuming but seems to be more concordat to problem # gamma: kernel coefficient. How far the influence of a single training data reaches [low: far / high: close] param_grid_svm = {'C': [0.1], 'kernel': ['rbf'], 'gamma': [0.01]} # param_grid_svm = {'C': [0.01, 0.1, 1, 10], 'kernel': ['linear', 'poly', 'rbf'], 'gamma': [0.001, 0.01, 0.1, 1]} # rbf is time consuming comparing to others clf_svm = GridSearchCV( svm.SVC(), param_grid_svm, cv=3) # , verbose=10: write the result of each epoc of cv clfNames = ['random_forest', 'knn', 'svm'] dataTypes = ['original ', 'normalized', 'nomalized_PCA'] for idx, clf in enumerate([clf_rf, clf_knn, clf_svm]): for idx2, datap in enumerate([dataPar, dataParNormal, dataParPca]): runCl = RunClassifier() prediction, accuracy, conf_matrix, clf.best_params = runCl.doClassification( clf, datap.fVecTrain, datap.fVecTest, datap.labelTrain, datap.labelTest, showPlot=True, savePickleModel=savePickleModel, clfName=clfNames[idx], dataType=dataTypes[idx2]) print('\n+++++++++++++++++++\n')