Python ProblemData.ProblemData Examples

Programming Language: Python

Namespace/Package Name: ProblemData

Class/Type: ProblemData

Method/Function: ProblemData

Examples at hotexamples.com: 2

Python ProblemData.ProblemData - 2 examples found. These are the top rated real world Python examples of ProblemData.ProblemData.ProblemData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ProblemData(2)

element_state(1)

house_state(1)

loadData(1)

readFile(1)

set_bc(1)

set_composition(1)

set_refinements(1)

set_xs(1)

setup(1)

splitData(1)

Example #1

Show file

File: UI.py Project: SabaAlex/AI

    def __process(nrOfIterations, learningRate, hiddenNeuronsNumber, aConst):
        dataset = ProblemData("resources/data.data")
        trainX, trainY, testX, testY = dataset.splitData()

        neuralNetwork = ANN(deepcopy(trainX), deepcopy(trainY), learningRate,
                            hiddenNeuronsNumber, aConst)

        iterations = []
        for i in range(nrOfIterations):
            neuralNetwork.feedForward()
            neuralNetwork.backProp()
            iterations.append(i)

        for i in range(len(testX)):
            predictedOut = neuralNetwork.getOutput(testX[i])
            print("Predicted output: {0}\nReal value: {1}".format(
                predictedOut, testY[i]))

        matplotlib.pyplot.plot(iterations,
                               neuralNetwork.getLoss(),
                               label='loss value vs iteration')
        matplotlib.pyplot.xlabel('Iterations')
        matplotlib.pyplot.ylabel('Loss function')
        matplotlib.pyplot.legend()
        matplotlib.pyplot.show()

Example #2

Show file

File: Classification.py Project: aslanmehrabi/Fingerprint_Classifier

def main():

    # loading the data as prblmData
    prblmData = ProblemData(defaultSignalValue=defaultSignalValue,
                            numNodes=numNodes)
    prblmData = prblmData.loadData(useStoredData=useStoredData,
                                   inputFileName=inputFileName,
                                   storeReadData=storeReadData,
                                   storeDataName=storeDataName,
                                   rowReadUntil=readDataUntilRow)

    # partitioning data and providing test and train sets and corresponding labels as dataPar
    dataPar = DataPartition()
    dataPar = dataPar.makeTrainTest(prblmData=prblmData,
                                    readSampleSize=sampleSize,
                                    testPartitionSize=testPartitionSize,
                                    randomState=0,
                                    doNormalize=False,
                                    useSubSample=useSubSample,
                                    storeSubSample=True,
                                    subSamplePcklName=subSamplePcklName)

    # providing normalized data of dataPar as dataParNormal
    dataParNormal = DataPartition()
    dataParNormal = dataParNormal.makeTrainTest(
        prblmData=prblmData,
        readSampleSize=sampleSize,
        testPartitionSize=testPartitionSize,
        randomState=0,
        doNormalize=True,
        useSubSample=useSubSample,
        storeSubSample=True,
        subSamplePcklName=subSamplePcklName)

    # Feature reduction of normalized data by PCA(with pcaNcomponents dimensions) as dataParPca
    pcaNcomponent = 10
    pcaObj = PCA(n_components=pcaNcomponent)
    fit = pcaObj.fit(dataParNormal.fVecTrain)
    dataParPca = DataPartition()
    dataParPca.fVecTrain = pcaObj.fit_transform(dataParNormal.fVecTrain)
    dataParPca.fVecTest = pcaObj.fit_transform(dataParNormal.fVecTest)
    dataParPca.labelTrain = dataParNormal.labelTrain
    dataParPca.labelTest = dataParNormal.labelTest
    dataParPca.isNormalized = True

    # plotting data by first and second principle components of applied PCA on data
    plt.plot(dataParPca.fVecTrain[:, 0], dataParPca.fVecTrain[:, 1], 'b.')
    plt.title('2D PCA')
    plt.show()

    # Three classifiers (random forest, KNN, SVM) with hyper parameters (will be tuned by cross validation) are defined below:
    # parameter cv shows number of partitions for cross validation of hyper parameter tuning
    # n_jobs = -1 run the data on multiple cores

    # Random Forest:
    # n_estimators: number of trees to make the forest
    #criteria: measuring quality of a split. “gini” for the Gini impurity and “entropy” for the information gain
    # max_features: number of features to consider when looking for the best split
    paramGrid_rf = {
        'n_estimators': [5, 10, 17, 30],
        'criterion': ['gini', 'entropy'],
        'max_features': ['auto', 0.01, 0.1, 0.9],
        'n_jobs': [-1]
    }
    #paramGrid_rf = {'n_estimators': [30] ,'criterion': ['gini'] ,'n_jobs': [-1]} # n_jobs => runs in parallel
    clf_rf = GridSearchCV(RandomForestClassifier(), paramGrid_rf,
                          cv=3)  # ,scoring='%s_macro' % score

    # KNN:
    # n_neighbors: number of neighbours to consider
    # weights: how to weight labels of neighbors. uniform or distance(consider reverse of the neighbors distance)
    # metric: how to measure the distance: 'minkowski', 'euclidean' or 'manhattan'
    paramGrid_knn = {
        'n_neighbors': [3, 5, 9, 15],
        'weights': ['uniform', 'distance'],
        'metric': ['minkowski', 'euclidean', 'manhattan']
    }
    #paramGrid_knn = {'n_neighbors': [5, 9], 'weights': ['distance'],'metric': ['manhattan']}
    clf_knn = GridSearchCV(KNeighborsClassifier(algorithm='kd_tree',
                                                n_jobs=-1),
                           paramGrid_knn,
                           cv=3)  #, verbose=10  # ,scoring='%s_macro' % score

    # SVM:
    # C : Penalty parameter for miss classification
    # kernel: used kernel 'linear', 'poly', 'rbf' // rbf is more time consuming but seems to be more concordat to problem
    # gamma: kernel coefficient. How far the influence of a single training data reaches [low: far / high: close]
    param_grid_svm = {'C': [0.1], 'kernel': ['rbf'], 'gamma': [0.01]}
    # param_grid_svm = {'C': [0.01, 0.1, 1, 10], 'kernel': ['linear', 'poly', 'rbf'], 'gamma': [0.001, 0.01, 0.1, 1]} # rbf is time consuming comparing to others
    clf_svm = GridSearchCV(
        svm.SVC(), param_grid_svm,
        cv=3)  # , verbose=10: write the result of each epoc of cv

    clfNames = ['random_forest', 'knn', 'svm']
    dataTypes = ['original ', 'normalized', 'nomalized_PCA']

    for idx, clf in enumerate([clf_rf, clf_knn, clf_svm]):
        for idx2, datap in enumerate([dataPar, dataParNormal, dataParPca]):
            runCl = RunClassifier()
            prediction, accuracy, conf_matrix, clf.best_params = runCl.doClassification(
                clf,
                datap.fVecTrain,
                datap.fVecTest,
                datap.labelTrain,
                datap.labelTest,
                showPlot=True,
                savePickleModel=savePickleModel,
                clfName=clfNames[idx],
                dataType=dataTypes[idx2])
            print('\n+++++++++++++++++++\n')