def solve(self): x_train, x_test, y_train, y_test = train_test_split(self.inputs, self.outputs, test_size=0.3, random_state=1) x_train_numeric = self.getFeatures(x_train, self.repository.numericCols) x_train_string = self.getFeatures(x_train, self.repository.stringCols) x_test_numeric = self.getFeatures(x_test, self.repository.numericCols) x_test_string = self.getFeatures(x_test, self.repository.stringCols) x_train_numeric, x_test_numeric = normalisation(x_train_numeric, x_test_numeric) # normalise the numeric features x_train_string = self.vectorizeStringFeatures(x_train_string) # convert string features to float x_test_string = self.vectorizeStringFeatures(x_test_string) x_train = self.unionFeatures(x_train_numeric, x_train_string) x_test = self.unionFeatures(x_test_numeric, x_test_string) clf_tree = tree.DecisionTreeClassifier(criterion='gini', random_state=1) clf_tree.fit(x_train, y_train) y_computed = clf_tree.predict(x_test) acc = accuracy(y_computed, y_test) print("accuracy:", acc)
from utils import loadData, splitData,flatten,normalisation import matplotlib.image as mpimg from sklearn import neural_network from sklearn.metrics import accuracy_score if __name__=="__main__": inputs, outputs = loadData("data.csv") trainInputs, trainOutputs, validationInputs, validationOutputs = splitData(inputs,outputs) trainInputsFlatten = [flatten(el) for el in trainInputs] validationInputsFlatten = [flatten(el) for el in validationInputs] trainInputsNormalised, validationInputsNormalised = normalisation(trainInputsFlatten, validationInputsFlatten) classifier = neural_network.MLPClassifier(hidden_layer_sizes=(5,), activation='relu', max_iter=1000, solver='sgd', verbose=10, random_state=1, learning_rate_init=.1) classifier.fit(trainInputsNormalised,trainOutputs) predictedLabels = classifier.predict(validationInputsNormalised) print("Real labels: ", validationOutputs) print("Computed labels: ", predictedLabels) acc = accuracy_score(validationOutputs, predictedLabels) print("Accuracy: ",acc)
import variables import variables_image_recognition import lime import lime.lime_tabular import sklearn import sklearn.metrics from sklearn.linear_model import LogisticRegression from sklearn.datasets import fetch_20newsgroups from lime.lime_text import LimeTextExplainer from utils_lime import Draw_Lime_On_Logistic_Regression exit() if variables.choice_of_work == 1: xtrain, ytrain, xtest, ytest = load_data(variables.file_name) xtrain, xtest = normalisation(xtrain, xtest) n = NN(variables.choice, variables.w) n.fit(xtrain, ytrain) ypred = n.pred(xtest) print(1 - accuracy_score(ytest, ypred)) explainer = lime.lime_tabular.LimeTabularExplainer(xtrain, mode='classification') exp = explainer.explain_instance(xtest[0], n.pred_proba, labels=1, top_labels=1) exp.as_pyplot_figure() plt.show() elif variables.choice_of_work == 2: xtrain, ytrain, xtest, ytest = load_data(variables.file_name) xtrain, xtest = normalisation(xtrain, xtest)
from utils import loadData, splitData, normalisation, transform, myNormalisation from sklearn import linear_model from sklearn.metrics import accuracy_score if __name__ == "__main__": #step 1. load data inputs, outputs = loadData("iris.data") #step 2. split data trainInputs, trainOutputs, validationInputs, validationOutputs = splitData( inputs, outputs) #step 3. normalisation normalisedTrainData, normalisedTestData = normalisation( trainInputs, validationInputs) #step 4. training classifier = linear_model.LogisticRegression() classifier.fit(normalisedTrainData, transform(trainOutputs)) #step 5. make predictions computedTestOutputs = classifier.predict(normalisedTestData) #step 6. evalaute the classifier performance print("Computed outputs", computedTestOutputs) print("Real outputs", transform(validationOutputs)) error = 1 - accuracy_score(transform(validationOutputs), computedTestOutputs) print("classification error (tool): ", error)
feature1 = [ex[0] for ex in inputs] #Economy..GDP.per.Capita. feature2 = [ex[1] for ex in inputs] #Freedom #train and test samples normalised import numpy as np np.random.seed() indexes = [i for i in range(len(inputs))] trainSample = np.random.choice(indexes, int(0.8 * len(inputs)), replace=False) testSample = [i for i in indexes if not i in trainSample] trainInputs = [inputs[i] for i in trainSample] trainOutputs = [outputs[i] for i in trainSample] testInputs = [inputs[i] for i in testSample] testOutputs = [outputs[i] for i in testSample] trainInputs, testInputs = normalisation(trainInputs, testInputs) trainOutputs, testOutputs = normalisation(trainOutputs, testOutputs) feature1train = [ex[0] for ex in trainInputs] feature2train = [ex[1] for ex in trainInputs] feature1test = [ex[0] for ex in testInputs] feature2test = [ex[1] for ex in testInputs] #using sklearn tool: from sklearn import linear_model from sklearn.metrics import mean_squared_error regressor = linear_model.SGDRegressor(alpha=0.01, max_iter=1000, average=True) regressor.fit(trainInputs, trainOutputs) w0, w1, w2 = regressor.intercept_[0], regressor.coef_[0], regressor.coef_[1] print('the learnt model: f(x1,x2) = ', w0, ' + ', w1, ' * x1', '+', w2, '*x2')