def evaluate(self, Classifier, *args): proportion = 0 average = 0.0 accuracies = [] performance = Performance() trainingSet = DataSet() for num in range(0, len(args[0])): if args[0][num] == "-t": trainingSet.load(args[0][num + 1]) if args[0][num] == "-T": testSet = DataSet() testSet.load(args[0][num + 1]) if args[0][num] == "-p": proportion = float(args[0][num + 1]) for items in range( 0, int(proportion * len(trainingSet.getExamples().getExamplesList()))): trainingSet.getExamples().add( trainingSet.getExamples().getExamplesList()[items]) trainingSet.setAttributes(trainingSet.getAttributes()) if type(Classifier) == ID3: Classifier.train(trainingSet) performance = Classifier.classify(testSet) return str(performance) else: print "Error in Evaluator:evaluate" performance = Classifier.classify(testSet) return str(performance) for num in range(0, self.folds): testSet = DataSet() trainSet = DataSet() for items in trainingSet.getExamples().getExamplesList(): randomNum = random.randint(0, self.folds - 1) if randomNum != num: testSet.getExamples().add(items) else: trainingSet.getExamples().add(items) testSet.setAttributes(trainingSet.getAttributes()) trainSet.setAttributes(trainingSet.getAttributes()) if (len(trainingSet.attributes.attributes) > 0): trainSet = trainingSet Classifier.train(trainSet) tempPerformance = Classifier.classify(testSet) accuracies.append(tempPerformance.accuracy) average += tempPerformance.accuracy performance += tempPerformance return str(performance) + " +- " + str(self.stdDev( accuracies, average))
def evaluate(self, Classifier, *args): proportion = 0 average = 0.0 accuracies = [] performance = Performance() trainingSet = DataSet() for num in range(0, len(args[0])): if args[0][num] == "-t": trainingSet.load(args[0][num+1]) if args[0][num] == "-T": testSet = DataSet() testSet.load(args[0][num+1]) if args[0][num] == "-p": proportion = float(args[0][num+1]) for items in range(0, int(proportion * len(trainingSet.getExamples().getExamplesList()))): trainingSet.getExamples().add(trainingSet.getExamples().getExamplesList()[items]) trainingSet.setAttributes(trainingSet.getAttributes()) if type(Classifier) == ID3: Classifier.train(trainingSet) performance = Classifier.classify(testSet) return str(performance) else: print "Error in Evaluator:evaluate" performance = Classifier.classify(testSet) return str(performance) for num in range(0, self.folds): testSet = DataSet() trainSet = DataSet() for items in trainingSet.getExamples().getExamplesList(): randomNum = random.randint(0,self.folds-1) if randomNum != num: testSet.getExamples().add(items) else: trainingSet.getExamples().add(items) testSet.setAttributes(trainingSet.getAttributes()) trainSet.setAttributes(trainingSet.getAttributes()) if (len(trainingSet.attributes.attributes) > 0): trainSet = trainingSet Classifier.train(trainSet) tempPerformance = Classifier.classify(testSet) accuracies.append(tempPerformance.accuracy) average += tempPerformance.accuracy performance += tempPerformance return str(performance) + " +- " + str(self.stdDev(accuracies, average))
print results return results if __name__=="__main__": import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import random import os print os.getcwd() ds = DataSet("..//..//data//ml//test_weather.gla") p = Perceptron(dataset=ds, epochs=10) print "Perceptron test:", p.classify([0,0,1,1]) p.test(ds.getExamples()) attribute1 = [n for n in xrange(10)] attribute2a = [random.sample(range(50)[:35],1)[0] for n in xrange(5)] attribute2b = [random.sample(range(50)[20:],1)[0] for n in xrange(5)] class0examples = [[attribute1[n], attribute2a[n], 0] for n in xrange(5)] class1examples = [[attribute1[n], attribute2b[n], 1] for n in xrange(5)] for exs in class1examples+class0examples: #print exs ds.addExample(Factory().example(exs)) class0 = [x.getValues() for x in ds.getExamplesByClass(0)] class0x = [x[0] for x in class0] class0y = [x[1] for x in class0]
return results if __name__ == "__main__": import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import random import os print os.getcwd() ds = DataSet("..//..//data//ml//test_weather.gla") p = Perceptron(dataset=ds, epochs=10) print "Perceptron test:", p.classify([0, 0, 1, 1]) p.test(ds.getExamples()) attribute1 = [n for n in xrange(10)] attribute2a = [random.sample(range(50)[:35], 1)[0] for n in xrange(5)] attribute2b = [random.sample(range(50)[20:], 1)[0] for n in xrange(5)] class0examples = [[attribute1[n], attribute2a[n], 0] for n in xrange(5)] class1examples = [[attribute1[n], attribute2b[n], 1] for n in xrange(5)] for exs in class1examples + class0examples: #print exs ds.addExample(Factory().example(exs)) class0 = [x.getValues() for x in ds.getExamplesByClass(0)] class0x = [x[0] for x in class0] class0y = [x[1] for x in class0]
@return integer indicating the class of the unknown data. """ types = { "levenshtein": Distance().levenshtein, "l": Distance().levenshtein, 0: Distance().levenshtein , "hamming": Distance().hamming, "h": Distance().hamming, 1: Distance().hamming , "euclidean": Distance().euclidean, "e": Distance().euclidean, 2: Distance().euclidean , "manhattan": Distance().manhattan, "m": Distance().manhattan, 3: Distance().manhattan , "chebyshev": Distance().chebyshev, "c": Distance().chebyshev, 4: Distance().chebyshev } results = [types[distanceType](x.getValue(), data) for x in self.trainset] results = [(i,x) for i,x in enumerate(results)] kernels = sorted(results, key = lambda x:x[1])[:3] kernels = [self.trainset[i].getLabel() for i,x in kernels] kernels = [(n, kernels.count(n)) for n in set(kernels)] return sorted(kernels, key = lambda x:x[1], reverse = True)[0][0] if __name__ == "__main__": from DataSet import DataSet ds = DataSet("C:\\Users\\a5rjqzz\\Desktop\\Python\\pyClassifiers\\data\\IBk\\sample_set_lang.gla") bk = IBk() bk.train(ds.getExamples()) kn = ds.convert("y n n") cl = bk.classify(kn, 3) print cl print ds.getAttributes(1)[-1].getLabel(cl)
class knn(Classifier): def __init__(self, *args): super(knn, self).__init__(*args) self.k = 3 self.instances = DataSet() self.setOptions(args) def train(self, inDataSet): self.instances = inDataSet def classify(self, input): neighbors = [] if type(input) == Example: for index, item in enumerate(self.instances.getExamples().getExamplesList()): if len(neighbors) < self.k: tempNeighbor = neighbor() tempNeighbor.setNeighbor(self.instances.getAttributes().getAttributesList()[self.instances.getAttributes().getClassIndex()].domain[item.values[self.instances.getAttributes().getClassIndex()]], self.distance(input, item)) neighbors.append(tempNeighbor) else: highestDist = -1 highestIndex = -1 for num in range(0, len(neighbors)): if num < len(neighbors)-1: if neighbors[num].distance >= neighbors[num + 1].distance: highestDist = neighbors[num].distance highestIndex = num else: highestDist = neighbors[num + 1].distance highestIndex = num + 1 elif neighbors[num] < highestDist: highestDist = neighbors[num] highestIndex = num if self.distance(input, self.instances.getExamples().getExamplesList()[index]) < highestDist: newNeighbor = neighbor() newNeighbor.setNeighbor(self.instances.getAttributes().getAttributesList()[self.instances.getAttributes().getClassIndex()].domain[item.values[self.instances.getAttributes().getClassIndex()]], self.distance(input,item)) neighbors[highestIndex] = newNeighbor return self.vote(neighbors) elif type(input) == DataSet: rightCount = 0 for index, item in enumerate(self.instances.getExamples().getExamplesList()): if self.classify(self.instances.getExamples().getExamplesList()[index]) == self.instances.getAttributes().getClassAttribute().domain[self.instances.getExamples().getExamplesList()[index].values[self.instances.getExamples().attributes.getClassIndex()]]: rightCount += 1 performance = Performance() performance.setPerf(rightCount, len(self.instances.getExamples().getExamplesList())) return performance def setOptions(self, arguments): for num in range(0, len(arguments[0])): if arguments[0][num] == "-k": self.k = int(arguments[0][num+1]) elif arguments[0][num] == "-t": newDataSet = DataSet() newDataSet.load(arguments[0][num+1]) self.instances = newDataSet def distance(self, observation, example): total = 0 for num in range(0, len(observation.attributes.getAttributesList())-1): if observation.values[num] != example.values[num]: total += 1 return total def vote(self, neighbors): voteDict = {} for index, items in enumerate(neighbors): if items.classifier in voteDict.keys(): voteDict[items.classifier] += 1 else: voteDict[items.classifier] = 1 return max(voteDict, key = voteDict.get)