예제 #1
0
    def evaluate(self, Classifier, *args):
        proportion = 0
        average = 0.0
        accuracies = []
        performance = Performance()
        trainingSet = DataSet()
        for num in range(0, len(args[0])):
            if args[0][num] == "-t":
                trainingSet.load(args[0][num + 1])
            if args[0][num] == "-T":
                testSet = DataSet()
                testSet.load(args[0][num + 1])
            if args[0][num] == "-p":
                proportion = float(args[0][num + 1])
                for items in range(
                        0,
                        int(proportion *
                            len(trainingSet.getExamples().getExamplesList()))):
                    trainingSet.getExamples().add(
                        trainingSet.getExamples().getExamplesList()[items])
                trainingSet.setAttributes(trainingSet.getAttributes())
                if type(Classifier) == ID3:
                    Classifier.train(trainingSet)
                    performance = Classifier.classify(testSet)
                    return str(performance)
                else:
                    print "Error in Evaluator:evaluate"
                performance = Classifier.classify(testSet)
                return str(performance)

        for num in range(0, self.folds):
            testSet = DataSet()
            trainSet = DataSet()
            for items in trainingSet.getExamples().getExamplesList():
                randomNum = random.randint(0, self.folds - 1)
                if randomNum != num:
                    testSet.getExamples().add(items)
                else:
                    trainingSet.getExamples().add(items)
            testSet.setAttributes(trainingSet.getAttributes())
            trainSet.setAttributes(trainingSet.getAttributes())
            if (len(trainingSet.attributes.attributes) > 0):
                trainSet = trainingSet
            Classifier.train(trainSet)
            tempPerformance = Classifier.classify(testSet)
            accuracies.append(tempPerformance.accuracy)
            average += tempPerformance.accuracy
            performance += tempPerformance
        return str(performance) + " +- " + str(self.stdDev(
            accuracies, average))
예제 #2
0
    def evaluate(self, Classifier, *args):
        proportion = 0
        average = 0.0
        accuracies = []
        performance = Performance()
        trainingSet = DataSet()
        for num in range(0, len(args[0])):
            if args[0][num] == "-t":
                trainingSet.load(args[0][num+1])
            if args[0][num] == "-T":
                testSet = DataSet()
                testSet.load(args[0][num+1])
            if args[0][num] == "-p":
                proportion = float(args[0][num+1])
                for items in range(0, int(proportion * len(trainingSet.getExamples().getExamplesList()))):
                    trainingSet.getExamples().add(trainingSet.getExamples().getExamplesList()[items])
                trainingSet.setAttributes(trainingSet.getAttributes())
                if type(Classifier) == ID3:
                    Classifier.train(trainingSet)
                    performance = Classifier.classify(testSet)
                    return str(performance)
                else:
                    print "Error in Evaluator:evaluate"
                performance = Classifier.classify(testSet)
                return str(performance)

        for num in range(0, self.folds):
            testSet = DataSet()
            trainSet = DataSet()
            for items in trainingSet.getExamples().getExamplesList():
                randomNum = random.randint(0,self.folds-1)
                if randomNum != num:
                    testSet.getExamples().add(items)
                else:
                    trainingSet.getExamples().add(items)
            testSet.setAttributes(trainingSet.getAttributes())
            trainSet.setAttributes(trainingSet.getAttributes())
            if (len(trainingSet.attributes.attributes) > 0):
                trainSet = trainingSet
            Classifier.train(trainSet)
            tempPerformance = Classifier.classify(testSet)
            accuracies.append(tempPerformance.accuracy)
            average += tempPerformance.accuracy
            performance += tempPerformance
        return str(performance) + " +- " + str(self.stdDev(accuracies, average))
		print results
		return results

if __name__=="__main__":
	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	import random
	import os

	print os.getcwd()
	ds = DataSet("..//..//data//ml//test_weather.gla")

	p = Perceptron(dataset=ds, epochs=10)

	print "Perceptron test:", p.classify([0,0,1,1])
	p.test(ds.getExamples())

	attribute1 = [n for n in xrange(10)]
	attribute2a = [random.sample(range(50)[:35],1)[0] for n in xrange(5)]
	attribute2b = [random.sample(range(50)[20:],1)[0] for n in xrange(5)]

	class0examples = [[attribute1[n], attribute2a[n], 0] for n in xrange(5)]
	class1examples = [[attribute1[n], attribute2b[n], 1] for n in xrange(5)]

	for exs in class1examples+class0examples:
		#print exs
		ds.addExample(Factory().example(exs))

	class0 = [x.getValues() for x in ds.getExamplesByClass(0)]
	class0x = [x[0] for x in class0]
	class0y = [x[1] for x in class0]
        return results


if __name__ == "__main__":
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import random
    import os

    print os.getcwd()
    ds = DataSet("..//..//data//ml//test_weather.gla")

    p = Perceptron(dataset=ds, epochs=10)

    print "Perceptron test:", p.classify([0, 0, 1, 1])
    p.test(ds.getExamples())

    attribute1 = [n for n in xrange(10)]
    attribute2a = [random.sample(range(50)[:35], 1)[0] for n in xrange(5)]
    attribute2b = [random.sample(range(50)[20:], 1)[0] for n in xrange(5)]

    class0examples = [[attribute1[n], attribute2a[n], 0] for n in xrange(5)]
    class1examples = [[attribute1[n], attribute2b[n], 1] for n in xrange(5)]

    for exs in class1examples + class0examples:
        #print exs
        ds.addExample(Factory().example(exs))

    class0 = [x.getValues() for x in ds.getExamplesByClass(0)]
    class0x = [x[0] for x in class0]
    class0y = [x[1] for x in class0]
예제 #5
0
			@return integer indicating the class of the unknown data.
		"""
		types = {
					"levenshtein": 	Distance().levenshtein, 	"l": Distance().levenshtein,	0: Distance().levenshtein , 			
					"hamming": 		Distance().hamming, 		"h": Distance().hamming,		1: Distance().hamming , 		
					"euclidean": 	Distance().euclidean, 		"e": Distance().euclidean,		2: Distance().euclidean , 			
					"manhattan": 	Distance().manhattan, 		"m": Distance().manhattan,		3: Distance().manhattan , 			
					"chebyshev": 	Distance().chebyshev,		"c": Distance().chebyshev,		4: Distance().chebyshev
				}

		results = [types[distanceType](x.getValue(), data) for x in self.trainset]
		results = [(i,x) for i,x in enumerate(results)]
		kernels = sorted(results, key = lambda x:x[1])[:3]
		kernels = [self.trainset[i].getLabel() for i,x in kernels]
		kernels = [(n, kernels.count(n)) for n in set(kernels)]
		return sorted(kernels, key = lambda x:x[1], reverse = True)[0][0]


if __name__ == "__main__":
	from DataSet import DataSet

	ds = DataSet("C:\\Users\\a5rjqzz\\Desktop\\Python\\pyClassifiers\\data\\IBk\\sample_set_lang.gla")
	bk = IBk()

	bk.train(ds.getExamples())

	kn = ds.convert("y n n")
	cl = bk.classify(kn, 3)

	print cl
	print ds.getAttributes(1)[-1].getLabel(cl)
예제 #6
0
class knn(Classifier):

    def __init__(self, *args):
        super(knn, self).__init__(*args)
        self.k = 3
        self.instances = DataSet()
        self.setOptions(args)

    def train(self, inDataSet):
        self.instances = inDataSet

    def classify(self, input):
        neighbors = []
        if type(input) == Example:

            for index, item in enumerate(self.instances.getExamples().getExamplesList()):
                if len(neighbors) < self.k:
                    tempNeighbor = neighbor()
                    tempNeighbor.setNeighbor(self.instances.getAttributes().getAttributesList()[self.instances.getAttributes().getClassIndex()].domain[item.values[self.instances.getAttributes().getClassIndex()]], self.distance(input, item))
                    neighbors.append(tempNeighbor)
                else:
                    highestDist = -1
                    highestIndex = -1
                    for num in range(0, len(neighbors)):
                        if num < len(neighbors)-1:
                            if neighbors[num].distance >= neighbors[num + 1].distance:
                                highestDist = neighbors[num].distance
                                highestIndex = num
                            else:
                                highestDist = neighbors[num + 1].distance
                                highestIndex = num + 1
                        elif neighbors[num] < highestDist:
                            highestDist = neighbors[num]
                            highestIndex = num
                    if self.distance(input, self.instances.getExamples().getExamplesList()[index]) < highestDist:
                        newNeighbor = neighbor()
                        newNeighbor.setNeighbor(self.instances.getAttributes().getAttributesList()[self.instances.getAttributes().getClassIndex()].domain[item.values[self.instances.getAttributes().getClassIndex()]], self.distance(input,item))
                        neighbors[highestIndex] = newNeighbor

            return self.vote(neighbors)

        elif type(input) == DataSet:
            rightCount = 0
            for index, item in enumerate(self.instances.getExamples().getExamplesList()):
               if self.classify(self.instances.getExamples().getExamplesList()[index]) == self.instances.getAttributes().getClassAttribute().domain[self.instances.getExamples().getExamplesList()[index].values[self.instances.getExamples().attributes.getClassIndex()]]:
                    rightCount += 1
            performance = Performance()
            performance.setPerf(rightCount, len(self.instances.getExamples().getExamplesList()))
            return performance

    def setOptions(self, arguments):
        for num in range(0, len(arguments[0])):
            if arguments[0][num] == "-k":
                self.k = int(arguments[0][num+1])
            elif arguments[0][num] == "-t":
                newDataSet = DataSet()
                newDataSet.load(arguments[0][num+1])
                self.instances = newDataSet

    def distance(self, observation, example):
        total = 0
        for num in range(0, len(observation.attributes.getAttributesList())-1):
            if observation.values[num] != example.values[num]:
                total += 1
        return total

    def vote(self, neighbors):
        voteDict = {}
        for index, items in enumerate(neighbors):
            if items.classifier in voteDict.keys():
                voteDict[items.classifier] += 1
            else:
                voteDict[items.classifier] = 1
        return max(voteDict, key = voteDict.get)