forked from Dhirajdgandhi/Artificial_Intelligence
/
dataclassifier.py
156 lines (116 loc) · 5.1 KB
/
dataclassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import math
import time
import matplotlib.pyplot as plt
from perceptron import PerceptronClassifier
from samples import Samples
class DataClassifier:
def __init__(self, imgHeight, imgWidth, LABELS, pixelChars):
if pixelChars is None:
pixelChars = ['#', '+']
self.pixelGrid = 1
self.imgHeight = imgHeight
self.FEATURES = math.ceil((imgHeight * imgWidth) / self.pixelGrid)
self.LABELS = LABELS
self.pixelChars = pixelChars
self.FileObject = None
self.LabelFileObject = None
def countPixels(self, line):
count = 0
if not isinstance(line, list):
line = list(line)
for char in line:
if char in self.pixelChars:
count += 1
return count
def extractFeaturesPerLine(self, line, row):
gridList = []
featureValueList = []
for startIndexOfGrid in range(0, len(line), self.pixelGrid):
gridList.append(line[startIndexOfGrid:startIndexOfGrid + self.pixelGrid])
# col = 0
for grid in gridList:
# Count the number of chars in this grid and add the count to respective index of FEATURE
# indexOfFeature = row + col
featureValueList.append(self.countPixels(grid))
return featureValueList
def extractFeatures(self, lines_itr, labelsLines_itr):
imageLine = lines_itr.__next__()
totalImages = 0
featureValueListPerImage = [1]
featureValueListForAllTestingImages = []
actualLabelList = []
try:
while imageLine:
# Skipping the blank lines
while imageLine and self.countPixels(imageLine) == 0:
imageLine = lines_itr.__next__()
# Scanning image pixels
for i in range(0, self.imgHeight):
featureValueListPerImage.extend(self.extractFeaturesPerLine(imageLine, i))
# print(featureValueList)
imageLine = lines_itr.__next__()
totalImages += 1
actualLabel = labelsLines_itr.__next__()
featureValueListForAllTestingImages.append(featureValueListPerImage)
actualLabelList.append(int(actualLabel))
# Re-init the feature score
featureValueListPerImage = [1]
except StopIteration:
# print("End of File")
pass
return featureValueListForAllTestingImages, actualLabelList
FACE = "FACE"
DIGIT = "DIGIT"
DIR = "DIR"
HEIGHT = "HEIGHT"
WIDTH = "WIDTH"
LABEL= "LABEL"
PIXELS="PIXELS"
if __name__ == '__main__':
print("TRAINING OUR MODEL FIRST")
PERCENT_INCREMENT = 10
inp = input("Type FACE or DIGIT")
map = {
FACE: {
DIR: 'data/facedata', HEIGHT: 68, WIDTH: 61, LABEL: 2, PIXELS: None
},
DIGIT:{
DIR: 'data/digitdata', HEIGHT: 20, WIDTH: 29, LABEL: 10, PIXELS: None
}
}
samples = Samples(map.get(inp).get(DIR))
dataClassifier = DataClassifier(map.get(inp).get(HEIGHT),map.get(inp).get(WIDTH),map.get(inp).get(LABEL),map.get(inp).get(PIXELS))
perceptronClassifier = PerceptronClassifier(dataClassifier.FEATURES, dataClassifier.LABELS)
samples.readFiles()
dataset = 0
featureValueListForAllTrainingImages, actualLabelForTrainingList = dataClassifier.extractFeatures(samples.train_lines_itr,
samples.train_labelsLines_itr)
TOTALDATASET = len(featureValueListForAllTrainingImages)
INCREMENTS = int(TOTALDATASET * PERCENT_INCREMENT / 100)
PERCEPTRON_TIME = {}
while dataset < TOTALDATASET:
startTimer = time.time()
print("Training ON {0} to {1} data".format(dataset, dataset+INCREMENTS))
ImageLabelZipList = zip(featureValueListForAllTrainingImages[dataset:dataset+INCREMENTS], actualLabelForTrainingList[dataset:dataset+INCREMENTS])
for featureValueListPerImage, actualLabel in ImageLabelZipList:
perceptronClassifier.runModel(True, featureValueListPerImage, actualLabel)
endTimer = time.time()
print("TESTING our model that is TRAINED ON {0} to {1} data".format(0, dataset+INCREMENTS))
errorPrediction = 0
total = 0
featureValueListForAllTestingImages, actualLabelList = dataClassifier.extractFeatures(samples.test_lines_itr, samples.test_labelsLines_itr)
for featureValueListPerImage, actualLabel in zip(featureValueListForAllTestingImages, actualLabelList):
errorPrediction += perceptronClassifier.runModel(False, featureValueListPerImage, actualLabel)
total += 1
samples.initTestIters()
print("Error is", errorPrediction, "out of Total of ", total)
errorRate = (errorPrediction * 100) / total
print(errorRate, "%")
dataset += INCREMENTS
PERCEPTRON_TIME[dataset] = ((endTimer-startTimer), errorRate)
plt.plot([1,2,3],[2,3,4])
plt.ylabel('Error Rate')
plt.xlabel('DataSet')
plt.show()
print(PERCEPTRON_TIME)
samples.closeFiles()