/
PythonPainters.py
51 lines (37 loc) · 1.59 KB
/
PythonPainters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import math
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KNeighborsClassifier
import sklearn.metrics as metrics
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
import Image
import DataModel
def processImage(x):
print("Processing file: {0}".format(x))
return preprocessing.minmax_scale(Image.calcImageHistFast(join(DataModel.trainDataFolder, x), 50).astype(float))
# Experiment #1
# Check corellation between colors histogram and painter.
# 1. Extract image and artist from csvExistingFiles
numSamples = 50
checkFrame = DataModel.trainFrame.sample(numSamples)
firstProc = np.vstack(checkFrame['FirstName'].map(processImage))
secondProc = np.vstack(checkFrame['SecondName'].map(processImage))
subFrame = DataModel.csvExistingFiles.sample(numSamples)
dataFrame = pd.DataFrame()
dataFrame['id'] = subFrame['id']
dataFrame['artist'] = subFrame['artist']
# 2. Load and transform image into histogram
dataFrame['hist'] = subFrame['filename'].map(processImage)
# 3. Build kNN database {histogram => artist}
knn = KNeighborsClassifier(n_neighbors=8, metric=Image.chiSquareDistance)
knn.fit(np.vstack(dataFrame['hist'].values), dataFrame['artist'].values)
# 4. Check it against trainFrame
xx = knn.predict(firstProc) == knn.predict(secondProc)
print(metrics.accuracy_score(checkFrame['Same'], xx))
print(metrics.confusion_matrix(checkFrame['Same'], xx))
print("Classification report\n{0}".format(metrics.classification_report(checkFrame['Same'], xx)))