/
preprocess_image.py
97 lines (84 loc) · 3.17 KB
/
preprocess_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
The preprocessing step uses VGG16 from the research paper
'Very Deep Convolutional Networks for Large-Scale Image Recognition' by
Karen Simonyan, Andrew Zisserman. Find the paper here: https://arxiv.org/abs/1409.1556
The calculation of pleasure, arousal and dominance values uses the formula from the
research paper 'Effects of Color on Emotions' by Patricia Valdez, Albert Mehrabian.
Find the paper here: https://pdfs.semanticscholar.org/4711/624c0f72d8c85ea6813b8ec5e8abeedfb616.pdf
"""
# for general purpose
import os, sys
import numpy as np
import cv2
from pathlib import Path as path
# for local binary pattern
from skimage.data import load
from skimage.feature import multiblock_lbp
from skimage.transform import resize
# for vgg16
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
# image height/width
IMAGE_SIZE = 256
# settings for multilevel lbp
HEIGHT = int(IMAGE_SIZE / 3)
WIDTH = HEIGHT
""" Extracts texture features using local binary pattern.
Returns an integer value.
"""
def calculate_lbp(img_path):
# load img in grayscale
img = load(img_path, as_gray=True)
return multiblock_lbp(img, 0, 0, WIDTH, HEIGHT)
""" Returns mean value of RGB """
def mean(img_path):
img = load(img_path)
# flatten image to be 2D and compute mean rgb
mean_rgb_val = mean_helper(img)
# convert image to hsv scale
hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
# calculate mean
mean_hsv_val = mean_helper(hsv)
return mean_rgb_val, mean_hsv_val
""" Calculates mean value of a plane given a 3D matrix """
def mean_helper(org_mat):
# "flatten" matrix to a 2D matrix
temp = org_mat.T.reshape(3, IMAGE_SIZE * IMAGE_SIZE)
mean_val = temp.mean(axis=1)
return mean_val
""" Returns a 1000 * 1 matrix with probabilities of possible objects in image """
def predict_object(img_path):
# load vgg16 model
vgg_model = VGG16()
img = load(img_path)
# reshape to size 224 to fit model
img = resize(img, (224, 224)) * 255
img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
img = preprocess_input(img)
probabilities = vgg_model.predict(img)
return probabilities
""" Calculates pleasure, arousal and dominance values.
Note 'value' in hsv is used as a measure of brightness in this case
"""
def calculate_pad(hsv):
saturation = hsv[1]
brightness = hsv[2] # or 'value' in hsv
pleasure = 0.69 * brightness + 0.22 * saturation
arousal = -0.31 * brightness + 0.6 * saturation
dominance = -0.76 * brightness + 0.32 * saturation
pad = np.array([pleasure, arousal, dominance])
return pad
""" Returns all necessary features and average hue """
def preprocess(img_path):
absolute_path = os.getcwd() + "/" + img_path
# obtain neceassy features as a row matrix
lbp = np.array(calculate_lbp(absolute_path))
obj = predict_object(absolute_path).flatten()
rgb, hsv = mean(absolute_path)
rgb = rgb.flatten()
# calculate pleasure, arousal and dominance values
pad = calculate_pad(hsv).flatten()
hsv = hsv.flatten()
# concatenate all matrices
features = np.concatenate((lbp, rgb, pad, hsv, obj), axis=None)
return features, hsv[0]