/
main.py
287 lines (267 loc) · 10.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
'''
This script implemets project number 27 Salient object segmentation. for the implementation the method chosen is the methode discribed in article [1]
'''
# imports
import numpy as np
import cv2
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
# Global Variables
path_to_image = "MSO/img/COCO_COCO_train2014_000000151534.jpg"
plt.ion()
'''
Classes used for defining the structures and algorithms useful for the implementation
'''
class Bin:
"""
Class representing a histogram bin
"""
def __init__(self, pos, is_peak = False):
self.pos = pos #bin position in the histogram
self.is_peak = is_peak #True if the bin is a peak
class Hill_climbing:
"""
Class used for hill climbing algorithm
"""
def __init__(self, hist, n_bins):
self.hist = hist #Image histogram
self.n_bins = n_bins #Number of bins for each dimension of the histogram
self.not_visited = [] #bins not visited yet (see find_peaks for more details)
self.peaks_pos = [] #positions of found peaks
#Fill not_visited with non-zero bins
for i in range(n_bins):
for j in range(n_bins):
for k in range(n_bins):
if hist[i,j,k] != 0:
self.not_visited.append((i,j,k))
def cyclic_coord(self, x):
"""
Returns coordinates of neighbors in one dimension using cyclic
scheme
"""
if x == 0:
min_x = self.n_bins - 1 #we are at the start of the histogram so left neighbor is last bin
max_x = 1
elif x == self.n_bins - 1:
min_x = self.n_bins - 2 #we are at the end of the histogram so right neighbor is first bin
max_x = 0
else:
min_x = x - 1
max_x = x + 1
return min_x, max_x
def find_neighbors(self, pos):
"""
Returns neighbor positions and values
"""
x,y,z = pos
min_x, max_x = self.cyclic_coord(x)
min_y, max_y = self.cyclic_coord(y)
min_z, max_z = self.cyclic_coord(z)
#Neighbor positions
neighbors_coord = np.array([[min_x, x, max_x], [min_y, y, max_y], [min_z, z, max_z]])
#Neighbor histogram values
neighbors = np.zeros((3,3,3))
for i, val_x in enumerate(neighbors_coord[0]):
for j, val_y in enumerate(neighbors_coord[1]):
for k, val_z in enumerate(neighbors_coord[2]):
neighbors[i,j,k] = self.hist[val_x, val_y, val_z]
return neighbors_coord, neighbors
def find_peaks(self):
"""
Returns number of peaks in the histogram
"""
curr_bin = Bin(self.not_visited[0])
self.not_visited.remove(self.not_visited[0])
print('not visited',len(self.not_visited))
while len(self.not_visited) != 0:
#print(len(self.not_visited))
while curr_bin.is_peak == False:
#Move until finding a peak
curr_bin = self.uphill_move(curr_bin)
#If peak not already found, add it to peaks
if curr_bin.pos not in self.peaks_pos:
self.peaks_pos.append(curr_bin.pos)
#Choose a not visted peak to start again
if len(self.not_visited) != 0:
new_ind = np.random.randint(0,len(self.not_visited),1)[0]
hist_ind = self.not_visited[new_ind]
self.not_visited.remove(hist_ind)
curr_bin.pos = hist_ind
curr_bin.is_peak = False
print('end')
print(len(self.peaks_pos))
return len(self.peaks_pos)
def uphill_move(self,curr_bin):
"""
Make uphill move
"""
x, y, z = curr_bin.pos
neigh_coord, neighbors = self.find_neighbors(curr_bin.pos)
true_neigh = np.delete(neighbors,13) #remove bin itself from neighbors
#Current bin is a peak
if (self.hist[x,y,z] >= true_neigh).all() and (self.hist[x,y,z] > true_neigh).any():
curr_bin.is_peak = True
#All neighbors are equal to current bin
elif (self.hist[x,y,z] == neighbors).all():
curr_bin = self.resolve_draw(curr_bin, neigh_coord)
#At least one neighbor has greater value than current bin
else:
loc_x, loc_y, loc_z = np.unravel_index(np.argmax(neighbors, axis=None), neighbors.shape)
curr_bin.pos = neigh_coord[0,loc_x], neigh_coord[1,loc_y], neigh_coord[2,loc_z]
if curr_bin.pos in self.not_visited:
self.not_visited.remove(curr_bin.pos)
return curr_bin
def resolve_draw(self, curr_bin, neigh_coord):
"""
Resolve conflict when all neighbors are equal to current bin
"""
for i in neigh_coord[0]:
for j in neigh_coord[1]:
for k in neigh_coord[2]:
if (i,j,k) in self.not_visited: #We move to a not visited bin
curr_bin.pos = (i,j,k)
self.not_visited.remove(curr_bin.pos)
return curr_bin
curr_bin.is_peak = True #All neighbors have already been visited so current bin is in a plateau,
#We label it as peak
return curr_bin
def get_limits(i, j, n_H, n_W, W_R):
"""
Returns limits of the window of width W_R centered
in pixel (i,j) in the integral image and number of
pixels in the window in the orginal
"""
#Limits of the window in the integram
maxi_x = min(n_H, i + 1 + W_R//2)
maxi_y = min(n_W, j + 1 + W_R//2)
mini_x = max(0, i - W_R//2)
mini_y = max(0, j - W_R//2)
#Number of pixels in the window in the ORIGINAL image (not the integral image)
N = (maxi_x - mini_x)*(maxi_y - mini_y)
return int(mini_x), int(mini_y), int(maxi_x), int(maxi_y), int(N)
def extract_saliency(img , scale):
"""
Computes saliency map for a specific scale
Arguments :
image -- numpy array, of shape (n_H, n_W, n_C)
scale -- integer, representing the scale.
Outputs :
saliency_map -- saliency map fo a specific scale, of shape (n_H, n_W)
"""
n_H, n_W, n_channel = img.shape
W_R2 = scale
N2 = W_R2**2
integral_img = cv2.integral(img)
saliency_map = np.zeros((n_H,n_W))
for i in range(n_H):
for j in range(n_W):
#Window limits in integral image
mini_x, mini_y, maxi_x, maxi_y, N = get_limits(i, j, n_H, n_W, W_R2)
#Sum of window pixels in original image
tmp = 1.0*(integral_img[mini_x,mini_y,:] + integral_img[maxi_x,maxi_y,:]\
- (integral_img[mini_x,maxi_y,:] \
+ integral_img[maxi_x,mini_y,:]))/N
#Distance between R1 and R2
saliency_map[i,j] = np.linalg.norm(img[i,j,:]-np.array(tmp))
return saliency_map
def mean_saliency(image):
"""
Compute mean saliency from different scales
Arguments :
image -- numpy array, of shape (n_H, n_W, n_C)
Outputs :
M -- mean saliency maps across different scales, of shape (n_H, n_W)
"""
h, w, n_chan = image.shape
w2 = min(h,w)
scales = [w2/2, w2/4, w2/8]
M = np.zeros((h,w))
for s in range(3):
saliency = extract_saliency(image, scales[s])
M += saliency
return M
def apply_kmeans(image, K):
"""
Apply K-means algorithm to the image
Arguments :
image -- numpy array, of shape (n_H, n_W, n_C)
K -- number of classes
Outputs :
y_pred -- array containing the class label of each pixel, of shape (n_H, n_W)
"""
n_H, n_W, n_C = image.shape
reshaped_image = np.reshape(image,(n_H*n_W, n_C))
kmeans = KMeans(init='k-means++', n_clusters=K, n_init=10).fit(reshaped_image)
y_pred = kmeans.predict(reshaped_image)
return np.reshape(y_pred, (n_H,n_W))
def saliency_means_per_region(image, K):
"""
Compute mean saliency for each region detected by K means.
Arguments:
image -- numpy array, of shape (n_H, n_W, n_C)
K -- number of classes
Outputs :
V -- mean saliency per region, of shape (K,1)
region_idxes -- array containing class of each pixel, of shape (n_H, n_W)
"""
M = mean_saliency(image)
n_H, n_W = M.shape
region_idxes = apply_kmeans(image, K)
V = np.zeros((K,))
for k in range(K):
N = 0
for i in range(n_H):
for j in range(n_W):
if region_idxes[i,j] == k:
V[k] += M[i,j]
N += 1
V[k] = (1.0*V[k])/N
return V, region_idxes
def segmented_image(image, V , threshold, region_idxes):
'''
Function discarding the regions with mean saliency inferior to a threshold.
Arguments:
image -- numpy array, of shape (n_H, n_W, n_C)
V -- mean saliency per region, of shape (K,1)
region_idxes -- array containing class of each pixel, of shape (n_H, n_W)
threshold -- value of the threshold
Outputs :
segmented_image -- the segmented image, numpy array of shape (n_H, n_W, n_C)
'''
n_H, n_W, n_C = image.shape
test_array = V > threshold
chosen_classes = [i for i, x in enumerate(test_array) if x]
segmented_image = np.zeros(image.shape, dtype=np.uint8)
for i in range(n_H):
for j in range(n_W):
if region_idxes[i,j] in chosen_classes:
segmented_image[i,j,:] = image[i,j,:]
return segmented_image
def main():
# reading image
image = cv2.imread(path_to_image)
# changing color space
im = cv2.cvtColor(image,cv2.COLOR_BGR2LAB)
plt.figure(1)
plt.title('Original image')
plt.imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
# computation of integral image
i_im = cv2.integral(im)
n_bins = 16
hist = cv2.calcHist([im],[0,1,2],None,[n_bins,n_bins,n_bins],[0,100,-128,127,-128,127])
hill_climb = Hill_climbing(hist, n_bins)
# appliying hill climbing algorithm to find k
k = hill_climb.find_peaks()
# computation of saliency maps
V, region_idxes = saliency_means_per_region(im, k)
threshold = np.median(V) - 0.1
# segmenting the image
seg_image = segmented_image(image, V, threshold, region_idxes)
# color space revert
seg_image = cv2.cvtColor(seg_image, cv2.COLOR_RGB2BGR)
# displaying the image
plt.figure(2)
plt.title('Segmented image')
plt.imshow(seg_image)
plt.show()
main()