-
Notifications
You must be signed in to change notification settings - Fork 4
/
picker.py
175 lines (150 loc) · 5.7 KB
/
picker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Compute features on a bunch of images and display as hoverable-scatterplot.
"""
import os
import sys
import tempfile
import pathlib
from io import BytesIO
import base64
import numpy as np
from PIL import Image
from matplotlib import cm
from scipy import ndimage as ndi
from skimage import io, filters, measure, morphology, img_as_ubyte
import pandas as pd
from sklearn import decomposition, manifold
from bokeh.models import (LassoSelectTool, PanTool,
ResizeTool, ResetTool,
HoverTool, WheelZoomTool)
TOOLS = [LassoSelectTool, PanTool, WheelZoomTool, ResizeTool, ResetTool]
from bokeh.models import ColumnDataSource
from bokeh import plotting as bplot
def to_png(arr):
out = BytesIO()
im = Image.fromarray(arr)
im.save(out, format='png')
return out.getvalue()
def extract_properties(image, closing_size=2):
selem = morphology.disk(radius=closing_size)
thresholded = image > filters.threshold_otsu(image)
closed = morphology.binary_closing(thresholded, selem)
regions = ndi.label(closed)[0]
propnames = ['area', 'convex_area', 'eccentricity', 'euler_number',
'extent', 'min_intensity', 'mean_intensity', 'max_intensity',
'minor_axis_length', 'major_axis_length']
props = measure.regionprops(regions, image)
data_table = []
for obj in props:
data_point = [getattr(obj, p) for p in propnames]
data_table.append(data_point)
return propnames, np.array(data_table), props
def extract_properties_multi_image(image_collection, closing_size=2,
min_blob_size=9, max_blob_size=100):
all_results = []
all_objs = []
times = []
filenames = []
for idx, image in enumerate(image_collection):
print('processing image ', idx)
filename = image_collection.files[idx]
timepoint = int(filename.split('-')[1][:-1])
names, proptable, objs = extract_properties(image, closing_size)
passed = np.flatnonzero((proptable[:, 0] > min_blob_size) *
(proptable[:, 0] < max_blob_size))
all_results.append(proptable[passed])
all_objs.extend([objs[i] for i in passed])
times.extend([timepoint] * len(passed))
filenames.extend([filename] * len(passed))
all_results = np.vstack(all_results)
times = np.array(times)[:, np.newaxis]
dec, dec_names, pca_weights = dimension_reductions(all_results)
col_names = ['time'] + names + dec_names
data = np.hstack((times, all_results, dec))
df = pd.DataFrame(data, columns=col_names)
df['images'] = [obj.intensity_image for obj in all_objs]
df['source_filenames'] = [os.path.split(p)[1] for p in filenames]
return all_results, df, pca_weights
def dimension_reductions(data_table):
"""Perform various 2D projections of the data.
Parameters
----------
data_table : array of float, shape (n_samples, n_features)
The input data.
Returns
-------
vecs : array of float, shape (n_samples, 6)
Three 2D projections of the data:
- PCA
- tSNE
names : list of string
The names of the returned columns.
components : array of float, shape (2, n_features)
The PCA vector loadings.
"""
mean = np.mean(data_table, axis=0)
std = np.std(data_table, axis=0)
norm_data = (data_table - mean) / std
pca_obj = decomposition.PCA(n_components=2)
pca = pca_obj.fit_transform(norm_data)
tsne = manifold.TSNE().fit_transform(norm_data)
names = ['PC1', 'PC2', 'tSNE-0', 'tSNE-1']
return np.hstack((pca, tsne)), names, pca_obj.components_
def b64_image_files(images, colormap='magma'):
cmap = cm.get_cmap(colormap)
urls = []
for im in images:
png = to_png(img_as_ubyte(cmap(im)))
url = 'data:image/png;base64,' + base64.b64encode(png).decode('utf-8')
urls.append(url)
return urls
def bokeh_plot(df):
tooltip = """
<div>
<div>
<img
src="@image_files" height="60" alt="image"
style="float: left; margin: 0px 15px 15px 0px; image-rendering: pixelated;"
border="2"
></img>
</div>
<div>
<span style="font-size: 17px;">@source_filenames</span>
</div>
</div>
"""
filenames = b64_image_files(df['images'])
df['image_files'] = filenames
colors_raw = cm.viridis((df['time'] - df['time'].min()) /
(df['time'].max() - df['time'].min()), bytes=True)
colors_str = ['#%02x%02x%02x' % tuple(c[:3]) for c in colors_raw]
df['color'] = colors_str
source = ColumnDataSource(df)
bplot.output_file('plot.html')
hover0 = HoverTool(tooltips=tooltip)
hover1 = HoverTool(tooltips=tooltip)
tools0 = [t() for t in TOOLS] + [hover0]
tools1 = [t() for t in TOOLS] + [hover1]
pca = bplot.figure(tools=tools0)
pca.circle('PC1', 'PC2', color='color', source=source)
tsne = bplot.figure(tools=tools1)
tsne.circle('tSNE-0', 'tSNE-1', color='color', source=source)
p = bplot.gridplot([[pca, tsne]])
bplot.show(p)
def normalize_images(ims):
max_val = np.median([np.percentile(im, 99.9) for im in ims])
for im in ims:
im /= max_val
np.clip(im, 0, 1, out=im)
return ims
def main(argv):
print('reading images')
images = io.imread_collection(argv[1:],
conserve_memory=False, plugin='tifffile')
images = normalize_images(images)
print('extracting data')
table, df, weights = extract_properties_multi_image(images)
print('preparing plots')
bokeh_plot(df)
if __name__ == '__main__':
main(sys.argv)