-
Notifications
You must be signed in to change notification settings - Fork 0
/
dog-cluster.py
142 lines (118 loc) · 4.13 KB
/
dog-cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# rbm imports
import rbms, stats, updaters, trainers, updaters, monitors, units, parameters
from utils import generate_data, get_context
# theano imports
import theano
import theano.tensor as T
from theano import ProfileMode
# extra imports
import numpy as np
import time
import scipy.io as sio
import matplotlib.pyplot as plt
plt.ion()
from multiprocessing import Pool, freeze_support
import itertools, json, sys, getopt
mode = None
# optimization parameters
learning_rate = 0.001
weight_decay = 0.02
epochs = 1
def rbm(threadName, data):
n_visible = data.shape[1]
n_hidden = 4
# the rbm type
rbm = rbms.BinaryBinaryRBM(n_visible, n_hidden)
initial_vmap = { rbm.v: T.matrix('v') }
# We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use
# the CDUpdater. This requires symbolic CD-1 statistics:
s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=2)
# We create an updater for each parameter variable
umap = {}
for var in rbm.variables:
# the learning rate is 0.001
pu = var + learning_rate * updaters.CDUpdater(rbm, var, s)
umap[var] = pu
# training
t = trainers.MinibatchTrainer(rbm, umap)
mse = monitors.reconstruction_mse(s, rbm.v)
train = t.compile_function(initial_vmap, mb_size=1, monitors=[mse], name='train', mode=mode)
# run for every sample point
h,w = data.shape
# costs for each thread
costs_1 = []
costs_2 = []
costs_3 = []
costs_4 = []
# train the model
for epoch in xrange(epochs):
for i in xrange(h):
# get cost for training the data point
costs = [m for m in train({ rbm.v: data[i,:].reshape(1,n_visible) })]
# print "MSE = %.4f, thread = %s" % (np.mean(costs), threadName)
if threadName == "Thread-1":
costs_1.append(np.mean(costs))
elif threadName == "Thread-2":
costs_2.append(np.mean(costs))
elif threadName == "Thread-3":
costs_3.append(np.mean(costs))
else:
costs_4.append(np.mean(costs))
# all_costs = []
# for i in xrange(h):
# # get cost for training the data point across all epochs
# cost_point = []
# for epoch in xrange(epochs):
# cost = [m for m in train({ rbm.v: data[i,:].reshape(1,40) })]
# print epoch, cost, threadName, i
# cost_point.append(cost)
#
# all_costs.append(cost_point)
return costs_1, costs_2, costs_3, costs_4
def rbm_star(a_b):
# time.sleep(np.random.rand(1,1)*120)
return rbm(*a_b)
def k_rbm(data):
cost_dict = {}
p = Pool(4)
first_arg = ["Thread-1", "Thread-2", "Thread-3", "Thread-4"]
second_arg = data
a,b,c,d = p.map(rbm_star, itertools.izip(first_arg, itertools.repeat(second_arg)))
# p.map(rbm_star, itertools.izip(first_arg, itertools.repeat(second_arg)))
# get the costs from the tuples
cost_1 = a[0]
cost_2 = b[1]
cost_3 = c[2]
cost_4 = d[3]
# find the cluster assignments
for i in xrange(len(cost_1)):
mincost = min(cost_1[i],cost_2[i],cost_3[i],cost_4[i])
if mincost == cost_1[i]:
cost_dict[i+1] = 1
elif mincost == cost_2[i]:
cost_dict[i+1] = 2
elif mincost == cost_3[i]:
cost_dict[i+1] = 3
else:
cost_dict[i+1] = 4
json.dump(cost_dict, open("results1v4h.txt", 'w'))
if __name__ == "__main__":
# file input
infile = ''
outfile = ''
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["in_file=", "out_file="])
except getopt.GetoptError:
print 'python dog-cluster.py -i <inputfile> -o <outputfile>'
sys.exit(2)
for opt, arg in opts:
if opt == "-h":
print 'python dog-cluster.py -i <inputfile> -o <outputfile>'
print 'Change number of hidden units in line 29 of code'
sys.exit()
elif opt in ("-i", "--in_file"):
infile = arg
elif opt in ("-o", "--out_file"):
outfile = arg
# run the k-rbm model
k_rbm(infile, outfile)