-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_rbm_rep.py
103 lines (80 loc) · 2.93 KB
/
get_rbm_rep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
import os
import sys
import copy
from string import Template
import mlpython.datasets.store as dataset_store
from rbm import RBM
import pickle
experiment_number = 100
def train():
sys.argv.pop(0); # Remove first argument
# Check if every option(s) from parent's script are here.
if 5 != len(sys.argv):
print "Usage: python run_stacked_autoencoders_nnet.py lr hidden_size n_epochs n_cdk seed"
print ""
print "Ex.: python run_stacked_autoencoders_nnet.py 0.01 50 10 10 1234"
sys.exit()
# Set the constructor
str_ParamOption = "lr=" + sys.argv[0] + ", " + "hidden_size=" + sys.argv[1] + ", " + "n_epochs=" + sys.argv[2] + ", " +\
"CDk=" + sys.argv[3] + ", " + "seed=" + sys.argv[4]
try:
objectString = 'myObject = RBM(' + str_ParamOption + ')'
exec objectString
#code = compile(objectString, '<string>', 'exec')
#exec code
except Exception as inst:
print "Error while instantiating RBM (required hyper-parameters are probably missing)"
print inst
print "Loading dataset..."
trainset,validset,testset = dataset_store.get_classification_problem('ocr_letters')
print "Training..."
myObject.train(trainset)
#Store the trained dictionary and the parameters to a file.
pickle.dump((myObject.W, myObject.b, myObject.hidden_size), open("Models/RBM/model%d.pkl"%experiment_number, 'wb'))
def get_representation():
# Load the dictionary and corresponding args.
(W, b, hidden_size) = pickle.load(open("Models/RBM/model%d.pkl"%experiment_number,'rb'))
# Set the constructor
myObject = RBM(hidden_size=hidden_size)
print "Loading dataset..."
trainset,validset,testset = dataset_store.get_classification_problem('ocr_letters')
encoded_trainset = []
encoded_validset = []
encoded_testset = []
print "Initializing..."
myObject.initialize(W,b)
print "Encoding the trainset..."
counter = 0 #Inelegant, I know! I use this to only use the first 1000 values.
for input,target in trainset:
#Encode the sample.
h = myObject.encode(input)
encoded_trainset.append(h)
# counter +=1
# if counter == 1000:
# break
# Save the datasets to files.
filename = "Models/RBM/trainset%d.pkl"%(experiment_number)
pickle.dump( np.asarray(encoded_trainset) , open(filename, 'wb'))
counter = 0
print "Encoding the validset..."
for input,target in validset:
#Encode the sample.
h = myObject.encode(input)
encoded_validset.append(h)
# counter +=1
# if counter == 1000:
# break
filename = "Models/RBM/validset%d.pkl"%(experiment_number)
pickle.dump( np.asarray(encoded_validset) , open(filename, 'wb'))
#Note: only need to do it for the best hyper-params at the end.
print "Encoding the testset..."
for input,target in testset:
#Encode the sample.
h = myObject.encode(input)
encoded_testset.append(h)
filename = "Models/RBM/testset%d.pkl"%(experiment_number)
pickle.dump( np.asarray(encoded_testset), open(filename, 'wb'))
# Run...
train()
get_representation()