-
Notifications
You must be signed in to change notification settings - Fork 1
/
main_d.py
314 lines (281 loc) · 15.7 KB
/
main_d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
import os, time, pickle
import numpy as np
import tensorflow as tf
from utils import discount_with_dones, Scheduler, make_path, find_trainable_variables
from models import model, model2, model3, load, save
import scipy.sparse as sp
from sl_buffer_d import slBuffer_allFile
from mct_d import MCT
from status import Status
"""
this function builds the model that is used by all three functions below
"""
def build_model(args, scope):
nh = args.max_clause
nw = args.max_var
nc = 2
nact = nc * nw
ob_shape = (None, nh, nw, nc * args.nstack)
X = tf.placeholder(tf.float32, ob_shape)
Y = tf.placeholder(tf.float32, (None, nact))
Z = tf.placeholder(tf.float32, (None))
p, v = model3(X, nact, scope)
params = find_trainable_variables(scope)
with tf.name_scope("loss"):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=p))
value_loss = tf.losses.mean_squared_error(labels = Z, predictions = v)
lossL2 = tf.add_n([ tf.nn.l2_loss(vv) for vv in params ])
loss = cross_entropy + value_loss + args.l2_coeff * lossL2
return X, Y, Z, p, v, params, loss
"""
c_act (exploration parameter of MCTS) and num_mcts (the full size of MCTS tree) are determined in minisat.core.Const.h
NOTE: max_clause, max_var and nc are define in both here (in args for model) and in minisat.core.Const.h (for writing states).
They need to BE the same.
nbatch is the degree of parallel for neural net
nstack is the number of history for a state
"""
def self_play(args, built_model, status_track):
# take out the parts that self_play need from the model
X, _, _, p, v, params, _ = built_model
# within a tensorflow session, run MCT objects with model
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
model_dir = status_track.get_model_dir()
if (args.save_dir is not None) and (model_dir is not None):
sess.run(load(params, os.path.join(args.save_dir, model_dir)))
print("loaded model {} at dir {} for selfplay".format(args.save_dir, model_dir))
else:
# this is the initial random parameter! let's save it in hard drive!
ps = sess.run(params)
model_dir = status_track.generate_new_model()
save(ps, os.path.join(args.save_dir, model_dir))
# initialize a list of MCT and run self_play
MCTList = []
for i in status_track.get_nbatch_index(args.nbatch, args.n_train_files):
MCTList.append(MCT(args.train_path, i, args.max_clause, args.max_var, args.nrepeat,
tau = lambda x: 1.0 if x <= 30 else 0.0001, resign = 400))
pi_matrix = np.zeros((args.nbatch, 2 * args.max_var), dtype = np.float32)
v_array = np.zeros((args.nbatch,), dtype = np.float32)
needMore = np.ones((args.nbatch,), dtype = np.bool)
while True:
states = []
pi_v_index = 0
for i in range(args.nbatch):
if needMore[i]:
temp = MCTList[i].get_state(pi_matrix[pi_v_index], v_array[pi_v_index])
pi_v_index += 1
if temp is None:
needMore[i] = False
else:
states.append(temp)
if not np.any(needMore):
break
pi_matrix, v_array = sess.run([p, v], feed_dict = {X: np.asarray(states, dtype = np.float32)})
print("loop finished and save Pi graph to slBuffer")
# bring sl_buffer to memory
os.makedirs(args.dump_dir, exist_ok = True)
dump_trace = os.path.join(args.dump_dir, args.dump_file)
if os.path.isfile(dump_trace):
with open(dump_trace, 'rb') as sl_file:
sl_Buffer = pickle.load(sl_file)
else:
sl_Buffer = slBuffer_allFile(args.sl_buffer_size, args.train_path, args.n_train_files)
# write in sl_buffer
for i in range(args.nbatch):
MCTList[i].write_data_to_buffer(sl_Buffer)
# write sl_buffer back to disk
with open(dump_trace, 'wb') as sl_file:
pickle.dump(sl_Buffer, sl_file, -1)
"""
this function does supervised training
"""
def super_train(args, built_model, status_track):
# take out the parts that self_play needs from the model
X, Y, Z, _, _, params, loss = built_model
with tf.name_scope("train"):
if args.which_cycle == 0: lr = 1e-2
else: lr = 1e-3
train_step = tf.train.AdamOptimizer(lr).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
model_dir = status_track.get_sl_starter()
assert (args.save_dir is not None) and (model_dir is not None), "save_dir and model_dir needs to be specified for super_training"
sess.run(load(params, os.path.join(args.save_dir, model_dir)))
print("loaded model {} at dir {} as super_training starter".format(args.save_dir, model_dir))
# data for supervised training
dump_trace = os.path.join(args.dump_dir, args.dump_file)
with open(dump_trace, 'rb') as sl_file:
sl_Buffer = pickle.load(sl_file)
# supervised training cycle
for i in range(args.sl_num_steps + 1):
batch = sl_Buffer.sample(args.sl_nbatch)
feed_dict = { X: batch[0], Y: batch[1], Z: batch[2] }
sess.run(train_step, feed_dict)
if i > 0 and i % args.sl_ncheckpoint == 0:
new_model_dir = status_track.generate_new_model()
print("checkpoint model {}".format(new_model_dir))
ps = sess.run(params)
save(ps, os.path.join(args.save_dir, new_model_dir))
"""
this function evaluates all unevaluated model, as indicated in the status_track object
"""
def model_ev(args, built_model, status_track, ev_testing = False):
# there may be a few number of unevaluated models, and this function evaluate them all
model_dir = status_track.which_model_to_evaluate()
if model_dir is None: return
# add this layer of indirection so that the function is fit for both evaluating training files and testing files
if ev_testing:
sat_path = args.test_path
sat_num = args.n_test_files
else:
sat_path = args.train_path
sat_num = args.n_train_files
# take out the parts that self_play needs from the model
X, _, _, p, v, params, _ = built_model
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# may run this multiple times because there maybe multiple models to evaluate
while model_dir is not None:
sess.run(load(params, os.path.join(args.save_dir, model_dir)))
print("loaded model {} at dir {} for evaluation".format(args.save_dir, model_dir))
MCTList = []
for i in range(args.nbatch):
# tau is small for testing, and evaluation only solve a problem once.
MCTList.append(MCT(sat_path, i, args.max_clause, args.max_var, 1, tau = lambda x: 0.001, resign = 400))
pi_matrix = np.zeros((args.nbatch, 2 * args.max_var), dtype = np.float32)
v_array = np.zeros((args.nbatch,), dtype = np.float32)
needMore = np.ones((args.nbatch,), dtype = np.bool)
next_file_index = args.nbatch
assert (next_file_index <= sat_num), "this is a convention"
all_files_done = next_file_index == sat_num
performance = np.zeros(sat_num)
while True:
states = []
pi_v_index = 0
for i in range(args.nbatch):
if needMore[i]:
temp = MCTList[i].get_state(pi_matrix[pi_v_index], v_array[pi_v_index])
pi_v_index += 1
while temp is None:
idx, rep, scr = MCTList[i].report_performance()
performance[idx] = scr / rep
if all_files_done:
break
MCTList[i] = MCT(sat_path, next_file_index, args.max_clause, args.max_var, 1, tau = lambda x: 0.001, resign = 400)
next_file_index += 1
if next_file_index >= sat_num:
all_files_done = True
temp = MCTList[i].get_state(pi_matrix[pi_v_index-1], v_array[pi_v_index-1]) # the pi and v are not used (for new MCT object)
if temp is None:
needMore[i] = False
else:
states.append(temp)
if not np.any(needMore):
break
pi_matrix, v_array = sess.run([p, v], feed_dict = {X: np.asarray(states, dtype = np.float32)})
# write performance to the status_track
print(performance)
status_track.write_performance(performance)
model_dir = status_track.which_model_to_evaluate()
def ev_ss(args, built_model, status_track, file_no):
model_dir = status_track.get_model_dir()
sat_path = args.train_path
sat_num = args.n_train_files
if model_dir is None: return
X, _, _, p, v, params, _ = built_model
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(load(params, os.path.join(args.save_dir, model_dir)))
print("load model {} at dir {}".format(args.save_dir, model_dir))
MCT58 = MCT(sat_path, file_no, args.max_clause, args.max_var, 1, tau = lambda x: 0.9, resign = 80)
pi_matrix = np.zeros((1, 2 * args.max_var, ), dtype = np.float32)
v_array = np.zeros([1,], dtype = np.float32)
while True:
temp = MCT58.get_state(pi_matrix[0], v_array[0])
if temp is None: break
states = []
states.append(temp)
pi_matrix, v_array = sess.run([p, v], feed_dict = {X: np.asarray(states, dtype = np.float32)})
idx, rep, scr = MCT58.report_performance()
print("performance is {}".format(scr / rep))
def main():
import argparse
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--save_dir', type = str, help='where is the model saved', default="GPO5/mix20_120")
parser.add_argument('--best_model', type = int, help="the index of the best model (-1 for unknown)", default = -1)
parser.add_argument('--status_file', type = str, help="which file keeps a record of the status", default = "status.pkl")
parser.add_argument('--result_file', type = str, help="this file keeps the performance of models on testing files", default = "result.pkl")
parser.add_argument('--dump_dir', type = str, help="where to save (state, Pi, num_step) for SL", default = "parameters/")
parser.add_argument('--dump_file', type = str, help="what is the filename to save (state, Pi, num_step) for SL", default="sl.pkl")
parser.add_argument('--train_path', type = str, help='where are training files', default="satProb/mix20_train32/")
parser.add_argument('--test_path', type = str, help='where are test files', default="satProb/mix20_test200/")
parser.add_argument('--max_clause', type = int, help="what is the max_clause", default=120)
parser.add_argument('--max_var', type = int, help="what is the max_var", default=20)
parser.add_argument('--sl_buffer_size', type = int, help="max size of sl buffer", default = 1000000)
parser.add_argument('--nbatch', type = int, help="what is the batch size to use", default = 32)
parser.add_argument('--nstack', type = int, help="how many layers of states to use", default = 1)
parser.add_argument('--nrepeat', type = int, help="how many times to repeat a SAT problem", default= 100)
parser.add_argument('--n_start', type = int, help="which file index to start with (for running)", default = 0)
parser.add_argument('--n_train_files', type = int, help="total number of training files", default = 0) # calculated later
parser.add_argument('--n_test_files', type = int, help="total number of testing files", default = 0) # calculated later
parser.add_argument('--l2_coeff', type = float, help="the coefficient for l2 regularization", default = 0.0001)
parser.add_argument('--sl_num_steps', type = int, help="how many times to do supervised training", default = 64000)
parser.add_argument('--sl_nbatch', type = int, help="what is the batch size for supervised training", default = 32)
parser.add_argument('--sl_ncheckpoint', type = int, help="how often to checkpoint a supervised trained model", default =32000)
parser.add_argument('--n_cycles', type = int, help="how many cycles of self_play -> super_train -> model_ev do we want to run", default = 2)
parser.add_argument('--show_only', type = str, help="if only show the result", default="No")
parser.add_argument('--which_cycle', type = int, help="which cycle are we in now", default = 0)
args = parser.parse_args()
args.n_train_files = len([f for f in os.listdir(args.train_path) if os.path.isfile(os.path.join(args.train_path, f))]) # total number of training files
args.n_test_files = len([f for f in os.listdir(args.test_path) if os.path.isfile(os.path.join(args.test_path, f))]) # total number of testing files
args.dump_dir = args.save_dir # all files related to this project are saved in save_dir, so dump_dir is useless
os.makedirs(args.save_dir, exist_ok = True)
# start the status_track for these operations
status_track = Status()
if os.path.isfile(os.path.join(args.save_dir, args.status_file)):
status_track.start_with(os.path.join(args.save_dir, args.status_file))
else: # otherwise the initial values in Status object fits with the default values here;
status_track.init_with(args.best_model, args.n_start, [], 0, os.path.join(args.save_dir, args.status_file), args)
status_track.show_itself()
# following code evaluates the performance of models on testing files
result_track = Status()
if os.path.isfile(os.path.join(args.save_dir, args.result_file)):
result_track.start_with(os.path.join(args.save_dir, args.result_file))
else: # otherwise initilize values in Status object with the "total model number" --> "length_hist field" of status_track
result_track.init_with(-1, 0, [], 0, os.path.join(args.save_dir, args.result_file))
if args.show_only == "Yes":
status_track.show_itself()
status_track.print_all_models_performance()
result_track.show_itself()
result_track.print_all_models_performance()
return
# build the model for all three functions
built_model = build_model(args, scope = "mcts")
# run a specific file that has bugs
# ev_ss(args, built_model, status_track, 0)
# return
# model_ev(args, built_model, status_track)
# status_track.show_itself()
# return
# result_track.set_same_length_hist(status_track)
# model_ev(args, built_model, result_track, ev_testing = True)
# result_track.show_itself()
# return
# run args.n_cycles number of iteration (self_play -> super_train -> model_ev)
for i in range(args.n_cycles):
args.which_cycle = i
self_play(args, built_model, status_track)
status_track.show_itself()
super_train(args, built_model, status_track)
status_track.show_itself()
model_ev(args, built_model, status_track)
result_track.set_same_length_hist(status_track)
model_ev(args, built_model, result_track, ev_testing = True)
status_track.show_itself()
# print the performance of all models we have so far:
status_track.print_all_models_performance()
result_track.print_all_models_performance()
return
import sys
if __name__ == '__main__':
main()