def predict(data, config): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): if config.has_key('split') and config['split']: import cnn_split m = cnn_split.Model(config, is_train=False) else: import cnn m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.all_variables()) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(config['train_dir']) saver.restore(sess, ckpt.model_checkpoint_path) # predictions predictions = [] if config.has_key('split') and config['split']: left_batch, right_batch, y_true, _ = zip(*data) feed = {m.left: np.array(left_batch), m.right: np.array(right_batch)} else: x_batch, y_true, _ = zip(*data) feed = {m.inputs: np.array(x_batch)} prob = sess.run(m.scores, feed_dict=feed) for i in np.linspace(0, 1, 11): prediction = tf.select(prob > i, tf.ones_like(prob), tf.zeros_like(prob)) predictions.append(prediction.eval()) # prob.shape = (num_examples, num_classes) # predictions.shape = (11, num_examples, num_classes) <- for 11-point PR Curve return prob, np.array(predictions), y_true
def evaluate(eval_data, config): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): if config.has_key('contextwise') and config['contextwise']: import cnn_context m = cnn_context.Model(config, is_train=False) else: import cnn m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.all_variables()) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(config['train_dir']) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") #embeddings = sess.run(tf.all_variables())[0] print "\nStart evaluation\n" #losses = [] #precision = [] #recall = [] #batches = util.batch_iter(eval_data, batch_size=config['batch_size'], num_epochs=1, shuffle=False) #for batch in batches: if config.has_key('contextwise') and config['contextwise']: left_batch, middle_batch, right_batch, y_batch, _ = zip( *eval_data) feed = { m.left: np.array(left_batch), m.middle: np.array(middle_batch), m.right: np.array(right_batch), m.labels: np.array(y_batch) } else: x_batch, y_batch, _ = zip(*eval_data) feed = { m.inputs: np.array(x_batch), m.labels: np.array(y_batch) } loss, eval = sess.run([m.total_loss, m.eval_op], feed_dict=feed) #losses.append(loss) pre, rec = zip(*eval) #precision.append(pre) #recall.append(rec) avg_precision = np.mean(np.array(pre)) avg_recall = np.mean(np.array(rec)) auc = util.calc_auc_pr(pre, rec) f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5]) print '%s: loss = %.6f, f1 = %.4f, auc = %.4f' % (datetime.now(), loss, f1, auc) return pre, rec
def label(eval_data, config): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): if config.has_key('contextwise') and config['contextwise']: import cnn_context m = cnn_context.Model(config, is_train=False) else: import cnn m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.global_variables()) tf_config = tf.ConfigProto() if config.get("gpu_percentage", 0) > 0: tf_config.gpu_options.per_process_gpu_memory_fraction = config.get( "gpu_percentage", 0) else: tf_config = tf.ConfigProto(device_count={'GPU': 0}) with tf.Session(config=tf_config) as sess: ckpt = tf.train.get_checkpoint_state(config['train_dir']) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") print "\nStart evaluation\n" x_data = np.array(eval_data) data_size = x_data.shape[0] batch_size = 10 actual_output = [] start_i = 0 while start_i < data_size: end_i = min(start_i + batch_size, data_size) if config.has_key('contextwise') and config['contextwise']: raise NotImplementedError # left_batch, middle_batch, right_batch, y_batch, _ = zip(*eval_data) # feed = {m.left: np.array(left_batch), # m.middle: np.array(middle_batch), # m.right: np.array(right_batch), # m.labels: np.array(y_batch)} else: x_batch = eval_data[start_i:end_i] feed = {m.inputs: x_batch} current_actual_output, = sess.run([m.scores], feed_dict=feed) actual_output.append(current_actual_output) start_i = end_i actual_output = np.concatenate(actual_output, axis=0) return x_data, actual_output
def __init__(self, root): self.root = root self.res = "" self.pre = [None, None] self.model = cnn.Model() self.bs = 8.5 self.c = Canvas(self.root, bd=3, relief="ridge", width=300, height=282, bg='white') self.c.grid(row=0, columnspan=5) self.c.pack(side=LEFT) f1 = Frame(self.root, padx=5, pady=5) Label(f1, text="Real-Time Hand-Written Digits Recognition", fg="green", font=("", 15, "bold")).pack(pady=10) Label(f1, text="<<--Draw Your Digit on Canvas", fg="green", font=("", 15)).pack() self.pr = Label(f1, text="Prediction: None", fg="blue", font=("", 20, "bold")) self.pr.pack(pady=20) self.predictionScores = Text(f1, height=10, width=25, padx=5, bg='white', borderwidth=4, highlightthickness=0, relief='ridge') self.predictionScores.pack(side=RIGHT) ll = Label(f1, text="Scores:", fg="black", font=("", 15, "bold")) ll.place(relx=0.7, rely=0.45) Button(f1, font=("", 15), fg="white", bg="red", text="Clear", command=self.clear).pack(side=BOTTOM) f1.pack(side=RIGHT, fill=Y) self.c.bind("<Button-1>", self.putPoint) self.c.bind("<ButtonRelease-1>", self.result) self.c.bind("<B1-Motion>", self.paint)
def evaluate(config): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.global_variables()) # read test files if FLAGS.train_data: loader = util.DataLoader(FLAGS.data_dir, 'train3.pkl', batch_size=FLAGS.batch_size) else: loader = util.DataLoader(FLAGS.data_dir, 'test3.pkl', batch_size=FLAGS.batch_size) print( 'Start evaluation, %d batches needed, with %d examples per batch.' % (loader.num_batch, FLAGS.batch_size)) true_count = 0 avg_loss = 0 predictions = [] with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") for _ in range(loader.num_batch): x_batch, y_batch = loader.next_batch() scores, loss_value, true_count_value = sess.run( [m.scores, m.total_loss, m.true_count_op], feed_dict={ m.inputs: x_batch, m.labels: y_batch }) predictions.extend(scores) true_count += true_count_value avg_loss += loss_value accuracy = float(true_count) / loader._num_examples avg_loss = float(avg_loss) / loader.num_batch print('%s: test_loss = %.6f, test_accuracy = %.3f' % (datetime.now(), avg_loss, accuracy))
def evaluate(eval_data, config): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): if config.has_key('contextwise') and config['contextwise']: import cnn_context m = cnn_context.Model(config, is_train=False) else: import cnn m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(config['train_dir']) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") print "\nStart evaluation on test set ...\n" if config.has_key('contextwise') and config['contextwise']: left_batch, middle_batch, right_batch, y_batch, _ = zip( *eval_data) feed = { m.left: np.array(left_batch), m.middle: np.array(middle_batch), m.right: np.array(right_batch), m.labels: np.array(y_batch) } else: x_batch, y_batch, _ = zip(*eval_data) feed = { m.inputs: np.array(x_batch), m.labels: np.array(y_batch) } loss, eval = sess.run([m.total_loss, m.eval_op], feed_dict=feed) pre, rec = zip(*eval) auc = util.calc_auc_pr(pre, rec) f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5]) print '%s: loss = %.6f, p = %.4f, r = %4.4f, f1 = %.4f, auc = %.4f' % ( datetime.now(), loss, pre[5], rec[5], f1, auc) return pre, rec
def _load_model(self, config_path, checkpoint_path): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=GPU_MEM_FRAC) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) config = self._load_config(config_path) with tf.device("/gpu:0"): with tf.variable_scope('cnn'): model = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") inputs_tensor = model.inputs scores_tensor = model.scores return sess, inputs_tensor, scores_tensor
def predict_part(x, config, raw_text=True): """ Build evaluation graph and run. """ if raw_text: vocab = util.VocabLoader(config['data_dir']) x = vocab.text2id(x) class_names = vocab.class_names x_input = np.array([x]) else: x_input = x with tf.Graph().as_default(): with tf.variable_scope('cnn'): m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: train_dir = config['train_dir'] ckpt = tf.train.get_checkpoint_state(train_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") scores = sess.run(m.scores, feed_dict={m.inputs: x_input}) if raw_text: scores = [float(str(i)) for i in scores[0]] y_pred = class_names[int(np.argmax(scores))] scores = dict(zip(class_names, scores)) else: y_pred = np.argmax(scores, axis=1) ret = dict() ret['prediction'] = y_pred ret['scores'] = scores return ret
def train(train_data, test_data): # train_dir timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, timestamp)) # save flags if not os.path.exists(out_dir): os.mkdir(out_dir) FLAGS._parse_flags() config = dict(FLAGS.__flags.items()) # Window_size must not be larger than the sent_len if config['sent_len'] < config['max_window']: config['max_window'] = config['sent_len'] util.dump_to_file(os.path.join(out_dir, 'flags.cPickle'), config) print("Parameters:") for k, v in config.items(): print('%20s %r' % (k, v)) num_batches_per_epoch = int( np.ceil(float(len(train_data)) / FLAGS.batch_size)) max_steps = num_batches_per_epoch * FLAGS.num_epochs with tf.Graph().as_default(): with tf.variable_scope('cnn', reuse=None): m = cnn.Model(config, is_train=True) with tf.variable_scope('cnn', reuse=True): mtest = cnn.Model(config, is_train=False) # checkpoint saver = tf.train.Saver(tf.global_variables()) save_path = os.path.join(out_dir, 'model.ckpt') summary_op = tf.summary.merge_all() # session with tf.Session().as_default() as sess: proj_config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig( ) embedding = proj_config.embeddings.add() embedding.tensor_name = m.W_emb.name embedding.metadata_path = os.path.join(FLAGS.data_dir, 'vocab.txt') train_summary_writer = tf.summary.FileWriter(os.path.join( out_dir, "train"), graph=sess.graph) dev_summary_writer = tf.summary.FileWriter(os.path.join( out_dir, "dev"), graph=sess.graph) tf.contrib.tensorboard.plugins.projector.visualize_embeddings( train_summary_writer, proj_config) tf.contrib.tensorboard.plugins.projector.visualize_embeddings( dev_summary_writer, proj_config) sess.run(tf.global_variables_initializer()) # assign pretrained embeddings if FLAGS.use_pretrain: print("Initialize model with pretrained embeddings...") pretrained_embedding = np.load( os.path.join(FLAGS.data_dir, 'emb.npy')) m.assign_embedding(sess, pretrained_embedding) # initialize parameters current_lr = FLAGS.init_lr lowest_loss_value = float("inf") decay_step_counter = 0 global_step = 0 # evaluate on dev set def dev_step(mtest, sess): dev_loss = [] dev_auc = [] dev_f1_score = [] # create batch test_batches = util.batch_iter(test_data, batch_size=FLAGS.batch_size, num_epochs=1, shuffle=False) for batch in test_batches: x_batch, y_batch, _ = zip(*batch) loss_value, eval_value = sess.run( [mtest.total_loss, mtest.eval_op], feed_dict={ mtest.inputs: np.array(x_batch), mtest.labels: np.array(y_batch) }) dev_loss.append(loss_value) pre, rec = zip(*eval_value) # look at the 5th index, which corresponds to a threshold = 0.5 threshold = 5 dev_auc.append(util.calc_auc_pr(pre, rec, threshold)) dev_f1_score.append( (2.0 * pre[threshold] * rec[threshold]) / (pre[threshold] + rec[threshold])) return np.mean(dev_loss), np.mean(dev_auc), np.mean( dev_f1_score) # train loop print("\nStart training (save checkpoints in %s)\n" % out_dir) train_loss = [] train_auc = [] train_f1_score = [] train_batches = util.batch_iter(train_data, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs) for batch in train_batches: batch_size = len(batch) m.assign_lr(sess, current_lr) global_step += 1 x_batch, y_batch, a_batch = zip(*batch) feed = { m.inputs: np.array(x_batch), m.labels: np.array(y_batch) } if FLAGS.attention: feed[m.attention] = np.array(a_batch) start_time = time.time() _, loss_value, eval_value = sess.run( [m.train_op, m.total_loss, m.eval_op], feed_dict=feed) proc_duration = time.time() - start_time train_loss.append(loss_value) pre, rec = zip(*eval_value) # look at the 5th index, which corresponds to a threshold = 0.5 threshold = 5 auc = util.calc_auc_pr(pre, rec, threshold) f1 = (2.0 * pre[threshold] * rec[threshold]) / (pre[threshold] + rec[threshold]) train_auc.append(auc) train_f1_score.append(f1) assert not np.isnan(loss_value), "Model loss is NaN." # print log if global_step % FLAGS.log_step == 0: examples_per_sec = batch_size / proc_duration format_str = '%s: step %d/%d, f1 = %.4f, auc = %.4f, loss = %.4f ' + \ '(%.1f examples/sec; %.3f sec/batch), lr: %.6f' print(format_str % (datetime.now(), global_step, max_steps, f1, auc, loss_value, examples_per_sec, proc_duration, current_lr)) # write summary if global_step % FLAGS.summary_step == 0: summary_str = sess.run(summary_op) train_summary_writer.add_summary(summary_str, global_step) dev_summary_writer.add_summary(summary_str, global_step) # summary loss, f1 train_summary_writer.add_summary(_summary_for_scalar( 'loss', np.mean(train_loss)), global_step=global_step) train_summary_writer.add_summary(_summary_for_scalar( 'auc', np.mean(train_auc)), global_step=global_step) train_summary_writer.add_summary(_summary_for_scalar( 'f1', np.mean(train_f1_score)), global_step=global_step) dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess) dev_summary_writer.add_summary(_summary_for_scalar( 'loss', dev_loss), global_step=global_step) dev_summary_writer.add_summary(_summary_for_scalar( 'auc', dev_auc), global_step=global_step) dev_summary_writer.add_summary(_summary_for_scalar( 'f1', dev_f1), global_step=global_step) print("\n===== write summary =====") print("%s: step %d/%d: train_loss = %.6f, train_auc = %.4f, train_f1 = %.4f" \ % (datetime.now(), global_step, max_steps, np.mean(train_loss), np.mean(train_auc), np.mean(train_f1_score))) print("%s: step %d/%d: dev_loss = %.6f, dev_auc = %.4f, dev_f1 = %.4f\n" \ % (datetime.now(), global_step, max_steps, dev_loss, dev_auc, dev_f1)) # reset container train_loss = [] train_auc = [] train_f1_score = [] # decay learning rate if necessary if loss_value < lowest_loss_value: lowest_loss_value = loss_value decay_step_counter = 0 else: decay_step_counter += 1 if decay_step_counter >= FLAGS.tolerance_step: current_lr *= FLAGS.lr_decay print('%s: step %d/%d, Learning rate decays to %.5f' % \ (datetime.now(), global_step, max_steps, current_lr)) decay_step_counter = 0 # stop learning if learning rate is too low if current_lr < 1e-5: break # save checkpoint if global_step % FLAGS.checkpoint_step == 0: saver.save(sess, save_path, global_step=global_step) saver.save(sess, save_path, global_step=global_step)
def train(train_data, test_data, FLAGS=tf.app.flags.FLAGS): # train_dir timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, timestamp)) # save flags if not os.path.exists(out_dir): os.mkdir(out_dir) FLAGS._parse_flags() config = dict(FLAGS.__flags.items()) # Window_size must not be larger than the sent_len if config['sent_len'] < config['max_window']: config['max_window'] = config['sent_len'] util.dump_to_file(os.path.join(out_dir, 'flags.cPickle'), config) num_batches_per_epoch = int( np.ceil(float(len(train_data)) / FLAGS.batch_size)) max_steps = num_batches_per_epoch * FLAGS.num_epochs with tf.Graph().as_default(): with tf.variable_scope('cnn', reuse=None): m = cnn.Model(config, is_train=True) with tf.variable_scope('cnn', reuse=True): mtest = cnn.Model(config, is_train=False) # checkpoint saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) save_path = os.path.join(out_dir, 'model.ckpt') try: summary_op = tf.summary.merge_all() except: summary_op = tf.merge_all_summaries() # session config = tf.ConfigProto( log_device_placement=FLAGS.log_device_placement) if FLAGS.gpu_percentage > 0: config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_percentage else: config = tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, device_count={'GPU': 0}) sess = tf.Session(config=config) with sess.as_default(): train_summary_writer = tf.summary.FileWriter(os.path.join( out_dir, "train"), graph=sess.graph) dev_summary_writer = tf.summary.FileWriter(os.path.join( out_dir, "dev"), graph=sess.graph) try: sess.run(tf.global_variables_initializer()) except: sess.run(tf.initialize_all_variables()) # assign pretrained embeddings if FLAGS.use_pretrain: print "Initialize model with pretrained embeddings..." print( "Please don't forget to change the vocab size to the corresponding on in the embedding." ) pretrained_embedding = np.load( os.path.join(FLAGS.data_dir, 'emb.npy')) m.assign_embedding(sess, pretrained_embedding) # initialize parameters current_lr = FLAGS.init_lr lowest_loss_value = float("inf") decay_step_counter = 0 global_step = 0 # evaluate on dev set def dev_step(mtest, sess): dev_loss = [] dev_auc = [] dev_f1_score = [] # create batch test_batches = util.batch_iter(test_data, batch_size=FLAGS.batch_size, num_epochs=1, shuffle=False) for batch in test_batches: x_batch, y_batch, _ = zip(*batch) #a_batch = np.ones((len(batch), 1), dtype=np.float32) / len(batch) # average loss_value, eval_value = sess.run( [mtest.total_loss, mtest.eval_op], feed_dict={ mtest.inputs: np.array(x_batch), mtest.labels: np.array(y_batch) }) dev_loss.append(loss_value) pre, rec = zip(*eval_value) dev_auc.append(util.calc_auc_pr(pre, rec)) dev_f1_score.append((2.0 * pre[5] * rec[5]) / (pre[5] + rec[5])) # threshold = 0.5 return (np.mean(dev_loss), np.mean(dev_auc), np.mean(dev_f1_score)) # train loop print "\nStart training (save checkpoints in %s)\n" % out_dir train_loss = [] train_auc = [] train_f1_score = [] train_batches = util.batch_iter(train_data, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs) for batch in train_batches: batch_size = len(batch) m.assign_lr(sess, current_lr) global_step += 1 x_batch, y_batch, a_batch = zip(*batch) feed = { m.inputs: np.array(x_batch), m.labels: np.array(y_batch) } if FLAGS.attention: feed[m.attention] = np.array(a_batch) start_time = time.time() _, loss_value, eval_value = sess.run( [m.train_op, m.total_loss, m.eval_op], feed_dict=feed) proc_duration = time.time() - start_time train_loss.append(loss_value) pre, rec = zip(*eval_value) auc = util.calc_auc_pr(pre, rec) f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5] ) # threshold = 0.5 train_auc.append(auc) train_f1_score.append(f1) assert not np.isnan(loss_value), "Model loss is NaN." # print log if global_step % FLAGS.log_step == 0: examples_per_sec = batch_size / proc_duration format_str = '%s: step %d/%d, f1 = %.4f, auc = %.4f, loss = %.4f ' + \ '(%.1f examples/sec; %.3f sec/batch), lr: %.6f' print format_str % (datetime.now(), global_step, max_steps, f1, auc, loss_value, examples_per_sec, proc_duration, current_lr) # write summary if global_step % FLAGS.summary_step == 0: summary_str = sess.run(summary_op) train_summary_writer.add_summary(summary_str, global_step) dev_summary_writer.add_summary(summary_str, global_step) # summary loss, f1 train_summary_writer.add_summary(_summary_for_scalar( 'loss', np.mean(train_loss)), global_step=global_step) train_summary_writer.add_summary(_summary_for_scalar( 'auc', np.mean(train_auc)), global_step=global_step) train_summary_writer.add_summary(_summary_for_scalar( 'f1', np.mean(train_f1_score)), global_step=global_step) dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess) dev_summary_writer.add_summary(_summary_for_scalar( 'loss', dev_loss), global_step=global_step) dev_summary_writer.add_summary(_summary_for_scalar( 'auc', dev_auc), global_step=global_step) dev_summary_writer.add_summary(_summary_for_scalar( 'f1', dev_f1), global_step=global_step) print "\n===== write summary =====" print "%s: step %d/%d: train_loss = %.6f, train_auc = %.4f, train_f1 = %.4f" \ % (datetime.now(), global_step, max_steps, np.mean(train_loss), np.mean(train_auc), np.mean(train_f1_score)) print "%s: step %d/%d: dev_loss = %.6f, dev_auc = %.4f, dev_f1 = %.4f\n" \ % (datetime.now(), global_step, max_steps, dev_loss, dev_auc, dev_f1) # reset container train_loss = [] train_auc = [] train_f1_score = [] # decay learning rate if necessary if loss_value < lowest_loss_value: lowest_loss_value = loss_value decay_step_counter = 0 else: decay_step_counter += 1 if decay_step_counter >= FLAGS.tolerance_step: current_lr *= FLAGS.lr_decay print '%s: step %d/%d, Learning rate decays to %.5f' % \ (datetime.now(), global_step, max_steps, current_lr) decay_step_counter = 0 # stop learning if learning rate is too low if current_lr < 1e-5: break # save checkpoint if global_step % FLAGS.checkpoint_step == 0: saver.save(sess, save_path, global_step=global_step) saver.save(sess, save_path, global_step=global_step)
""" from cnn import optimisers import cnn import mnist_dataloader # np.set_printoptions(linewidth=200) train_images, train_labels, test_images, test_labels = mnist_dataloader.get_data( ) train_images = train_images.reshape( (train_images.shape[0], 1, train_images.shape[1], train_images.shape[2])) test_images = test_images.reshape( (test_images.shape[0], 1, test_images.shape[1], test_images.shape[2])) print(train_images.shape, train_labels.shape) model = cnn.Model() # model.add_layer( # CNN.Conv_Layer(filt_shape=(5,5),num_filters=5,stride=2,pad_type='include',input_shape=(1,28,28)) # ) # model.add_layer( # CNN.Pool_Layer(filt_shape=(3,3),stride=1,pool_type='max',pad_type='include') # ) model.add_layer(cnn.layers.Flatten(input_shape=(1, 28, 28))) model.add_layer( cnn.layers.FC(128, activation='relu', initiation_method='kaiming_normal')) model.add_layer( cnn.layers.FC(128, activation='relu', initiation_method='kaiming_normal')) model.add_layer( cnn.layers.FC(10, activation='softmax', initiation_method='kaiming_normal'))
relu_layer2 = cnn.ReLU(previous_layer=conv_layer2) max_pooling_layer = cnn.MaxPooling2D(pool_size=2, previous_layer=relu_layer2, stride=2) conv_layer3 = cnn.Conv2D(num_filters=1, kernel_size=3, previous_layer=max_pooling_layer, activation_function=None) relu_layer3 = cnn.ReLU(previous_layer=conv_layer3) pooling_layer = cnn.AveragePooling2D(pool_size=2, previous_layer=relu_layer3, stride=2) flatten_layer = cnn.Flatten(previous_layer=pooling_layer) dense_layer1 = cnn.Dense(num_neurons=100, previous_layer=flatten_layer, activation_function="relu") dense_layer2 = cnn.Dense(num_neurons=num_classes, previous_layer=dense_layer1, activation_function="softmax") model = cnn.Model(last_layer=dense_layer2, epochs=1, learning_rate=0.01) model.summary() model.train(train_inputs=train_inputs[:5, :], train_outputs=train_outputs[:5]) predictions = model.predict(data_inputs=train_inputs[:5, :]) print(predictions)
print("currentplayer : ", self.currentPlayer) print("current action : ", self.currentAction) print("board state : \n") self.board.drawCurrentBoard() print("nextActions : \n", self.nextActions) print("actionsLength : ", self.actionsLength) print("nstone : ", self.nstone) print("probability distribution : \n", self.prob_distrib) print("promisingActions : \n", self.promisingActions) print("score : ", self.score) if __name__ == "__main__": with tf.Session() as sess: m = cnn.Model(sess, "my_model") saver = tf.train.Saver() saver.restore( sess, str(os.getcwd()) + "\\training_9_20170811\\trained_model_at_epoch100.ckpt") b = Board(15, "EMPTY") b.putStoneOnBoard(7, 7, 'B') # b.putStoneOnBoard(7, 8, 'W') # b.putStoneOnBoard(6, 7, 'B') # b.putStoneOnBoard(8, 7, 'W') # b.putStoneOnBoard(8, 6, 'B') # b.putStoneOnBoard(9, 6, 'W') # b.putStoneOnBoard(6, 9, 'B')
def train(train_data, test_data, class_names=None, relations=None): # train_dir timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, timestamp)) # save flags if not os.path.exists(out_dir): os.mkdir(out_dir) FLAGS._parse_flags() config = dict(FLAGS.__flags.items()) # Window_size must not be larger than the sent_len if config['sent_len'] < config['max_window']: config['max_window'] = config['sent_len'] # save flags config['train_dir'] = out_dir util.dump_to_file(os.path.join(out_dir, 'flags.cPickle'), config) # display parameter settings print 'Parameters:' for k, v in config.iteritems(): print '\t' + k + '=' + str(v) num_batches_per_epoch = int( np.ceil(float(len(train_data)) / FLAGS.batch_size)) max_steps = num_batches_per_epoch * FLAGS.num_epochs with tf.Graph().as_default(): with tf.variable_scope('cnn', reuse=None): m = cnn.Model(config, is_train=True) with tf.variable_scope('cnn', reuse=True): mtest = cnn.Model(config, is_train=False) # checkpoint saver = tf.train.Saver(tf.all_variables()) save_path = os.path.join(out_dir, 'model.ckpt') summary_op = tf.merge_all_summaries() # session sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) with sess.as_default(): train_summary_writer = tf.train.SummaryWriter(os.path.join( out_dir, "train"), graph=sess.graph) dev_summary_writer = tf.train.SummaryWriter(os.path.join( out_dir, "dev"), graph=sess.graph) sess.run(tf.initialize_all_variables()) # assign pretrained embeddings if FLAGS.use_pretrain: print "Initializing model with pretrained embeddings ..." pretrained_embedding = np.load( os.path.join(FLAGS.data_dir, 'emb.npy')) m.assign_embedding(sess, pretrained_embedding) # initialize parameters current_lr = FLAGS.init_lr lowest_loss_value = float("inf") decay_step_counter = 0 global_step = 0 # evaluate on dev set def dev_step(mtest, sess): dev_loss = [] dev_auc = [] dev_f1_score = [] # create batch test_batches = util.batch_iter(test_data, batch_size=FLAGS.batch_size, num_epochs=1, shuffle=False) for batch in test_batches: x_batch, y_batch, n_batch = zip(*batch) feed = { mtest.inputs: np.array(x_batch), mtest.labels: np.array(y_batch) } if FLAGS.negative: feed[mtest.negative] = np.array(n_batch) loss_value, eval_value = sess.run( [mtest.total_loss, mtest.eval_op], feed_dict=feed) dev_loss.append(loss_value) pre, rec = zip(*eval_value) dev_auc.append(util.calc_auc_pr(pre, rec)) dev_f1_score.append((2.0 * pre[5] * rec[5]) / (pre[5] + rec[5])) # threshold = 0.5 return (np.mean(dev_loss), np.mean(dev_auc), np.mean(dev_f1_score)) # train loop print "\nStart training (save checkpoints in %s)\n" % out_dir train_loss = [] train_auc = [] train_f1_score = [] query_time = [] for _ in range(FLAGS.num_epochs): train_size = len(train_data) indices = range(train_size) # shuffle data every epoch indices = np.random.permutation(indices) while len(indices) > 0: # first batch if len(indices) == train_size: next_batch_idx = indices[:FLAGS.batch_size] indices = indices[FLAGS.batch_size:] # last batch elif len(indices) < FLAGS.batch_size: next_batch_idx = indices indices = [] else: pool_idx = indices[:FLAGS.pool_size] pool_data = train_data[pool_idx] start_time = time.time() next_pool_idx = util.most_informative( pool_data, config, strategy=FLAGS.strategy, class_names=class_names, relations=relations) query_time.append((time.time() - start_time) / float(len(next_pool_idx))) next_batch_idx = [pool_idx[i] for i in next_pool_idx] indices = [ i for i in indices if i not in next_batch_idx ] # check indices #assert len(set(indices).intersection(next_batch_idx)) == 0 batch = train_data[next_batch_idx] batch_size = len(batch) m.assign_lr(sess, current_lr) global_step += 1 x_batch, y_batch, n_batch = zip(*batch) feed = { m.inputs: np.array(x_batch), m.labels: np.array(y_batch) } if FLAGS.negative: feed[m.negative] = np.array(n_batch) start_time = time.time() _, loss_value, eval_value = sess.run( [m.train_op, m.total_loss, m.eval_op], feed_dict=feed) duration = time.time() - start_time train_loss.append(loss_value) pre, rec = zip(*eval_value) auc = util.calc_auc_pr(pre, rec) f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5] ) # threshold = 0.5 train_auc.append(auc) train_f1_score.append(f1) assert not np.isnan(loss_value), "Model loss is NaN." # print log if global_step % FLAGS.log_step == 0: examples_per_sec = batch_size / duration avg_query_time = np.mean(query_time) format_str = '%s: step %d/%d, f1 = %.4f, auc = %.4f, loss = %.4f ' + \ '(%.1f examples/sec; %.3f sec/batch; %.3f sec/query), lr: %.6f' print format_str % (datetime.now(), global_step, max_steps, f1, auc, loss_value, examples_per_sec, duration, avg_query_time, current_lr) # write summary if global_step % FLAGS.summary_step == 0: summary_str = sess.run(summary_op) train_summary_writer.add_summary( summary_str, global_step) dev_summary_writer.add_summary(summary_str, global_step) # summary loss, f1 train_summary_writer.add_summary( _summary_for_scalar('loss', np.mean(train_loss)), global_step=global_step) train_summary_writer.add_summary( _summary_for_scalar('auc', np.mean(train_auc)), global_step=global_step) train_summary_writer.add_summary( _summary_for_scalar('f1', np.mean(train_f1_score)), global_step=global_step) dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess) dev_summary_writer.add_summary(_summary_for_scalar( 'loss', dev_loss), global_step=global_step) dev_summary_writer.add_summary(_summary_for_scalar( 'auc', dev_auc), global_step=global_step) dev_summary_writer.add_summary(_summary_for_scalar( 'f1', dev_f1), global_step=global_step) print "\n===== write summary =====" print "%s: step %d/%d: train_loss = %.6f, train_auc = %.4f, train_f1 = %.4f" \ % (datetime.now(), global_step, max_steps, np.mean(train_loss), np.mean(train_auc), np.mean(train_f1_score)) print "%s: step %d/%d: dev_loss = %.6f, dev_auc = %.4f, dev_f1 = %.4f\n" \ % (datetime.now(), global_step, max_steps, dev_loss, dev_auc, dev_f1) # reset container train_loss = [] train_auc = [] train_f1_score = [] #query_time = [] # decay learning rate if necessary if loss_value < lowest_loss_value: lowest_loss_value = loss_value decay_step_counter = 0 else: decay_step_counter += 1 if decay_step_counter >= FLAGS.tolerance_step: current_lr *= FLAGS.lr_decay print '%s: step %d/%d, Learning rate decays to %.5f' % \ (datetime.now(), global_step, max_steps, current_lr) decay_step_counter = 0 # stop learning if learning rate is too low if current_lr < 1e-5: break # save checkpoint #if global_step % FLAGS.checkpoint_step == 0: saver.save(sess, save_path, global_step=global_step) saver.save(sess, save_path, global_step=global_step) print 'avg. query time = %.4f [sec]' % (np.mean(query_time))
def train(): # train_dir timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, timestamp)) # save flags if not os.path.exists(out_dir): os.mkdir(out_dir) FLAGS._parse_flags() config = dict(FLAGS.__flags.items()) util.dump_to_file(os.path.join(out_dir, 'flags.cPickle'), config) print "Parameters:" for k, v in config.iteritems(): print '%20s %r' % (k, v) # load data print "Preparing train data ..." train_loader = util.DataLoader(FLAGS.data_dir, 'train.cPickle', batch_size=FLAGS.batch_size) print "Preparing test data ..." dev_loader = util.DataLoader(FLAGS.data_dir, 'test.cPickle', batch_size=FLAGS.batch_size) max_steps = train_loader.num_batch * FLAGS.num_epoch config['num_classes'] = train_loader.num_classes config['sent_len'] = train_loader.sent_len with tf.Graph().as_default(): with tf.variable_scope('cnn', reuse=None): m = cnn.Model(config, is_train=True) with tf.variable_scope('cnn', reuse=True): mtest = cnn.Model(config, is_train=False) # checkpoint saver = tf.train.Saver(tf.global_variables()) save_path = os.path.join(out_dir, 'model.ckpt') summary_op = tf.summary.merge_all() # session sess = tf.Session() # summary writer proj_config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig( ) embedding = proj_config.embeddings.add() embedding.tensor_name = m.W_emb.name embedding.metadata_path = os.path.join(FLAGS.data_dir, 'metadata.tsv') summary_dir = os.path.join(out_dir, "summaries") summary_writer = tf.summary.FileWriter(summary_dir, graph=sess.graph) tf.contrib.tensorboard.plugins.projector.visualize_embeddings( summary_writer, proj_config) sess.run(tf.global_variables_initializer()) # assign pretrained embeddings if FLAGS.use_pretrain: print "Use pretrained embeddings to initialize model ..." emb_file = os.path.join(FLAGS.data_dir, 'emb.txt') vocab_file = os.path.join(FLAGS.data_dir, 'vocab.txt') pretrained_embedding = util.load_embedding(emb_file, vocab_file, FLAGS.vocab_size) m.assign_embedding(sess, pretrained_embedding) # initialize parameters current_lr = FLAGS.init_lr lowest_loss_value = float("inf") decay_step_counter = 0 global_step = 0 # evaluate on dev set def dev_step(mtest, sess, data_loader): dev_loss = 0.0 dev_accuracy = 0.0 for _ in range(data_loader.num_batch): x_batch_dev, y_batch_dev = data_loader.next_batch() dev_loss_value, dev_true_count = sess.run( [mtest.total_loss, mtest.true_count_op], feed_dict={ mtest.inputs: x_batch_dev, mtest.labels: y_batch_dev }) dev_loss += dev_loss_value dev_accuracy += dev_true_count dev_loss /= data_loader.num_batch dev_accuracy /= float(data_loader.num_batch * FLAGS.batch_size) data_loader.reset_pointer() return dev_loss, dev_accuracy # train loop print '\nStart training, %d batches needed, with %d examples per batch.' % ( train_loader.num_batch, FLAGS.batch_size) for epoch in range(FLAGS.num_epoch): train_loss = [] train_accuracy = [] train_loader.reset_pointer() for _ in range(train_loader.num_batch): m.assign_lr(sess, current_lr) global_step += 1 start_time = time.time() x_batch, y_batch = train_loader.next_batch() feed = {m.inputs: x_batch, m.labels: y_batch} run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, loss_value, true_count = sess.run( [m.train_op, m.total_loss, m.true_count_op], feed_dict=feed, options=run_options, run_metadata=run_metadata) proc_duration = time.time() - start_time train_loss.append(loss_value) train_accuracy.append(true_count) assert not np.isnan(loss_value), "Model loss is NaN." if global_step % FLAGS.log_step == 0: examples_per_sec = FLAGS.batch_size / proc_duration accuracy = float(true_count) / FLAGS.batch_size format_str = '%s: step %d/%d (epoch %d/%d), acc = %.2f, loss = %.2f ' + \ '(%.1f examples/sec; %.3f sec/batch), lr: %.6f' print format_str % (datetime.now(), global_step, max_steps, epoch + 1, FLAGS.num_epoch, accuracy, loss_value, examples_per_sec, proc_duration, current_lr) # write summary if global_step % FLAGS.summary_step == 0: summary_str = sess.run(summary_op) summary_writer.add_run_metadata(run_metadata, 'step%04d' % global_step) summary_writer.add_summary(summary_str, global_step) # summary loss/accuracy train_loss_mean = sum(train_loss) / float(len(train_loss)) train_accuracy_mean = sum(train_accuracy) / float( len(train_accuracy) * FLAGS.batch_size) summary_writer.add_summary(_summary( 'train/loss', train_loss_mean), global_step=global_step) summary_writer.add_summary(_summary( 'train/accuracy', train_accuracy_mean), global_step=global_step) test_loss, test_accuracy = dev_step( mtest, sess, dev_loader) summary_writer.add_summary(_summary('dev/loss', test_loss), global_step=global_step) summary_writer.add_summary(_summary( 'dev/accuracy', test_accuracy), global_step=global_step) print "\nStep %d: train_loss = %.6f, train_accuracy = %.3f" % ( global_step, train_loss_mean, train_accuracy_mean) print "Step %d: test_loss = %.6f, test_accuracy = %.3f\n" % ( global_step, test_loss, test_accuracy) # decay learning rate if necessary if loss_value < lowest_loss_value: lowest_loss_value = loss_value decay_step_counter = 0 else: decay_step_counter += 1 if decay_step_counter >= FLAGS.tolerance_step: current_lr *= FLAGS.lr_decay print '%s: step %d/%d (epoch %d/%d), Learning rate decays to %.5f' % \ (datetime.now(), global_step, max_steps, epoch+1, FLAGS.num_epoch, current_lr) decay_step_counter = 0 # stop learning if learning rate is too low if current_lr < 1e-5: break # save checkpoint if global_step % FLAGS.checkpoint_step == 0: saver.save(sess, save_path, global_step=global_step) saver.save(sess, save_path, global_step=global_step)
def emb(pool, config, class_names=None, relations=None): """ Compute gradients with respect tod embeddings layer. """ config['dropout'] = 0.0 # no dropout with tf.Graph().as_default(): with tf.variable_scope('cnn'): if config.has_key('split') and config['split']: import cnn_split m = cnn_split.Model(config, is_train=True) else: import cnn m = cnn.Model(config, is_train=True) saver = tf.train.Saver(tf.all_variables()) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(config['train_dir']) learning_rate = 1.0 if config['optimizer'] == 'adadelta': opt = tf.train.AdadeltaOptimizer(learning_rate) elif config['optimizer'] == 'adagrad': opt = tf.train.AdagradOptimizer(learning_rate) elif config['optimizer'] == 'adam': opt = tf.train.AdamOptimizer(learning_rate) elif config['optimizer'] == 'sgd': opt = tf.train.GradientDescentOptimizer(learning_rate) else: raise ValueError("Optimizer not supported.") m.assign_lr(sess, config['init_lr']) gradients = [] # feed only one instance at a time (batch_size = 1) for instance in pool: print '.', saver.restore(sess, ckpt.model_checkpoint_path) if config.has_key('split') and config['split']: # get prediction feed = {m.left: instance[0].reshape((1, config['sent_len'])), m.right: instance[1].reshape((1, config['sent_len']))} prob = sess.run(m.scores, feed_dict=feed) # get total loss label = [0.0] * config['num_classes'] losses = [] for i in range(config['num_classes']): label[i] = 1.0 feed[m.labels] = np.array(label).reshape((1, config['num_classes'])) if config['negative'] and class_names and relations: neg = util.pseudo_negative_sampling(np.array(label), class_names, relations, hierarchical=config['hierarchical']) feed[m.negative] = np.array(neg.eval()).reshape((1, config['num_classes'])) _, loss = sess.run([m.train_op, m.total_loss], feed_dict=feed) losses.append(loss) else: # get probability feed = {m.inputs: instance[0].reshape((1, config['sent_len']))} prob = sess.run(m.scores, feed_dict=feed) # get total loss label = [0.0] * config['num_classes'] losses = [] for i in range(config['num_classes']): label[i] = 1.0 feed[m.labels] = np.array(label).reshape((1, config['num_classes'])) if config['negative'] and class_names and relations: neg = util.pseudo_negative_sampling(np.array(label), class_names, relations, hierarchical=config['hierarchical']) feed[m.negative] = np.array(neg).reshape((1, config['num_classes'])) _, loss = sess.run([m.train_op, m.total_loss], feed_dict=feed) losses.append(loss) # get variable by name emb = [var for var in tf.trainable_variables() if var.op.name.startswith('cnn/embedding')] marginal = [] scaled = util.minmax_scale(prob[0, :]) assert len(scaled) == config['num_classes'] for i, loss in enumerate(losses): # compute gradients w.r.t. embeddings layer grad = opt.compute_gradients(tf.cast(loss, dtype=tf.float32), emb) # compute norm and scale by the probability marginal.append(scaled[i] * np.linalg.norm(np.array([g[1].eval() for g in grad]))) gradients.append(np.sum(marginal)) return gradients
def evaluate(eval_data, config): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): if config.has_key('contextwise') and config['contextwise']: import cnn_context m = cnn_context.Model(config, is_train=False) else: import cnn m = cnn.Model(config, is_train=False) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(config['train_dir']) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") #embeddings = sess.run(tf.global_variables())[0] print "\nStart evaluation\n" #losses = [] #precision = [] #recall = [] #batches = util.batch_iter(eval_data, batch_size=config['batch_size'], num_epochs=1, shuffle=False) #for batch in batches: if config.has_key('contextwise') and config['contextwise']: left_batch, middle_batch, right_batch, y_batch, _ = zip(*eval_data) feed = {m.left: np.array(left_batch), m.middle: np.array(middle_batch), m.right: np.array(right_batch), m.labels: np.array(y_batch)} else: x_batch, y_batch, _ = zip(*eval_data) feed = {m.inputs: np.array(x_batch), m.labels: np.array(y_batch)} loss, eval, actual_output, eval_per_class = sess.run([m.total_loss, m.eval_op, m.scores, m.eval_class_op], feed_dict=feed) #losses.append(loss) pre, rec = zip(*eval) #precision.append(pre) #recall.append(rec) avg_precision = np.mean(np.array(pre)) avg_recall = np.mean(np.array(rec)) auc = util.calc_auc_pr(pre, rec) f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5]) print '%s: Overall\nloss = %.6f, f1 = %.4f, auc = %.4f' % (datetime.now(), loss, f1, auc) pre_per_class, rec_per_class = zip(*eval_per_class) num_class = len(pre_per_class) for class_i in range(num_class): current_pre = pre_per_class[class_i] current_rec = rec_per_class[class_i] current_auc = util.calc_auc_pr(current_pre, current_rec) current_f1 = (2.0 * current_pre[5] * current_rec[5]) / (current_pre[5] + current_rec[5]) print 'Class "%s": precision = %.4f, recall = %.4f, f1 = %.4f, auc = %.4f' % (CLASS_NAMES[class_i], current_pre[5], current_rec[5], current_f1, current_auc) x_batch = np.array(x_batch) y_batch = np.array(y_batch) # Now calculate the true probability distribution using softmax. actual_output_exp = np.exp(actual_output) actual_output_softmax = actual_output_exp / np.sum(actual_output_exp, axis=1, keepdims=True) plot_precision_recall(y_batch,actual_output_softmax) return pre, rec, x_batch, y_batch, actual_output