def main(args): worddicts = load_dict(args.path + '/data/dictionary.txt') worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk train, train_uid_list = dataIterator(args.path + '/data/offline-train.pkl', args.path + '/data/train_caption.txt', worddicts, batch_size=args.batch_size, batch_Imagesize=400000, maxlen=100, maxImagesize=400000) valid, valid_uid_list = dataIterator(args.path + '/data/offline-test.pkl', args.path + '/data/test_caption.txt', worddicts, batch_size=args.batch_size, batch_Imagesize=400000, maxlen=100, maxImagesize=400000) print('train lenght is ', len(train)) x = tf.placeholder(tf.float32, shape=[None, None, None, 1]) y = tf.placeholder(tf.int32, shape=[None, None]) x_mask = tf.placeholder(tf.float32, shape=[None, None, None]) y_mask = tf.placeholder(tf.float32, shape=[None, None]) lr = tf.placeholder(tf.float32, shape=()) if_trainning = tf.placeholder(tf.bool, shape=()) watcher_train = Watcher_train(blocks=3, level=16, growth_rate=24, training=if_trainning) annotation, anno_mask = watcher_train.dense_net(x, x_mask) # for initilaizing validation anno = tf.placeholder(tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2], annotation.shape.as_list()[3] ]) infer_y = tf.placeholder(tf.int64, shape=(None, )) h_pre = tf.placeholder(tf.float32, shape=[None, 256]) alpha_past = tf.placeholder(tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2] ]) attender = Attender(annotation.shape.as_list()[3], 256, 512) parser = Parser(256, 256, attender, annotation.shape.as_list()[3]) w = WAP(watcher_train, attender, parser, 256, 256, annotation.shape.as_list()[3], 111, if_trainning) hidden_state_0 = tf.tanh( tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), w.Wa2h, axes=1) + w.ba2h) # [batch, hidden_dim] cost = w.get_cost(annotation, y, anno_mask, y_mask) vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for vv in vs: if not vv.name.startswith('batch_normalization'): cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2)) p, w, h, alpha = w.get_word(infer_y, h_pre, alpha_past, anno) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): trainer = optimizer.minimize(cost) max_epoch = 200 config = tf.ConfigProto() config.gpu_options.allow_growth = True init = tf.global_variables_initializer() uidx = 0 cost_s = 0 dispFreq = 100 saveFreq = len(train) sampleFreq = len(train) validFreq = len(train) history_errs = [] estop = False halfLrFlag = 0 patience = 15 lrate = 1.0 log = open(args.path + '/log-bs-6.txt', 'w') with tf.Session(config=config) as sess: sess.run(init) for epoch in range(max_epoch): n_samples = 0 random.shuffle(train) for batch_x, batch_y in train: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) n_samples += len(batch_x) uidx += 1 cost_i, _ = sess.run( [cost, trainer], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: True, lr: lrate }) cost_s += cost_i if np.isnan(cost_i) or np.isinf(cost_i): print('invalid cost value detected') sys.exit(0) if np.mod(uidx, dispFreq) == 0: cost_s /= dispFreq print('Epoch ', epoch, 'Update ', uidx, 'Cost ', cost_s, 'Lr ', lrate) log.write('Epoch ' + str(epoch) + ' Update ' + str(uidx) + ' Cost ' + str(cost_s) + ' Lr ' + str(lrate) + '\n') log.flush() cost_s = 0 if np.mod(uidx, sampleFreq) == 0: fpp_sample = open( args.path + '/result/valid_decode_result-bs-6.txt', 'w') valid_count_idx = 0 for batch_x, batch_y in valid: for xx in batch_x: xx = np.moveaxis(xx, 0, -1) xx_pad = np.zeros( (xx.shape[0], xx.shape[1], xx.shape[2]), dtype='float32') xx_pad[:, :, :] = xx / 255. xx_pad = xx_pad[None, :, :, :] annot = sess.run(annotation, feed_dict={ x: xx_pad, if_trainning: False }) h_state = sess.run(hidden_state_0, feed_dict={anno: annot}) sample, score = w.get_sample(p, w, h, alpha, annot, h_state, 10, 100, False, sess, training=False) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 if np.mod(valid_count_idx, 100) == 0: print('gen %d samples' % valid_count_idx) log.write('gen %d samples' % valid_count_idx + '\n') log.flush() for vv in ss: if vv == 0: # <eol> break fpp_sample.write(' ' + worddicts_r[vv]) fpp_sample.write('\n') fpp_sample.close() print('valid set decode done') log.write('valid set decode done\n') log.flush() if np.mod(uidx, validFreq) == 0: probs = [] for batch_x, batch_y in valid: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) pprobs, annot = sess.run( [cost, annotation], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: False }) probs.append(pprobs) valid_errs = np.array(probs) valid_err_cost = valid_errs.mean() os.system('python3.4 compute-wer.py ' + args.path + '/result/valid_decode_result-bs-6.txt' + ' ' + args.path + '/data/test_caption.txt' + ' ' + args.path + '/result/valid-bs-6.wer') fpp = open(args.path + '/result/valid-bs-6.wer') stuff = fpp.readlines() fpp.close() m = re.search('WER (.*)\n', stuff[0]) valid_per = 100. * float(m.group(1)) m = re.search('ExpRate (.*)\n', stuff[1]) valid_sacc = 100. * float(m.group(1)) valid_err = valid_per history_errs.append(valid_err) if uidx / validFreq == 0 or valid_err <= np.array( history_errs).min(): bad_counter = 0 if uidx / validFreq != 0 and valid_err > np.array( history_errs).min(): bad_counter += 1 if bad_counter > patience: if halfLrFlag == 2: print('Early Stop!') log.write('Early Stop!\n') log.flush() estop = True break else: print('Lr decay and retrain!') log.write('Lr decay and retrain!\n') log.flush() bad_counter = 0 lrate = lrate / 10 halfLrFlag += 1 print('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' % (valid_per, valid_sacc, valid_err_cost)) log.write('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' % (valid_per, valid_sacc, valid_err_cost) + '\n') log.flush() if estop: break
def main(args): global anno, infer_y, h_pre, alpha_past, if_trainning, dictLen worddicts = load_dict(args.dictPath) dictLen = len(worddicts) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk train, train_uid_list = dataIterator( args.trainPklPath, args.trainCaptionPath, worddicts, batch_size=args.batch_size, batch_Imagesize=500000, maxlen=150, maxImagesize=500000, ) valid, valid_uid_list = dataIterator( args.validPklPath, args.validCaptionPath, worddicts, batch_size=args.batch_size, batch_Imagesize=500000, maxlen=150, maxImagesize=500000, ) print("train lenth is ", len(train)) print("valid lenth is ", len(valid)) x = tf.placeholder(tf.float32, shape=[None, None, None, 1]) y = tf.placeholder(tf.int32, shape=[None, None]) x_mask = tf.placeholder(tf.float32, shape=[None, None, None]) y_mask = tf.placeholder(tf.float32, shape=[None, None]) lr = tf.placeholder(tf.float32, shape=()) if_trainning = tf.placeholder(tf.bool, shape=()) watcher_train = Watcher_train(blocks=3, level=16, growth_rate=24, training=if_trainning) annotation, anno_mask = watcher_train.dense_net(x, x_mask) # for initilaizing validation anno = tf.placeholder( tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2], annotation.shape.as_list()[3], ], ) infer_y = tf.placeholder(tf.int64, shape=(None, )) h_pre = tf.placeholder(tf.float32, shape=[None, 256]) alpha_past = tf.placeholder( tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2] ], ) attender = Attender(annotation.shape.as_list()[3], 256, 512) parser = Parser(256, 256, attender, annotation.shape.as_list()[3]) wap = WAP( watcher_train, attender, parser, 256, 256, annotation.shape.as_list()[3], dictLen, if_trainning, ) hidden_state_0 = tf.tanh( tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), wap.Wa2h, axes=1) + wap.ba2h) # [batch, hidden_dim] cost = wap.get_cost(annotation, y, anno_mask, y_mask) vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for vv in vs: if not vv.name.startswith("batch_normalization"): cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2)) p, w, h, alpha = wap.get_word(infer_y, h_pre, alpha_past, anno) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): trainer = optimizer.minimize(cost) max_epoch = 200 config = tf.ConfigProto() config.gpu_options.allow_growth = True init = tf.global_variables_initializer() uidx = 0 cost_s = 0 dispFreq = 100 if args.dispFreq is None else args.dispFreq saveFreq = (len(train) * args.epochDispRatio if args.saveFreq is None else args.saveFreq) sampleFreq = (len(train) * args.epochSampleRatio if args.sampleFreq is None else args.sampleFreq) validFreq = (len(train) * args.epochValidRatio if args.validFreq is None else args.validFreq) history_errs = [] estop = False halfLrFlag = 0 patience = 15 if args.patience is None else args.patience lrate = args.lr logPath = "./log.txt" if args.logPath is None else args.logPath log = open(logPath, "w") log.write(str(vars(args))) log.write(str(patience)) log.write(str(lr)) saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(init) for epoch in range(max_epoch): n_samples = 0 random.shuffle(train) for batch_x, batch_y in train: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) n_samples += len(batch_x) uidx += 1 cost_i, _ = sess.run( [cost, trainer], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: True, lr: lrate, }, ) cost_s += cost_i if np.isnan(cost_i) or np.isinf(cost_i): print("invalid cost value detected") sys.exit(0) if np.mod(uidx, dispFreq) == 0: cost_s /= dispFreq print("Epoch ", epoch, "Update ", uidx, "Cost ", cost_s, "Lr ", lrate) log.write("Epoch " + str(epoch) + " Update " + str(uidx) + " Cost " + str(cost_s) + " Lr " + str(lrate) + "\n") log.flush() cost_s = 0 if np.mod(uidx, sampleFreq) == 0: print("Start sampling...") _t = time.time() fpp_sample = open( os.path.join(args.resultPath, str(args.resultFileName) + ".txt"), "w", ) valid_count_idx = 0 for batch_x, batch_y in valid: for xx in batch_x: xx = np.moveaxis(xx, 0, -1) xx_pad = np.zeros( (xx.shape[0], xx.shape[1], xx.shape[2]), dtype="float32") xx_pad[:, :, :] = xx / 255.0 xx_pad = xx_pad[None, :, :, :] annot = sess.run(annotation, feed_dict={ x: xx_pad, if_trainning: False }) h_state = sess.run(hidden_state_0, feed_dict={anno: annot}) sample, score = wap.get_sample( p, w, h, alpha, annot, h_state, 10, 100, False, sess, training=False, ) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 if np.mod(valid_count_idx, 100) == 0: print("gen %d samples" % valid_count_idx) log.write("gen %d samples" % valid_count_idx + "\n") log.flush() for vv in ss: if vv == 0: # <eol> break fpp_sample.write(" " + worddicts_r[vv]) fpp_sample.write("\n") fpp_sample.close() print("valid set decode done") log.write("valid set decode done\n") log.flush() print("Done sampling, took" + str(time.time() - _t)) if np.mod(uidx, validFreq) == 0: print("Start validating...") _t = time.time() probs = [] for batch_x, batch_y in valid: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) pprobs, annot = sess.run( [cost, annotation], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: False, }, ) probs.append(pprobs) valid_errs = np.array(probs) valid_err_cost = valid_errs.mean() wer_process( os.path.join(args.resultPath, args.resultFileName + ".txt"), args.validCaptionPath, os.path.join(args.resultPath, args.resultFileName + ".wer"), ) fpp = open( os.path.join(args.resultPath, args.resultFileName + ".wer")) stuff = fpp.readlines() fpp.close() m = re.search("WER (.*)\n", stuff[0]) valid_per = 100.0 * float(m.group(1)) m = re.search("ExpRate (.*)\n", stuff[1]) valid_sacc = 100.0 * float(m.group(1)) valid_err = valid_per history_errs.append(valid_err) if (uidx / validFreq == 0 or valid_err <= np.array(history_errs).min()): bad_counter = 0 if (uidx / validFreq != 0 and valid_err > np.array(history_errs).min()): bad_counter += 1 if bad_counter > patience: if halfLrFlag == 2: print("Early Stop!") log.write("Early Stop!\n") log.flush() estop = True break else: print("Lr decay and retrain!") log.write("Lr decay and retrain!\n") log.flush() bad_counter = 0 lrate = lrate / 10 halfLrFlag += 1 print("bad_counter" + str(bad_counter)) print("Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" % (valid_per, valid_sacc, valid_err_cost)) log.write("Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" % (valid_per, valid_sacc, valid_err_cost) + "\n") log.flush() print("Done validating, took" + str(time.time() - _t)) if estop: break save_path = saver.save(sess, os.path.join(args.savePath + args.saveName))
def main(args): global anno, infer_y, h_pre, alpha_past, if_trainning, dictLen worddicts = load_dict(args.dictPath) dictLen = len(worddicts) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk test, test_uid_list = dataIterator( args.testPklPath, args.testCaptionPath, worddicts, batch_size=2, batch_Imagesize=400000, maxlen=100, maxImagesize=400000, ) x = tf.placeholder(tf.float32, shape=[None, None, None, 1]) y = tf.placeholder(tf.int32, shape=[None, None]) x_mask = tf.placeholder(tf.float32, shape=[None, None, None]) y_mask = tf.placeholder(tf.float32, shape=[None, None]) lr = tf.placeholder(tf.float32, shape=()) if_trainning = tf.placeholder(tf.bool, shape=()) watcher_train = Watcher_train(blocks=3, level=16, growth_rate=24, training=if_trainning) annotation, anno_mask = watcher_train.dense_net(x, x_mask) # for initilaizing validation anno = tf.placeholder( tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2], annotation.shape.as_list()[3], ], ) infer_y = tf.placeholder(tf.int64, shape=(None, )) h_pre = tf.placeholder(tf.float32, shape=[None, 256]) alpha_past = tf.placeholder( tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2] ], ) attender = Attender(annotation.shape.as_list()[3], 256, 512) parser = Parser(256, 256, attender, annotation.shape.as_list()[3]) wap = WAP( watcher_train, attender, parser, 256, 256, annotation.shape.as_list()[3], dictLen, if_trainning, ) hidden_state_0 = tf.tanh( tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), wap.Wa2h, axes=1) + wap.ba2h) # [batch, hidden_dim] cost = wap.get_cost(annotation, y, anno_mask, y_mask) vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for vv in vs: if not vv.name.startswith("batch_normalization"): cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2)) p, w, h, alpha = wap.get_word(infer_y, h_pre, alpha_past, anno) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): trainer = optimizer.minimize(cost) max_epoch = 200 config = tf.ConfigProto() config.gpu_options.allow_growth = True init = tf.global_variables_initializer() saver = tf.train.Saver() saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(init) saver.restore( sess, os.path.join(args.modelPath, args.modelFileName) + ".ckpt") print("Start sampling...") _t = time.time() fpp_sample = open( os.path.join(args.resultPath, str(args.resultFileName) + ".txt"), "w", ) test_count_idx = 0 for batch_x, batch_y in test: for xx in batch_x: xx = np.moveaxis(xx, 0, -1) xx_pad = np.zeros((xx.shape[0], xx.shape[1], xx.shape[2]), dtype="float32") xx_pad[:, :, :] = xx / 255.0 xx_pad = xx_pad[None, :, :, :] annot = sess.run(annotation, feed_dict={ x: xx_pad, if_trainning: False }) h_state = sess.run(hidden_state_0, feed_dict={anno: annot}) sample, score = wap.get_sample( p, w, h, alpha, annot, h_state, 10, 100, False, sess, training=False, ) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(test_uid_list[test_count_idx]) test_count_idx = test_count_idx + 1 if np.mod(test_count_idx, 100) == 0: print("gen %d samples" % test_count_idx) log.write("gen %d samples" % test_count_idx + "\n") log.flush() for vv in ss: if vv == 0: # <eol> break fpp_sample.write(" " + worddicts_r[vv]) fpp_sample.write("\n") fpp_sample.close() print("valid set decode done") log.write("valid set decode done\n") log.flush() print("Done sampling, took" + str(time.time() - _t)) print("Start validating...") _t = time.time() probs = [] for batch_x, batch_y in test: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) pprobs, annot = sess.run( [cost, annotation], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: False, }, ) probs.append(pprobs) valid_errs = np.array(probs) valid_err_cost = valid_errs.mean() wer_process( os.path.join(args.resultPath, args.resultFileName + ".txt"), args.validCaptionPath, os.path.join(args.resultPath, args.resultFileName + ".wer"), ) fpp = open(os.path.join(args.resultPath, f"{args.resultFileName}.wer")) stuff = fpp.readlines() fpp.close() m = re.search("WER (.*)\n", stuff[0]) test_per = 100.0 * float(m.group(1)) m = re.search("ExpRate (.*)\n", stuff[1]) test_sacc = 100.0 * float(m.group(1)) test_err = test_per print("Test WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" % (test_per, test_sacc, test_err_cost)) print(f"Done validating, took {time.time() - _t}.")