def main(args):
    worddicts = load_dict(args.path + '/data/dictionary.txt')
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk

    train, train_uid_list = dataIterator(args.path + '/data/offline-train.pkl',
                                         args.path + '/data/train_caption.txt',
                                         worddicts,
                                         batch_size=args.batch_size,
                                         batch_Imagesize=400000,
                                         maxlen=100,
                                         maxImagesize=400000)

    valid, valid_uid_list = dataIterator(args.path + '/data/offline-test.pkl',
                                         args.path + '/data/test_caption.txt',
                                         worddicts,
                                         batch_size=args.batch_size,
                                         batch_Imagesize=400000,
                                         maxlen=100,
                                         maxImagesize=400000)

    print('train lenght is ', len(train))

    x = tf.placeholder(tf.float32, shape=[None, None, None, 1])

    y = tf.placeholder(tf.int32, shape=[None, None])

    x_mask = tf.placeholder(tf.float32, shape=[None, None, None])

    y_mask = tf.placeholder(tf.float32, shape=[None, None])

    lr = tf.placeholder(tf.float32, shape=())

    if_trainning = tf.placeholder(tf.bool, shape=())

    watcher_train = Watcher_train(blocks=3,
                                  level=16,
                                  growth_rate=24,
                                  training=if_trainning)

    annotation, anno_mask = watcher_train.dense_net(x, x_mask)

    # for initilaizing validation
    anno = tf.placeholder(tf.float32,
                          shape=[
                              None,
                              annotation.shape.as_list()[1],
                              annotation.shape.as_list()[2],
                              annotation.shape.as_list()[3]
                          ])
    infer_y = tf.placeholder(tf.int64, shape=(None, ))
    h_pre = tf.placeholder(tf.float32, shape=[None, 256])
    alpha_past = tf.placeholder(tf.float32,
                                shape=[
                                    None,
                                    annotation.shape.as_list()[1],
                                    annotation.shape.as_list()[2]
                                ])

    attender = Attender(annotation.shape.as_list()[3], 256, 512)

    parser = Parser(256, 256, attender, annotation.shape.as_list()[3])

    w = WAP(watcher_train, attender, parser, 256, 256,
            annotation.shape.as_list()[3], 111, if_trainning)

    hidden_state_0 = tf.tanh(
        tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), w.Wa2h, axes=1) +
        w.ba2h)  # [batch, hidden_dim]

    cost = w.get_cost(annotation, y, anno_mask, y_mask)

    vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    for vv in vs:
        if not vv.name.startswith('batch_normalization'):
            cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2))

    p, w, h, alpha = w.get_word(infer_y, h_pre, alpha_past, anno)

    optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        trainer = optimizer.minimize(cost)

    max_epoch = 200

    config = tf.ConfigProto()

    config.gpu_options.allow_growth = True

    init = tf.global_variables_initializer()

    uidx = 0
    cost_s = 0
    dispFreq = 100
    saveFreq = len(train)
    sampleFreq = len(train)
    validFreq = len(train)
    history_errs = []
    estop = False
    halfLrFlag = 0
    patience = 15
    lrate = 1.0
    log = open(args.path + '/log-bs-6.txt', 'w')

    with tf.Session(config=config) as sess:
        sess.run(init)
        for epoch in range(max_epoch):
            n_samples = 0
            random.shuffle(train)
            for batch_x, batch_y in train:
                batch_x, batch_x_m, batch_y, batch_y_m = prepare_data(
                    batch_x, batch_y)
                n_samples += len(batch_x)
                uidx += 1

                cost_i, _ = sess.run(
                    [cost, trainer],
                    feed_dict={
                        x: batch_x,
                        y: batch_y,
                        x_mask: batch_x_m,
                        y_mask: batch_y_m,
                        if_trainning: True,
                        lr: lrate
                    })

                cost_s += cost_i

                if np.isnan(cost_i) or np.isinf(cost_i):
                    print('invalid cost value detected')
                    sys.exit(0)

                if np.mod(uidx, dispFreq) == 0:
                    cost_s /= dispFreq
                    print('Epoch ', epoch, 'Update ', uidx, 'Cost ', cost_s,
                          'Lr ', lrate)
                    log.write('Epoch ' + str(epoch) + ' Update ' + str(uidx) +
                              ' Cost ' + str(cost_s) + ' Lr ' + str(lrate) +
                              '\n')
                    log.flush()
                    cost_s = 0

                if np.mod(uidx, sampleFreq) == 0:
                    fpp_sample = open(
                        args.path + '/result/valid_decode_result-bs-6.txt',
                        'w')
                    valid_count_idx = 0
                    for batch_x, batch_y in valid:
                        for xx in batch_x:
                            xx = np.moveaxis(xx, 0, -1)
                            xx_pad = np.zeros(
                                (xx.shape[0], xx.shape[1], xx.shape[2]),
                                dtype='float32')
                            xx_pad[:, :, :] = xx / 255.
                            xx_pad = xx_pad[None, :, :, :]
                            annot = sess.run(annotation,
                                             feed_dict={
                                                 x: xx_pad,
                                                 if_trainning: False
                                             })
                            h_state = sess.run(hidden_state_0,
                                               feed_dict={anno: annot})
                            sample, score = w.get_sample(p,
                                                         w,
                                                         h,
                                                         alpha,
                                                         annot,
                                                         h_state,
                                                         10,
                                                         100,
                                                         False,
                                                         sess,
                                                         training=False)
                            score = score / np.array([len(s) for s in sample])
                            ss = sample[score.argmin()]
                            fpp_sample.write(valid_uid_list[valid_count_idx])
                            valid_count_idx = valid_count_idx + 1
                            if np.mod(valid_count_idx, 100) == 0:
                                print('gen %d samples' % valid_count_idx)
                                log.write('gen %d samples' % valid_count_idx +
                                          '\n')
                                log.flush()
                            for vv in ss:
                                if vv == 0:  # <eol>
                                    break
                                fpp_sample.write(' ' + worddicts_r[vv])
                            fpp_sample.write('\n')
                    fpp_sample.close()
                    print('valid set decode done')
                    log.write('valid set decode done\n')
                    log.flush()

                if np.mod(uidx, validFreq) == 0:
                    probs = []
                    for batch_x, batch_y in valid:
                        batch_x, batch_x_m, batch_y, batch_y_m = prepare_data(
                            batch_x, batch_y)
                        pprobs, annot = sess.run(
                            [cost, annotation],
                            feed_dict={
                                x: batch_x,
                                y: batch_y,
                                x_mask: batch_x_m,
                                y_mask: batch_y_m,
                                if_trainning: False
                            })
                        probs.append(pprobs)
                    valid_errs = np.array(probs)
                    valid_err_cost = valid_errs.mean()
                    os.system('python3.4 compute-wer.py ' + args.path +
                              '/result/valid_decode_result-bs-6.txt' + ' ' +
                              args.path + '/data/test_caption.txt' + ' ' +
                              args.path + '/result/valid-bs-6.wer')
                    fpp = open(args.path + '/result/valid-bs-6.wer')
                    stuff = fpp.readlines()
                    fpp.close()
                    m = re.search('WER (.*)\n', stuff[0])
                    valid_per = 100. * float(m.group(1))
                    m = re.search('ExpRate (.*)\n', stuff[1])
                    valid_sacc = 100. * float(m.group(1))
                    valid_err = valid_per

                    history_errs.append(valid_err)

                    if uidx / validFreq == 0 or valid_err <= np.array(
                            history_errs).min():
                        bad_counter = 0

                    if uidx / validFreq != 0 and valid_err > np.array(
                            history_errs).min():
                        bad_counter += 1
                        if bad_counter > patience:
                            if halfLrFlag == 2:
                                print('Early Stop!')
                                log.write('Early Stop!\n')
                                log.flush()
                                estop = True
                                break
                            else:
                                print('Lr decay and retrain!')
                                log.write('Lr decay and retrain!\n')
                                log.flush()
                                bad_counter = 0
                                lrate = lrate / 10
                                halfLrFlag += 1

                    print('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' %
                          (valid_per, valid_sacc, valid_err_cost))
                    log.write('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' %
                              (valid_per, valid_sacc, valid_err_cost) + '\n')
                    log.flush()
            if estop:
                break
def main(args):
    global anno, infer_y, h_pre, alpha_past, if_trainning, dictLen

    worddicts = load_dict(args.dictPath)
    dictLen = len(worddicts)
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk

    train, train_uid_list = dataIterator(
        args.trainPklPath,
        args.trainCaptionPath,
        worddicts,
        batch_size=args.batch_size,
        batch_Imagesize=500000,
        maxlen=150,
        maxImagesize=500000,
    )

    valid, valid_uid_list = dataIterator(
        args.validPklPath,
        args.validCaptionPath,
        worddicts,
        batch_size=args.batch_size,
        batch_Imagesize=500000,
        maxlen=150,
        maxImagesize=500000,
    )

    print("train lenth is ", len(train))
    print("valid lenth is ", len(valid))

    x = tf.placeholder(tf.float32, shape=[None, None, None, 1])

    y = tf.placeholder(tf.int32, shape=[None, None])

    x_mask = tf.placeholder(tf.float32, shape=[None, None, None])

    y_mask = tf.placeholder(tf.float32, shape=[None, None])

    lr = tf.placeholder(tf.float32, shape=())

    if_trainning = tf.placeholder(tf.bool, shape=())

    watcher_train = Watcher_train(blocks=3,
                                  level=16,
                                  growth_rate=24,
                                  training=if_trainning)

    annotation, anno_mask = watcher_train.dense_net(x, x_mask)

    # for initilaizing validation
    anno = tf.placeholder(
        tf.float32,
        shape=[
            None,
            annotation.shape.as_list()[1],
            annotation.shape.as_list()[2],
            annotation.shape.as_list()[3],
        ],
    )
    infer_y = tf.placeholder(tf.int64, shape=(None, ))
    h_pre = tf.placeholder(tf.float32, shape=[None, 256])
    alpha_past = tf.placeholder(
        tf.float32,
        shape=[
            None,
            annotation.shape.as_list()[1],
            annotation.shape.as_list()[2]
        ],
    )

    attender = Attender(annotation.shape.as_list()[3], 256, 512)

    parser = Parser(256, 256, attender, annotation.shape.as_list()[3])

    wap = WAP(
        watcher_train,
        attender,
        parser,
        256,
        256,
        annotation.shape.as_list()[3],
        dictLen,
        if_trainning,
    )

    hidden_state_0 = tf.tanh(
        tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), wap.Wa2h, axes=1) +
        wap.ba2h)  # [batch, hidden_dim]

    cost = wap.get_cost(annotation, y, anno_mask, y_mask)

    vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    for vv in vs:
        if not vv.name.startswith("batch_normalization"):
            cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2))

    p, w, h, alpha = wap.get_word(infer_y, h_pre, alpha_past, anno)

    optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        trainer = optimizer.minimize(cost)

    max_epoch = 200

    config = tf.ConfigProto()

    config.gpu_options.allow_growth = True

    init = tf.global_variables_initializer()

    uidx = 0
    cost_s = 0
    dispFreq = 100 if args.dispFreq is None else args.dispFreq
    saveFreq = (len(train) * args.epochDispRatio
                if args.saveFreq is None else args.saveFreq)
    sampleFreq = (len(train) * args.epochSampleRatio
                  if args.sampleFreq is None else args.sampleFreq)
    validFreq = (len(train) * args.epochValidRatio
                 if args.validFreq is None else args.validFreq)
    history_errs = []
    estop = False
    halfLrFlag = 0
    patience = 15 if args.patience is None else args.patience
    lrate = args.lr
    logPath = "./log.txt" if args.logPath is None else args.logPath
    log = open(logPath, "w")

    log.write(str(vars(args)))
    log.write(str(patience))
    log.write(str(lr))

    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        sess.run(init)
        for epoch in range(max_epoch):
            n_samples = 0
            random.shuffle(train)
            for batch_x, batch_y in train:
                batch_x, batch_x_m, batch_y, batch_y_m = prepare_data(
                    batch_x, batch_y)
                n_samples += len(batch_x)
                uidx += 1

                cost_i, _ = sess.run(
                    [cost, trainer],
                    feed_dict={
                        x: batch_x,
                        y: batch_y,
                        x_mask: batch_x_m,
                        y_mask: batch_y_m,
                        if_trainning: True,
                        lr: lrate,
                    },
                )

                cost_s += cost_i

                if np.isnan(cost_i) or np.isinf(cost_i):
                    print("invalid cost value detected")
                    sys.exit(0)

                if np.mod(uidx, dispFreq) == 0:
                    cost_s /= dispFreq
                    print("Epoch ", epoch, "Update ", uidx, "Cost ", cost_s,
                          "Lr ", lrate)
                    log.write("Epoch " + str(epoch) + " Update " + str(uidx) +
                              " Cost " + str(cost_s) + " Lr " + str(lrate) +
                              "\n")
                    log.flush()
                    cost_s = 0

                if np.mod(uidx, sampleFreq) == 0:
                    print("Start sampling...")
                    _t = time.time()
                    fpp_sample = open(
                        os.path.join(args.resultPath,
                                     str(args.resultFileName) + ".txt"),
                        "w",
                    )
                    valid_count_idx = 0
                    for batch_x, batch_y in valid:
                        for xx in batch_x:
                            xx = np.moveaxis(xx, 0, -1)
                            xx_pad = np.zeros(
                                (xx.shape[0], xx.shape[1], xx.shape[2]),
                                dtype="float32")
                            xx_pad[:, :, :] = xx / 255.0
                            xx_pad = xx_pad[None, :, :, :]
                            annot = sess.run(annotation,
                                             feed_dict={
                                                 x: xx_pad,
                                                 if_trainning: False
                                             })
                            h_state = sess.run(hidden_state_0,
                                               feed_dict={anno: annot})
                            sample, score = wap.get_sample(
                                p,
                                w,
                                h,
                                alpha,
                                annot,
                                h_state,
                                10,
                                100,
                                False,
                                sess,
                                training=False,
                            )
                            score = score / np.array([len(s) for s in sample])
                            ss = sample[score.argmin()]
                            fpp_sample.write(valid_uid_list[valid_count_idx])
                            valid_count_idx = valid_count_idx + 1
                            if np.mod(valid_count_idx, 100) == 0:
                                print("gen %d samples" % valid_count_idx)
                                log.write("gen %d samples" % valid_count_idx +
                                          "\n")
                                log.flush()
                            for vv in ss:
                                if vv == 0:  # <eol>
                                    break
                                fpp_sample.write(" " + worddicts_r[vv])
                            fpp_sample.write("\n")
                    fpp_sample.close()
                    print("valid set decode done")
                    log.write("valid set decode done\n")
                    log.flush()
                    print("Done sampling, took" + str(time.time() - _t))

                if np.mod(uidx, validFreq) == 0:
                    print("Start validating...")
                    _t = time.time()
                    probs = []
                    for batch_x, batch_y in valid:
                        batch_x, batch_x_m, batch_y, batch_y_m = prepare_data(
                            batch_x, batch_y)
                        pprobs, annot = sess.run(
                            [cost, annotation],
                            feed_dict={
                                x: batch_x,
                                y: batch_y,
                                x_mask: batch_x_m,
                                y_mask: batch_y_m,
                                if_trainning: False,
                            },
                        )
                        probs.append(pprobs)
                    valid_errs = np.array(probs)
                    valid_err_cost = valid_errs.mean()
                    wer_process(
                        os.path.join(args.resultPath,
                                     args.resultFileName + ".txt"),
                        args.validCaptionPath,
                        os.path.join(args.resultPath,
                                     args.resultFileName + ".wer"),
                    )
                    fpp = open(
                        os.path.join(args.resultPath,
                                     args.resultFileName + ".wer"))
                    stuff = fpp.readlines()
                    fpp.close()
                    m = re.search("WER (.*)\n", stuff[0])
                    valid_per = 100.0 * float(m.group(1))
                    m = re.search("ExpRate (.*)\n", stuff[1])
                    valid_sacc = 100.0 * float(m.group(1))
                    valid_err = valid_per

                    history_errs.append(valid_err)

                    if (uidx / validFreq == 0
                            or valid_err <= np.array(history_errs).min()):
                        bad_counter = 0

                    if (uidx / validFreq != 0
                            and valid_err > np.array(history_errs).min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            if halfLrFlag == 2:
                                print("Early Stop!")
                                log.write("Early Stop!\n")
                                log.flush()
                                estop = True
                                break
                            else:
                                print("Lr decay and retrain!")
                                log.write("Lr decay and retrain!\n")
                                log.flush()
                                bad_counter = 0
                                lrate = lrate / 10
                                halfLrFlag += 1
                    print("bad_counter" + str(bad_counter))
                    print("Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" %
                          (valid_per, valid_sacc, valid_err_cost))
                    log.write("Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" %
                              (valid_per, valid_sacc, valid_err_cost) + "\n")
                    log.flush()
                    print("Done validating, took" + str(time.time() - _t))
            if estop:
                break

        save_path = saver.save(sess,
                               os.path.join(args.savePath + args.saveName))
def main(args):
    global anno, infer_y, h_pre, alpha_past, if_trainning, dictLen

    worddicts = load_dict(args.dictPath)
    dictLen = len(worddicts)
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk

    test, test_uid_list = dataIterator(
        args.testPklPath,
        args.testCaptionPath,
        worddicts,
        batch_size=2,
        batch_Imagesize=400000,
        maxlen=100,
        maxImagesize=400000,
    )

    x = tf.placeholder(tf.float32, shape=[None, None, None, 1])

    y = tf.placeholder(tf.int32, shape=[None, None])

    x_mask = tf.placeholder(tf.float32, shape=[None, None, None])

    y_mask = tf.placeholder(tf.float32, shape=[None, None])

    lr = tf.placeholder(tf.float32, shape=())

    if_trainning = tf.placeholder(tf.bool, shape=())

    watcher_train = Watcher_train(blocks=3,
                                  level=16,
                                  growth_rate=24,
                                  training=if_trainning)

    annotation, anno_mask = watcher_train.dense_net(x, x_mask)

    # for initilaizing validation
    anno = tf.placeholder(
        tf.float32,
        shape=[
            None,
            annotation.shape.as_list()[1],
            annotation.shape.as_list()[2],
            annotation.shape.as_list()[3],
        ],
    )
    infer_y = tf.placeholder(tf.int64, shape=(None, ))
    h_pre = tf.placeholder(tf.float32, shape=[None, 256])
    alpha_past = tf.placeholder(
        tf.float32,
        shape=[
            None,
            annotation.shape.as_list()[1],
            annotation.shape.as_list()[2]
        ],
    )

    attender = Attender(annotation.shape.as_list()[3], 256, 512)

    parser = Parser(256, 256, attender, annotation.shape.as_list()[3])

    wap = WAP(
        watcher_train,
        attender,
        parser,
        256,
        256,
        annotation.shape.as_list()[3],
        dictLen,
        if_trainning,
    )

    hidden_state_0 = tf.tanh(
        tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), wap.Wa2h, axes=1) +
        wap.ba2h)  # [batch, hidden_dim]

    cost = wap.get_cost(annotation, y, anno_mask, y_mask)

    vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    for vv in vs:
        if not vv.name.startswith("batch_normalization"):
            cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2))

    p, w, h, alpha = wap.get_word(infer_y, h_pre, alpha_past, anno)

    optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        trainer = optimizer.minimize(cost)

    max_epoch = 200

    config = tf.ConfigProto()

    config.gpu_options.allow_growth = True

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        sess.run(init)
        saver.restore(
            sess,
            os.path.join(args.modelPath, args.modelFileName) + ".ckpt")

        print("Start sampling...")
        _t = time.time()
        fpp_sample = open(
            os.path.join(args.resultPath,
                         str(args.resultFileName) + ".txt"),
            "w",
        )
        test_count_idx = 0
        for batch_x, batch_y in test:
            for xx in batch_x:
                xx = np.moveaxis(xx, 0, -1)
                xx_pad = np.zeros((xx.shape[0], xx.shape[1], xx.shape[2]),
                                  dtype="float32")
                xx_pad[:, :, :] = xx / 255.0
                xx_pad = xx_pad[None, :, :, :]
                annot = sess.run(annotation,
                                 feed_dict={
                                     x: xx_pad,
                                     if_trainning: False
                                 })
                h_state = sess.run(hidden_state_0, feed_dict={anno: annot})
                sample, score = wap.get_sample(
                    p,
                    w,
                    h,
                    alpha,
                    annot,
                    h_state,
                    10,
                    100,
                    False,
                    sess,
                    training=False,
                )
                score = score / np.array([len(s) for s in sample])
                ss = sample[score.argmin()]
                fpp_sample.write(test_uid_list[test_count_idx])
                test_count_idx = test_count_idx + 1
                if np.mod(test_count_idx, 100) == 0:
                    print("gen %d samples" % test_count_idx)
                    log.write("gen %d samples" % test_count_idx + "\n")
                    log.flush()
                for vv in ss:
                    if vv == 0:  # <eol>
                        break
                    fpp_sample.write(" " + worddicts_r[vv])
                fpp_sample.write("\n")
        fpp_sample.close()
        print("valid set decode done")
        log.write("valid set decode done\n")
        log.flush()
        print("Done sampling, took" + str(time.time() - _t))

        print("Start validating...")
        _t = time.time()
        probs = []
        for batch_x, batch_y in test:
            batch_x, batch_x_m, batch_y, batch_y_m = prepare_data(
                batch_x, batch_y)
            pprobs, annot = sess.run(
                [cost, annotation],
                feed_dict={
                    x: batch_x,
                    y: batch_y,
                    x_mask: batch_x_m,
                    y_mask: batch_y_m,
                    if_trainning: False,
                },
            )
            probs.append(pprobs)
        valid_errs = np.array(probs)
        valid_err_cost = valid_errs.mean()
        wer_process(
            os.path.join(args.resultPath, args.resultFileName + ".txt"),
            args.validCaptionPath,
            os.path.join(args.resultPath, args.resultFileName + ".wer"),
        )
        fpp = open(os.path.join(args.resultPath, f"{args.resultFileName}.wer"))
        stuff = fpp.readlines()
        fpp.close()
        m = re.search("WER (.*)\n", stuff[0])
        test_per = 100.0 * float(m.group(1))
        m = re.search("ExpRate (.*)\n", stuff[1])
        test_sacc = 100.0 * float(m.group(1))
        test_err = test_per

        print("Test WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" %
              (test_per, test_sacc, test_err_cost))
        print(f"Done validating, took {time.time() - _t}.")