def main():

    if not os.path.isdir(args.logdir):
        os.mkdir(args.logdir)

    with open('architecture.json') as f:
        arch = json.load(f)

    dataset = MNISTLoader(args.datadir)
    dataset.divide_semisupervised(N_u=arch['training']['num_unlabeled'])
    x_s, y_s = dataset.pick_supervised_samples(
        smp_per_class=arch['training']['smp_per_class'])
    x_u = dataset.x_u
    x_t, y_t = dataset.x_t, dataset.y_t
    x_1, _ = dataset.pick_supervised_samples(smp_per_class=1)

    x_l_show = reshape(x_s, 10)
    imshow([x_l_show], os.path.join(args.logdir, 'x_labeled.png'))

    batch_size = arch['training']['batch_size']
    N_EPOCH = arch['training']['epoch']
    N_ITER = x_u.shape[0] // batch_size
    N_HALFLIFE = arch['training']['halflife']


    h, w, c = arch['hwc']
    X_u = tf.placeholder(shape=[None, h, w, c], dtype=tf.float32)
    X_l = tf.constant(x_s)
    Y_l = tf.one_hot(y_s, arch['y_dim'])

    net = CVAE(arch)
    loss = net.loss(X_u, X_l, Y_l)

    encodings = net.encode(X_u)
    Z_u = encodings['mu']
    Y_u = encodings['y']
    Xh = net.decode(Z_u, Y_u)

    label_pred = tf.argmax(Y_u, 1)
    Y_pred = tf.one_hot(label_pred, arch['y_dim'])
    Xh2 = net.decode(Z_u, Y_pred)

    thumbnail = make_thumbnail(Y_u, Z_u, arch, net)

    opt = get_optimization_ops(loss, arch=arch)


    if args.gpu_cfg:
        with open(args.gpu_cfg) as f:
            cfg = json.load(f)
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=cfg['per_process_gpu_memory_fraction'])
        session_conf = tf.ConfigProto(
            allow_soft_placement=cfg['allow_soft_placement'],
            log_device_placement=cfg['log_device_placement'],
            inter_op_parallelism_threads=cfg['inter_op_parallelism_threads'],
            intra_op_parallelism_threads=cfg['intra_op_parallelism_threads'],
            gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)
    else:
        sess_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True))
        sess = tf.Session(config=sess_config)
            
    # sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)


    # writer = tf.train.SummaryWriter(args.logdir)  # TODO
    # writer.add_graph(tf.get_default_graph())  # TODO
    # summary_op = tf.merge_all_summaries()  # TODO
    saver = tf.train.Saver()

    # ===============================
    # [TODO] 
    #   1. batcher class
    #      1) for train and for test
    #      2) binarization
    #      3) shffule as arg
    #   5. TBoard (training tracker to monitor the convergence)
    # ===============================

    sqrt_bz = int(np.sqrt(batch_size))

    logfile = os.path.join(args.logdir, 'log.txt')

    try:
        step = 0
        for ep in range(N_EPOCH):
            np.random.shuffle(x_u)  # shuffle

            for it in range(N_ITER):
                step = ep * N_ITER + it

                idx = range(it * batch_size, (it + 1) * batch_size)
                tau = halflife(
                    step,
                    N0=arch['training']['largest_tau'],
                    T_half=N_ITER*N_HALFLIFE,
                    thresh=arch['training']['smallest_tau'])

                batch = np.random.binomial(1, x_u[idx])

                _, l_x, l_z, l_y, l_l = sess.run(
                    [opt['g'], loss['Dis'], loss['KL(z)'], loss['H(y)'], loss['Labeled']],
                    {X_u: batch,
                     net.tau: tau})

                msg = 'Ep [{:03d}/{:d}]-It[{:03d}/{:d}]: Lx: {:6.2f}, KL(z): {:4.2f}, L:{:.2e}: H(u): {:.2e}'.format(
                    ep, N_EPOCH, it, N_ITER, l_x, l_z, l_l, l_y)
                print(msg)

                if it == (N_ITER -1):
                    # b, y, xh, xh2, summary = sess.run(    # TODO
                    #     [X_u, Y_u, Xh, Xh2, summary_op],  # TODO
                    b, y, xh, xh2 = sess.run(
                        [X_u, Y_u, Xh, Xh2],
                        {X_u: batch,
                         net.tau: tau})

                    b = reshape(b, sqrt_bz)
                    xh = reshape(xh, sqrt_bz)
                    xh2 = reshape(xh2, sqrt_bz)

                    y = np.argmax(y, 1).astype(np.int32)
                    y = np.reshape(y, [sqrt_bz, sqrt_bz])

                    png = os.path.join(args.logdir, 'Ep-{:03d}-reconst.png'.format(ep))
                    with open(logfile, 'a') as f:
                        f.write(png + '  ')
                        f.write('Tau: {:.3f}\n'.format(tau[0]))
                        f.write(msg + '\n')
                        n, m = y.shape
                        for i in range(n):
                            for j in range(m):
                                f.write('{:d} '.format(y[i, j]))
                            f.write('\n')
                        f.write('\n\n')

                    imshow(
                        img_list=[b, xh, xh2],
                        filename=png,
                        titles=['Ground-truth',
                                'Reconstructed using dense label',
                                'Reconstructed using onehot label'])

                    # writer.add_summary(summary, step)  # TODO

                # Periodic evaluation
                if it == (N_ITER - N_ITER) and ep % arch['training']['summary_freq'] == 0:
                    # ==== Classification ====
                    y_p = list()
                    bz = 100
                    for i in range(N_TEST // bz):
                        b_t = x_t[i * bz: (i + 1) * bz]
                        b_t[b_t > 0.5] = 1.0  # [MAKESHIFT] Binarization
                        b_t[b_t <= 0.5] = 0.0
                        p = sess.run(
                            label_pred,
                            {X_u: b_t,
                             net.tau: tau})
                        y_p.append(p)
                    y_p = np.concatenate(y_p, 0)

                    # ==== Style Conversion ====
                    x_converted = sess.run(
                        thumbnail,
                        {X_u: x_1, Y_u: np.eye(arch['y_dim'])})

                    imshow(
                        img_list=[x_converted],
                        filename=os.path.join(
                            args.logdir,
                            'Ep-{:03d}-conv.png'.format(ep)))

                    # == Confusion Matrix ==
                    with open(logfile, 'a') as f:
                        cm = metrics.confusion_matrix(y_t, y_p)
                        n, m = cm.shape
                        for i in range(n):
                            for j in range(m):
                                f.write('{:4d} '.format(cm[i, j]))
                            f.write('\n')
                        acc = metrics.accuracy_score(y_t, y_p)
                        f.write('Accuracy: {:.4f}\n'.format(acc))
                        f.write('\n\n')
    except KeyboardInterrupt:
        print('Aborted')

    finally:
        save(saver, sess, args.logdir, step)
        e_classifier_en_loss = 0

        epoch_time = time()

        for i, data in enumerate(dataloader['train'], 0):
            optimizer_cvae.zero_grad()
            optimizer_classifier.zero_grad()
            optimizer_dis.zero_grad()

            x, y = data
            x = Variable(x).to(device)
            y = Variable(y).view(y.size(0), 1).to(device)

            rec, mean, log_var = cvae(x, y)
            z = cvae.reparameterization(mean, log_var)
            rec_loss, kl_loss = cvae.loss(rec, x, mean, log_var)
            en_de_coder_loss = rec_loss + opts.alpha * kl_loss

            loss = nn.BCELoss()
            classifierY = classifier(z)
            classifier_en_loss = loss(classifierY.type_as(x), y.type_as(x))
            en_de_coder_loss -= opts.beta * classifier_en_loss
            classifierY = classifier(z.detach())
            classifier_loss = loss(classifierY.type_as(x), y.type_as(x))

            dis_real = dis(x)
            dis_fake_rec = dis(rec.detach())
            randn_z = Variable(torch.randn(y.size(0),
                                           opts.latent_size)).to(device)
            dis_fake_randn = dis(cvae.decode(y.type_as(x), randn_z).detach())
            label_fake = Variable(torch.Tensor(