def main(args):
    mnasnet = models.mnasnet1_0(pretrained=True).to(device).eval()
    cvae = CVAE(1000, 128, args.n_class * 2, args.n_class).to(device)
    cvae.encoder.eval()
    regressor = Regressor().to(device)
    if Path(args.cvae_resume_model).exists():
        print("load cvae model:", args.cvae_resume_model)
        cvae.load_state_dict(torch.load(args.cvae_resume_model))

    if Path(args.regressor_resume_model).exists():
        print("load regressor model:", args.regressor_resume_model)
        regressor.load_state_dict(torch.load(args.regressor_resume_model))

    image_label = pandas.read_csv(
        Path(args.data_root, args.metadata_file_name.format(
            args.subset))).sample(frac=1, random_state=551)[:250]
    image_label["class"] = image_label["class"] - 1

    dataset = WBCDataset(args.n_class,
                         image_label[:250].values,
                         args.data_root,
                         subset=args.subset,
                         train=True)
    data_loader = loader(dataset, args.batch_size, True)
    cvae_optimizer = RAdam(cvae.parameters(), weight_decay=1e-3)
    regressor_optimizer = RAdam(regressor.parameters(), weight_decay=1e-3)
    train(args, mnasnet, cvae, regressor, cvae_optimizer, regressor_optimizer,
          data_loader)
def main(args):
    mnasnet1_0 = models.mnasnet1_0(pretrained=True).to(device).eval()
    model = CVAE(1000, 128, 128, args.n_class, 128).to(device)

    image_label = pandas.read_csv(
        Path(args.data_root, 
             args.metadata_file_name.format(args.subset))
    ).sample(frac=1, random_state=551)[:250]
    image_label["class"] = image_label["class"] - 1
    dataset = WBCDataset(image_label.values, args.data_root, subset=args.subset)

    data_loader = loader(dataset, args.batch_size, True)
    optimizer = RAdam(model.parameters(), weight_decay=1e-3)
    train(args, mnasnet1_0, model, optimizer, data_loader)
def main(args):
    mnasnet = models.mnasnet1_0(pretrained=True).to(device).eval()
    model = CVAE(1000, 128, 128, args.n_class, 128).to(device).eval()
    if Path(args.resume_model).exists():
        print("load regressor model:", args.resume_model)
        model.load_state_dict(torch.load(args.resume_model))

    image_label = pandas.read_csv(
        Path(args.data_root, 
             args.metadata_file_name.format(args.subset))
    ).sample(frac=1, random_state=551) #[250:]
    image_label["class"] = image_label["class"] - 1
    dataset = WBCDataset(image_label.values, args.data_root, subset=args.subset)
    data_loader = loader(dataset, 1, False)
    test(args, mnasnet, model, data_loader)
def read(config, args, first_task=False):
    """
    Read experiment configuration, Generate dataset and model \n
    @param: \n
        config(configparser.ConfigParser): configuration object \n
        args(argparse.ArgumentParser): command line argument object \n
    @return: \n
        dataset(DatasetWoz3): dataset to use \n
        model(nn.Module) \n
    """
    print('Processing data...', file=sys.stderr)

    # Read settings from config.cfg
    model_type = config["MODEL"]["model_type"]
    decoder_type = config["MODEL"]["dec_type"]
    percentage = config.getfloat("MODEL", "train_percentage")
    data_split = config["DATA"]["data_split"]
    n_layer = config.getint("MODEL", "num_layer")
    hidden_size = config.getint("MODEL", "hidden_size")
    beam_size = config.getint("TESTING", "beam_size")
    experiment_prefix = config["EXPERIMENT"]["experiment_prefix"] + str(
        args.random_seed) + '/'
    experiment_type = config["EXPERIMENT"]["experiment"]

    # Get model settings for cvae
    if model_type == "cvae":
        latent_size = config.getint("MODEL", "latent_size")
        std = config.getfloat("MODEL", "std")

    if first_task:  # Pretrain the mode of the first task using the same
        if model_type == 'lm':
            dropout = 0.25
            lr = 0.005
        else:  # scave
            dropout = 0.25
            lr = 0.002

    else:
        # Read dropout and learning rate
        dropout = args.dropout if "dropout" in experiment_type else 0
        lr = args.lr

    # Add suffix to experiment type to indicate hyper parameter used
    if 'loss' in experiment_type:
        experiment_type = experiment_type + '_' + str(args.sv_len_weight)
    if 'distillation' in experiment_type:
        experiment_type = experiment_type + '_' + str(args._lambda)
    if 'ewc' in experiment_type:
        experiment_type = experiment_type + '_' + str(args.ewc_importance)
    if 'l2' in experiment_type:
        experiment_type = experiment_type + '_' + str(args.l2_weight)
    if 'dropout' in experiment_type:
        experiment_type = experiment_type + '_' + str(args.dropout)

    dataset = DatasetWoz3(config, data_split, percentage=percentage)

    # Get dataset parameter
    d_size = dataset.do_size + dataset.da_size + dataset.sv_size  # len of 1-hot feature vector
    do_size = dataset.do_size  # number of domain
    da_size = dataset.da_size  # number of dialogue act
    sv_size = dataset.sv_size  # number of slot values
    vocab_size = len(dataset.word2index)  # vocabulary size

    # Construct model path to save/load the model
    model_path = construct_model_path(experiment_prefix, experiment_type,
                                      model_type)
    print(f"The model path is {model_path}", file=sys.stderr)
    print(f"The mode is {args.mode}", file=sys.stderr)

    # Initialize model
    if model_type == "lm":
        model = LM_deep(decoder_type,
                        args,
                        vocab_size,
                        vocab_size,
                        hidden_size,
                        d_size,
                        n_layer=n_layer,
                        dropout=dropout,
                        lr=lr)
    elif model_type == "cvae":
        model = CVAE(decoder_type,
                     args,
                     hidden_size,
                     vocab_size,
                     latent_size,
                     d_size,
                     do_size,
                     da_size,
                     sv_size,
                     std,
                     n_layer=n_layer,
                     dropout=dropout,
                     lr=lr)

    # Load model if recover/test mode
    if args.mode == "train":
        assert not os.path.isfile(model_path)

    elif args.mode == "recover":
        # Load the model specified by the task suffix for recovering training
        task_suffix = args.recovered_tasks
        model_path = f"{model_path[: len(model_path) - 3]}_{task_suffix}.pt"
        print(f"Recovering from {model_path}", file=sys.stderr)

        state = torch.load(model_path)
        model.load_state_dict(state["model_state_dict"])
        model.solver.load_state_dict(state["optimizer_state_dict"])
        if USE_CUDA:
            model.to(torch.device("cuda"))

    else:
        # Load the model specified by the task suffix for testing
        task_suffix = args.recovered_tasks
        model_path = f"{model_path[: len(model_path) - 3]}_{task_suffix}.pt"
        print(f"Testing at {model_path}", file=sys.stderr)

        state = torch.load(model_path)
        model.load_state_dict(state["model_state_dict"])
        if args.mode != 'adapt':
            model.eval()

    # Print model info
    print('\n***** MODEL INFO *****')
    print('MODEL TYPE:', model_type)
    print('MODEL PATH:', model_path)
    print('SIZE OF HIDDEN:', hidden_size)
    print('# of LAYER:', n_layer)
    print('SAMPLE/BEAM SIZE:', beam_size)
    print('*************************\n')

    # Move models to GPU
    if USE_CUDA:
        model.cuda()

    return dataset, model
def main():

    if not os.path.isdir(args.logdir):
        os.mkdir(args.logdir)

    with open('architecture.json') as f:
        arch = json.load(f)

    dataset = MNISTLoader(args.datadir)
    dataset.divide_semisupervised(N_u=arch['training']['num_unlabeled'])
    x_s, y_s = dataset.pick_supervised_samples(
        smp_per_class=arch['training']['smp_per_class'])
    x_u = dataset.x_u
    x_t, y_t = dataset.x_t, dataset.y_t
    x_1, _ = dataset.pick_supervised_samples(smp_per_class=1)

    x_l_show = reshape(x_s, 10)
    imshow([x_l_show], os.path.join(args.logdir, 'x_labeled.png'))

    batch_size = arch['training']['batch_size']
    N_EPOCH = arch['training']['epoch']
    N_ITER = x_u.shape[0] // batch_size
    N_HALFLIFE = arch['training']['halflife']


    h, w, c = arch['hwc']
    X_u = tf.placeholder(shape=[None, h, w, c], dtype=tf.float32)
    X_l = tf.constant(x_s)
    Y_l = tf.one_hot(y_s, arch['y_dim'])

    net = CVAE(arch)
    loss = net.loss(X_u, X_l, Y_l)

    encodings = net.encode(X_u)
    Z_u = encodings['mu']
    Y_u = encodings['y']
    Xh = net.decode(Z_u, Y_u)

    label_pred = tf.argmax(Y_u, 1)
    Y_pred = tf.one_hot(label_pred, arch['y_dim'])
    Xh2 = net.decode(Z_u, Y_pred)

    thumbnail = make_thumbnail(Y_u, Z_u, arch, net)

    opt = get_optimization_ops(loss, arch=arch)


    if args.gpu_cfg:
        with open(args.gpu_cfg) as f:
            cfg = json.load(f)
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=cfg['per_process_gpu_memory_fraction'])
        session_conf = tf.ConfigProto(
            allow_soft_placement=cfg['allow_soft_placement'],
            log_device_placement=cfg['log_device_placement'],
            inter_op_parallelism_threads=cfg['inter_op_parallelism_threads'],
            intra_op_parallelism_threads=cfg['intra_op_parallelism_threads'],
            gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)
    else:
        sess_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True))
        sess = tf.Session(config=sess_config)
            
    # sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)


    # writer = tf.train.SummaryWriter(args.logdir)  # TODO
    # writer.add_graph(tf.get_default_graph())  # TODO
    # summary_op = tf.merge_all_summaries()  # TODO
    saver = tf.train.Saver()

    # ===============================
    # [TODO] 
    #   1. batcher class
    #      1) for train and for test
    #      2) binarization
    #      3) shffule as arg
    #   5. TBoard (training tracker to monitor the convergence)
    # ===============================

    sqrt_bz = int(np.sqrt(batch_size))

    logfile = os.path.join(args.logdir, 'log.txt')

    try:
        step = 0
        for ep in range(N_EPOCH):
            np.random.shuffle(x_u)  # shuffle

            for it in range(N_ITER):
                step = ep * N_ITER + it

                idx = range(it * batch_size, (it + 1) * batch_size)
                tau = halflife(
                    step,
                    N0=arch['training']['largest_tau'],
                    T_half=N_ITER*N_HALFLIFE,
                    thresh=arch['training']['smallest_tau'])

                batch = np.random.binomial(1, x_u[idx])

                _, l_x, l_z, l_y, l_l = sess.run(
                    [opt['g'], loss['Dis'], loss['KL(z)'], loss['H(y)'], loss['Labeled']],
                    {X_u: batch,
                     net.tau: tau})

                msg = 'Ep [{:03d}/{:d}]-It[{:03d}/{:d}]: Lx: {:6.2f}, KL(z): {:4.2f}, L:{:.2e}: H(u): {:.2e}'.format(
                    ep, N_EPOCH, it, N_ITER, l_x, l_z, l_l, l_y)
                print(msg)

                if it == (N_ITER -1):
                    # b, y, xh, xh2, summary = sess.run(    # TODO
                    #     [X_u, Y_u, Xh, Xh2, summary_op],  # TODO
                    b, y, xh, xh2 = sess.run(
                        [X_u, Y_u, Xh, Xh2],
                        {X_u: batch,
                         net.tau: tau})

                    b = reshape(b, sqrt_bz)
                    xh = reshape(xh, sqrt_bz)
                    xh2 = reshape(xh2, sqrt_bz)

                    y = np.argmax(y, 1).astype(np.int32)
                    y = np.reshape(y, [sqrt_bz, sqrt_bz])

                    png = os.path.join(args.logdir, 'Ep-{:03d}-reconst.png'.format(ep))
                    with open(logfile, 'a') as f:
                        f.write(png + '  ')
                        f.write('Tau: {:.3f}\n'.format(tau[0]))
                        f.write(msg + '\n')
                        n, m = y.shape
                        for i in range(n):
                            for j in range(m):
                                f.write('{:d} '.format(y[i, j]))
                            f.write('\n')
                        f.write('\n\n')

                    imshow(
                        img_list=[b, xh, xh2],
                        filename=png,
                        titles=['Ground-truth',
                                'Reconstructed using dense label',
                                'Reconstructed using onehot label'])

                    # writer.add_summary(summary, step)  # TODO

                # Periodic evaluation
                if it == (N_ITER - N_ITER) and ep % arch['training']['summary_freq'] == 0:
                    # ==== Classification ====
                    y_p = list()
                    bz = 100
                    for i in range(N_TEST // bz):
                        b_t = x_t[i * bz: (i + 1) * bz]
                        b_t[b_t > 0.5] = 1.0  # [MAKESHIFT] Binarization
                        b_t[b_t <= 0.5] = 0.0
                        p = sess.run(
                            label_pred,
                            {X_u: b_t,
                             net.tau: tau})
                        y_p.append(p)
                    y_p = np.concatenate(y_p, 0)

                    # ==== Style Conversion ====
                    x_converted = sess.run(
                        thumbnail,
                        {X_u: x_1, Y_u: np.eye(arch['y_dim'])})

                    imshow(
                        img_list=[x_converted],
                        filename=os.path.join(
                            args.logdir,
                            'Ep-{:03d}-conv.png'.format(ep)))

                    # == Confusion Matrix ==
                    with open(logfile, 'a') as f:
                        cm = metrics.confusion_matrix(y_t, y_p)
                        n, m = cm.shape
                        for i in range(n):
                            for j in range(m):
                                f.write('{:4d} '.format(cm[i, j]))
                            f.write('\n')
                        acc = metrics.accuracy_score(y_t, y_p)
                        f.write('Accuracy: {:.4f}\n'.format(acc))
                        f.write('\n\n')
    except KeyboardInterrupt:
        print('Aborted')

    finally:
        save(saver, sess, args.logdir, step)
Exemple #6
0
    model_config["exp_logs_dir"] = exp_logs
    model_config["exp_save_models_dir"] = exp_saved_models

    # file path for gradient checking and plotting
    #file_loc = "/afs/inf.ed.ac.uk/user/s18/s1890219/Thesis/CVAE/experiments/cvae/output_logs/analysis_1/"
    #model_config["file_loc"] = file_loc
    # the parameter name suggest what to evaluate on
    model_config[
        "test_user_item_interaction_dict"] = val_user_item_interaction_dict
    model_config[
        "train_user_item_interaction_dict"] = train_user_item_interaction_dict

    ##### define the model #####
    if args.model_type == "cvae":
        model = CVAE(config=model_config).to(device)
        print(model)
        #criterion = torch.nn.MSELoss()
        criterion = torch.nn.CrossEntropyLoss()
        # size_average is set to False, the losses are instead summed for each minibatch
        #criterion.size_average = False
        learning_rate = 1e-4
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
                                     weight_decay=1e-5)

    # train the model
    if args.exp_type == "train":
        train.train_and_val(model, train_dataloader, val_dataloader, \
                      criterion, optimizer, args, model_config)
    test_dataset = CelebA(label=opts.label,
                          path=opts.path,
                          train=False,
                          transform=transforms.ToTensor())
    dataloader = {
        'train':
        torch.utils.data.DataLoader(train_dataset,
                                    batch_size=opts.batch_size,
                                    shuffle=True),
        'test':
        torch.utils.data.DataLoader(test_dataset,
                                    batch_size=opts.batch_size,
                                    shuffle=False)
    }

    cvae = CVAE(opts.latent_size, device).to(device)
    dis = Discriminator().to(device)
    classifier = Classifier(opts.latent_size).to(device)
    classer = CLASSIFIERS().to(device)

    print(cvae)
    print(dis)
    print(classifier)

    optimizer_cvae = torch.optim.Adam(cvae.parameters(),
                                      lr=opts.lr,
                                      betas=(opts.b1, opts.b2),
                                      weight_decay=opts.weight_decay)
    optimizer_dis = torch.optim.Adam(dis.parameters(),
                                     lr=opts.lr,
                                     betas=(opts.b1, opts.b2),