Example #1
0
def train_for_n(model_name,\
                XT_nd, XT_dg, XTest_dg,\
                generator, discriminator, GAN, \
                nb_epoch=5000, plt_frq=25, BATCH_SIZE=32,\
                losses = {"d":[], "g":[]}):
    
    start_string = model_name + '_' +  time.strftime('%m_%d__%H_%M_%S', time.localtime())
    os.mkdir(os.path.join('output', start_string))
    for e in tqdm(range(nb_epoch)):  
        
        # Make generative images
        image_batch = XT_nd[np.random.randint(0,XT_nd.shape[0],size=BATCH_SIZE),:,:,:]
        disguised_batch = XT_dg[np.random.randint(0,XT_dg.shape[0],size=BATCH_SIZE),:,:,:]
        #print(disguised_batch.shape)
        generated_images = generator.predict(disguised_batch)
        
        # Train discriminator on generated images
        X = np.concatenate((image_batch, generated_images))
        y = np.zeros([2*BATCH_SIZE,2])
        y[0:BATCH_SIZE,1] = 1
        y[BATCH_SIZE:,0] = 1
        
        #print('Batch X shape is {}'.format(X.shape))
        make_trainable(discriminator,True)
        d_loss  = discriminator.train_on_batch(X,y)
        losses["d"].append(d_loss)
    
        # train Generator-Discriminator stack on 
        # input noise to non-generated output class
        non_generated = XT_dg[np.random.randint(0,XT_dg.shape[0],size=BATCH_SIZE),:,:,:]
        y2 = np.zeros([BATCH_SIZE,2])
        y2[:,1] = 1
        
        make_trainable(discriminator,False)
        g_loss = GAN.train_on_batch(non_generated, y2 )
        losses["g"].append(g_loss)
        
        # Updates plots
        if e%plt_frq==plt_frq-1:
            plot_loss(losses, gen_figname(e, start_string, 'loss',))
            plot_gen(generator, XTest_dg, gen_figname(e, start_string, 'gen'))

    return losses
Example #2
0
File: train.py Project: ysterin/vta
def main():
    # parse arguments
    args = parse_args()

    # fix seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True

    # set logger
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(level=logging.INFO,
                        format=log_format,
                        stream=sys.stderr)

    # set size
    seq_size = args.seq_size
    init_size = args.init_size

    # set device as gpu
    device = torch.device('cuda', 0)

    # set writer
    exp_name = set_exp_name(args)
    writer = SummaryWriter(args.log_dir + exp_name)
    LOGGER.info('EXP NAME: ' + exp_name)

    # load dataset
    train_loader, test_loader = full_dataloader(seq_size, init_size,
                                                args.batch_size)
    LOGGER.info('Dataset loaded')

    # init models
    model = EnvModel(belief_size=args.belief_size,
                     state_size=args.state_size,
                     num_layers=args.num_layers,
                     max_seg_len=args.seg_len,
                     max_seg_num=args.seg_num).to(device)
    LOGGER.info('Model initialized')

    # init optimizer
    optimizer = Adam(params=model.parameters(),
                     lr=args.learn_rate,
                     amsgrad=True)

    # test data
    pre_test_full_data_list = iter(test_loader).next()
    pre_test_full_data_list = preprocess(pre_test_full_data_list.to(device),
                                         args.obs_bit)

    # for each iter
    b_idx = 0
    while b_idx <= args.max_iters:
        # for each batch
        for train_obs_list in train_loader:
            b_idx += 1
            # mask temp annealing
            if args.beta_anneal:
                model.state_model.mask_beta = (
                    args.max_beta - args.min_beta) * 0.999**(
                        b_idx / args.beta_anneal) + args.min_beta
            else:
                model.state_model.mask_beta = args.max_beta

            ##############
            # train time #
            ##############
            # get input data
            train_obs_list = preprocess(train_obs_list.to(device),
                                        args.obs_bit)

            # run model with train mode
            model.train()
            optimizer.zero_grad()
            results = model(train_obs_list, seq_size, init_size, args.obs_std)

            # get train loss and backward update
            train_total_loss = results['train_loss']
            train_total_loss.backward()
            if args.grad_clip > 0.0:
                nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
            optimizer.step()

            # log
            if b_idx % 10 == 0:
                log_str, log_data = log_train(results, writer, b_idx)
                LOGGER.info(log_str, *log_data)

            #############
            # test time #
            #############
            if b_idx % 100 == 0:
                # set data
                pre_test_init_data_list = pre_test_full_data_list[:, :
                                                                  init_size]
                post_test_init_data_list = postprocess(pre_test_init_data_list,
                                                       args.obs_bit)
                pre_test_input_data_list = pre_test_full_data_list[:,
                                                                   init_size:
                                                                   (init_size +
                                                                    seq_size)]
                post_test_input_data_list = postprocess(
                    pre_test_input_data_list, args.obs_bit)

                with torch.no_grad():
                    ##################
                    # test data elbo #
                    ##################
                    model.eval()
                    results = model(pre_test_full_data_list, seq_size,
                                    init_size, args.obs_std)
                    post_test_rec_data_list = postprocess(
                        results['rec_data'], args.obs_bit)
                    output_img, output_mask = plot_rec(
                        post_test_init_data_list, post_test_input_data_list,
                        post_test_rec_data_list, results['mask_data'],
                        results['p_mask'], results['q_mask'])

                    # log
                    log_str, log_data = log_test(results, writer, b_idx)
                    LOGGER.info(log_str, *log_data)
                    writer.add_image('valid/rec_image',
                                     output_img.transpose([2, 0, 1]),
                                     global_step=b_idx)
                    writer.add_image('valid/mask_image',
                                     output_mask.transpose([2, 0, 1]),
                                     global_step=b_idx)

                    ###################
                    # full generation #
                    ###################
                    pre_test_gen_data_list, test_mask_data_list = model.full_generation(
                        pre_test_init_data_list, seq_size)
                    post_test_gen_data_list = postprocess(
                        pre_test_gen_data_list, args.obs_bit)

                    # log
                    output_img = plot_gen(post_test_init_data_list,
                                          post_test_gen_data_list,
                                          test_mask_data_list)
                    writer.add_image('valid/full_gen_image',
                                     output_img.transpose([2, 0, 1]), b_idx)
Example #3
0
def main():
    # parse arguments
    args = parse_args()

    # fix seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True

    # set size
    seq_size = args.seq_size
    init_size = args.init_size

    # set device as gpu
    device = torch.device('cuda', 0)

    # set writer
    exp_name = set_exp_name(args)
    writer = SummaryWriter(args.log_dir + exp_name)

    # load dataset
    train_loader, test_loader = maze_dataloader(seq_size, init_size,
                                                args.batch_size)

    # init models
    hrssm_params = {
        'seq_size': args.seq_size,
        'init_size': args.init_size,
        'state_size': args.state_size,
        'belief_size': args.belief_size,
        'num_layers': args.num_layers,
        'max_seg_num': args.seg_num,
        'max_seg_len': args.seg_len
    }
    optimizer = optim.Adam
    optimizer_params = {'lr': args.learn_rate, 'amsgrad': True}
    model = HRSSM(optimizer=optimizer,
                  optimizer_params=optimizer_params,
                  clip_grad_norm=args.grad_clip,
                  hrssm_params=hrssm_params)

    # test data
    pre_test_full_data_list = iter(test_loader).next()
    pre_test_full_data_list = preprocess(pre_test_full_data_list.to(device))

    # for each iter
    b_idx = 0
    while b_idx <= args.max_iters:
        # for each batch
        for train_obs_list in train_loader:
            b_idx += 1
            # mask temp annealing
            if args.beta_anneal:
                model.mask_beta = (args.max_beta - args.min_beta) * 0.999**(
                    b_idx / args.beta_anneal) + args.min_beta
            else:
                model.mask_beta = args.max_beta

            # get input data
            train_obs_list = preprocess(train_obs_list.to(device))

            # train step and return the loss
            loss = model.train(train_obs_list)

            # log
            if b_idx % 1000 == 0:
                writer.add_scalar('train/total_loss', loss, b_idx)

            # test time
            if b_idx % 1000 == 0:
                # set data
                pre_test_init_data_list = pre_test_full_data_list[:, :
                                                                  init_size]
                post_test_init_data_list = post_process_maze(
                    pre_test_init_data_list)
                pre_test_input_data_list = pre_test_full_data_list[:,
                                                                   init_size:
                                                                   (init_size +
                                                                    seq_size)]
                post_test_input_data_list = post_process_maze(
                    pre_test_input_data_list)

                with torch.no_grad():
                    # test data elbo
                    results = model.reconstruction(pre_test_full_data_list)
                    post_test_rec_data_list = post_process_maze(
                        results['rec_data'])
                    output_img, output_mask = plot_rec(
                        post_test_init_data_list, post_test_input_data_list,
                        post_test_rec_data_list, results['mask_data'],
                        results['p_mask'], results['q_mask'])

                    # log
                    loss = model.test(pre_test_full_data_list)
                    writer.add_scalar('valid/total_loss', loss, b_idx)
                    writer.add_image('valid/rec_image',
                                     output_img.transpose([2, 0, 1]),
                                     global_step=b_idx)
                    writer.add_image('valid/mask_image',
                                     output_mask.transpose([2, 0, 1]),
                                     global_step=b_idx)

                    # full generation
                    pre_test_gen_data_list, test_mask_data_list = model.full_generation(
                        pre_test_init_data_list, seq_size)
                    post_test_gen_data_list = post_process_maze(
                        pre_test_gen_data_list)

                    # log
                    output_img = plot_gen(post_test_init_data_list,
                                          post_test_gen_data_list,
                                          test_mask_data_list)
                    writer.add_image('valid/full_gen_image',
                                     output_img.transpose([2, 0, 1]), b_idx)

                    # jumpy imagination
                    pre_test_gen_data_list = model.jumpy_generation(
                        pre_test_init_data_list, seq_size)
                    post_test_gen_data_list = post_process_maze(
                        pre_test_gen_data_list)

                    # log
                    output_img = plot_gen(post_test_init_data_list,
                                          post_test_gen_data_list)
                    writer.add_image('valid/jumpy_gen_image',
                                     output_img.transpose([2, 0, 1]), b_idx)
Example #4
0
def train(model_name,\
          data_collection,\
          nets, \
          nb_epoch=5000, plt_frq=25, BATCH_SIZE=32,\
          losses = {"fw_d_l":[], "fw_d_a":[], \
                    "bw_d_l":[], "bw_d_a":[], \
                    "g_fw_id":[], "g_fw_recon":[], \
                    "g_bw_id":[], "g_bw_recon":[], \
                    "g_loss":[]}):

    sprint('Preparing output path', level=1)
    #os.mkdir(build_model_path(model_name, 'output'))
    parent_path, start_string = build_model_path(model_name, 'output')
    setup_workspace(parent_path)

    XT_nd, XTest_nd, XT_dg, XTest_dg = data_collection
    gan, gen_fw, gen_bw, dis_fw, dis_bw = nets

    for e in tqdm(range(nb_epoch)):

        # Select batch
        nd_batch = XT_nd[
            np.random.randint(0, XT_nd.shape[0], size=BATCH_SIZE), :, :, :]
        dg_batch = XT_dg[
            np.random.randint(0, XT_dg.shape[0], size=BATCH_SIZE), :, :, :]

        # Generate images
        fw_generated = gen_fw.predict(dg_batch)
        bw_generated = gen_bw.predict(nd_batch)

        # Prepare training 'output'
        fw_X = np.concatenate((nd_batch, fw_generated))
        fw_y = np.zeros([2 * BATCH_SIZE, 2])
        fw_y[0:BATCH_SIZE, 1] = 1
        fw_y[BATCH_SIZE:, 0] = 1

        bw_X = np.concatenate((dg_batch, bw_generated))
        bw_y = np.zeros([2 * BATCH_SIZE, 2])
        bw_y[0:BATCH_SIZE, 1] = 1
        bw_y[BATCH_SIZE:, 0] = 1

        make_trainable(dis_fw, True)
        fw_d_loss = dis_fw.train_on_batch(fw_X, fw_y)
        #losses["fw_d_l"].append(fw_d_loss)
        losses["fw_d_l"].append(fw_d_loss[0])
        losses["fw_d_a"].append(fw_d_loss[1])

        make_trainable(dis_bw, True)
        bw_d_loss = dis_bw.train_on_batch(bw_X, bw_y)
        #losses["bw_d_l"].append(bw_d_loss)
        losses["bw_d_l"].append(bw_d_loss[0])
        losses["bw_d_a"].append(bw_d_loss[1])

        # train combined generators
        # Remember that CycleGAN model computes losses as follows
        # Combine Discriminator and Generator
        # gan = Model(inputs=[image_fw, image_bw], \
        #             outputs=[dis_result_fw, dis_result_bw, \
        #                      same_fw,       same_bw,       \
        #                      recovered_fw,  recovered_bw])
        # gan.compile(loss=['binary_crossentropy', 'binary_crossentropy',\
        #                   'mae',                 'mae',                \
        #                   'mae',                 'mae'],\
        #             loss_weights = [1,                1,\
        #                             identity_loss,    identity_loss,\
        #                             consistency_loss, consistency_loss],\
        #             optimizer=optimizer)
        nd_batch_2 = XT_nd[
            np.random.randint(0, XT_nd.shape[0], size=BATCH_SIZE), :, :, :]
        dg_batch_2 = XT_dg[
            np.random.randint(0, XT_dg.shape[0], size=BATCH_SIZE), :, :, :]
        y2 = np.zeros([BATCH_SIZE, 2])
        y2[:, 1] = 1

        #make_trainable(dis_fw, False)
        #make_trainable(dis_bw, False)

        g_loss = gan.train_on_batch( [dg_batch_2, nd_batch_2],
                                     [y2,         y2,\
                                      dg_batch_2, nd_batch_2,\
                                      dg_batch_2, nd_batch_2 ]
                                   )
        losses["g_loss"].append(g_loss[0])
        losses["g_fw_id"].append(g_loss[3])
        losses["g_bw_id"].append(g_loss[4])
        losses["g_fw_recon"].append(g_loss[5])
        losses["g_bw_recon"].append(g_loss[6])

        # Updates plots
        if e % plt_frq == plt_frq - 1:
            #plot_loss(losses, gen_figname(e, start_string, 'loss', parent_path=parent_path))
            plot_training_stats(
                losses,
                gen_figname(e, start_string, 'loss', parent_path=parent_path))
            plot_gen(
                gen_fw, gen_bw, XTest_dg,
                gen_figname(e, start_string, 'gen_fw',
                            parent_path=parent_path))
            plot_gen(
                gen_bw, gen_fw, XTest_nd,
                gen_figname(e, start_string, 'gen_bw',
                            parent_path=parent_path))

    return losses
Example #5
0
def main():
    # configs
    args = toml.load(open('config.toml'))['model']
    
    seed = args['seed']
    batch_size = args['batch_size']
    seq_size = args['seq_size']
    init_size = args['init_size']
    state_size = args['state_size']
    belief_size = args['belief_size']
    num_layers = args['num_layers']
    obs_std = args['obs_std']
    obs_bit = args['obs_bit']
    learn_rate = args['learn_rate']
    grad_clip = args['grad_clip']
    max_iters = args['max_iters']
    seg_num = args['seg_num']
    seg_len = args['seg_len']
    max_beta = args['max_beta']
    min_beta = args['min_beta']
    beta_anneal = args['beta_anneal']
    log_dir = args['log_dir']
    test_times = args['test_times']
    gpu_ids = args['gpu_ids']
    data_path = args['data_path']
    check_path = args['check_path']
    
    # fix seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

    # set logger
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(level=logging.INFO, format=log_format, stream=sys.stderr)

    # set size
    seq_size = seq_size
    init_size = init_size

    # set writer
    exp_name = set_exp_name(args)
    writer = SummaryWriter(log_dir + exp_name)
    LOGGER.info('EXP NAME: ' + exp_name)

    # load dataset
    train_loader, test_loader, check_loader = full_dataloader(seq_size, init_size, batch_size, data_path, check_path)
    LOGGER.info('Dataset loaded')

    # init models
    model = EnvModel(belief_size=belief_size,
                     state_size=state_size,
                     num_layers=num_layers,
                     max_seg_len=seg_len,
                     max_seg_num=seg_num)

    if torch.cuda.is_available():
        device = torch.device(f'cuda:{gpu_ids[0]}')
        model.to(device)
        model = nn.DataParallel(model, device_ids=gpu_ids)
        model = model.module
    else:
        device = torch.device('cpu')
        model.to(device)
        
    LOGGER.info('Model initialized')

    # init optimizer
    optimizer = Adam(params=model.parameters(),
                     lr=learn_rate, amsgrad=True)

    # test data
    pre_test_full_list = iter(test_loader).next()
    pre_test_full_data_list = pre_test_full_list['img']
    pre_test_full_point_list = pre_test_full_list['point']
    pre_test_full_data_list = preprocess(pre_test_full_data_list.to(device), obs_bit)
    
    # for each iter
    b_idx = 0
    while b_idx <= max_iters:
        # for each batch
        for train_list in train_loader:
            b_idx += 1
            # mask temp annealing
            if beta_anneal:
                model.state_model.mask_beta = (max_beta - min_beta) * 0.999 ** (b_idx / beta_anneal) + min_beta
            else:
                model.state_model.mask_beta = max_beta

            ##############
            # train time #
            ##############
            # get input data
            train_obs_list = train_list['img']
            train_points_list = train_list['point']
            train_obs_list = preprocess(train_obs_list.to(device), obs_bit)

            # run model with train mode
            model.train()
            optimizer.zero_grad()
            results = model(train_obs_list, train_points_list, seq_size, init_size, obs_std)

            # get train loss and backward update
            train_total_loss = results['train_loss']
            train_total_loss.backward()
            if grad_clip > 0.0:
                nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            optimizer.step()

            # log
            if b_idx % 10 == 0:
                log_str, log_data = log_train(results, writer, b_idx)
                LOGGER.info(log_str, *log_data)

            #############
            # test time #
            #############
            if b_idx % test_times == 0:
                # set data
                pre_test_init_data_list = pre_test_full_data_list[:, :init_size]
                post_test_init_data_list = postprocess(pre_test_init_data_list, obs_bit)
                pre_test_input_data_list = pre_test_full_data_list[:, init_size:(init_size + seq_size)]
                post_test_input_data_list = postprocess(pre_test_input_data_list, obs_bit)

                with torch.no_grad():
                    ##################
                    # test data elbo #
                    ##################
                    model.eval()
                    results = model(pre_test_full_data_list, pre_test_full_point_list, seq_size, init_size, obs_std)
                    post_test_rec_data_list = postprocess(results['rec_data'], obs_bit)
                    output_img, output_mask = plot_rec(post_test_init_data_list,
                                                       post_test_input_data_list,
                                                       post_test_rec_data_list,
                                                       results['mask_data'],
                                                       results['p_mask'],
                                                       results['q_mask'])

                    # log
                    log_str, log_data = log_test(results, writer, b_idx)
                    LOGGER.info(log_str, *log_data)
                    writer.add_image('valid/rec_image', output_img.transpose([2, 0, 1]), global_step=b_idx)
                    writer.add_image('valid/mask_image', output_mask.transpose([2, 0, 1]), global_step=b_idx)

                    ###################
                    # full generation #
                    ###################
                    pre_test_gen_data_list, test_mask_data_list = model.full_generation(pre_test_init_data_list, seq_size)
                    post_test_gen_data_list = postprocess(pre_test_gen_data_list, obs_bit)

                    # log
                    output_img = plot_gen(post_test_init_data_list, post_test_gen_data_list, test_mask_data_list)
                    writer.add_image('valid/full_gen_image', output_img.transpose([2, 0, 1]), b_idx)
              
    
    with torch.no_grad():
        model.eval()
        acc = []
        precision = []
        recall = []
        f_value = []
        for check in check_loader:
            check_obs = check['img']
            check_point = check['point']
            check_obs = preprocess(check_obs.to(device), obs_bit)
            results = model(check_obs, check_point, seq_size, init_size, obs_std)
            metrixs = calc_metrixs(results['mask_data_true'], results['mask_data'])
            acc.append(metrixs['accuracy'])
            precision.append(metrixs['precision'])
            recall.append(metrixs['recall'])
            f_value.append(metrixs['f_value'])
            
        acc = np.concatenate(acc)
        precision = np.concatenate(precision)
        recall = np.concatenate(recall)
        f_value = np.concatenate(f_value)
        
        print('shape: ', acc.shape)
        print('accuracy: ', acc.mean())
        print('precision: ', precision.mean())
        print('recall: ', recall.mean())
        print('f_value: ', f_value.mean())