コード例 #1
0
ファイル: main.py プロジェクト: system123/pytorch_divcolor
def train_mdn(logger=None):
  out_dir, listdir, featslistdir = get_dirpaths(args)
  batchsize = args.batchsize 
  hiddensize = args.hiddensize 
  nmix = args.nmix
  nepochs = args.epochs_mdn
 
  data = colordata(\
    os.path.join(out_dir, 'images'), \
    listdir=listdir,\
    featslistdir=featslistdir,
    split='train')
    
  nbatches = np.int_(np.floor(data.img_num/batchsize))

  data_loader = DataLoader(dataset=data, num_workers=args.nthreads,\
    batch_size=batchsize, shuffle=True, drop_last=True)

  model_vae = VAE()
  model_vae.cuda()
  model_vae.load_state_dict(torch.load('%s/models/model_vae.pth' % (out_dir)))
  model_vae.train(False)

  model_mdn = MDN()
  model_mdn.cuda()
  model_mdn.train(True)

  optimizer = optim.Adam(model_mdn.parameters(), lr=1e-3)

  itr_idx = 0
  for epochs_mdn in range(nepochs):
    train_loss = 0.

    for batch_idx, (batch, batch_recon_const, batch_weights, _, batch_feats) in \
      tqdm(enumerate(data_loader), total=nbatches):

      input_color = Variable(batch).cuda()
      input_greylevel = Variable(batch_recon_const).cuda()
      input_feats = Variable(batch_feats).cuda()
      z = Variable(torch.randn(batchsize, hiddensize))

      optimizer.zero_grad()

      mu, logvar, _ = model_vae(input_color, input_greylevel, z)
      mdn_gmm_params = model_mdn(input_feats)

      loss, loss_l2 = mdn_loss(mdn_gmm_params, mu, torch.sqrt(torch.exp(logvar)), batchsize)
      loss.backward()

      optimizer.step()

      train_loss = train_loss + loss.data[0]

      if(logger): 
        logger.update_plot(itr_idx, [loss.data[0], loss_l2.data[0]], plot_type='mdn')
        itr_idx += 1

    train_loss = (train_loss*1.)/(nbatches)
    print('[DEBUG] Training MDN, epoch %d has loss %f' % (epochs_mdn, train_loss))
    torch.save(model_mdn.state_dict(), '%s/models/model_mdn.pth' % (out_dir))
コード例 #2
0
ファイル: main.py プロジェクト: system123/pytorch_divcolor
def divcolor():
  out_dir, listdir, featslistdir = get_dirpaths(args)
  batchsize = args.batchsize 
  hiddensize = args.hiddensize 
  nmix = args.nmix
 
  data = colordata(\
    os.path.join(out_dir, 'images'), \
    listdir=listdir,\
    featslistdir=featslistdir,
    split='test')
    
  nbatches = np.int_(np.floor(data.img_num/batchsize))

  data_loader = DataLoader(dataset=data, num_workers=args.nthreads,\
    batch_size=batchsize, shuffle=True, drop_last=True)

  model_vae = VAE()
  model_vae.cuda()
  model_vae.load_state_dict(torch.load('%s/models/model_vae.pth' % (out_dir)))
  model_vae.train(False)

  model_mdn = MDN()
  model_mdn.cuda()
  model_mdn.load_state_dict(torch.load('%s/models/model_mdn.pth' % (out_dir)))
  model_mdn.train(False) 

  for batch_idx, (batch, batch_recon_const, batch_weights, \
    batch_recon_const_outres, batch_feats) in \
    tqdm(enumerate(data_loader), total=nbatches):

    input_feats = Variable(batch_feats).cuda()

    mdn_gmm_params = model_mdn(input_feats)
    gmm_mu, gmm_pi = get_gmm_coeffs(mdn_gmm_params)
    gmm_pi = gmm_pi.view(-1, 1)
    gmm_mu = gmm_mu.view(-1, hiddensize)

    for j in range(batchsize):
      batch_j = np.tile(batch[j, ...].numpy(), (batchsize, 1, 1, 1))
      batch_recon_const_j = np.tile(batch_recon_const[j, ...].numpy(), (batchsize, 1, 1, 1))
      batch_recon_const_outres_j = np.tile(batch_recon_const_outres[j, ...].numpy(), \
        (batchsize, 1, 1, 1))

      input_color = Variable(torch.from_numpy(batch_j)).cuda()
      input_greylevel = Variable(torch.from_numpy(batch_recon_const_j)).cuda()
 
      curr_mu = gmm_mu[j*nmix:(j+1)*nmix, :]
      orderid = np.argsort(\
        gmm_pi[j*nmix:(j+1)*nmix, 0].cpu().data.numpy().reshape(-1))
  
      z = curr_mu.repeat(np.int((batchsize*1.)/nmix), 1)

      _, _, color_out = model_vae(input_color, input_greylevel, z, is_train=False)

      data.saveoutput_gt(color_out.cpu().data.numpy()[orderid, ...], \
       batch_j[orderid, ...], \
       'divcolor_%05d_%05d' % (batch_idx, j), \
       nmix, \
       net_recon_const=batch_recon_const_outres_j[orderid, ...]) 
コード例 #3
0
def load_vaes(H, logprint):
    vae = VAE(H)
    if H.restore_path:
        logprint(f'Restoring vae from {H.restore_path}')
        restore_params(vae, H.restore_path, map_cpu=True, local_rank=H.local_rank, mpi_size=H.mpi_size)

    ema_vae = VAE(H)
    if H.restore_ema_path:
        logprint(f'Restoring ema vae from {H.restore_ema_path}')
        restore_params(ema_vae, H.restore_ema_path, map_cpu=True, local_rank=H.local_rank, mpi_size=H.mpi_size)
    else:
        ema_vae.load_state_dict(vae.state_dict())
    ema_vae.requires_grad_(False)

    vae = vae.cuda(H.local_rank)
    ema_vae = ema_vae.cuda(H.local_rank)

    vae = DistributedDataParallel(vae, device_ids=[H.local_rank], output_device=H.local_rank)

    if len(list(vae.named_parameters())) != len(list(vae.parameters())):
        raise ValueError('Some params are not named. Please name all params.')
    total_params = 0
    for name, p in vae.named_parameters():
        total_params += np.prod(p.shape)
    logprint(total_params=total_params, readable=f'{total_params:,}')
    return vae, ema_vae
コード例 #4
0
class VAERNN(torch.nn.Module):
    def __init__(self):
        super(VAERNN, self).__init__()

        self.z_size = 32
        self.kl_tolerance = 0.5

        self.vae = VAE()
        self.rnn = RNN()

        self.vae.train()
        self.rnn.train()
        self.init_()

        self.is_cuda = False


    def load(self):
        self.vae.load_state_dict(torch.load(vae_model_path, map_location=lambda storage, loc: storage))
        self.rnn.load_state_dict(torch.load(rnn_model_path, map_location=lambda storage, loc: storage))


    def init_(self):
        self.h = self.rnn.init_()

    def forward(self, inputs):
        z = self.vae(inputs)
        return z

    def when_train(self, inputs, one, outputs):
        if self.is_cuda:
            self.vae.is_cuda = True
            self.vae.cuda()
            self.rnn.is_cuda = True
            self.rnn.cuda()


        # self.rnn.init_()

        z = self.vae(inputs)
        # z = self.vae(inputs)
        # self.next_kl_loss = self.vae.kl_loss
        # self.next_r_loss = self.vae.r_loss
        z = z.unsqueeze(0)


        z_a = torch.cat((z, one), dim=2)
        self.rnn(z_a)
        z_next = self.vae(outputs)
        self.next_kl_loss = self.vae.kl_loss
        self.next_r_loss = self.vae.r_loss

        z_next = z_next.unsqueeze(0)
        # z_next = z

        self.pred_loss = self.rnn.prediction_loss_f(z_next)
        self.mdn_loss = self.rnn.mdn_loss_f(z_next)
コード例 #5
0
ファイル: testing.py プロジェクト: ShahRutav/vdvae
def load_vaes(H):
    vae = None
    #vae = VAE(H)
    #if H.restore_path:
    #    #logprint(f'Restoring vae from {H.restore_path}')
    #    print('Restoring vae from :', H.restore_path)
    #    restore_params(vae, H.restore_path, map_cpu=True, local_rank=None, mpi_size=None)

    ema_vae = VAE(H)
    if H.restore_ema_path:
        #logprint(f'Restoring ema vae from {H.restore_ema_path}')
        restore_params(ema_vae,
                       H.restore_ema_path,
                       map_cpu=True,
                       local_rank=None,
                       mpi_size=None)
    elif (vae):
        ema_vae.load_state_dict(vae.state_dict())
    ema_vae.requires_grad_(False)

    #vae = vae.cuda(H.local_rank)
    ema_vae = ema_vae.cuda(H.local_rank)

    #vae = DistributedDataParallel(vae, device_ids=[H.local_rank], output_device=H.local_rank)

    #if len(list(vae.named_parameters())) != len(list(vae.parameters())):
    #    raise ValueError('Some params are not named. Please name all params.')
    #total_params = 0
    #for name, p in vae.named_parameters():
    #    total_params += np.prod(p.shape)
    #print("Totat Params : ", total_params)
    #logprint(total_params=total_params, readable=f'{total_params:,}')
    return vae, ema_vae
コード例 #6
0
def objective(params):
    """
    Objective function to be minimized: loss with respect to our hyperparameters.
    """
    enc_kernel1 = int(params[0])
    enc_kernel2 = int(params[1])
    enc_kernel3 = int(params[2])
    dec_kernel1 = int(params[3])
    dec_kernel2 = int(params[4])
    dec_kernel3 = int(params[5])

    # Contact matrices are 21x21
    input_dim = 441

    encoder = Encoder(input_size=input_dim,
                      latent_size=8,
                      kernel1=enc_kernel1,
                      kernel2=enc_kernel2,
                      kernel3=enc_kernel3)

    decoder = Decoder(latent_dim=8,
                      output_size=input_size,
                      kernel1=dec_kernel1,
                      kernel2=dec_kernel2,
                      kernel3=dec_kernel3)

    vae = VAE(encoder, decoder)
    criterion = nn.MSELoss()

    use_cuda = args.use_cuda
    if use_cuda:
        encoder = encoder.cuda()
        deconder = decoder.cuda()
        vae = vae.cuda()
        criterion = criterion.cuda()

    optimizer = optim.Adam(vae.parameters(), lr=0.0001)

    epoch_loss = 0
    total_loss = 0
    for epoch in range(100):
        for i, data in enumerate(trainloader, 0):
            inputs = data['cont_matrix']
            inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            optimizer.zero_grad()
            dec = vae(inputs)
            ll = latent_loss(vae.z_mean, vae.z_sigma)
            loss = criterion(dec, inputs) + ll
            loss.backward()
            optimizer.step()
            epoch_loss = loss.data[0]
        print(epoch, epoch_loss)
        total_loss += epoch_loss

    return total_loss
コード例 #7
0
def main():
    """
    Generate images from a saved model
    """
    train_data = UnlabeledContact(
        data='/home/ygx/data/fspeptide/fs_peptide.npy')
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    #encoder = Encoder(input_size=args.input_size, latent_size=args.latent_size)
    #decoder = Decoder(latent_size=args.latent_size, output_size=args.input_size)
    #vae = VAE(encoder, decoder, use_cuda=args.use_cuda)

    vae = VAE()

    # Load saved model
    vae.load_state_dict(torch.load(args.model_path + args.model_name))

    if args.use_cuda:
        #encoder = encoder.cuda()
        #decoder = decoder.cuda()
        vae = vae.cuda()

    latent_arrys = []
    recon_arrys = []
    for batch_idx, data in enumerate(trainloader):
        inputs = data['cont_matrix']
        inputs = inputs.resize_(args.batch_size, 1, 21, 21)
        inputs = inputs.float()
        if args.use_cuda:
            inputs = inputs.cuda()
        inputs = Variable(inputs)

        #latent_array = encoder(inputs).data.cpu().numpy()
        #print('latent_array has shape {}'.format(latent_array.shape))
        #latent_arrys.append(latent_array)

        #reconstructed_array = vae(inputs).data.cpu().numpy()
        reconstructed_array, mu, _ = vae(inputs)
        reconstructed_array = reconstructed_array.data.cpu().numpy()
        latent_array = mu.data.cpu().numpy()
        recon_arrys.append(reconstructed_array)
        latent_arrys.append(latent_array)

        if batch_idx % 100 == 0:
            print('Saving progress: {:.3f}%'.format(batch_idx * 100. /
                                                    len(trainloader)))

    print('\nNumber of images prepared: {}'.format(len(latent_arrys)))
    latent_stacked = np.stack(latent_arrys, axis=0)
    latent_filename = 'latent_imgs_fc'
    np.save(args.latent_save_path + latent_filename, latent_stacked)

    recon_stacked = np.stack(recon_arrys, axis=0)
    recon_filename = 'recon_imgs_fc'
    np.save(args.recon_save_path + recon_filename, recon_stacked)
コード例 #8
0
def main():
    use_cuda = args.use_cuda

    train_data = UnlabeledContact(data=args.data_dir)
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    # Contact matrices are 21x21
    input_size = 441

    encoder = Encoder(input_size=input_size, latent_size=3)
    decoder = Decoder(latent_size=3, output_size=input_size)
    vae = VAE(encoder, decoder, use_cuda=use_cuda)
    criterion = nn.MSELoss()

    if use_cuda:
        encoder = nn.DataParallel(encoder)
        decoder = nn.DataParallel(decoder)
        encoder = encoder.cuda().half()
        decoder = decoder.cuda().half()
        vae = nn.DataParallel(vae)
        vae = vae.cuda().half()
        criterion = criterion.cuda().half()

    optimizer = optim.SGD(vae.parameters(), lr=0.01)

    clock = AverageMeter(name='clock16', rank=0)
    epoch_loss = 0
    total_loss = 0
    end = time.time()
    for epoch in range(15):
        for batch_idx, data in enumerate(trainloader):
            inputs = data['cont_matrix']
            #           inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda().half()
            inputs = Variable(inputs)
            optimizer.zero_grad()
            dec = vae(inputs)
            ll = latent_loss(vae.z_mean, vae.z_sigma)
            loss = criterion(dec, inputs) + ll
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]

            clock.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(trainloader.dataset),
                    100. * batch_idx / len(trainloader), loss.data[0]))

    clock.save(path='/home/ygx/libraries/mds/molecules/molecules/linear_vae')
コード例 #9
0
class VAERNN(torch.nn.Module):
    def __init__(self):
        super(VAERNN, self).__init__()

        self.z_size = 32
        self.kl_tolerance = 0.5

        self.vae = VAE()
        self.rnn = RNN()

        self.vae.train()
        self.rnn.train()
        self.init_()

        self.is_cuda = False

    def load(self):
        self.vae.load_state_dict(
            torch.load(vae_model_path,
                       map_location=lambda storage, loc: storage))
        self.rnn.load_state_dict(
            torch.load(rnn_model_path,
                       map_location=lambda storage, loc: storage))

    def init_(self):
        self.h = self.rnn.init_()

    def forward(self, inputs):
        z = self.vae(inputs)
        return z

    def when_train(self, inputs, one, outputs):
        if self.is_cuda:
            self.vae.is_cuda = True
            self.vae.cuda()
            self.rnn.is_cuda = True
            self.rnn.cuda()

        # with torch.no_grad():
        z = self.vae(inputs)
        self.next_kl_loss = self.vae.kl_loss
        self.next_r_loss = self.vae.r_loss
コード例 #10
0
class VAERNN(torch.nn.Module):
    def __init__(self):
        super(VAERNN, self).__init__()

        self.z_size = 32
        self.kl_tolerance = 0.5

        self.vae = VAE()
        self.rnn = RNN()
        self.vae.load_state_dict(
            torch.load(vae_model_path,
                       map_location=lambda storage, loc: storage))
        self.rnn.load_state_dict(
            torch.load(rnn_model_path,
                       map_location=lambda storage, loc: storage))
        self.vae.train()
        self.rnn.train()
        self.init_()

        self.is_cuda = False

    def init_(self):
        self.h = self.rnn.init_()

    def forward(self, inputs):
        z = self.vae(inputs)
        # z = z.unsqueeze(0)
        # z = self.rnn(z)
        print('z', z.shape)
        print('h', self.h.shape)
        return z, self.h

    def when_train(self, inputs, one, outputs):

        self.vae.is_cuda = True
        self.vae.cuda()
        self.rnn.is_cuda = True
        self.rnn.cuda()

        # print('inputs outputs')
        # print(inputs.shape)
        # print(outputs.shape)
        with torch.no_grad():
            z = self.vae(inputs)
        # print(z.shape)
        z = z.unsqueeze(0)
        # print(z.shape)

        z_a = torch.cat((z, one), dim=2)
        self.rnn(z_a)
        z_next = self.vae(outputs)
        self.next_kl_loss = self.vae.kl_loss
        self.next_r_loss = self.vae.r_loss
        # print('z_next', z_next.shape)
        # print(next_kl_loss.shape)
        # print(next_r_loss.shape)
        # print('rnn now')
        # print(self.rnn.z_prediction.shape)
        z_next = z_next.unsqueeze(0)
        # print(z_next.shape)
        # input('hi')
        self.pred_loss = self.rnn.prediction_loss_f(z_next)
        self.mdn_loss = self.rnn.mdn_loss_f(z_next)
        # print(pred_loss.shape)
        # print(mdn_loss.shape)
        z_next_hat = self.rnn.z_prediction
        # print('making v m error')
        # print(z_next_hat.shape)
        # print(outputs.shape)
        z_next_hat = z_next_hat.squeeze(0)
        self.pred_recon_loss = self.vae.reconstruction_error_f(
            z_next_hat, inputs)
        # print(pred_recon_loss.shape)
        '''
        w = self.rnn.logweight_mdn
        m = self.rnn.mean_mdn
        s = self.rnn.logstd_mdn
        print('w', w.shape)
        print(w[0, 0, 0])
        a = w[0, 0, 0]
        b = torch.exp(a)
        print(b)
        n = b.multinomial(num_samples=1).data
        print(n)
        weight = torch.exp(w)
        ns = weight.multinomial(num_samples=1).data
        print(ns.shape)
        c = weight[0, 0]
        d = c.multinomial(num_samples=1).data
        print(c.shape)
        print(d.shape)
        weight = weight.squeeze(0)
        print('ww', weight.shape)
        a = torch.reshape(weight, (-1, 5))
        print(a.shape)
        d = a.multinomial(num_samples=5).data
        print('d is ', d.shape)
        b = torch.reshape(d, (-1, 32, 5))
        print(b.shape)
        #c = (weight==b)
        #print(c.shape)
        #print(c[200,30,4])
        c = b[:,:,0:1]
        c = c.unsqueeze(0)
        print(c[0,250,20,0])
        print(c[0,c[0,250,20,0],20,0])
        print(c.shape)
        samples = c
        # z_a = z_a.unsqueeze(0)
        '''
        # print(z_a.shape)

    def make_prediction(self, action):
        one = one_hot(action)
        one = torch.from_numpy(one)
        one = one.unsqueeze(0)
        one = one.type(torch.float)
        z_a = torch.cat((z, one), dim=1)
        z_a = z_a.unsqueeze(0)
コード例 #11
0

if __name__ == '__main__':
    # params for visualizations
    n_images = 10

    transformers = transforms.Compose([transforms.ToTensor()])

    img_path = CONFIG.shots.dir
    dataset = DataSet(img_path, transform=transformers)
    loader = torch.utils.data.DataLoader(dataset=dataset,
                                         batch_size=n_images,
                                         shuffle=True)

    model = VAE()
    model.cuda()
    resume_path = CONFIG.model.dir + '/model.checkpoint.tar'
    if os.path.isfile(resume_path):
        print("=> loading checkpoint '{}'".format(resume_path))
        checkpoint = torch.load(resume_path)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            resume_path, checkpoint['epoch']))

        reconstruction(data_loader=loader, model=model, n_images=n_images)
        loader.batch_size = 1
        # save_encoding(data_loader=loader, model=model)

    else:
        print("=> no checkpoint found at '{}'".format(resume_path))
コード例 #12
0
def main():
    use_cuda = args.use_cuda

    train_data = UnlabeledContact(data=args.data_dir)
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    # Contact matrices are 21x21
    input_size = 441

    encoder = Encoder(input_size=input_size, latent_size=3)
    decoder = Decoder(latent_size=3, output_size=input_size)
    vae = VAE(encoder, decoder, use_cuda=use_cuda)
    criterion = nn.MSELoss()

    if use_cuda:
        encoder = encoder.cuda().half()
        decoder = decoder.cuda().half()
        vae = vae.cuda().half()
        criterion = criterion.cuda().half()

    optimizer = optim.SGD(vae.parameters(), lr=0.01)

    epoch_loss = 0
    total_loss = 0
    for epoch in range(100):
        for batch_idx, data in enumerate(trainloader):
            inputs = data['cont_matrix']
            inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda().half()
            inputs = Variable(inputs)
            optimizer.zero_grad()
            dec = vae(inputs)
            ll = latent_loss(vae.z_mean, vae.z_sigma)
            loss = criterion(dec, inputs) + ll
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(trainloader.dataset),
                    100. * batch_idx / len(trainloader), loss.data[0]))

        if epoch < 10:
            # Get latent encoding
            latent_array = encoder(inputs).data[0].cpu().float().numpy()
            filename = 'latent_epoch' + str(epoch)
            np.save('./latent_saves/' + filename, latent_array)

            # Get reconstructed image
            reconstructed_array = vae(
                inputs).data[0].cpu().float().numpy().reshape(21, 21)
            recon_filename = 'reconstructed_epoch' + str(epoch)
            np.save('./reconstruct_saves/' + recon_filename,
                    reconstructed_array)

        if epoch % 10 == 0:
            torch.save(vae.state_dict(), args.save_path + 'epoch' + str(epoch))

            latent_array = encoder(inputs).data[0].cpu().float().numpy()
            filename = 'latent_epoch' + str(epoch)
            np.save('./latent_saves/' + filename, latent_array)

            reconstructed_array = vae(
                inputs).data[0].cpu().float().numpy().reshape(21, 21)
            recon_filename = 'reconstructed_epoch' + str(epoch)
            np.save('./reconstruct_saves/' + recon_filename,
                    reconstructed_array)
コード例 #13
0
def train(model_dict):
    def update_current_state(current_state, state, channels):
        # current_state: [processes, channels*stack, height, width]
        state = torch.from_numpy(
            state).float()  # (processes, channels, height, width)
        # if num_stack > 1:
        #first stack*channel-channel frames = last stack*channel-channel , so slide them forward
        current_state[:, :-channels] = current_state[:, channels:]
        current_state[:, -channels:] = state  #last frame is now the new one

        return current_state

    def update_rewards(reward, done, final_rewards, episode_rewards,
                       current_state):
        # Reward, Done: [P], [P]
        # final_rewards, episode_rewards: [P,1]. [P,1]
        # current_state: [P,C*S,H,W]
        reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                 1)).float()  #[P,1]
        episode_rewards += reward  #keeps track of current episode cumulative reward
        masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                   for done_ in done])  #[P,1]
        final_rewards *= masks  #erase the ones that are done
        final_rewards += (
            1 -
            masks) * episode_rewards  #set it to the cumulative episode reward
        episode_rewards *= masks  #erase the done ones
        masks = masks.type(dtype)  #cuda
        if current_state.dim() == 4:  # if state is a frame/image
            current_state *= masks.unsqueeze(2).unsqueeze(2)  #[P,1,1,1]
        else:
            current_state *= masks  #restart the done ones, by setting the state to zero
        return reward, masks, final_rewards, episode_rewards, current_state

    num_frames = model_dict['num_frames']
    cuda = model_dict['cuda']
    which_gpu = model_dict['which_gpu']
    num_steps = model_dict['num_steps']
    num_processes = model_dict['num_processes']
    seed = model_dict['seed']
    env_name = model_dict['env']
    save_dir = model_dict['save_to']
    num_stack = model_dict['num_stack']
    algo = model_dict['algo']
    save_interval = model_dict['save_interval']
    log_interval = model_dict['log_interval']

    save_params = model_dict['save_params']
    vid_ = model_dict['vid_']
    gif_ = model_dict['gif_']
    ls_ = model_dict['ls_']
    vae_ = model_dict['vae_']

    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(which_gpu)

    if cuda:
        torch.cuda.manual_seed(seed)
        dtype = torch.cuda.FloatTensor
        model_dict['dtype'] = dtype
    else:
        torch.manual_seed(seed)
        dtype = torch.FloatTensor
        model_dict['dtype'] = dtype

    # Create environments
    print(num_processes, 'processes')
    monitor_rewards_dir = os.path.join(save_dir, 'monitor_rewards')
    if not os.path.exists(monitor_rewards_dir):
        os.makedirs(monitor_rewards_dir)
        print('Made dir', monitor_rewards_dir)
    envs = SubprocVecEnv([
        make_env(env_name, seed, i, monitor_rewards_dir)
        for i in range(num_processes)
    ])

    if vid_:
        print('env for video')
        envs_video = make_env_monitor(env_name, save_dir)

    if gif_:
        print('env for gif')
        envs_gif = make_env_basic(env_name)

    if ls_:
        print('env for ls')
        envs_ls = make_env_basic(env_name)

    if vae_:
        print('env for vae')
        envs_vae = make_env_basic(env_name)

    obs_shape = envs.observation_space.shape  # (channels, height, width)
    obs_shape = (obs_shape[0] * num_stack, *obs_shape[1:]
                 )  # (channels*stack, height, width)
    shape_dim0 = envs.observation_space.shape[0]  #channels

    model_dict['obs_shape'] = obs_shape
    model_dict['shape_dim0'] = shape_dim0

    next_state_pred_ = 0
    model_dict['next_state_pred_'] = next_state_pred_

    # Create agent
    if algo == 'a2c':
        agent = a2c(envs, model_dict)
        print('init a2c agent')
    elif algo == 'ppo':
        agent = ppo(envs, model_dict)
        print('init ppo agent')
    elif algo == 'a2c_minibatch':
        agent = a2c_minibatch(envs, model_dict)
        print('init a2c_minibatch agent')
    elif algo == 'a2c_list_rollout':
        agent = a2c_list_rollout(envs, model_dict)
        print('init a2c_list_rollout agent')
    elif algo == 'a2c_with_var':
        agent = a2c_with_var(envs, model_dict)
        print('init a2c_with_var agent')
    # elif algo == 'a2c_bin_mask':
    #     agent = a2c_with_var(envs, model_dict)
    #     print ('init a2c_with_var agent')
    # agent = model_dict['agent'](envs, model_dict)

    # #Load model
    # if args.load_path != '':
    #     # agent.actor_critic = torch.load(os.path.join(args.load_path))
    #     agent.actor_critic = torch.load(args.load_path).cuda()
    #     print ('loaded ', args.load_path)

    # see_reward_episode = 0
    # if 'Montez' in env_name and see_reward_episode:
    #     states_list = [[] for i in range(num_processes)]

    # view_reward_episode(model_dict=model_dict, frames=[])
    # dfasddsf

    if vae_:
        vae = VAE()
        vae.cuda()

    # Init state
    state = envs.reset()  # (processes, channels, height, width)
    current_state = torch.zeros(
        num_processes,
        *obs_shape)  # (processes, channels*stack, height, width)
    current_state = update_current_state(
        current_state, state,
        shape_dim0).type(dtype)  #add the new frame, remove oldest
    agent.insert_first_state(
        current_state
    )  #storage has states: (num_steps + 1, num_processes, *obs_shape), set first step

    # These are used to compute average rewards for all processes.
    episode_rewards = torch.zeros(
        [num_processes, 1])  #keeps track of current episode cumulative reward
    final_rewards = torch.zeros([num_processes, 1])

    num_updates = int(num_frames) // num_steps // num_processes
    save_interval_num_updates = int(save_interval / num_processes / num_steps)

    # prev_action = Variable(torch.zeros([num_processes, 1]).type(torch.LongTensor)).cuda()

    #Begin training
    # count =0
    start = time.time()
    start2 = time.time()
    for j in range(num_updates):
        for step in range(num_steps):

            # Act, [P,1], [P], [P,1], [P]
            # value, action = agent.act(Variable(agent.rollouts.states[step], volatile=True))
            state_pytorch = Variable(agent.rollouts.states[step])

            value, action, action_log_probs, dist_entropy = agent.act(
                state_pytorch)  #, volatile=True))

            # if next_state_pred_:
            #     next_state_prediction = agent.actor_critic.predict_next_state2(state_pytorch, prev_action)
            # next_state_prediction = 0

            # print (action_log_probs.size())
            # print (dist_entropy.size())

            # prev_action = action

            # print (next_state_prediction.size()) # [P,1,84,84]
            # fasd

            cpu_actions = action.data.squeeze(1).cpu().numpy()  #[P]
            # cpu_actions = action.data.cpu().numpy() #[P]
            # print (actions.size())

            # Step, S:[P,C,H,W], R:[P], D:[P]
            state, reward, done, info = envs.step(cpu_actions)

            reward_numpy = reward

            # Record rewards and update state
            reward, masks, final_rewards, episode_rewards, current_state = update_rewards(
                reward, done, final_rewards, episode_rewards, current_state)
            current_state = update_current_state(current_state, state,
                                                 shape_dim0)

            # Agent record step
            # agent.insert_data(step, current_state, action.data, value.data, reward, masks, action_log_probs.data, dist_entropy.data)

            if next_state_pred_:

                agent.insert_data(step, current_state, action.data, value,
                                  reward, masks, action_log_probs,
                                  dist_entropy,
                                  next_state_prediction)  #, done)
                agent.rollouts.insert_state_pred(next_state_prediction)

            else:
                agent.insert_data(step, current_state, action.data, value,
                                  reward, masks, action_log_probs,
                                  dist_entropy, 0)  #, done)

            # if 'Montez' in env_name and see_reward_episode:

            #     for state_i in range(len(state)):
            #         if done[state_i]:
            #             states_list[state_i] = []
            #         else:
            #             states_list[state_i].append(np.squeeze(state[state_i]))

            #             # print (state[state_i].shape)
            #             # fasdf

            #         # print (reward)

            #         if reward_numpy[state_i] >0:
            #             #plot the states of state_i
            #             print (len(states_list[state_i]))
            #             # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:])
            #             # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:])
            #             view_reward_episode(model_dict=model_dict, frames=states_list[state_i])

            #             fadsa

            #      # and np.sum(agent.rollouts.rewards.cpu().numpy()) > 0

            #     # print (np.sum(agent.rollouts.rewards.cpu().numpy()))
            #     # print (j)

        #Optimize agent
        agent.update()  #agent.update(j,num_updates)

        batch = agent.rollouts.states

        # print (batch.size())   # [Steps+1,Processes,Stack,84,84]
        # remove first state since its repeated, its the last state of last episode
        # take the first state of the stack for each step
        #reshape to [P*S,84,84]
        batch = batch[1:]  # [Steps,Processes,Stack,84,84]
        batch = batch[:, :, 0]  # [Steps,Processes,84,84]
        batch = batch.contiguous().view(-1, 84, 84)  # [Steps*Processes,84,84]

        # print (batch.size())

        # fadsa
        # print (vae)
        elbo = vae.update(batch)

        agent.insert_first_state(agent.rollouts.states[-1])

        # print (agent.state_pred_error.data.cpu().numpy())

        # print ('save_interval_num_updates', save_interval_num_updates)
        # print ('num_updates', num_updates)
        # print ('j', j)
        total_num_steps = (j + 1) * num_processes * num_steps

        # if total_num_steps % save_interval == 0 and save_dir != "":
        if j % save_interval_num_updates == 0 and save_dir != "" and j != 0:

            #Save model
            if save_params:
                do_params(save_dir, agent, total_num_steps, model_dict)
            #make video
            if vid_:
                do_vid(envs_video, update_current_state, shape_dim0, dtype,
                       agent, model_dict, total_num_steps)
            #make gif
            if gif_:
                do_gifs(envs_gif, agent, model_dict, update_current_state,
                        update_rewards, total_num_steps)
            #make vae prob gif
            if vae_:
                do_prob_state(envs_vae, agent, model_dict, vae,
                              update_current_state, total_num_steps)

        #Print updates
        if j % log_interval == 0:  # and j!=0:
            end = time.time()

            to_print_info_string = "{}, {}, {:.1f}/{:.1f}/{:.1f}/{:.1f}, {}, {:.1f}, {:.2f}".format(
                j, total_num_steps,
                final_rewards.min(), final_rewards.median(),
                final_rewards.mean(), final_rewards.max(),
                int(total_num_steps / (end - start)), end - start,
                end - start2)

            elbo = "{:.2f}".format(elbo.data.cpu().numpy()[0])

            if next_state_pred_:
                state_pred_error_print = "{:.2f}".format(
                    agent.state_pred_error.data.cpu().numpy()[0])
                print(to_print_info_string + ' ' + state_pred_error_print +
                      ' ' + elbo)
                to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, pred_error, elbo"

            else:
                print(to_print_info_string + ' ' + elbo)
                to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, elbo"

            start2 = time.time()

            if j % (log_interval * 30) == 0:

                if ls_:
                    do_ls(envs_ls, agent, model_dict, total_num_steps,
                          update_current_state, update_rewards)
                # print("Upts, n_timesteps, min/med/mean/max, FPS, Time, Plot updated, LS updated")
                # print(to_print_info_string + ' LS recorded')#, agent.current_lr)
                # else:
                #update plots
                try:
                    if ls_:
                        update_ls_plot(model_dict)
                    make_plots(model_dict)
                    print(to_print_legend_string + " Plot updated")
                except:
                    raise  #pass
                    print(to_print_legend_string)

    try:
        make_plots(model_dict)
    except:
        print()
コード例 #14
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.multiprocessing as mp
import torch.optim as optim

from this_util import *
from vae import VAE

is_cuda = True
vae_model = VAE()
vae_model.load_state_dict(torch.load(vae_model_path))
vae_model.eval()
if is_cuda:
    vae_model.cuda()
    vae_model.is_cuda = True
filelist = os.listdir(DATA_DIR)
filelist.sort()
N = len(filelist)

z_list = []
action_list = []
for i in range(N):
    filename = filelist[i]
    raw_data = np.load(os.path.join(DATA_DIR, filename))
    data = raw_data['obs']
    data = torch.from_numpy(data)
    if is_cuda:
        data = data.cuda()
    t = vae_model(data)
コード例 #15
0
print(len(dataset))
print(len(dataset[ii][0]))  # single timepoint
print(dataset[ii][0][0].shape)  #action [1]           a_t+1
print(dataset[ii][0][1].shape)  #state [2,84,84]   s_t

state_dataset = []
for i in range(len(dataset)):
    for t in range(len(dataset[i])):
        state_dataset.append(dataset[i][t][1])

print(len(state_dataset))

print('Init VAE')
vae = VAE()
vae.cuda()

load_ = 1
train_ = 1
viz_ = 1

if load_:
    load_epoch = 50
    path_to_load_variables = home + '/Documents/tmp/breakout_2frames/vae_params' + str(
        load_epoch) + '.ckpt'
    vae.load_params(path_to_load_variables)

epochs = 100
if load_:
    path_to_save_variables = home + '/Documents/tmp/breakout_2frames/vae_params' + str(
        epochs + load_epoch) + '.ckpt'
コード例 #16
0
    data_dim = dsize * dsize
    nr_mix = 10
    # mean and scale for each components and weighting bt components (10+2*10)
    probs_size = (2 * nr_mix) + nr_mix
    dout = data_dim * probs_size
    latent_size = 64

    encoder = Encoder(data_dim, latent_size)
    decoder = Decoder(latent_size, dout)
    vae = VAE(encoder, decoder, use_cuda)
    # square error is not the correct loss - for ordered input,
    # should use softmax for unordered input ( like mine )

    if use_cuda:
        print("using gpu")
        vae = vae.cuda()
        vae.encoder = vae.encoder.cuda()
        vae.decoder = vae.decoder.cuda()
    opt = torch.optim.Adam(vae.parameters(), lr=1e-4)
    epoch = 0
    data_train_loader = DataLoader(FroggerDataset(
        train_data_dir,
        transform=transforms.ToTensor(),
        limit=args.num_train_limit),
                                   batch_size=64,
                                   shuffle=True)
    data_test_loader = DataLoader(FroggerDataset(
        test_data_dir, transform=transforms.ToTensor()),
                                  batch_size=32,
                                  shuffle=True)
    test_data = data_test_loader
コード例 #17
0
ファイル: main.py プロジェクト: yngtodd/molecules-deprecated
def main():
    use_cuda = args.use_cuda
    half_precision = args.half_precision
    print("Cuda set to {} | Cuda availability: {}".format(
        use_cuda, torch.cuda.is_available()))

    experiment = "vae_latent3"
    #logger = SummaryWriter(log_dir='./logs', comment=experiment)

    train_data = UnlabeledContact(
        data='/home/ygx/data/fspeptide/fs_peptide.npy')
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    # Contact matrices are 21x21
    input_size = 441

    encoder = Encoder(input_size=input_size, latent_size=3)
    decoder = Decoder(latent_size=3, output_size=input_size)
    vae = VAE(encoder,
              decoder,
              use_cuda=use_cuda,
              half_precision=half_precision)
    #criterion = nn.BCELoss()

    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        vae = vae.cuda()
        #criterion = criterion.cuda().half()
        if half_precision:
            encoder = encoder.half()
            decoder = decoder.half()
            vae = vae.half()

    optimizer = optim.SGD(vae.parameters(), lr=0.001)

    losses = AverageMeter()
    epoch_loss = 0
    total_loss = 0
    for epoch in range(100):
        for batch_idx, data in enumerate(trainloader):
            inputs = data['cont_matrix']
            inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda()
                if half_precision:
                    inputs = inputs.half()
            inputs = Variable(inputs)

            # Compute output
            optimizer.zero_grad()
            dec = vae(inputs)

            # Measure the loss
            #kl = kl_loss(vae.z_mean, vae.z_sigma)
            #loss = criterion(dec, inputs) #+ kl # Adding KL is caussing loss > 1
            loss = loss_function(dec, inputs, vae.z_mean, vae.z_sigma)
            losses.update(loss.data[0], inputs.size(0))

            # Compute the gradient
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]

            # Logging
            # Adding graph is a lot of overhead
            #logger.add_graph_onnx(vae)

            # log loss values every iteration
            #logger.add_scalar('data/(train)loss_val', losses.val, batch_idx + 1)
            #logger.add_scalar('data/(train)loss_avg', losses.avg, batch_idx + 1)

            # log the layers and layers gradient histogram and distributions
            #for tag, value in vae.named_parameters():
            #    tag = tag.replace('.', '/')
            #    logger.add_histogram('model/(train)' + tag, to_numpy(value), batch_idx + 1)
            #    logger.add_histogram('model/(train)' + tag + '/grad', to_numpy(value.grad), batch_idx + 1)

            # log the outputs of the autoencoder
            #logger.add_image('model/(train)output', make_grid(dec.data), batch_idx + 1)

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(trainloader.dataset),
                    100. * batch_idx / len(trainloader), loss.data[0]))

        #if epoch < 10:
        # Get latent encoding
        #latent_array = encoder(inputs).data[0].cpu().numpy()
        #filename = 'latent_epoch' + str(epoch)
        #np.save('./latent_saves/kl_bce_latent3/' + filename, latent_array)

        # Get reconstructed image
        #reconstructed_array = vae(inputs).data[0].cpu().numpy().reshape(21, 21)
        #recon_filename = 'reconstructed_epoch' + str(epoch)
        #np.save('./reconstruct_saves/kl_bce_latent3/' + recon_filename, reconstructed_array)

        if epoch % 10 == 0:
            torch.save(vae.state_dict(), args.save_path + 'epoch' + str(epoch))

            #latent_array = encoder(inputs).data[0].cpu().numpy()
            #filename = 'latent_epoch' + str(epoch)
            #np.save('./latent_saves/kl_bce_latent3/' + filename, latent_array)

            reconstructed_array = vae(
                inputs).data[0].cpu().float().numpy().reshape(21, 21)
            recon_filename = 'reconstructed_epoch' + str(epoch)
            np.save('./reconstruct_saves/kl_bce_latent3/' + recon_filename,
                    reconstructed_array)
コード例 #18
0
def train(model_dict):
    def update_current_state(current_state, state, channels):
        # current_state: [processes, channels*stack, height, width]
        state = torch.from_numpy(
            state).float()  # (processes, channels, height, width)
        # if num_stack > 1:
        #first stack*channel-channel frames = last stack*channel-channel , so slide them forward
        current_state[:, :-channels] = current_state[:, channels:]
        current_state[:, -channels:] = state  #last frame is now the new one

        return current_state

    def update_rewards(reward, done, final_rewards, episode_rewards,
                       current_state):
        # Reward, Done: [P], [P]
        # final_rewards, episode_rewards: [P,1]. [P,1]
        # current_state: [P,C*S,H,W]
        reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                 1)).float()  #[P,1]
        episode_rewards += reward  #keeps track of current episode cumulative reward
        masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                   for done_ in done])  #[P,1]
        final_rewards *= masks  #erase the ones that are done
        final_rewards += (
            1 -
            masks) * episode_rewards  #set it to the cumulative episode reward
        episode_rewards *= masks  #erase the done ones
        masks = masks.type(dtype)  #cuda
        if current_state.dim() == 4:  # if state is a frame/image
            current_state *= masks.unsqueeze(2).unsqueeze(2)  #[P,1,1,1]
        else:
            current_state *= masks  #restart the done ones, by setting the state to zero
        return reward, masks, final_rewards, episode_rewards, current_state

    num_frames = model_dict['num_frames']
    cuda = model_dict['cuda']
    which_gpu = model_dict['which_gpu']
    num_steps = model_dict['num_steps']
    num_processes = model_dict['num_processes']
    seed = model_dict['seed']
    env_name = model_dict['env']
    save_dir = model_dict['save_to']
    num_stack = model_dict['num_stack']
    algo = model_dict['algo']
    save_interval = model_dict['save_interval']
    log_interval = model_dict['log_interval']

    save_params = model_dict['save_params']
    vid_ = model_dict['vid_']
    gif_ = model_dict['gif_']
    ls_ = model_dict['ls_']
    vae_ = model_dict['vae_']
    explore_ = model_dict['explore_']

    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(which_gpu)

    if cuda:
        torch.cuda.manual_seed(seed)
        dtype = torch.cuda.FloatTensor
        model_dict['dtype'] = dtype
    else:
        torch.manual_seed(seed)
        dtype = torch.FloatTensor
        model_dict['dtype'] = dtype

    # Create environments
    print(num_processes, 'processes')
    monitor_rewards_dir = os.path.join(save_dir, 'monitor_rewards')
    if not os.path.exists(monitor_rewards_dir):
        os.makedirs(monitor_rewards_dir)
        print('Made dir', monitor_rewards_dir)
    envs = SubprocVecEnv([
        make_env(env_name, seed, i, monitor_rewards_dir)
        for i in range(num_processes)
    ])

    if vid_:
        print('env for video')
        envs_video = make_env_monitor(env_name, save_dir)

    if gif_:
        print('env for gif')
        envs_gif = make_env_basic(env_name)

    if ls_:
        print('env for ls')
        envs_ls = make_env_basic(env_name)

    if vae_:
        print('env for vae')
        envs_vae = make_env_basic(env_name)

    obs_shape = envs.observation_space.shape  # (channels, height, width)
    obs_shape = (obs_shape[0] * num_stack, *obs_shape[1:]
                 )  # (channels*stack, height, width)
    shape_dim0 = envs.observation_space.shape[0]  #channels

    model_dict['obs_shape'] = obs_shape
    model_dict['shape_dim0'] = shape_dim0

    next_state_pred_ = 0
    model_dict['next_state_pred_'] = next_state_pred_

    # Create agent
    # if algo == 'a2c':

    # agent = a2c(envs, model_dict)

    # elif algo == 'ppo':
    #     agent = ppo(envs, model_dict)
    #     print ('init ppo agent')
    # elif algo == 'a2c_minibatch':
    #     agent = a2c_minibatch(envs, model_dict)
    #     print ('init a2c_minibatch agent')
    # elif algo == 'a2c_list_rollout':
    #     agent = a2c_list_rollout(envs, model_dict)
    #     print ('init a2c_list_rollout agent')
    # elif algo == 'a2c_with_var':
    #     agent = a2c_with_var(envs, model_dict)
    #     print ('init a2c_with_var agent')
    # elif algo == 'a2c_bin_mask':
    #     agent = a2c_with_var(envs, model_dict)
    #     print ('init a2c_with_var agent')
    # agent = model_dict['agent'](envs, model_dict)

    # #Load model
    # if args.load_path != '':
    #     # agent.actor_critic = torch.load(os.path.join(args.load_path))
    #     agent.actor_critic = torch.load(args.load_path).cuda()
    #     print ('loaded ', args.load_path)

    # see_reward_episode = 0
    # if 'Montez' in env_name and see_reward_episode:
    #     states_list = [[] for i in range(num_processes)]

    # view_reward_episode(model_dict=model_dict, frames=[])
    # dfasddsf

    # if vae_:
    #     vae = VAE()
    #     vae.cuda()

    print('init exploit a2c agent')
    agent_exploit = a2c(envs, model_dict)

    if explore_:
        print('init explore a2c agent')
        agent_explore = a2c(envs, model_dict)
        print('init vae')
        vae = VAE()
        vae.cuda()

    # Init state
    state = envs.reset()  # (processes, channels, height, width)
    current_state = torch.zeros(
        num_processes,
        *obs_shape)  # (processes, channels*stack, height, width)
    current_state = update_current_state(
        current_state, state,
        shape_dim0).type(dtype)  #add the new frame, remove oldest

    agent_exploit.insert_first_state(
        current_state
    )  #storage has states: (num_steps + 1, num_processes, *obs_shape), set first step
    if explore_:
        agent_explore.insert_first_state(
            current_state
        )  #storage has states: (num_steps + 1, num_processes, *obs_shape), set first step

    # These are used to compute average rewards for all processes.
    episode_rewards = torch.zeros(
        [num_processes, 1])  #keeps track of current episode cumulative reward
    final_rewards = torch.zeros([num_processes, 1])

    num_updates = int(num_frames) // num_steps // num_processes
    save_interval_num_updates = int(save_interval / num_processes / num_steps)

    # prev_action = Variable(torch.zeros([num_processes, 1]).type(torch.LongTensor)).cuda()

    # For normalizing the logprobs
    B = .99
    m = torch.FloatTensor([-100.]).cuda()
    v = torch.FloatTensor([10000.]).cuda()

    # prev_reward = torch.ones(num_processes,1).cuda()
    if model_dict['init_exploit_processes'] == -1:
        init_exploit_processes = num_processes
    else:
        init_exploit_processes = model_dict['init_exploit_processes']
    exploit_processes = init_exploit_processes
    # explore_processes = 16

    all_frames = []

    start = time.time()
    start2 = time.time()
    for j in range(num_updates):

        start3 = time.time()
        for step in range(num_steps):

            # start3 = time.time()
            state_pytorch = Variable(agent_exploit.rollouts.states[step]
                                     )  #, volatile=True) # [P,S,84,84]

            # exploit_state = state_pytorch[:exploit_processes]
            # explore_state = state_pytorch[exploit_processes:]

            u_value, u_action, u_action_log_probs, u_dist_entropy = agent_exploit.act(
                state_pytorch)
            if explore_:
                r_value, r_action, r_action_log_probs, r_dist_entropy = agent_explore.act(
                    state_pytorch)

            u_cpu_actions = u_action.data.squeeze(1).cpu().numpy()  #[P]
            if explore_:
                r_cpu_actions = r_action.data.squeeze(1).cpu().numpy()  #[P]

            #Choose how many you want from each
            cpu_actions = np.concatenate((u_cpu_actions[:exploit_processes],
                                          r_cpu_actions[exploit_processes:]),
                                         0)  #[P]
            # cpu_actions = u_cpu_actions

            # before_step_time = time.time() - start3

            # Step, S:[P,C,H,W], R:[P], D:[P]
            # start3 = time.time()
            state, reward, done, info = envs.step(cpu_actions)
            # step_time = time.time() - start3
            # reward_numpy = reward
            # print (reward)

            # # for trainign vae.
            # for p in range(len(state)):
            #     # print (state[p].shape) #[1,84,84]
            #     # fasad
            #     all_frames.append(state[p])
            #     print (len(all_frames))
            #     if len(all_frames) == 10000:
            #         pickle.dump( all_frames, open(home + '/Documents/tmp/montezum_frames.pkl' , "wb" ) )
            #         print ('saved pkl')
            #         fafaadsfs

            # start3 = time.time()
            # Record rewards and update state
            reward, masks, final_rewards, episode_rewards, current_state = update_rewards(
                reward, done, final_rewards, episode_rewards, current_state)
            current_state = update_current_state(current_state, state,
                                                 shape_dim0)
            # current_state_u = current_state[:exploit_processes]
            # current_state_r = current_state[exploit_processes:]

            #Insert data for exploit agent
            agent_exploit.insert_data(step, current_state, u_action.data,
                                      u_value, reward, masks,
                                      u_action_log_probs, u_dist_entropy,
                                      0)  #, done)

            if explore_:
                # Insert log prob for explore agent
                batch = state_pytorch[:, -1]  #last of stack
                batch = batch.contiguous()  # [P,84,84]
                elbo = vae.forward2(batch, k=10)  #[P]
                elbo = elbo.view(-1, 1).data  #[P,1]
                elbo = (elbo - m) / torch.sqrt(v)
                elbo = torch.clamp(elbo, max=.01)
                agent_explore.insert_data(step, current_state, r_action.data,
                                          r_value, -elbo, masks,
                                          r_action_log_probs, r_dist_entropy,
                                          0)  #, done)

                #update m and v
                m = B * m + (1. - B) * elbo.mean()
                v = B * v + (1. - B) * elbo.pow(2).mean()

                if elbo.mean() < -9000.:
                    print(elbo)
                    print(reward)
                    print(elbo.mean())
                    print(elbo.pow(2).mean())
                    fadsads

            # after_step_time = time.time() - start3

            # if 'Montez' in env_name and see_reward_episode:

            #     for state_i in range(len(state)):
            #         if done[state_i]:
            #             states_list[state_i] = []
            #         else:
            #             states_list[state_i].append(np.squeeze(state[state_i]))

            #             # print (state[state_i].shape)
            #             # fasdf

            #         # print (reward)

            #         if reward_numpy[state_i] >0:
            #             #plot the states of state_i
            #             print (len(states_list[state_i]))
            #             # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:])
            #             # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:])
            #             view_reward_episode(model_dict=model_dict, frames=states_list[state_i])

            #             fadsa

            #      # and np.sum(agent.rollouts.rewards.cpu().numpy()) > 0

            #     # print (np.sum(agent.rollouts.rewards.cpu().numpy()))
            #     # print (j)

        steps_time = time.time() - start3
        start3 = time.time()

        #Optimize agents
        agent_exploit.update()  #agent.update(j,num_updates)
        if explore_:
            agent_explore.update()  #agent.update(j,num_updates)

            #Optimize vae
            batch = agent_exploit.rollouts.states
            batch = batch[1:]  # [Steps,Processes,Stack,84,84]
            batch = batch[:, :, 0]  # [Steps,Processes,84,84]
            batch = batch.contiguous().view(-1, 84,
                                            84)  # [Steps*Processes,84,84]
            elbo = vae.update(batch)

        #Insert state
        agent_exploit.insert_first_state(agent_exploit.rollouts.states[-1])
        if explore_:
            agent_explore.insert_first_state(agent_explore.rollouts.states[-1])

        total_num_steps = (j + 1) * num_processes * num_steps

        #Change number of explore vs exploit
        if model_dict['init_exploit_processes'] != -1 and model_dict[
                'inc_exploiters_over'] != -1:
            frac_step = np.minimum((total_num_steps + 1.) /
                                   float(model_dict['inc_exploiters_over']),
                                   1.)  #fraction of steps
            aaa = int((num_processes - init_exploit_processes) * frac_step)
            exploit_processes = np.minimum(init_exploit_processes + aaa + 1,
                                           num_processes)

        update_time = time.time() - start3

        # agent_exploit.rollouts.reset_lists()
        # agent_explore.rollouts.reset_lists()

        # print ('init ', init_exploit_processes)
        # print ('cur ', exploit_processes)
        # print ('frac_step', frac_step)
        # print ('aaa', aaa)

        # print (agent.state_pred_error.data.cpu().numpy())

        # print ('save_interval_num_updates', save_interval_num_updates)
        # print ('num_updates', num_updates)
        # print ('j', j)

        # if total_num_steps % save_interval == 0 and save_dir != "":
        if j % save_interval_num_updates == 0 and save_dir != "" and j != 0:

            #Save model
            if save_params:
                do_params(save_dir, agent, total_num_steps, model_dict)
            #make video
            if vid_:
                do_vid(envs_video, update_current_state, shape_dim0, dtype,
                       agent, model_dict, total_num_steps)
            #make gif
            if gif_:
                do_gifs(envs_gif, agent, model_dict, update_current_state,
                        update_rewards, total_num_steps)
            # #make vae prob gif
            if vae_:
                # do_prob_state(envs_vae, agent, model_dict, vae, update_current_state, total_num_steps)
                # do_gifs2(envs_vae, agent_exploit, vae, model_dict, update_current_state, update_rewards, total_num_steps)
                do_gifs3(envs_vae, agent_exploit, vae, model_dict,
                         update_current_state, update_rewards, total_num_steps)

        #Print updates
        if j % log_interval == 0:  # and j!=0:
            end = time.time()

            to_print_info_string = "{}, {}, {:.1f}/{:.1f}/{:.1f}/{:.1f}, {}, {:.1f}, {:.2f}".format(
                j, total_num_steps,
                final_rewards.min(), final_rewards.median(),
                final_rewards.mean(), final_rewards.max(),
                int(total_num_steps / (end - start)), end - start,
                end - start2)

            elbo = "{:.2f}".format(elbo.data.cpu().numpy()[0])
            # elbo =  "1"

            steps_time = "{:.3f}".format(steps_time)
            update_time = "{:.3f}".format(update_time)

            # if next_state_pred_:
            #     state_pred_error_print =  "{:.2f}".format(agent.state_pred_error.data.cpu().numpy()[0])
            #     print(to_print_info_string+' '+state_pred_error_print+' '+elbo)
            #     to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, pred_error, elbo"

            # else:
            # print(to_print_info_string+' '+elbo)
            # print(to_print_info_string+' '+elbo+' '+str(exploit_processes)+' '+str(before_step_time)+' '+str(step_time)+' '+str(after_step_time))#, value[0].data.cpu().numpy(), m.cpu().numpy(), v.cpu().numpy())
            print(
                to_print_info_string + ' ' + elbo + ' ' +
                str(exploit_processes)
            )  #+' '+steps_time+' '+update_time)#, value[0].data.cpu().numpy(), m.cpu().numpy(), v.cpu().numpy())

            # print (value[0].data.cpu().numpy(), m.cpu().numpy(), v.cpu().numpy())
            to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, elbo, Exploit_Procs"

            start2 = time.time()

            if j % (log_interval * 30) == 0:

                if ls_:
                    # do_ls(envs_ls, agent, model_dict, total_num_steps, update_current_state, update_rewards)
                    do_ls_2(envs_ls, agent_explore, model_dict,
                            total_num_steps, update_current_state,
                            update_rewards, vae)

                    # update_ls_plot(model_dict)
                    update_ls_plot_2(model_dict)
                    print('updated ls')

                # print("Upts, n_timesteps, min/med/mean/max, FPS, Time, Plot updated, LS updated")
                # print(to_print_info_string + ' LS recorded')#, agent.current_lr)
                # else:
                #update plots

            # if ls_:

                try:

                    start3 = time.time()

                    make_plots(model_dict)
                    print(to_print_legend_string +
                          " Plot updated ")  #+str(time.time() - start3))
                except:
                    raise  #pass
                    print(to_print_legend_string)

    try:
        make_plots(model_dict)
    except:
        print()
コード例 #19
0
ファイル: main.py プロジェクト: system123/pytorch_divcolor
def train_vae(logger=None):

  out_dir, listdir, featslistdir = get_dirpaths(args)
  batchsize = args.batchsize 
  hiddensize = args.hiddensize 
  nmix = args.nmix 
  nepochs = args.epochs
 
  data = colordata(\
    os.path.join(out_dir, 'images'), \
    listdir=listdir,\
    featslistdir=featslistdir,
    split='train')

  nbatches = np.int_(np.floor(data.img_num/batchsize))

  data_loader = DataLoader(dataset=data, num_workers=args.nthreads,\
    batch_size=batchsize, shuffle=True, drop_last=True)

  model = VAE()
  model.cuda()
  model.train(True)

  optimizer = optim.Adam(model.parameters(), lr=5e-5)

  itr_idx = 0
  for epochs in range(nepochs):
    train_loss = 0.

    for batch_idx, (batch, batch_recon_const, batch_weights, batch_recon_const_outres, _) in \
      tqdm(enumerate(data_loader), total=nbatches):

      input_color = Variable(batch).cuda()
      lossweights = Variable(batch_weights).cuda()
      lossweights = lossweights.view(batchsize, -1)
      input_greylevel = Variable(batch_recon_const).cuda()
      z = Variable(torch.randn(batchsize, hiddensize))
 
      optimizer.zero_grad()
      mu, logvar, color_out = model(input_color, input_greylevel, z)
      kl_loss, recon_loss, recon_loss_l2 = \
        vae_loss(mu, logvar, color_out, input_color, lossweights, batchsize)
      loss = kl_loss.mul(1e-2)+recon_loss
      recon_loss_l2.detach()
      loss.backward()
      optimizer.step()

      train_loss = train_loss + recon_loss_l2.data[0]

      if(logger): 
        logger.update_plot(itr_idx, \
          [kl_loss.data[0], recon_loss.data[0], recon_loss_l2.data[0]], \
          plot_type='vae')
        itr_idx += 1

      if(batch_idx % args.logstep == 0):
        data.saveoutput_gt(color_out.cpu().data.numpy(), \
          batch.numpy(), \
          'train_%05d_%05d' % (epochs, batch_idx), \
          batchsize, \
          net_recon_const=batch_recon_const_outres.numpy())
 
    train_loss = (train_loss*1.)/(nbatches)
    print('[DEBUG] VAE Train Loss, epoch %d has loss %f' % (epochs, train_loss)) 

    test_loss = test_vae(model) 
    if(logger):
      logger.update_test_plot(epochs, test_loss)
    print('[DEBUG] VAE Test Loss, epoch %d has loss %f' % (epochs, test_loss)) 

    torch.save(model.state_dict(), '%s/models/model_vae.pth' % (out_dir))
コード例 #20
0
ファイル: train_model.py プロジェクト: chriscremer/Other_Code
print (dataset[ii][0][0].shape)  #action [1]           a_t+1
print (dataset[ii][0][1].shape)     #state [2,84,84]   s_t


state_dataset = []
for i in range(len(dataset)):
    for t in range(len(dataset[i])):
        state_dataset.append(dataset[i][t][1])

print (len(state_dataset))



print('Init VAE')
vae = VAE()
vae.cuda()

load_ = 1
train_ = 1
viz_ = 1


if load_:
    load_epoch = 50
    path_to_load_variables = home+'/Documents/tmp/breakout_2frames/vae_params'+str(load_epoch)+'.ckpt'
    vae.load_params(path_to_load_variables)

epochs = 100
if load_:
    path_to_save_variables = home+'/Documents/tmp/breakout_2frames/vae_params'+str(epochs+load_epoch)+'.ckpt'
else:
コード例 #21
0
ファイル: test.py プロジェクト: jadeleiyu/DomainSumm
def main6():
    # vae test
    doc = Document(content=[[
        'to', 'the', 'editor', 're', 'for', 'women', 'worried', 'about',
        'fertility', 'egg', 'bank', 'is', 'a', 'new', 'option', 'sept', '00',
        'imagine', 'my', 'joy', 'in', 'reading', 'the', 'morning',
        'newspapers', 'on', 'the', 'day', 'of', 'my', '00th', 'birthday',
        'and', 'finding', 'not', 'one', 'but', 'two', 'articles', 'on', 'how',
        'women', 's', 'fertility', 'drops', 'off', 'precipitously', 'after',
        'age', '00'
    ], [
        'one', 'in', 'the', 'times', 'and', 'one', 'in', 'another', 'newspaper'
    ], ['i', 'sense', 'a', 'conspiracy', 'here'],
                            [
                                'have', 'you', 'been', 'talking', 'to', 'my',
                                'mother', 'in', 'law'
                            ], ['laura', 'heymann', 'washington']],
                   summary=[[
                       'laura', 'heymann', 'letter', 'on', 'sept', '00',
                       'article', 'about', 'using', 'egg', 'bank', 'to',
                       'prolong', 'fertility', 'expresses', 'ironic', 'humor',
                       'about', 'her', 'age', 'and', 'chances', 'of',
                       'becoming', 'pregnant'
                   ]],
                   label=[0.01] * 100,
                   label_idx=[0.01] * 100)
    torch.manual_seed(233)
    torch.cuda.set_device(0)
    args = get_args()
    if args.data == "nyt":
        vocab_file = "/home/ml/lyu40/PycharmProjects/data/nyt/lda_domains/preprocessed/vocab_100d.p"
        with open(vocab_file, "rb") as f:
            vocab = pickle.load(f, encoding='latin1')
    else:
        vocab_file = '/home/ml/ydong26/data/CNNDM/CNN_DM_pickle_data/vocab_100d.p'
        with open(vocab_file, "rb") as f:
            vocab = pickle.load(f, encoding='latin1')
    config = Config(
        vocab_size=vocab.embedding.shape[0],
        embedding_dim=vocab.embedding.shape[1],
        category_size=args.category_size,
        category_dim=50,
        word_input_size=100,
        sent_input_size=2 * args.hidden,
        word_GRU_hidden_units=args.hidden,
        sent_GRU_hidden_units=args.hidden,
        pretrained_embedding=vocab.embedding,
        word2id=vocab.w2i,
        id2word=vocab.i2w,
    )
    model = VAE(config)

    if torch.cuda.is_available():
        model.cuda()
    train_loss = 0
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    x = prepare_data(
        doc, vocab.w2i
    )  # list of tokens ex.x=[[1,2,1],[1,1]] x = Variable(torch.from_numpy(x)).cuda()
    sents = Variable(torch.from_numpy(x)).cuda()
    optimizer.zero_grad()
    loss = 0
    for sent in sents:
        recon_batch, mu, logvar = model(sent.float())
        loss += loss_function(recon_batch, sent, mu, logvar)
    loss.backward()
    train_loss += loss.data[0]
    optimizer.step()