def predict(self, tags, data, show_org=False): self.C.train() output, imgs = [], [] l = 0 while l < len(tags): for x, y in data: imgs.append(x) l += x.size(0) if l >= len(tags): break c = torch.cat(imgs, 0) if len(imgs) > 1 else imgs[0] Y = sum(c[..., i] * self.Y_coff[i] for i in range(3)) Y = Y.unsqueeze(1) I = Y for tag, Y in zip(tags, Y): tag = torch.LongTensor(tag).unsqueeze(0) Y = Y.unsqueeze(0) # Training Generator Y = createVariable(Y, self.use_cuda, True) hair = createVariable(tag[:, 0], self.use_cuda, True) eyes = createVariable(tag[:, 1], self.use_cuda, True) #print (hair.shape, eyes.shape, Y.shape) x = self.C(hair, eyes, Y) output.append(x.data.cpu().numpy()[0]) return np.array(output).transpose(0, 2, 3, 1), c.numpy()
def predict(self, tags, data): # FIXME: self.G.train() bar = tqdm(tags, smoothing=0) r = [] l = 0 c = [] while l < len(tags): for x, y in data: c.append(x) l += x.size(0) if l >= len(tags): break c = torch.cat(c, 0) if len(c) > 1 else c[0] bw = c[:, 0] * 0.299 + c[:, 1] * 0.587 + c[:, 2] * 0.114 bw = bw.unsqueeze(1) for (i, tag), bw in zip(enumerate(bar), bw): tag = torch.LongTensor(tag).unsqueeze(0) bw = bw.unsqueeze(0) # Training Generator bw = createVariable(bw, self.use_cuda, True) hair = createVariable(tag[:, 0], self.use_cuda, True) eyes = createVariable(tag[:, 1], self.use_cuda, True) x = self.G(hair, eyes, bw) # x = torch.clamp(x, 0, 1) r.append((x.data.cpu().numpy()[0], )) bar.close() return r
def fit(self, dataset): bar = tqdm(dataset, smoothing=0) avgGLoss = Average('GL', num=4) for i, (x, y) in enumerate(bar): self.step += 1 batchSZ = y.size(0) x, y = [createVariable(z, self.use_cuda) for z in [x, y]] true = createVariable(torch.ones(batchSZ).float(), self.use_cuda) false = createVariable(torch.zeros(batchSZ).float(), self.use_cuda) # b/w # coff = torch.rand(3) # coff /= coff.sum() coff = [0.299, 0.587, 0.114] # sign = torch.rand(3) # bw = sum(x[:, i] * coff[i] if sign[i] > 0.5 else (1.0 - x[:, i]) * coff[i] for i in range(3)) bw = sum(x[:, i] * coff[i] for i in range(3)) bw = bw.unsqueeze(1) c = x # lr decay if self.step % 10000 == 0: for param_group in self.optimG.param_groups: param_group['lr'] = param_group['lr'] * 0.5 self.optimG.zero_grad() self.G.train() gloss = 0 # l1 x = self.G(y[:, 0], y[:, 1], bw) if self.step % 15 == 0: imb = bw.data[0].repeat(3, 1, 1) img = c.data[0] img = torch.cat([x.data[0], img, imb], 1) img = img.cpu().numpy() img, org = toImage(img) img.save( os.path.join('output', 'training', 'cnorm', '%d-0.jpg' % (self.step))) org.save( os.path.join('output', 'training', 'corig', '%d-0.jpg' % (self.step))) loss = F.mse_loss(x, c) gloss += loss.data.cpu().numpy().tolist()[0] loss.backward() avgGLoss.append(gloss) torch.nn.utils.clip_grad_norm(self.G.parameters(), 1) self.optimG.step() logs = logging((avgGLoss, )) bar.desc = logs bar.close() return [ avgGLoss, ]
def forward(self, hair, eyes, img): hair, eyes = hair.unsqueeze(1), eyes.unsqueeze(1) _hair = createVariable(torch.zeros(hair.size(0), 12), hair.is_cuda) _hair.data.scatter_(1, hair.data, 1) _eyes = createVariable(torch.zeros(eyes.size(0), 11), eyes.is_cuda) _eyes.data.scatter_(1, eyes.data, 1) tag = torch.cat([_hair, _eyes], 1) emb = self.inp(tag) emb = emb.view(tag.size(0), 32, latent_dim, latent_dim) feature = self.inpconv(img) #print (emb.size(), feature.size(), img.size()) x = torch.cat([emb, feature, img], 1) y = self.conv(x) return y
def fit(self, dataset): self.model.train() bar = tqdm(dataset, smoothing=0) avgLoss = Average('Loss', num=20) acc = Average('TAcc') for i, (x, y) in enumerate(bar): x, y = [createVariable(z, self.use_cuda) for z in [x, y]] prob = self.model(x) loss = F.cross_entropy(prob, y) avgLoss.append(toList(loss)[0]) pred = torch.max(prob.data, 1)[1] corr = (pred == y.data).sum() total = y.size(0) acc.append(corr / total) self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), 10) self.optimizer.step() logs = logging((avgLoss, acc)) bar.desc = logs bar.close() return [avgLoss, acc]
def fit(self, dataset): Loss = History.Average('CL') for i, (x, y) in enumerate(dataset): self.step += 1 print(self.step, Loss, end='\r') batch_size = y.size(0) x, y = [createVariable(z, self.use_cuda) for z in [x, y]] Y = sum(x[..., i] * self.Y_coff[i] for i in range(3)) Y = Y.unsqueeze(1) tmp = x.permute(0, 3, 1, 2) # lr decay if self.step % 10000 == 0: for param_group in self.optimC.param_groups: param_group['lr'] = param_group['lr'] * 0.5 self.optimC.zero_grad() self.C.train() closs = 0 x = self.C(y[:, 0], y[:, 1], Y) #print ('mse', x.shape, tmp.shape) loss = F.mse_loss(x, tmp) closs += loss.data.cpu().numpy().tolist()[0] loss.backward() Loss.append(closs) torch.nn.utils.clip_grad_norm(self.C.parameters(), 1) self.optimC.step() return [ Loss, ]
def forward(self, hair, eyes, bw): hair, eyes = hair.unsqueeze(1), eyes.unsqueeze(1) ohair = createVariable(torch.zeros(hair.size(0), 12), hair.is_cuda) ohair.data.scatter_(1, hair.data, 1) #* 0.8 + 0.2 oeyes = createVariable(torch.zeros(eyes.size(0), 11), eyes.is_cuda) oeyes.data.scatter_(1, eyes.data, 1) #* 0.8 + 0.2 x = torch.cat([ohair, oeyes], 1) x = self.inp(x).view(x.size(0), 32, hdim, hdim) b = self.inpconv(bw) x = torch.cat([x, b, bw], 1) # noise = self.noise(noise).view(noise.size(0), 48, hdim, hdim) # hair = self.hair(hair).view(hair.size(0), 8, hdim, hdim) # eyes = self.eyes(eyes).view(eyes.size(0), 8, hdim, hdim) # x = torch.cat([noise, hair, eyes], 1) y = self.conv(x) return y
def eval_db_agent(env, params): if params['use_preproc']: preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'], params['resize_shape']) params['state_dim'] = preprocessor.state_shape else: preprocessor = None agent = VAE(params['state_dim'], params['action_dim']) if params['use_cuda']: agent = agent.cuda() agent.load_state_dict(torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']))) else: agent.load_state_dict( torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']), map_location='cpu')) agent.eval() agent_steps = 0 episode_rewards = [] start = time.time() for episode in xrange(1, params['num_episodes'] + 1): env_state = env.reset() episode_reward = 0.0 for t in xrange(1, params['max_steps'] + 1): if params['env_render']: env.render() if preprocessor: state = preprocessor.process_state(env_state) else: state = env_state var_state = createVariable(state, use_cuda=params['use_cuda']) action, state_val = agent.sample_action_eval(var_state) reward = 0.0 for _ in range(1): env_state, r, terminal, _ = env.step(action) reward += r if terminal: break episode_reward += reward if terminal: break episode_rewards.append(episode_reward) agent_steps += t if preprocessor: preprocessor.reset() print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3} | Total Time {4}' \ .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100, timeSince(start, episode / params['num_episodes']))
def predict(self, illums): # FIXME: self.G.train() self.D.eval() bar = tqdm(illums, smoothing=0) r = [] for i, illum in enumerate(bar): illum = torch.FloatTensor([illum]) # Training Generator noise = createVariable(torch.randn(1, noiseDim), self.use_cuda, True) illum = createVariable(illum, self.use_cuda, True) x = self.G(noise, illum) # x = torch.clamp(x, 0, 1) isReal, illum = self.D(x) r.append( (x.data.cpu().numpy()[0], toList(isReal)[0], toList(illum)[0])) bar.close() return r
def deep_barley(params): agent = VAE(params['state_dim'], params['action_dim']) agent.train() if params['use_cuda']: agent = agent.cuda() dataset = EpisodeDataset('./out/A2C_{0}_episode.pkl'.format(params['env_name'])) trainloader = DataLoader(dataset, batch_size=params['batch_size'], shuffle=True, num_workers=4) optimizer = torch.optim.Adam(agent.parameters(), lr=params['learning_rate']) # optimizer = torch.optim.RMSprop(agent.parameters(), lr=params['learning_rate']) for epoch in xrange(1, params['num_epochs'] + 1): total_loss = 0.0 for batch_id, batch in enumerate(trainloader): optimizer.zero_grad() batch_states, batch_pols = batch['state'], batch['policy'] if params['use_cuda']: batch_pols = batch_pols.cuda() if agent.use_concrete: pi_phi, _, phi = agent.forward(createVariable(batch_states, use_cuda=params['use_cuda'])) phi, _ = phi loss, r_loss, p_loss = loss_concrete(batch_pols, pi_phi, phi, params) else: pi_phi, _, rets = agent.forward(createVariable(batch_states, use_cuda=params['use_cuda'])) mus, logvars = rets loss, r_loss, p_loss = loss_gauss(batch_pols, pi_phi, mus, logvars, params) loss.backward() total_loss += loss.data optimizer.step() if (batch_id + 1) % params['print_every'] == 0: print '\tBatch {} | Total Loss: {:.6f} | R-Loss {:.6f} | P-Loss {:.6f} | \t[{}/{} ({:.0f}%)]' \ .format(batch_id + 1, loss.data, r_loss.data, p_loss.data, batch_id * len(batch_states), len(trainloader.dataset), 100. * batch_id / len(trainloader)) print 'Epoch {} | Total Loss {:.6f}'.format(epoch + 1, total_loss) if (epoch + 1) % params['save_every'] == 0 or (epoch + 1) == params['num_epochs']: torch.save(agent.state_dict(), './agents/{0}_{1}'.format(params['arch'], params['env_name']))
def predict(self, dataset): self.model.eval() bar = tqdm(dataset, smoothing=0) r = [] for i, (x, y) in enumerate(bar): x, y = [createVariable(z, self.use_cuda) for z in [x, y]] prob = self.model(x) prob = F.softmax(prob) conf, pred = torch.max(prob, 1) r.extend( zip(x.data.cpu().numpy(), toList(pred), toList(y), toList(conf))) bar.close() return r
def score(self, dataset): self.model.eval() bar = tqdm(dataset, smoothing=0) acc = Average('Acc') for i, (x, y) in enumerate(bar): x, y = [createVariable(z, self.use_cuda) for z in [x, y]] prob = self.model(x) pred = torch.max(prob.data, 1)[1] corr = (pred == y.data).sum() total = y.size(0) acc.append(corr / total) logs = logging((acc, )) bar.desc = logs bar.close() return [acc]
def eval_agent_parallel(envs, params): preprocessors = [] for _ in range(params['num_envs']): if params['use_preproc']: preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'], params['resize_shape']) params['state_dim'] = preprocessor.state_shape else: preprocessor = None preprocessors.append(preprocessor) agent = agent_lookup(params) restore_model(agent, params['restore'], params['use_cuda']) if params['use_cuda']: agent.cuda() agent.eval() episode_rewards = [] start = time.time() for episode in xrange(1, params['num_episodes'] + 1): env_states = [env.reset() for env in envs] states = [ preprocessors[i].process_state(env_states[i]) if preprocessors[i] else env_states[i] for i in range(len(envs)) ] env_status = [False for _ in envs] episode_reward = [0.0 for _ in envs] for t in xrange(1, params['max_steps'] + 1): if reduce(lambda x, y: x and y, env_status): break for i, env in enumerate(envs): if params['env_render']: env.render() if env_status[i]: continue var_state = createVariable(states[i], use_cuda=params['use_cuda']) action, state_val = agent.sample_action_eval(var_state) reward = 0.0 for _ in range(1): env_states[i], r, terminal, _ = env.step(action) reward += r if terminal: env_status[i] = True break # episode_reward[i] += reward states[i] = preprocessors[i].process_state( env_states[i]) if preprocessors[i] else env_states[i] for p in preprocessors: p.reset() episode_rewards.extend(episode_reward) if episode % params['print_every'] == 0: print 'Episode {0} | Total Reward {1} | Mean Reward {2} | Total Time {3} ' \ .format(episode, episode_reward, sum(episode_rewards[-100:]) / 100, timeSince(start, episode / params['num_episodes']))
def cache_eval_episode(env, params): cache_states, cache_distros = [], [] if params['use_preproc']: preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'], params['resize_shape']) params['state_dim'] = preprocessor.state_shape else: preprocessor = None agent = agent_lookup(params) if params['use_cuda']: agent = agent.cuda() agent.load_state_dict( torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']))) else: agent.load_state_dict( torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']), map_location='cpu')) agent_steps = 0 episode_rewards = [] start = time.time() for episode in xrange(1): env_state = env.reset() episode_reward = 0.0 for t in xrange(1, params['max_steps'] + 1): if params['env_render']: env.render() if preprocessor: state = preprocessor.process_state(env_state) else: state = env_state var_state = createVariable(state, use_cuda=params['use_cuda']) action, state_val, distro = agent.sample_action_distro(var_state) cache_states.append(state) cache_distros.append(distro.cpu().numpy()) reward = 0.0 for _ in range(1): env_state, r, terminal, _ = env.step(action) reward += r if terminal: break episode_reward += reward if terminal: break episode_rewards.append(episode_reward) agent_steps += t if preprocessor: preprocessor.reset() if episode % params['print_every'] == 0: print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3}' \ .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100) cache_states, cache_distros = np.array(cache_states), np.array( cache_distros) pickle.dump((cache_states, cache_distros), open( './out/{0}_{1}_episode.pkl'.format(params['arch'], params['env_name']), 'wb'), -1)
def train_agent(env, params): if params['use_preproc']: preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'], params['resize_shape']) params['state_dim'] = preprocessor.state_shape else: preprocessor = None agent = agent_lookup(params) agent.train() if params['optim'] == 'rms': optimizer = torch.optim.RMSprop(agent.parameters(), lr=params['learning_rate']) elif params['optim'] == 'adam': optimizer = torch.optim.Adam(agent.parameters(), lr=params['learning_rate']) else: print 'Unknown optimizer specified!' sys.exit(0) if params['use_cuda']: agent = agent.cuda() agent_steps = 0 episode_rewards = [] start = time.time() for episode in xrange(1, params['num_episodes'] + 1): env_state = env.reset() episode_reward = 0.0 policy_loss, value_loss = 0.0, 0.0 num_updates = 0 for t in xrange(1, params['max_steps'] + 1): if params['env_render']: env.render() if preprocessor: state = preprocessor.process_state(env_state) else: state = env_state var_state = createVariable(state, use_cuda=params['use_cuda']) action, state_val = agent.sample_action(var_state) reward = 0.0 for _ in range(1): env_state, r, terminal, _ = env.step(action) reward += r if terminal: break agent.rewards.append(reward) episode_reward += reward if terminal: agent.final_state_val = 0.0 break if t % params['update_freq'] == 0: agent.final_state_val = state_val[0] pl, vl = train_step(agent, optimizer, params) policy_loss += pl value_loss += vl num_updates += 1 episode_rewards.append(episode_reward) agent.final_state_val = 0.0 pl, vl = train_step(agent, optimizer, params) policy_loss += pl value_loss += vl num_updates += 1 agent_steps += t if preprocessor: preprocessor.reset() if params['arch'] in ['VQ-A2C']: visit = len(agent.visited), agent.visited agent.visited = set([]) else: visit = 0 if episode % params['print_every'] == 0: print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3} | Policy Loss {4} | Value Loss {6} | Total Time {5} | S_A {7}' \ .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100, policy_loss / num_updates, timeSince(start, episode / params['num_episodes']), value_loss / num_updates, visit)
def fit(self, dataset): bar = tqdm(dataset, smoothing=0) avgDLoss = Average('DL', num=4) realRealAcc = Average('DR', num=4) avgGLoss = Average('GL', num=4) fakeRealAcc = Average('GR', num=4) realIlluAcc = Average('TI', num=4) fakeIlluAcc = Average('GI', num=4) for i, (x, y) in enumerate(bar): self.step += 1 batchSZ = y.size(0) x, y = [createVariable(z, self.use_cuda) for z in [x, y]] true = createVariable(torch.ones(batchSZ).float(), self.use_cuda) false = createVariable(torch.zeros(batchSZ).float(), self.use_cuda) # lr decay if self.step % 50000 == 0: for param_group in self.optimD.param_groups: param_group['lr'] = param_group['lr'] * 0.5 for param_group in self.optimG.param_groups: param_group['lr'] = param_group['lr'] * 0.5 # tagger pretrain # if self.step < 4000: # self.G.eval() # self.D.train() # self.optimD.zero_grad() # dloss = 0 # # Real data # isReal, tags = self.D(x) # lossHair = F.cross_entropy(tags[:, 0, :], y[:, 0]) # lossEyes = F.cross_entropy(tags[:, 1, :], y[:, 1]) # realHairAcc.append(toList((torch.max(tags[:, 0, :], 1)[1] == y[:, 0]).sum())[0] / batchSZ) # realEyesAcc.append(toList((torch.max(tags[:, 1, :], 1)[1] == y[:, 1]).sum())[0] / batchSZ) # lossRealTags = lossHair * 0.6 + lossEyes # loss = lossRealTags # dloss += loss.data.cpu().numpy().tolist()[0] # loss.backward() # # Gradient penalty # alpha = createVariable(torch.rand(batchSZ, 1, 1, 1), self.use_cuda) # beta = createVariable(torch.randn(x.size()), self.use_cuda) # gradientPenalty = 0 # x = alpha * x + (1 - alpha) * (x + 0.5 * x.std() * beta) # x = x.detach() # x.requires_grad = True # isReal, tags = self.D(x) # hair = tags[:,0,:12] # eyes = tags[:,1,:11] # hairGrad = createVariable(torch.ones(batchSZ, 12).float(), self.use_cuda) # hairGrad = grad(hair, x, hairGrad, create_graph=True, # retain_graph=True, only_inputs=True)[0].view(batchSZ, -1) # gradientPenalty += ((hairGrad.norm(p=2, dim=1) - 1)**2).mean() # eyesGrad = createVariable(torch.ones(batchSZ, 11).float(), self.use_cuda) # eyesGrad = grad(eyes, x, eyesGrad, create_graph=True, # retain_graph=True, only_inputs=True)[0].view(batchSZ, -1) # gradientPenalty += ((eyesGrad.norm(p=2, dim=1) - 1)**2).mean() # gradientPenalty *= 0.5 # dloss += gradientPenalty.data.cpu().numpy().tolist()[0] # gradientPenalty.backward() # avgDLoss.append(dloss) # torch.nn.utils.clip_grad_norm(self.D.parameters(), 1) # self.optimD.step() # logs = logging((avgDLoss, avgGLoss, realRealAcc, fakeRealAcc, realHairAcc, fakeHairAcc, realEyesAcc, fakeEyesAcc)) # bar.desc = logs # continue lambdaAdvMax = 1 # lambdaAdv = min(1, self.step / 4000) ** 2 # lambdaAdv = lambdaAdv * 0.8 + 0.2 # lambdaAdv = lambdaAdv * lambdaAdvMax lambdaAdv = lambdaAdvMax skipD = False if lambdaAdv >= lambdaAdvMax - 1e-10: # gap skip gap = max(realRealAcc.value() - fakeRealAcc.value(), 0) gap = min(1, gap * 2) r = random.random() if r > 1 - gap * 0.9: skipD = True pass if not skipD: for _ in range(1): # Training Discriminator self.G.eval() self.D.train() self.optimD.zero_grad() self.optimG.zero_grad() dloss = 0 # Real data isReal, illum = self.D(x) lossRealLabel = F.binary_cross_entropy_with_logits( isReal, true) realRealAcc.append(toList(F.sigmoid(isReal).mean())[0]) lossIllu = F.mse_loss(illum, y) realIlluAcc.append(toList(lossIllu)[0]) loss = lossRealLabel * lambdaAdv + lossIllu dloss += loss.data.cpu().numpy().tolist()[0] loss.backward() # Gradient penalty alpha = createVariable(torch.rand(batchSZ, 1, 1, 1), self.use_cuda) beta = createVariable(torch.randn(x.size()), self.use_cuda) gradientPenalty = 0 x = alpha * x + (1 - alpha) * (x + 0.5 * x.std() * beta) x = x.detach() x.requires_grad = True isReal, illum = self.D(x) # isReal = F.sigmoid(isReal) realGrad = grad(isReal, x, true, create_graph=True, retain_graph=True, only_inputs=True)[0].view(batchSZ, -1) gradientPenalty += ((realGrad.norm(p=2, dim=1) - 1)**2).mean() gradientPenalty *= 0.5 dloss += gradientPenalty.data.cpu().numpy().tolist()[0] gradientPenalty.backward() # Fake data noise = createVariable(torch.randn(batchSZ, noiseDim), self.use_cuda) illum = createVariable( torch.FloatTensor(batchSZ).uniform_(0.3, 1), self.use_cuda) x = self.G(noise, illum) # x = torch.clamp(x, 0, 1) x = x.detach() isReal, illum = self.D(x) lossRealLabel = F.binary_cross_entropy_with_logits( isReal, false) loss = lossRealLabel * lambdaAdv loss = loss * 0.1 dloss += loss.data.cpu().numpy().tolist()[0] loss.backward() # Fake data history if len(self.memory) > batchSZ: x = random.sample(self.memory, batchSZ) x = createVariable(torch.stack(x, 0), self.use_cuda) isReal, illum = self.D(x) lossRealLabel = F.binary_cross_entropy_with_logits( isReal, false) loss = lossRealLabel * lambdaAdv loss = loss * 0.9 dloss += loss.data.cpu().numpy().tolist()[0] loss.backward() avgDLoss.append(dloss) torch.nn.utils.clip_grad_norm(self.D.parameters(), 1) self.optimD.step() # Training Generator for i in range(1): self.optimD.zero_grad() self.optimG.zero_grad() self.D.eval() self.G.train() noise = createVariable(torch.randn(batchSZ, noiseDim), self.use_cuda) illum = createVariable( torch.FloatTensor(batchSZ).uniform_(0.3, 1), self.use_cuda) gloss = 0 x = self.G(noise, illum) isReal, _illum = self.D(x) self.memory.append(x[0].data.cpu()) if len(self.memory) > 1e6: self.memory = self.memory[-1e6:] if self.step % 15 == 0 and i == 0: img = x.data[0].cpu().numpy() img, org = toImage(img) try: img.save( os.path.join('output', 'training', 'norm', '%d-0.jpg' % (self.step))) org.save( os.path.join('output', 'training', 'orig', '%d-0.jpg' % (self.step))) except: pass lossRealLabel = F.binary_cross_entropy_with_logits( isReal, true) fakeRealAcc.append(toList(F.sigmoid(isReal).mean())[0]) lossIllu = F.mse_loss(_illum, illum) fakeIlluAcc.append(toList(lossIllu)[0]) loss = lossRealLabel * lambdaAdv + lossIllu gloss += loss.data.cpu().numpy().tolist()[0] loss.backward() avgGLoss.append(gloss) torch.nn.utils.clip_grad_norm(self.G.parameters(), 1) self.optimG.step() logs = logging((avgDLoss, avgGLoss, realRealAcc, fakeRealAcc, realIlluAcc, fakeIlluAcc)) bar.desc = logs bar.close() return [ avgDLoss, avgGLoss, realRealAcc, fakeRealAcc, realIlluAcc, fakeIlluAcc ]
def cache_abstraction(env, params): if os.path.exists('./out/{0}'.format(params['env_name'])): shutil.rmtree('./out/{0}'.format(params['env_name'])) if params['use_preproc']: preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'], params['resize_shape']) params['state_dim'] = preprocessor.state_shape else: preprocessor = None agent = VAE(params['state_dim'], params['action_dim']) if params['use_cuda']: agent = agent.cuda() agent.load_state_dict(torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']))) else: agent.load_state_dict( torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']), map_location='cpu')) agent.eval() agent_steps = 0 episode_rewards = [] start = time.time() for episode in xrange(1): env_state = env.reset() episode_reward = 0.0 for t in xrange(1, params['max_steps'] + 1): if params['env_render']: env.render() if preprocessor: state = preprocessor.process_state(env_state) else: state = env_state var_state = createVariable(state, use_cuda=params['use_cuda']) # action, state_val = agent.sample_action_eval(var_state) action, state_val, code = agent.sample_action_eval_code(var_state) if not os.path.exists('./out/{0}/{1}'.format(params['env_name'], code)): os.makedirs('./out/{0}/{1}'.format(params['env_name'], code)) preprocessor.get_img_state().save('./out/{0}/{1}/{2}.png'.format(params['env_name'], code, t)) reward = 0.0 for _ in range(1): env_state, r, terminal, _ = env.step(action) reward += r if terminal: break episode_reward += reward if terminal: break episode_rewards.append(episode_reward) agent_steps += t if preprocessor: preprocessor.reset() print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3}' \ .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100)
def train_agent_parallel(envs, params): preprocessors = [] for _ in range(params['num_envs']): if params['use_preproc']: preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'], params['resize_shape']) params['state_dim'] = preprocessor.state_shape else: preprocessor = None preprocessors.append(preprocessor) agent = agent_lookup(params) if params['optim'] == 'rms': optimizer = torch.optim.RMSprop(agent.parameters(), lr=params['learning_rate']) elif params['optim'] == 'adam': optimizer = torch.optim.Adam(agent.parameters(), lr=params['learning_rate']) else: print 'Unknown optimizer specified!' sys.exit(0) if params['restore'] is not None: restore_model(agent, params['restore'], params['use_cuda']) if params['use_cuda']: agent = agent.cuda() agent.train() if params['arch'] == 'DBAgentAE': agent.eval() episode_rewards = [] start = time.time() total_steps = 0 for episode in xrange(1, params['num_episodes'] + 1): env_states = [env.reset() for env in envs] states = [ preprocessors[i].process_state(env_states[i]) if preprocessors[i] else env_states[i] for i in range(len(envs)) ] env_status = [False for _ in envs] episode_reward = [0.0 for _ in envs] loss_dict = defaultdict(float) num_updates = 0 for t in xrange(1, params['max_steps'] + 1): if reduce(lambda x, y: x and y, env_status): break for i, env in enumerate(envs): if params['env_render']: env.render() if env_status[i]: continue var_state = createVariable(states[i], use_cuda=params['use_cuda']) action, state_val = agent.sample_action(var_state, i=i) reward = 0.0 for _ in range(1): env_states[i], r, terminal, _ = env.step(action) reward += r if terminal: env_status[i] = True break episode_reward[i] += reward states[i] = preprocessors[i].process_state( env_states[i]) if preprocessors[i] else env_states[i] if t % params['update_freq'] == 0: l_dict = train_step_parallel(agent, optimizer, params) loss_dict = merge_loss_dicts(loss_dict, l_dict) num_updates += 1 for i, env in enumerate(envs): agent.rewards[i].append(0.0) l_dict = train_step_parallel(agent, optimizer, params) loss_dict = merge_loss_dicts(loss_dict, l_dict) num_updates += 1 for p in preprocessors: p.reset() episode_rewards.extend(episode_reward) # Might need this later visit = 0 total_steps += t if episode % params['print_every'] == 0: print 'Episode {0} | Total Reward {1} | Total Steps {6} | Mean Reward {2} | Losses {3} | Total Time {4} | SA {5} ' \ .format(episode, episode_reward, sum(episode_rewards[-100:]) / 100, {k: v / num_updates for k, v in loss_dict.iteritems()}, timeSince(start, episode / params['num_episodes']), visit, total_steps) if episode % params['save_every'] == 0: torch.save( agent.state_dict(), './agents/{0}_{1}_{2}_{3}'.format(params['arch'], params['env_name'], int(params['beta']), params['seed']))