Beispiel #1
0
    def predict(self, tags, data, show_org=False):
        self.C.train()
        output, imgs = [], []
        l = 0

        while l < len(tags):
            for x, y in data:
                imgs.append(x)
                l += x.size(0)
                if l >= len(tags):
                    break

        c = torch.cat(imgs, 0) if len(imgs) > 1 else imgs[0]
        Y = sum(c[..., i] * self.Y_coff[i] for i in range(3))
        Y = Y.unsqueeze(1)
        I = Y

        for tag, Y in zip(tags, Y):
            tag = torch.LongTensor(tag).unsqueeze(0)
            Y = Y.unsqueeze(0)

            # Training Generator
            Y = createVariable(Y, self.use_cuda, True)
            hair = createVariable(tag[:, 0], self.use_cuda, True)
            eyes = createVariable(tag[:, 1], self.use_cuda, True)
            #print (hair.shape, eyes.shape, Y.shape)
            x = self.C(hair, eyes, Y)
            output.append(x.data.cpu().numpy()[0])

        return np.array(output).transpose(0, 2, 3, 1), c.numpy()
Beispiel #2
0
    def predict(self, tags, data):
        # FIXME:
        self.G.train()
        bar = tqdm(tags, smoothing=0)
        r = []
        l = 0
        c = []
        while l < len(tags):
            for x, y in data:
                c.append(x)
                l += x.size(0)
                if l >= len(tags):
                    break
        c = torch.cat(c, 0) if len(c) > 1 else c[0]
        bw = c[:, 0] * 0.299 + c[:, 1] * 0.587 + c[:, 2] * 0.114
        bw = bw.unsqueeze(1)
        for (i, tag), bw in zip(enumerate(bar), bw):
            tag = torch.LongTensor(tag).unsqueeze(0)
            bw = bw.unsqueeze(0)

            # Training Generator
            bw = createVariable(bw, self.use_cuda, True)
            hair = createVariable(tag[:, 0], self.use_cuda, True)
            eyes = createVariable(tag[:, 1], self.use_cuda, True)

            x = self.G(hair, eyes, bw)
            #  x = torch.clamp(x, 0, 1)
            r.append((x.data.cpu().numpy()[0], ))

        bar.close()
        return r
Beispiel #3
0
    def fit(self, dataset):
        bar = tqdm(dataset, smoothing=0)
        avgGLoss = Average('GL', num=4)
        for i, (x, y) in enumerate(bar):
            self.step += 1
            batchSZ = y.size(0)
            x, y = [createVariable(z, self.use_cuda) for z in [x, y]]
            true = createVariable(torch.ones(batchSZ).float(), self.use_cuda)
            false = createVariable(torch.zeros(batchSZ).float(), self.use_cuda)

            # b/w
            #  coff = torch.rand(3)
            #  coff /= coff.sum()
            coff = [0.299, 0.587, 0.114]
            #  sign = torch.rand(3)
            #  bw = sum(x[:, i] * coff[i] if sign[i] > 0.5 else (1.0 - x[:, i]) * coff[i] for i in range(3))
            bw = sum(x[:, i] * coff[i] for i in range(3))
            bw = bw.unsqueeze(1)
            c = x

            # lr decay
            if self.step % 10000 == 0:
                for param_group in self.optimG.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.5

            self.optimG.zero_grad()
            self.G.train()
            gloss = 0

            # l1
            x = self.G(y[:, 0], y[:, 1], bw)

            if self.step % 15 == 0:
                imb = bw.data[0].repeat(3, 1, 1)
                img = c.data[0]
                img = torch.cat([x.data[0], img, imb], 1)
                img = img.cpu().numpy()
                img, org = toImage(img)
                img.save(
                    os.path.join('output', 'training', 'cnorm',
                                 '%d-0.jpg' % (self.step)))
                org.save(
                    os.path.join('output', 'training', 'corig',
                                 '%d-0.jpg' % (self.step)))

            loss = F.mse_loss(x, c)

            gloss += loss.data.cpu().numpy().tolist()[0]
            loss.backward()

            avgGLoss.append(gloss)
            torch.nn.utils.clip_grad_norm(self.G.parameters(), 1)
            self.optimG.step()
            logs = logging((avgGLoss, ))
            bar.desc = logs

        bar.close()
        return [
            avgGLoss,
        ]
Beispiel #4
0
    def forward(self, hair, eyes, img):
        hair, eyes = hair.unsqueeze(1), eyes.unsqueeze(1)
        _hair = createVariable(torch.zeros(hair.size(0), 12), hair.is_cuda)
        _hair.data.scatter_(1, hair.data, 1)
        _eyes = createVariable(torch.zeros(eyes.size(0), 11), eyes.is_cuda)
        _eyes.data.scatter_(1, eyes.data, 1)

        tag = torch.cat([_hair, _eyes], 1)
        emb = self.inp(tag)
        emb = emb.view(tag.size(0), 32, latent_dim, latent_dim)
        feature = self.inpconv(img)
        #print (emb.size(), feature.size(), img.size())
        x = torch.cat([emb, feature, img], 1)
        y = self.conv(x)
        return y
Beispiel #5
0
    def fit(self, dataset):
        self.model.train()
        bar = tqdm(dataset, smoothing=0)
        avgLoss = Average('Loss', num=20)
        acc = Average('TAcc')
        for i, (x, y) in enumerate(bar):
            x, y = [createVariable(z, self.use_cuda) for z in [x, y]]

            prob = self.model(x)

            loss = F.cross_entropy(prob, y)
            avgLoss.append(toList(loss)[0])

            pred = torch.max(prob.data, 1)[1]
            corr = (pred == y.data).sum()
            total = y.size(0)
            acc.append(corr / total)

            self.optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(self.model.parameters(), 10)
            self.optimizer.step()

            logs = logging((avgLoss, acc))
            bar.desc = logs

        bar.close()
        return [avgLoss, acc]
Beispiel #6
0
    def fit(self, dataset):

        Loss = History.Average('CL')
        for i, (x, y) in enumerate(dataset):
            self.step += 1
            print(self.step, Loss, end='\r')

            batch_size = y.size(0)
            x, y = [createVariable(z, self.use_cuda) for z in [x, y]]
            Y = sum(x[..., i] * self.Y_coff[i] for i in range(3))
            Y = Y.unsqueeze(1)
            tmp = x.permute(0, 3, 1, 2)

            # lr decay
            if self.step % 10000 == 0:
                for param_group in self.optimC.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.5

            self.optimC.zero_grad()
            self.C.train()
            closs = 0

            x = self.C(y[:, 0], y[:, 1], Y)
            #print ('mse', x.shape, tmp.shape)
            loss = F.mse_loss(x, tmp)
            closs += loss.data.cpu().numpy().tolist()[0]
            loss.backward()

            Loss.append(closs)
            torch.nn.utils.clip_grad_norm(self.C.parameters(), 1)
            self.optimC.step()

        return [
            Loss,
        ]
Beispiel #7
0
    def forward(self, hair, eyes, bw):
        hair, eyes = hair.unsqueeze(1), eyes.unsqueeze(1)
        ohair = createVariable(torch.zeros(hair.size(0), 12), hair.is_cuda)
        ohair.data.scatter_(1, hair.data, 1) #* 0.8 + 0.2
        oeyes = createVariable(torch.zeros(eyes.size(0), 11), eyes.is_cuda)
        oeyes.data.scatter_(1, eyes.data, 1) #* 0.8 + 0.2
        x = torch.cat([ohair, oeyes], 1)
        x = self.inp(x).view(x.size(0), 32, hdim, hdim)
        b = self.inpconv(bw)
        x = torch.cat([x, b, bw], 1)

        #  noise = self.noise(noise).view(noise.size(0), 48, hdim, hdim)
        #  hair = self.hair(hair).view(hair.size(0), 8, hdim, hdim)
        #  eyes = self.eyes(eyes).view(eyes.size(0), 8, hdim, hdim)
        #  x = torch.cat([noise, hair, eyes], 1)
        y = self.conv(x)
        return y
Beispiel #8
0
def eval_db_agent(env, params):
    if params['use_preproc']:
        preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'],
                                    params['resize_shape'])
        params['state_dim'] = preprocessor.state_shape
    else:
        preprocessor = None

    agent = VAE(params['state_dim'], params['action_dim'])
    if params['use_cuda']:
        agent = agent.cuda()
        agent.load_state_dict(torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name'])))
    else:
        agent.load_state_dict(
            torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']), map_location='cpu'))
    agent.eval()

    agent_steps = 0
    episode_rewards = []
    start = time.time()
    for episode in xrange(1, params['num_episodes'] + 1):
        env_state = env.reset()
        episode_reward = 0.0
        for t in xrange(1, params['max_steps'] + 1):
            if params['env_render']:
                env.render()

            if preprocessor:
                state = preprocessor.process_state(env_state)
            else:
                state = env_state

            var_state = createVariable(state, use_cuda=params['use_cuda'])
            action, state_val = agent.sample_action_eval(var_state)

            reward = 0.0
            for _ in range(1):
                env_state, r, terminal, _ = env.step(action)
                reward += r
                if terminal:
                    break

            episode_reward += reward

            if terminal:
                break

        episode_rewards.append(episode_reward)
        agent_steps += t

        if preprocessor:
            preprocessor.reset()

        print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3} | Total Time {4}' \
            .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100,
                    timeSince(start, episode / params['num_episodes']))
Beispiel #9
0
    def predict(self, illums):
        # FIXME:
        self.G.train()
        self.D.eval()
        bar = tqdm(illums, smoothing=0)
        r = []
        for i, illum in enumerate(bar):
            illum = torch.FloatTensor([illum])

            # Training Generator
            noise = createVariable(torch.randn(1, noiseDim), self.use_cuda,
                                   True)
            illum = createVariable(illum, self.use_cuda, True)

            x = self.G(noise, illum)
            #  x = torch.clamp(x, 0, 1)
            isReal, illum = self.D(x)
            r.append(
                (x.data.cpu().numpy()[0], toList(isReal)[0], toList(illum)[0]))

        bar.close()
        return r
Beispiel #10
0
def deep_barley(params):
    agent = VAE(params['state_dim'], params['action_dim'])
    agent.train()
    if params['use_cuda']:
        agent = agent.cuda()

    dataset = EpisodeDataset('./out/A2C_{0}_episode.pkl'.format(params['env_name']))
    trainloader = DataLoader(dataset, batch_size=params['batch_size'], shuffle=True, num_workers=4)
    optimizer = torch.optim.Adam(agent.parameters(), lr=params['learning_rate'])
    # optimizer = torch.optim.RMSprop(agent.parameters(), lr=params['learning_rate'])
    for epoch in xrange(1, params['num_epochs'] + 1):
        total_loss = 0.0
        for batch_id, batch in enumerate(trainloader):
            optimizer.zero_grad()
            batch_states, batch_pols = batch['state'], batch['policy']
            if params['use_cuda']:
                batch_pols = batch_pols.cuda()
            if agent.use_concrete:
                pi_phi, _, phi = agent.forward(createVariable(batch_states, use_cuda=params['use_cuda']))
                phi, _ = phi
                loss, r_loss, p_loss = loss_concrete(batch_pols, pi_phi, phi, params)
            else:
                pi_phi, _, rets = agent.forward(createVariable(batch_states, use_cuda=params['use_cuda']))
                mus, logvars = rets
                loss, r_loss, p_loss = loss_gauss(batch_pols, pi_phi, mus, logvars, params)
            loss.backward()
            total_loss += loss.data
            optimizer.step()

            if (batch_id + 1) % params['print_every'] == 0:
                print '\tBatch {} | Total Loss: {:.6f} | R-Loss {:.6f} | P-Loss {:.6f} | \t[{}/{} ({:.0f}%)]' \
                    .format(batch_id + 1, loss.data, r_loss.data, p_loss.data, batch_id * len(batch_states),
                            len(trainloader.dataset), 100. * batch_id / len(trainloader))
        print 'Epoch {} | Total Loss {:.6f}'.format(epoch + 1, total_loss)
        if (epoch + 1) % params['save_every'] == 0 or (epoch + 1) == params['num_epochs']:
            torch.save(agent.state_dict(), './agents/{0}_{1}'.format(params['arch'], params['env_name']))
Beispiel #11
0
    def predict(self, dataset):
        self.model.eval()
        bar = tqdm(dataset, smoothing=0)
        r = []
        for i, (x, y) in enumerate(bar):
            x, y = [createVariable(z, self.use_cuda) for z in [x, y]]

            prob = self.model(x)

            prob = F.softmax(prob)
            conf, pred = torch.max(prob, 1)
            r.extend(
                zip(x.data.cpu().numpy(), toList(pred), toList(y),
                    toList(conf)))
        bar.close()
        return r
Beispiel #12
0
    def score(self, dataset):
        self.model.eval()
        bar = tqdm(dataset, smoothing=0)
        acc = Average('Acc')
        for i, (x, y) in enumerate(bar):
            x, y = [createVariable(z, self.use_cuda) for z in [x, y]]

            prob = self.model(x)

            pred = torch.max(prob.data, 1)[1]
            corr = (pred == y.data).sum()
            total = y.size(0)
            acc.append(corr / total)

            logs = logging((acc, ))
            bar.desc = logs

        bar.close()
        return [acc]
Beispiel #13
0
def eval_agent_parallel(envs, params):
    preprocessors = []
    for _ in range(params['num_envs']):
        if params['use_preproc']:
            preprocessor = Preprocessor(params['state_dim'], params['history'],
                                        params['use_luminance'],
                                        params['resize_shape'])
            params['state_dim'] = preprocessor.state_shape
        else:
            preprocessor = None
        preprocessors.append(preprocessor)

    agent = agent_lookup(params)

    restore_model(agent, params['restore'], params['use_cuda'])
    if params['use_cuda']:
        agent.cuda()

    agent.eval()

    episode_rewards = []
    start = time.time()
    for episode in xrange(1, params['num_episodes'] + 1):
        env_states = [env.reset() for env in envs]
        states = [
            preprocessors[i].process_state(env_states[i])
            if preprocessors[i] else env_states[i] for i in range(len(envs))
        ]
        env_status = [False for _ in envs]
        episode_reward = [0.0 for _ in envs]
        for t in xrange(1, params['max_steps'] + 1):

            if reduce(lambda x, y: x and y, env_status):
                break

            for i, env in enumerate(envs):

                if params['env_render']:
                    env.render()

                if env_status[i]:
                    continue

                var_state = createVariable(states[i],
                                           use_cuda=params['use_cuda'])
                action, state_val = agent.sample_action_eval(var_state)

                reward = 0.0
                for _ in range(1):
                    env_states[i], r, terminal, _ = env.step(action)
                    reward += r
                    if terminal:
                        env_status[i] = True
                        break
                #
                episode_reward[i] += reward
                states[i] = preprocessors[i].process_state(
                    env_states[i]) if preprocessors[i] else env_states[i]

        for p in preprocessors:
            p.reset()

        episode_rewards.extend(episode_reward)

        if episode % params['print_every'] == 0:
            print 'Episode {0} | Total Reward {1} | Mean Reward {2} | Total Time {3} ' \
                .format(episode, episode_reward, sum(episode_rewards[-100:]) / 100,
                        timeSince(start, episode / params['num_episodes']))
Beispiel #14
0
def cache_eval_episode(env, params):
    cache_states, cache_distros = [], []

    if params['use_preproc']:
        preprocessor = Preprocessor(params['state_dim'], params['history'],
                                    params['use_luminance'],
                                    params['resize_shape'])
        params['state_dim'] = preprocessor.state_shape
    else:
        preprocessor = None

    agent = agent_lookup(params)

    if params['use_cuda']:
        agent = agent.cuda()
        agent.load_state_dict(
            torch.load('./agents/{0}_{1}'.format(params['arch'],
                                                 params['env_name'])))
    else:
        agent.load_state_dict(
            torch.load('./agents/{0}_{1}'.format(params['arch'],
                                                 params['env_name']),
                       map_location='cpu'))

    agent_steps = 0
    episode_rewards = []
    start = time.time()
    for episode in xrange(1):
        env_state = env.reset()
        episode_reward = 0.0
        for t in xrange(1, params['max_steps'] + 1):
            if params['env_render']:
                env.render()

            if preprocessor:
                state = preprocessor.process_state(env_state)
            else:
                state = env_state

            var_state = createVariable(state, use_cuda=params['use_cuda'])
            action, state_val, distro = agent.sample_action_distro(var_state)
            cache_states.append(state)
            cache_distros.append(distro.cpu().numpy())

            reward = 0.0
            for _ in range(1):
                env_state, r, terminal, _ = env.step(action)
                reward += r
                if terminal:
                    break

            episode_reward += reward

            if terminal:
                break

        episode_rewards.append(episode_reward)
        agent_steps += t

        if preprocessor:
            preprocessor.reset()

        if episode % params['print_every'] == 0:
            print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3}' \
                .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100)

    cache_states, cache_distros = np.array(cache_states), np.array(
        cache_distros)
    pickle.dump((cache_states, cache_distros),
                open(
                    './out/{0}_{1}_episode.pkl'.format(params['arch'],
                                                       params['env_name']),
                    'wb'), -1)
Beispiel #15
0
def train_agent(env, params):
    if params['use_preproc']:
        preprocessor = Preprocessor(params['state_dim'], params['history'],
                                    params['use_luminance'],
                                    params['resize_shape'])
        params['state_dim'] = preprocessor.state_shape
    else:
        preprocessor = None

    agent = agent_lookup(params)
    agent.train()

    if params['optim'] == 'rms':
        optimizer = torch.optim.RMSprop(agent.parameters(),
                                        lr=params['learning_rate'])
    elif params['optim'] == 'adam':
        optimizer = torch.optim.Adam(agent.parameters(),
                                     lr=params['learning_rate'])
    else:
        print 'Unknown optimizer specified!'
        sys.exit(0)

    if params['use_cuda']:
        agent = agent.cuda()

    agent_steps = 0
    episode_rewards = []
    start = time.time()
    for episode in xrange(1, params['num_episodes'] + 1):
        env_state = env.reset()
        episode_reward = 0.0
        policy_loss, value_loss = 0.0, 0.0
        num_updates = 0
        for t in xrange(1, params['max_steps'] + 1):
            if params['env_render']:
                env.render()

            if preprocessor:
                state = preprocessor.process_state(env_state)
            else:
                state = env_state

            var_state = createVariable(state, use_cuda=params['use_cuda'])
            action, state_val = agent.sample_action(var_state)

            reward = 0.0
            for _ in range(1):
                env_state, r, terminal, _ = env.step(action)
                reward += r
                if terminal:
                    break

            agent.rewards.append(reward)
            episode_reward += reward

            if terminal:
                agent.final_state_val = 0.0
                break

            if t % params['update_freq'] == 0:
                agent.final_state_val = state_val[0]
                pl, vl = train_step(agent, optimizer, params)
                policy_loss += pl
                value_loss += vl
                num_updates += 1

        episode_rewards.append(episode_reward)
        agent.final_state_val = 0.0
        pl, vl = train_step(agent, optimizer, params)
        policy_loss += pl
        value_loss += vl
        num_updates += 1
        agent_steps += t

        if preprocessor:
            preprocessor.reset()

        if params['arch'] in ['VQ-A2C']:
            visit = len(agent.visited), agent.visited
            agent.visited = set([])
        else:
            visit = 0

        if episode % params['print_every'] == 0:
            print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3} | Policy Loss {4} | Value Loss {6} | Total Time {5} | S_A {7}' \
                .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100,
                        policy_loss / num_updates,
                        timeSince(start, episode / params['num_episodes']), value_loss / num_updates, visit)
Beispiel #16
0
    def fit(self, dataset):
        bar = tqdm(dataset, smoothing=0)
        avgDLoss = Average('DL', num=4)
        realRealAcc = Average('DR', num=4)
        avgGLoss = Average('GL', num=4)
        fakeRealAcc = Average('GR', num=4)
        realIlluAcc = Average('TI', num=4)
        fakeIlluAcc = Average('GI', num=4)
        for i, (x, y) in enumerate(bar):
            self.step += 1
            batchSZ = y.size(0)
            x, y = [createVariable(z, self.use_cuda) for z in [x, y]]
            true = createVariable(torch.ones(batchSZ).float(), self.use_cuda)
            false = createVariable(torch.zeros(batchSZ).float(), self.use_cuda)

            # lr decay
            if self.step % 50000 == 0:
                for param_group in self.optimD.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.5
                for param_group in self.optimG.param_groups:
                    param_group['lr'] = param_group['lr'] * 0.5

            #  tagger pretrain
            #  if self.step < 4000:
            #  self.G.eval()
            #  self.D.train()
            #  self.optimD.zero_grad()
            #  dloss = 0

            #  # Real data
            #  isReal, tags = self.D(x)
            #  lossHair = F.cross_entropy(tags[:, 0, :], y[:, 0])
            #  lossEyes = F.cross_entropy(tags[:, 1, :], y[:, 1])
            #  realHairAcc.append(toList((torch.max(tags[:, 0, :], 1)[1] == y[:, 0]).sum())[0] / batchSZ)
            #  realEyesAcc.append(toList((torch.max(tags[:, 1, :], 1)[1] == y[:, 1]).sum())[0] / batchSZ)
            #  lossRealTags = lossHair * 0.6 + lossEyes
            #  loss = lossRealTags
            #  dloss += loss.data.cpu().numpy().tolist()[0]
            #  loss.backward()

            #  # Gradient penalty
            #  alpha = createVariable(torch.rand(batchSZ, 1, 1, 1), self.use_cuda)
            #  beta = createVariable(torch.randn(x.size()), self.use_cuda)
            #  gradientPenalty = 0

            #  x = alpha * x + (1 - alpha) * (x + 0.5 * x.std() * beta)
            #  x = x.detach()
            #  x.requires_grad = True
            #  isReal, tags = self.D(x)
            #  hair = tags[:,0,:12]
            #  eyes = tags[:,1,:11]

            #  hairGrad = createVariable(torch.ones(batchSZ, 12).float(), self.use_cuda)
            #  hairGrad = grad(hair, x, hairGrad, create_graph=True,
            #  retain_graph=True, only_inputs=True)[0].view(batchSZ, -1)
            #  gradientPenalty += ((hairGrad.norm(p=2, dim=1) - 1)**2).mean()

            #  eyesGrad = createVariable(torch.ones(batchSZ, 11).float(), self.use_cuda)
            #  eyesGrad = grad(eyes, x, eyesGrad, create_graph=True,
            #  retain_graph=True, only_inputs=True)[0].view(batchSZ, -1)
            #  gradientPenalty += ((eyesGrad.norm(p=2, dim=1) - 1)**2).mean()

            #  gradientPenalty *= 0.5
            #  dloss += gradientPenalty.data.cpu().numpy().tolist()[0]
            #  gradientPenalty.backward()

            #  avgDLoss.append(dloss)
            #  torch.nn.utils.clip_grad_norm(self.D.parameters(), 1)
            #  self.optimD.step()
            #  logs = logging((avgDLoss, avgGLoss, realRealAcc, fakeRealAcc, realHairAcc, fakeHairAcc, realEyesAcc, fakeEyesAcc))
            #  bar.desc = logs
            #  continue

            lambdaAdvMax = 1
            #  lambdaAdv = min(1, self.step / 4000) ** 2
            #  lambdaAdv = lambdaAdv * 0.8 + 0.2
            #  lambdaAdv = lambdaAdv * lambdaAdvMax
            lambdaAdv = lambdaAdvMax

            skipD = False

            if lambdaAdv >= lambdaAdvMax - 1e-10:
                # gap skip
                gap = max(realRealAcc.value() - fakeRealAcc.value(), 0)
                gap = min(1, gap * 2)
                r = random.random()
                if r > 1 - gap * 0.9:
                    skipD = True
                pass

            if not skipD:
                for _ in range(1):
                    # Training Discriminator
                    self.G.eval()
                    self.D.train()
                    self.optimD.zero_grad()
                    self.optimG.zero_grad()
                    dloss = 0

                    # Real data
                    isReal, illum = self.D(x)
                    lossRealLabel = F.binary_cross_entropy_with_logits(
                        isReal, true)
                    realRealAcc.append(toList(F.sigmoid(isReal).mean())[0])
                    lossIllu = F.mse_loss(illum, y)
                    realIlluAcc.append(toList(lossIllu)[0])
                    loss = lossRealLabel * lambdaAdv + lossIllu
                    dloss += loss.data.cpu().numpy().tolist()[0]
                    loss.backward()

                    # Gradient penalty
                    alpha = createVariable(torch.rand(batchSZ, 1, 1, 1),
                                           self.use_cuda)
                    beta = createVariable(torch.randn(x.size()), self.use_cuda)
                    gradientPenalty = 0

                    x = alpha * x + (1 - alpha) * (x + 0.5 * x.std() * beta)
                    x = x.detach()
                    x.requires_grad = True
                    isReal, illum = self.D(x)
                    #  isReal = F.sigmoid(isReal)

                    realGrad = grad(isReal,
                                    x,
                                    true,
                                    create_graph=True,
                                    retain_graph=True,
                                    only_inputs=True)[0].view(batchSZ, -1)
                    gradientPenalty += ((realGrad.norm(p=2, dim=1) -
                                         1)**2).mean()

                    gradientPenalty *= 0.5
                    dloss += gradientPenalty.data.cpu().numpy().tolist()[0]
                    gradientPenalty.backward()

                    # Fake data
                    noise = createVariable(torch.randn(batchSZ, noiseDim),
                                           self.use_cuda)
                    illum = createVariable(
                        torch.FloatTensor(batchSZ).uniform_(0.3, 1),
                        self.use_cuda)

                    x = self.G(noise, illum)
                    #  x = torch.clamp(x, 0, 1)
                    x = x.detach()

                    isReal, illum = self.D(x)
                    lossRealLabel = F.binary_cross_entropy_with_logits(
                        isReal, false)

                    loss = lossRealLabel * lambdaAdv
                    loss = loss * 0.1
                    dloss += loss.data.cpu().numpy().tolist()[0]
                    loss.backward()

                    # Fake data history
                    if len(self.memory) > batchSZ:
                        x = random.sample(self.memory, batchSZ)
                        x = createVariable(torch.stack(x, 0), self.use_cuda)

                        isReal, illum = self.D(x)
                        lossRealLabel = F.binary_cross_entropy_with_logits(
                            isReal, false)

                        loss = lossRealLabel * lambdaAdv
                        loss = loss * 0.9
                        dloss += loss.data.cpu().numpy().tolist()[0]
                        loss.backward()

                    avgDLoss.append(dloss)
                    torch.nn.utils.clip_grad_norm(self.D.parameters(), 1)
                    self.optimD.step()

            # Training Generator
            for i in range(1):
                self.optimD.zero_grad()
                self.optimG.zero_grad()
                self.D.eval()
                self.G.train()
                noise = createVariable(torch.randn(batchSZ, noiseDim),
                                       self.use_cuda)
                illum = createVariable(
                    torch.FloatTensor(batchSZ).uniform_(0.3, 1), self.use_cuda)
                gloss = 0

                x = self.G(noise, illum)
                isReal, _illum = self.D(x)

                self.memory.append(x[0].data.cpu())
                if len(self.memory) > 1e6: self.memory = self.memory[-1e6:]

                if self.step % 15 == 0 and i == 0:
                    img = x.data[0].cpu().numpy()
                    img, org = toImage(img)
                    try:
                        img.save(
                            os.path.join('output', 'training', 'norm',
                                         '%d-0.jpg' % (self.step)))
                        org.save(
                            os.path.join('output', 'training', 'orig',
                                         '%d-0.jpg' % (self.step)))
                    except:
                        pass

                lossRealLabel = F.binary_cross_entropy_with_logits(
                    isReal, true)
                fakeRealAcc.append(toList(F.sigmoid(isReal).mean())[0])

                lossIllu = F.mse_loss(_illum, illum)
                fakeIlluAcc.append(toList(lossIllu)[0])
                loss = lossRealLabel * lambdaAdv + lossIllu
                gloss += loss.data.cpu().numpy().tolist()[0]
                loss.backward()

                avgGLoss.append(gloss)
                torch.nn.utils.clip_grad_norm(self.G.parameters(), 1)
                self.optimG.step()

            logs = logging((avgDLoss, avgGLoss, realRealAcc, fakeRealAcc,
                            realIlluAcc, fakeIlluAcc))
            bar.desc = logs

        bar.close()
        return [
            avgDLoss, avgGLoss, realRealAcc, fakeRealAcc, realIlluAcc,
            fakeIlluAcc
        ]
Beispiel #17
0
def cache_abstraction(env, params):
    if os.path.exists('./out/{0}'.format(params['env_name'])):
        shutil.rmtree('./out/{0}'.format(params['env_name']))

    if params['use_preproc']:
        preprocessor = Preprocessor(params['state_dim'], params['history'], params['use_luminance'],
                                    params['resize_shape'])
        params['state_dim'] = preprocessor.state_shape
    else:
        preprocessor = None

    agent = VAE(params['state_dim'], params['action_dim'])
    if params['use_cuda']:
        agent = agent.cuda()
        agent.load_state_dict(torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name'])))
    else:
        agent.load_state_dict(
            torch.load('./agents/{0}_{1}'.format(params['arch'], params['env_name']), map_location='cpu'))
    agent.eval()

    agent_steps = 0
    episode_rewards = []
    start = time.time()
    for episode in xrange(1):
        env_state = env.reset()
        episode_reward = 0.0
        for t in xrange(1, params['max_steps'] + 1):
            if params['env_render']:
                env.render()

            if preprocessor:
                state = preprocessor.process_state(env_state)
            else:
                state = env_state

            var_state = createVariable(state, use_cuda=params['use_cuda'])
            # action, state_val = agent.sample_action_eval(var_state)
            action, state_val, code = agent.sample_action_eval_code(var_state)

            if not os.path.exists('./out/{0}/{1}'.format(params['env_name'], code)):
                os.makedirs('./out/{0}/{1}'.format(params['env_name'], code))
            preprocessor.get_img_state().save('./out/{0}/{1}/{2}.png'.format(params['env_name'], code, t))

            reward = 0.0
            for _ in range(1):
                env_state, r, terminal, _ = env.step(action)
                reward += r
                if terminal:
                    break

            episode_reward += reward

            if terminal:
                break

        episode_rewards.append(episode_reward)
        agent_steps += t

        if preprocessor:
            preprocessor.reset()

        print 'Episode {0} | Total Steps {1} | Total Reward {2} | Mean Reward {3}' \
            .format(episode, agent_steps, episode_reward, sum(episode_rewards[-100:]) / 100)
Beispiel #18
0
def train_agent_parallel(envs, params):
    preprocessors = []
    for _ in range(params['num_envs']):
        if params['use_preproc']:
            preprocessor = Preprocessor(params['state_dim'], params['history'],
                                        params['use_luminance'],
                                        params['resize_shape'])
            params['state_dim'] = preprocessor.state_shape
        else:
            preprocessor = None
        preprocessors.append(preprocessor)

    agent = agent_lookup(params)

    if params['optim'] == 'rms':
        optimizer = torch.optim.RMSprop(agent.parameters(),
                                        lr=params['learning_rate'])
    elif params['optim'] == 'adam':
        optimizer = torch.optim.Adam(agent.parameters(),
                                     lr=params['learning_rate'])
    else:
        print 'Unknown optimizer specified!'
        sys.exit(0)

    if params['restore'] is not None:
        restore_model(agent, params['restore'], params['use_cuda'])

    if params['use_cuda']:
        agent = agent.cuda()

    agent.train()
    if params['arch'] == 'DBAgentAE':
        agent.eval()

    episode_rewards = []
    start = time.time()
    total_steps = 0
    for episode in xrange(1, params['num_episodes'] + 1):
        env_states = [env.reset() for env in envs]
        states = [
            preprocessors[i].process_state(env_states[i])
            if preprocessors[i] else env_states[i] for i in range(len(envs))
        ]
        env_status = [False for _ in envs]
        episode_reward = [0.0 for _ in envs]
        loss_dict = defaultdict(float)
        num_updates = 0
        for t in xrange(1, params['max_steps'] + 1):

            if reduce(lambda x, y: x and y, env_status):
                break

            for i, env in enumerate(envs):

                if params['env_render']:
                    env.render()

                if env_status[i]:
                    continue

                var_state = createVariable(states[i],
                                           use_cuda=params['use_cuda'])
                action, state_val = agent.sample_action(var_state, i=i)

                reward = 0.0
                for _ in range(1):
                    env_states[i], r, terminal, _ = env.step(action)
                    reward += r
                    if terminal:
                        env_status[i] = True
                        break

                episode_reward[i] += reward
                states[i] = preprocessors[i].process_state(
                    env_states[i]) if preprocessors[i] else env_states[i]

            if t % params['update_freq'] == 0:
                l_dict = train_step_parallel(agent, optimizer, params)
                loss_dict = merge_loss_dicts(loss_dict, l_dict)
                num_updates += 1

        for i, env in enumerate(envs):
            agent.rewards[i].append(0.0)

        l_dict = train_step_parallel(agent, optimizer, params)
        loss_dict = merge_loss_dicts(loss_dict, l_dict)
        num_updates += 1

        for p in preprocessors:
            p.reset()

        episode_rewards.extend(episode_reward)

        # Might need this later
        visit = 0

        total_steps += t
        if episode % params['print_every'] == 0:
            print 'Episode {0} | Total Reward {1} | Total Steps {6} | Mean Reward {2} | Losses {3} | Total Time {4} | SA {5} ' \
                .format(episode, episode_reward, sum(episode_rewards[-100:]) / 100,
                        {k: v / num_updates for k, v in loss_dict.iteritems()},
                        timeSince(start, episode / params['num_episodes']), visit, total_steps)

        if episode % params['save_every'] == 0:
            torch.save(
                agent.state_dict(),
                './agents/{0}_{1}_{2}_{3}'.format(params['arch'],
                                                  params['env_name'],
                                                  int(params['beta']),
                                                  params['seed']))