Esempio n. 1
0
def main(args):
    test_x, test_y = load_image(args.image_path)

    test_inp = to_tensor(test_x.astype(np.float32))
    test_target = to_tensor(test_y.astype(np.float32))

    generator = Generator().to("cuda")

    start_t = time.time()
    pretrain_model = flow.load(args.model_path)
    generator.load_state_dict(pretrain_model)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    start_t = time.time()
    generator.eval()
    with flow.no_grad():
        gout = to_numpy(generator(test_inp), False)
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))

    # save images
    save_images(
        gout,
        test_inp.numpy(),
        test_target.numpy(),
        path=os.path.join("./testimage.png"),
        plot_size=1,
    )
Esempio n. 2
0
 def forward_actor(self, state, goal=None):
     if not isinstance(state, torch.Tensor):
         state = to_tensor(state)
     if goal is not None:
         if not isinstance(goal, torch.Tensor):
             goal = to_tensor(goal)
         x = torch.cat((state, goal), -1)
     else:
         x = state
     return self.actor(x)
Esempio n. 3
0
    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        mask_path = self.masks_files[idx]
        img = cv2.imread(image_path)
        mask = cv2.imread(mask_path)

        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']

        return to_tensor(img), to_tensor(mask)
Esempio n. 4
0
 def forward_critic(self, state, action, goal=None):
     if not isinstance(state, torch.Tensor):
         state = to_tensor(state)
     if not isinstance(action, torch.Tensor):
         action = to_tensor(action)
     if goal is not None:
         if not isinstance(goal, torch.Tensor):
             goal = to_tensor(goal)
         x = torch.cat((state, action, goal), 1)
     else:
         x = torch.cat((state, action), 1)
     return self.critic(x)
Esempio n. 5
0
def behavior():
    '''
    Obsolete.
    Draw the action probability of the agent at different observations.
    '''
    actor = Actor(3, 5, args).to(device)
    critic = Critic(3, args).to(device)
    saved_ckpt_path = os.path.join(os.getcwd(), 'save_model',
                                   str(args.load_model))
    ckpt = torch.load(saved_ckpt_path)
    actor.load_state_dict(ckpt['actor'])
    critic.load_state_dict(ckpt['critic'])
    actionstr = {0: 'left', 1: 'right', 2: 'down', 3: 'up'}

    for enemyBaseHealth in [100, 50, 1]:
        allinput = []
        for posX in range(51):
            for posY in range(51):
                allinput.append([posX, posY, enemyBaseHealth])
        allinput = np.array(allinput)
        normalized = []
        for i in range(0, 2592, 10):
            normalized.append(running_state(allinput[i:i + 10, :]))
        ending = running_state(allinput[2591:2601, :])
        ending2d = np.empty((1, 3))
        ending2d[0, :] = ending[-1, :]
        normalized.append(ending2d)
        allNormalized = np.concatenate(normalized, axis=0)
        with torch.no_grad():
            mu = actor(to_tensor(allNormalized))
        mu = torch.cat([to_tensor(allNormalized), mu], dim=1)
        for action in range(4):
            fig, ax = plt.subplots(figsize=(7, 7))
            value = np.empty((51, 51))
            for row in range(51):
                for col in range(51):
                    value[row,
                          col] = mu[51 * col + 50 - row, 3 +
                                    action].item()  # (x, y) = (col, 50-row)
            ax.set_xlabel('X')
            ax.set_ylabel('Y')
            plt.imshow(value,
                       cmap='Greens',
                       interpolation='spline36',
                       vmin=0.05,
                       vmax=0.95)
            plt.colorbar()
            ax.plot(30, 30, '*r', markersize=10)
            plt.title('Health %d Action-%s' %
                      (enemyBaseHealth, actionstr[action]))
            plt.tight_layout()
            plt.savefig('Health%dAction%dat.png' % (enemyBaseHealth, action))
            plt.close()
Esempio n. 6
0
    def forward(self, x, goal):
        if not isinstance(x, torch.Tensor):
            x = to_tensor(x)
        if not isinstance(goal, torch.Tensor):
            goal = to_tensor(goal)
        x = torch.cat((x, goal), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.action_out(x)
        actions = self.max_action * torch.tanh(x)

        return (actions, x)
Esempio n. 7
0
def process_memory(net, batch, args):
    states = to_tensor(batch.state, False)
    actions = to_tensor(batch.action, False)
    rewards = to_tensor(batch.reward, False)
    masks = to_tensor(batch.mask, False)

    netOutput = net(states)  # (value, action, moveX, moveY, target)
    values = netOutput[0]

    old_policy = log_density(actions, netOutput)
    old_values = values.clone()
    returns, advants = getGA(rewards, masks, values, args)

    return states, actions, returns, advants, old_policy, old_values
Esempio n. 8
0
    def forward(self, x, actions, goals):
        if not isinstance(x, torch.Tensor):
            x = to_tensor(x)
        if not isinstance(actions, torch.Tensor):
            actions = to_tensor(actions)
        if not isinstance(goals, torch.Tensor):
            goals = to_tensor(goals)
        x = torch.cat([x, goals, actions / self.max_action], dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        q_value = self.q_out(x)

        return q_value
Esempio n. 9
0
def process_memory(actor, critic, batch, args):
    states = to_tensor(batch.state)
    actions = to_tensor(batch.action)
    rewards = to_tensor(batch.reward)
    masks = to_tensor(batch.mask)
    values = critic(states)

    # ----------------------------
    # step 1: get returns and GAEs and log probability of old policy
    returns, advants = get_gae(rewards, masks, values, args)
    mu, std, logstd = actor(states)
    old_policy = log_density(actions, mu, std, logstd)
    old_values = values.clone()

    return states, actions, returns, advants, old_policy, old_values
    def fit(self,
            features,
            adj,
            labels,
            idx_train,
            idx_val=None,
            train_iters=200,
            initialize=True,
            verbose=False,
            normalize=True,
            patience=500,
            load_path=None):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        self.device = self.layers[0].weight.device
        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        if normalize:
            if utils.is_sparse_tensor(adj):
                adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                adj_norm = utils.normalize_adj_tensor(adj)
        else:
            adj_norm = adj

        self.adj_norm = adj_norm
        self.features = features
        self.labels = labels

        self._train_with_val(labels, idx_train, idx_val, train_iters, verbose,
                             load_path)
    def predict(self, features=None, adj=None):
        '''By default, inputs are unnormalized data'''

        self.eval()
        if features is None and adj is None:
            return self.forward(self.features, self.adj_norm)
        else:
            if type(adj) is not torch.Tensor:
                features, adj = utils.to_tensor(features,
                                                adj,
                                                device=self.device)

            self.features = features
            return self.forward(self.features, adj)

            if utils.is_sparse_tensor(adj):
                self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
            else:
                self.adj_norm = utils.normalize_adj_tensor(adj)
            return self.forward(self.features, self.adj_norm)
Esempio n. 12
0
    def forward(self, state, action=None):

        if not isinstance(state, torch.Tensor):
            state = to_tensor(state)

        value = self.critic(state)
        mu = self.actor(state)
        std = self.log_std
        dist = torch.distributions.Normal(mu, F.softplus(std))
        if action is None:
            action = dist.sample()
        log_prob = dist.log_prob(action).sum(-1).unsqueeze(-1)
        entropy = dist.entropy().sum(-1).unsqueeze(-1)
        return {
            'actions': action.unsqueeze(0),
            'log_prob': log_prob,
            'entropy': entropy,
            'mean': mu,
            'values': value
        }
Esempio n. 13
0
    def _update(self):
        experiences = self.replay_buffer.sample(self.config.batch_size)
        states, goals = self._preproc_og(experiences['obs'], experiences['g'])
        next_states, next_goals = self._preproc_og(experiences['next_obs'],
                                                   experiences['g'])
        actions = experiences['actions']
        rewards = experiences['r']

        states = self.o_norm.normalize(states)
        goals = self.g_norm.normalize(goals)
        next_states = self.o_norm.normalize(next_states)
        next_goals = self.g_norm.normalize(next_goals)

        with torch.no_grad():
            next_actions = self.target_actor(next_states, next_goals)
            target_value = self.target_critic(next_states, next_actions[0],
                                              next_goals)
            expected_value = (to_tensor(rewards) +
                              self.config.discount * target_value).detach()

            clip_return = 1 / (1 - self.config.discount)
            expected_value = torch.clamp(expected_value, -clip_return, 0)

        #====== Value loss ========
        value_criterion = nn.MSELoss()
        value = self.critic(states, actions, goals)
        value_loss = value_criterion(expected_value, value)
        #====== Policy loss =======
        actions_ = self.actor(states, goals)
        policy_loss = -(self.critic(states, actions_[0], goals)).mean()
        policy_loss += self.config.action_l2 * (actions_[0]).pow(2).mean()
        #====== Policy update =======
        self.actor_optimizer.zero_grad()
        policy_loss.backward()
        self.actor_optimizer.step()
        #====== Value update ========
        self.critic_optimizer.zero_grad()
        value_loss.backward()
        self.critic_optimizer.step()
Esempio n. 14
0
    def train(self):
        # init dataset
        x, y = load_facades()
        # flow.Tensor() bug in here
        x, y = np.ascontiguousarray(x), np.ascontiguousarray(y)
        self.fixed_inp = to_tensor(x[:self.batch_size].astype(np.float32))
        self.fixed_target = to_tensor(y[:self.batch_size].astype(np.float32))

        batch_num = len(x) // self.batch_size
        label1 = to_tensor(np.ones((self.batch_size, 1, 30, 30)),
                           dtype=flow.float32)
        label0 = to_tensor(np.zeros((self.batch_size, 1, 30, 30)),
                           dtype=flow.float32)

        for epoch_idx in range(self.n_epochs):
            self.netG.train()
            self.netD.train()
            start = time.time()

            # run every epoch to shuffle
            for batch_idx in range(batch_num):
                inp = to_tensor(x[batch_idx * self.batch_size:(batch_idx + 1) *
                                  self.batch_size].astype(np.float32))
                target = to_tensor(
                    y[batch_idx * self.batch_size:(batch_idx + 1) *
                      self.batch_size].astype(np.float32))

                # update D
                d_fake_loss, d_real_loss, d_loss = self.train_discriminator(
                    inp, target, label0, label1)

                # update G
                g_gan_loss, g_image_loss, g_total_loss, g_out = self.train_generator(
                    inp, target, label1)

                self.G_GAN_loss.append(g_gan_loss)
                self.G_image_loss.append(g_image_loss)
                self.G_total_loss.append(g_total_loss)
                self.D_loss.append(d_loss)
                if (batch_idx + 1) % self.eval_interval == 0:
                    self.logger.info(
                        "{}th epoch, {}th batch, d_fakeloss:{:>8.4f}, d_realloss:{:>8.4f},  ggan_loss:{:>8.4f}, gl1_loss:{:>8.4f}"
                        .format(
                            epoch_idx + 1,
                            batch_idx + 1,
                            d_fake_loss,
                            d_real_loss,
                            g_gan_loss,
                            g_image_loss,
                        ))

            self.logger.info("Time for epoch {} is {} sec.".format(
                epoch_idx + 1,
                time.time() - start))

            if (epoch_idx + 1) % 2 * self.eval_interval == 0:
                # save .train() images
                # save .eval() images
                self._eval_generator_and_save_images(epoch_idx)

        if self.save:
            flow.save(
                self.netG.state_dict(),
                os.path.join(self.checkpoint_path,
                             "pix2pix_g_{}".format(epoch_idx + 1)),
            )

            flow.save(
                self.netD.state_dict(),
                os.path.join(self.checkpoint_path,
                             "pix2pix_d_{}".format(epoch_idx + 1)),
            )

            # save train loss and val error to plot
            np.save(
                os.path.join(self.path,
                             "G_image_loss_{}.npy".format(self.n_epochs)),
                self.G_image_loss,
            )
            np.save(
                os.path.join(self.path,
                             "G_GAN_loss_{}.npy".format(self.n_epochs)),
                self.G_GAN_loss,
            )
            np.save(
                os.path.join(self.path,
                             "G_total_loss_{}.npy".format(self.n_epochs)),
                self.G_total_loss,
            )
            np.save(
                os.path.join(self.path, "D_loss_{}.npy".format(self.n_epochs)),
                self.D_loss,
            )
            self.logger.info("*************** Train done ***************** ")
Esempio n. 15
0
def train():
    numAgent = 10  # multiple agents are running synchronously.
    # each agent has a different type with different properties.
    # Only one network is created, different agent gets their
    # own behavior according to the embedding input.
    numGame = 20  # multiple games running simultaneously.
    print('agent count:', numAgent)
    print('Env num:', numGame)

    env = {}
    for game in range(numGame):
        env[game] = miniDotaEnv(args, numAgent)

    # initialize the neural networks.
    # use a single network to share the knowledge.
    net = ac(args)
    if not args.cpuSimulation:
        net = net.to(device)

    if args.load_model is not None:
        saved_ckpt_path = os.path.join(os.getcwd(), 'save_model',
                                       str(args.load_model))
        ckpt = torch.load(saved_ckpt_path)
        net.load_state_dict(ckpt['net'])

    observations, lastDone = {}, {}
    for game in range(numGame):
        observations[game] = env[game].reset(0)[
            'observations']  # get initial state.
        lastDone[game] = [
            False
        ] * 10  # to record whether game is done at the previous step.

    optimizer = optim.Adam(net.parameters(), lr=args.lr)

    for iteration in range(args.max_iter):  # playing-training iteration.
        start = time.time()
        print()
        print('Start iteration %d ..' % iteration)
        if args.cpuSimulation:
            net = net.cpu()
        net.eval()  # switch to evaluation mode.
        memory = []
        for i in range(numGame):
            memory.append([Memory() for j in range(numAgent)])
            # memory is cleared at every iter so only the current iteration's samples are used in training.
            # the separation of memory according to game is necessary as they
            # need to be processed separate for each game.

        steps = 0
        teamscore = 0  # only for game 0.
        record = []  # record the states for visualization.
        gameEnd = np.zeros(numGame).astype(bool)

        while steps <= args.time_horizon:  # loop for one game.
            if np.all(gameEnd):
                break
            steps += 1
            stateList = []
            for game in range(numGame):
                for agent in range(numAgent):
                    stateList.append(
                        np.expand_dims(observations[game][agent], axis=0))
            stateCombined = np.concatenate(stateList, axis=0)
            # concatenate the states of all games and process them by the network together.
            with torch.no_grad():
                actionDistr = net(to_tensor(stateCombined, args.cpuSimulation))
            actions = get_action(actionDistr)

            for game in range(numGame):
                if not gameEnd[game]:
                    # the following random action cannot work, because random action has too small prob density value,
                    # leading to strange bugs.
                    #                    sample = random.random()
                    #                    if sample > args.randomActionRatio * (1 - min(1, iteration/1000) ):
                    #                        thisGameAction = actions[10*game:10*(game+1), :] # contain actions from all agents.
                    #                        check(thisGameAction)
                    #                    else:
                    #                        actionmove = np.random.randint(0, 3, size=(10,3))
                    #                        target = np.random.randint(0, 12, size=(10,1))
                    #                        thisGameAction = np.concatenate([actionmove, target], axis=1)
                    thisGameAction = actions[10 * game:10 * (
                        game + 1
                    ), :]  # select the actions from all agents of this env.
                    envInfo = env[game].step(
                        thisGameAction
                    )  # environment runs one step given the action.
                    nextObs = envInfo['observations']  # get the next state.
                    if game == 0:
                        record.append(
                            np.concatenate([
                                env[game].getState(),
                                actions[0:10, :].reshape(-1)
                            ]))
                    rewards = envInfo['rewards']
                    dones = envInfo['local_done']
                    #                    masks = list(~dones) # cut the return calculation at the done point.
                    masks = [
                        True
                    ] * numAgent  # no need to mask out the last state-action pair,
                    # because the last reward is useful to us.

                    for i in range(numAgent):
                        if not lastDone[game][i]:
                            memory[game][i].push(observations[game][i],
                                                 thisGameAction[i], rewards[i],
                                                 masks[i])
                    lastDone[game] = dones
                    if game == 0:
                        teamscore += sum(
                            [rewards[x] for x in env[game].getTeam0()])
                    observations[game] = nextObs

                    gameEnd[game] = np.all(dones)
                    if gameEnd[game]:
                        if game == 0:
                            print('Game 0 score: %f' % teamscore)


#                            recordMat = np.stack(record)# stack will expand the dimension before concatenate.
#                            draw(recordMat, iteration, env[game].getUnitRange(), 10)
                        observations[game] = env[game].reset(iteration +
                                                             1)['observations']
                        lastDone[game] = [False] * 10

        simEnd = time.time()
        print('Simulation time: %.f' % (simEnd - start))

        net.train()  # switch to training mode.
        net = net.cuda()

        sts, ats, returns, advants, old_policy, old_value = [], [], [], [], [], []

        for game in range(numGame):
            for i in range(numAgent):
                batch = memory[game][i].sample()
                st, at, rt, adv, old_p, old_v = process_memory(
                    net, batch, args)
                sts.append(st)
                ats.append(at)
                returns.append(rt)
                advants.append(adv)
                old_policy.append(old_p)
                old_value.append(old_v)

        sts = torch.cat(sts)
        ats = torch.cat(ats)
        returns = torch.cat(returns)
        advants = torch.cat(advants)
        old_policy = torch.cat(old_policy)
        old_value = torch.cat(old_value)

        train_model(net, optimizer, sts, ats, returns, advants, old_policy,
                    old_value, args)
        # training is based on the state-action pairs from all games of the current iteration.

        trainEnd = time.time()
        print('Training time: %.f' % (trainEnd - simEnd))

        if iteration % 10 == 0:
            model_path = os.path.join(os.getcwd(), 'save_model')
            if not os.path.isdir(model_path):
                os.makedirs(model_path)

            ckpt_path = os.path.join(model_path,
                                     'ckpt_%.3f.pth.tar' % teamscore)

            save_checkpoint(
                {
                    'net': net.state_dict(),
                    'args': args,
                    'score': teamscore
                },
                filename=ckpt_path)
Esempio n. 16
0
                              weight_decay=args.l2_rate)

    scores = []
    score_avg = 0

    for iter in range(args.max_iter):
        actor.eval(), critic.eval()
        memory = [Memory() for _ in range(num_agent)]

        steps = 0
        score = 0

        while steps < args.time_horizon:
            steps += 1

            mu, std, _ = actor(to_tensor(states))
            actions = get_action(mu, std)
            env_info = env.step(actions)[default_brain]

            next_states = running_state(env_info.vector_observations)
            rewards = env_info.rewards
            dones = env_info.local_done
            masks = list(~(np.array(dones)))

            for i in range(num_agent):
                memory[i].push(states[i], actions[i], rewards[i], masks[i])

            score += rewards[0]
            states = next_states

            if dones[0]:
Esempio n. 17
0
def test(interval, runs):
    print('Testing..')
    numAgent = 10
    numGame = 1
    assert numGame == 1  # needed.
    env = {0: miniDotaEnv(args, numAgent)}
    net = ac(args)
    if not args.cpuSimulation:
        net = net.to(device)
    saved_ckpt_path = os.path.join(os.getcwd(), 'save_model',
                                   str(args.load_model))
    ckpt = torch.load(saved_ckpt_path)
    net.load_state_dict(ckpt['net'])
    net.eval()
    observations = {0: env[0].reset(0)['observations']}

    for iteration in range(runs):
        start = time.time()
        print()
        print('Start iteration %d ..' % iteration)
        if args.cpuSimulation:
            net = net.cpu()
        steps = 0
        teamscore = 0
        gameEnd = np.zeros(numGame).astype(bool)
        record = []
        teamLabel = env[0].getState().reshape((12, 4))[:10, 0]

        while steps <= args.time_horizon:  # loop for one round of games.
            if np.all(gameEnd):
                break
            steps += 1
            stateList = []
            for game in range(numGame):
                for agent in range(numAgent):
                    stateList.append(
                        np.expand_dims(observations[game][agent], axis=0))
            stateCombined = np.concatenate(stateList, axis=0)
            with torch.no_grad():
                actionDistr = net(to_tensor(
                    stateCombined,
                    args.cpuSimulation))  # calculate all envs together.
            actions = get_action(actionDistr)

            for game in range(numGame):
                if not gameEnd[game]:
                    thisGameAction = actions[
                        10 * game:10 *
                        (game + 1), :]  # contain actions from all agents.
                    #                    for player in range(10):
                    #                        if teamLabel[player] == 0 and steps < 100:
                    #                            thisGameAction[player] = [0, 1, 1, 0] # ablation test.
                    envInfo = env[game].step(
                        thisGameAction
                    )  # environment runs one step given the action.
                    nextObs = envInfo['observations']  # get the next state.
                    allAction = np.concatenate(
                        [actionDistr[x] for x in range(1, 5)], axis=1)
                    record.append(
                        np.concatenate([
                            env[0].getState(), actions[0:10, :].reshape(-1),
                            allAction.reshape(-1)
                        ]))
                    rewards = envInfo['rewards']
                    dones = envInfo['local_done']
                    teamscore += sum([rewards[x] for x in env[0].getTeam0()])
                    observations[game] = nextObs

                    gameEnd[game] = np.all(dones)
                    if gameEnd[game]:
                        print('Team 0 score: %f' % teamscore)
                        simEnd = time.time()
                        print('Simulation time: %.f' % (simEnd - start))
                        recordMat = np.stack(
                            record
                        )  # stack will expand the dimension before concatenate.
                        draw(recordMat, iteration, env[game].getUnitRange(),
                             interval)
                        observations[game] = env[game].reset(iteration +
                                                             1)['observations']

        drawEnd = time.time()
        print('Drawing time: %.f' % (drawEnd - simEnd))