Beispiel #1
0
class MGAIL(object):
    def __init__(self, environment):

        self.env = environment

        # Create placeholders for all the inputs
        self.states_ = tf.placeholder(
            "float", shape=(None, ) + self.env.state_size,
            name='states_')  # Batch x State, previous state
        self.states = tf.placeholder(
            "float", shape=(None, ) + self.env.state_size,
            name='states')  # Batch x State, current_state
        self.actions = tf.placeholder("float",
                                      shape=(None, self.env.action_size),
                                      name='action')  # Batch x Action
        self.label = tf.placeholder("float", shape=(None, 1), name='label')
        self.gamma = tf.placeholder("float", shape=(), name='gamma')
        self.temp = tf.placeholder("float", shape=(), name='temperature')
        self.noise = tf.placeholder("float", shape=(), name='noise_flag')
        self.do_keep_prob = tf.placeholder("float",
                                           shape=(),
                                           name='do_keep_prob')
        if self.env.use_airl:
            self.done_ph = tf.placeholder(name="dones",
                                          shape=(None, ),
                                          dtype=tf.float32)

        # Create MGAIL blocks
        self.forward_model = ForwardModel(
            state_size=self.env.state_size[0]
            if self.env.obs_mode == 'state' else self.env.encoder_feat_size,
            action_size=self.env.action_size,
            encoding_size=self.env.fm_size,
            lr=self.env.fm_lr,
            forward_model_type=self.env.forward_model_type,
            obs_mode=self.env.obs_mode,
            use_scale_dot_product=self.env.use_scale_dot_product,
            use_skip_connection=self.env.use_skip_connection,
            use_dropout=self.env.use_dropout)

        if self.env.obs_mode == 'pixel':
            if self.env.state_only:
                feat_in_dim = 1024  # self.env.encoder_feat_size[0]
                policy_input_feat = 1024
            else:
                feat_in_dim = 1024 + self.env.action_size  # self.env.encoder_feat_size[0]
                policy_input_feat = 1024
        else:
            if self.env.state_only:
                feat_in_dim = self.env.state_size[0]
                policy_input_feat = self.env.state_size[0]
            else:
                feat_in_dim = self.env.state_size[0] + self.env.action_size
                policy_input_feat = self.env.state_size[0]

        self.discriminator = Discriminator(
            in_dim=feat_in_dim,
            out_dim=self.env.disc_out_dim,
            size=self.env.d_size,
            lr=self.env.d_lr,
            do_keep_prob=self.do_keep_prob,
            weight_decay=self.env.weight_decay,
            use_airl=self.env.use_airl,
            phi_hidden_size=self.env.phi_size,
            state_only=self.env.state_only,
        )

        self.policy = Policy(in_dim=policy_input_feat,
                             out_dim=self.env.action_size,
                             size=self.env.p_size,
                             lr=self.env.p_lr,
                             do_keep_prob=self.do_keep_prob,
                             n_accum_steps=self.env.policy_accum_steps,
                             weight_decay=self.env.weight_decay)

        # Create experience buffers
        self.er_agent = ER(
            memory_size=self.env.er_agent_size,
            state_dim=self.env.state_size,
            action_dim=self.env.action_size,
            reward_dim=1,  # stub connection
            qpos_dim=self.env.qpos_size,
            qvel_dim=self.env.qvel_size,
            batch_size=self.env.batch_size,
            history_length=1)

        self.er_expert = common.load_er(fname=os.path.join(
            self.env.run_dir, self.env.expert_data),
                                        batch_size=self.env.batch_size,
                                        history_length=1,
                                        traj_length=2)

        self.env.sigma = self.er_expert.actions_std / self.env.noise_intensity

        if self.env.obs_mode == 'pixel':
            current_states = ops.preprocess(self.states, bits=8)
            current_states_feat = ops.encoder(current_states,
                                              reuse=tf.AUTO_REUSE)
            prev_states = ops.preprocess(self.states_, bits=8)
            prev_states_feat = ops.encoder(prev_states, reuse=tf.AUTO_REUSE)
        else:
            # Normalize the inputs
            prev_states = common.normalize(self.states_,
                                           self.er_expert.states_mean,
                                           self.er_expert.states_std)
            current_states = common.normalize(self.states,
                                              self.er_expert.states_mean,
                                              self.er_expert.states_std)
            prev_states_feat = prev_states
            current_states_feat = current_states

        if self.env.continuous_actions:
            actions = common.normalize(self.actions,
                                       self.er_expert.actions_mean,
                                       self.er_expert.actions_std)
        else:
            actions = self.actions

        # 1. Forward Model
        initial_gru_state = np.ones((1, self.forward_model.encoding_size))
        forward_model_prediction, _, divergence_loss = self.forward_model.forward(
            [prev_states_feat, actions, initial_gru_state])
        if self.env.obs_mode == 'pixel':
            forward_model_prediction = ops.decoder(
                forward_model_prediction,
                data_shape=self.env.state_size,
                reuse=tf.AUTO_REUSE)
            self.forward_model_prediction = ops.postprocess(
                forward_model_prediction, bits=8, dtype=tf.uint8)
        else:
            self.forward_model_prediction = forward_model_prediction
        forward_model_loss = tf.reduce_mean(
            tf.square(current_states - forward_model_prediction)
        ) + self.env.forward_model_lambda * tf.reduce_mean(divergence_loss)
        self.forward_model.train(objective=forward_model_loss)

        if self.env.use_airl:
            # 1.1 action log prob
            logits = self.policy.forward(current_states_feat)
            if self.env.continuous_actions:
                mean, logstd = logits, tf.log(tf.ones_like(logits))
                std = tf.exp(logstd)

                n_elts = tf.cast(tf.reduce_prod(mean.shape[1:]),
                                 tf.float32)  # first dimension is batch size
                log_normalizer = n_elts / 2. * (np.log(2 * np.pi).astype(
                    np.float32)) + 1 / 2 * tf.reduce_sum(logstd, axis=1)
                # Diagonal Gaussian action probability, for every action
                action_logprob = -tf.reduce_sum(tf.square(actions - mean) /
                                                (2 * std),
                                                axis=1) - log_normalizer
            else:
                # Override since the implementation of tfp.RelaxedOneHotCategorical
                # yields positive values.
                if actions.shape[1:] != logits.shape[1:]:
                    actions = tf.cast(actions, tf.int8)
                    values = tf.one_hot(actions,
                                        logits.shape.as_list()[-1],
                                        dtype=tf.float32)
                    assert values.shape == logits.shape, (values.shape,
                                                          logits.shape)
                else:
                    values = actions

                # [0]'s implementation (see line below) seems to be an approximation
                # to the actual Gumbel Softmax density.
                # TODO: to confirm 'action' or 'value'
                action_logprob = -tf.reduce_sum(
                    -values * tf.nn.log_softmax(logits, axis=-1), axis=-1)
                # prob = logit[np.arange(self.action_test.shape[0]), self.action_test]
                # action_logprob = tf.log(prob)
            # 2. Discriminator
            self.discriminator.airl_entropy_weight = self.env.airl_entropy_weight
            # labels = tf.concat([1 - self.label, self.label], 1)
            # labels = 1 - self.label  # 0 for expert, 1 for policy
            labels = self.label  # 1 for expert, 0 for policy
            d, self.disc_shaped_reward_output, self.disc_reward = self.discriminator.forward(
                state=current_states_feat,
                action=actions,
                prev_state=prev_states_feat,
                done_inp=self.done_ph,
                log_policy_act_prob=action_logprob,
            )

            # 2.1 0-1 accuracy
            correct_predictions = tf.equal(tf.argmax(d, 1),
                                           tf.argmax(labels, 1))
            self.discriminator.acc = tf.reduce_mean(
                tf.cast(correct_predictions, "float"))
            # 2.2 prediction
            d_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=labels,
                logits=d,
                name="disc_loss",
            )
            # Construct generator reward:
            # \[\hat{r}(s,a) = \log(D_{\theta}(s,a)) - \log(1 - D_{\theta}(s,a)).\]
            # This simplifies to:
            # \[\hat{r}(s,a) = f_{\theta}(s,a) - \log \pi(a \mid s).\]
            # This is just an entropy-regularized objective
            # ent_bonus = -self.env.airl_entropy_weight * self.discriminator.log_policy_act_prob_ph
            # policy_train_reward = self.discriminator.reward_net.reward_output_train + ent_bonus
        else:
            # 2. Discriminator
            labels = tf.concat([1 - self.label, self.label], 1)
            d, _, _ = self.discriminator.forward(state=current_states_feat,
                                                 action=actions)

            # 2.1 0-1 accuracy
            correct_predictions = tf.equal(tf.argmax(d, 1),
                                           tf.argmax(labels, 1))
            self.discriminator.acc = tf.reduce_mean(
                tf.cast(correct_predictions, "float"))
            # 2.2 prediction
            d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=d, labels=labels)
        # cost sensitive weighting (weight true=expert, predict=agent mistakes)
        d_loss_weighted = self.env.cost_sensitive_weight * tf.multiply(tf.to_float(tf.equal(tf.squeeze(self.label), 1.)), d_cross_entropy) +\
                                                           tf.multiply(tf.to_float(tf.equal(tf.squeeze(self.label), 0.)), d_cross_entropy)
        discriminator_loss = tf.reduce_mean(d_loss_weighted)
        self.discriminator.train(objective=discriminator_loss)

        # 3. Collect experience
        mu = self.policy.forward(current_states_feat)
        if self.env.continuous_actions:
            a = common.denormalize(mu, self.er_expert.actions_mean,
                                   self.er_expert.actions_std)
            eta = tf.random_normal(shape=tf.shape(a), stddev=self.env.sigma)
            self.action_test = tf.squeeze(a + self.noise * eta)
        else:
            a = common.gumbel_softmax(logits=mu, temperature=self.temp)
            self.action_test = tf.argmax(a, dimension=1)

        # 4.3 AL
        def policy_loop(current_state_policy_update, t, total_cost,
                        total_trans_err, env_term_sig, prev_state):
            if self.env.obs_mode == 'pixel':
                current_state_feat_policy_update = ops.encoder(
                    current_state_policy_update, reuse=True)
                prev_state_feat_policy_update = ops.encoder(prev_state,
                                                            reuse=True)
            else:
                current_state_feat_policy_update = current_state_policy_update
                prev_state_feat_policy_update = prev_state
            mu = self.policy.forward(current_state_feat_policy_update,
                                     reuse=True)

            if self.env.continuous_actions:
                eta = self.env.sigma * tf.random_normal(shape=tf.shape(mu))
                action = mu + eta

                if self.env.use_airl:
                    mean, logstd = mu, tf.log(
                        tf.ones_like(mu) * self.env.sigma)
                    std = tf.exp(logstd)

                    n_elts = tf.cast(
                        tf.reduce_prod(mean.shape[1:]),
                        tf.float32)  # first dimension is batch size
                    log_normalizer = n_elts / 2. * (np.log(2 * np.pi).astype(
                        np.float32)) + 1 / 2 * tf.reduce_sum(logstd, axis=1)
                    # Diagonal Gaussian action probability, for every action
                    action_logprob = -tf.reduce_sum(tf.square(action - mean) /
                                                    (2 * std),
                                                    axis=1) - log_normalizer
            else:
                action = common.gumbel_softmax_sample(logits=mu,
                                                      temperature=self.temp)

                if self.env.use_airl:
                    # Override since the implementation of tfp.RelaxedOneHotCategorical
                    # yields positive values.
                    if action.shape[1:] != logits.shape[1:]:
                        actions = tf.cast(action, tf.int8)
                        values = tf.one_hot(actions,
                                            logits.shape.as_list()[-1],
                                            dtype=tf.float32)
                        assert values.shape == logits.shape, (values.shape,
                                                              logits.shape)
                    else:
                        values = action

                    # [0]'s implementation (see line below) seems to be an approximation
                    # to the actual Gumbel Softmax density.
                    # TODO: to confirm 'action' or 'value'
                    action_logprob = -tf.reduce_sum(
                        -values * tf.nn.log_softmax(logits, axis=-1), axis=-1)

            # minimize the gap between agent logit (d[:,0]) and expert logit (d[:,1])
            if self.env.use_airl:
                d, shaped_reward_output, reward = self.discriminator.forward(
                    state=current_state_feat_policy_update,
                    action=action,
                    prev_state=prev_state_feat_policy_update,
                    done_inp=tf.cast(env_term_sig, tf.float32),
                    log_policy_act_prob=action_logprob,
                    reuse=True)
                if self.env.alg in ['mairlTransfer', 'mairlImit4Transfer']:
                    reward_for_updating_policy = reward
                else:  # 'mairlImit'
                    reward_for_updating_policy = shaped_reward_output
                if self.env.train_mode and not self.env.alg in [
                        'mairlTransfer', 'mairlImit4Transfer'
                ]:
                    ent_bonus = -self.env.airl_entropy_weight * tf.stop_gradient(
                        action_logprob)
                    policy_reward = reward_for_updating_policy + ent_bonus
                else:
                    policy_reward = reward_for_updating_policy
                cost = tf.reduce_mean(-policy_reward) * self.env.policy_al_w
            else:
                d, _, _ = self.discriminator.forward(
                    state=current_state_feat_policy_update,
                    action=action,
                    reuse=True)
                cost = self.al_loss(d)

            # add step cost
            total_cost += tf.multiply(tf.pow(self.gamma, t), cost)

            # get action
            if self.env.continuous_actions:
                a_sim = common.denormalize(action, self.er_expert.actions_mean,
                                           self.er_expert.actions_std)
            else:
                a_sim = tf.argmax(action, dimension=1)

            # get next state
            state_env, _, env_term_sig, = self.env.step(a_sim,
                                                        mode='tensorflow')[:3]
            state_e = common.normalize(state_env, self.er_expert.states_mean,
                                       self.er_expert.states_std)
            state_e = tf.stop_gradient(state_e)

            state_a, _, divergence_loss_a = self.forward_model.forward(
                [current_state_feat_policy_update, action, initial_gru_state],
                reuse=True)
            if self.env.obs_mode == 'pixel':
                state_a = ops.decoder(state_a,
                                      data_shape=self.env.state_size,
                                      reuse=True)
            if True:  # self.env.alg in ['mgail']:
                state, nu = common.re_parametrization(state_e=state_e,
                                                      state_a=state_a)
            else:
                _, nu = common.re_parametrization(state_e=state_e,
                                                  state_a=state_a)
                state = state_a

            total_trans_err += tf.reduce_mean(abs(nu))
            t += 1

            if self.env.obs_mode == 'pixel':
                state = tf.slice(state, [0, 0, 0, 0], [1, -1, -1, -1])
            return state, t, total_cost, total_trans_err, env_term_sig, current_state_policy_update

        def policy_stop_condition(current_state_policy_update, t, cost,
                                  trans_err, env_term_sig, prev_state):
            cond = tf.logical_not(
                env_term_sig)  # not done: env_term_sig = False
            cond = tf.logical_and(cond, t < self.env.n_steps_train)
            cond = tf.logical_and(cond,
                                  trans_err < self.env.total_trans_err_allowed)
            return cond

        if self.env.obs_mode == 'pixel':
            state_0 = tf.slice(current_states, [0, 0, 0, 0], [1, -1, -1, -1])
        else:
            state_0 = tf.slice(current_states, [0, 0], [1, -1])
        # prev_state_0 = tf.slice(states_, [0, 0], [1, -1])
        loop_outputs = tf.while_loop(policy_stop_condition, policy_loop,
                                     [state_0, 0., 0., 0., False, state_0])
        self.policy.train(objective=loop_outputs[2])

    def al_loss(self, d):
        logit_agent, logit_expert = tf.split(axis=1,
                                             num_or_size_splits=2,
                                             value=d)

        # Cross entropy loss
        labels = tf.concat(
            [tf.zeros_like(logit_agent),
             tf.ones_like(logit_expert)], 1)
        d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits=d, labels=labels)
        loss = tf.reduce_mean(d_cross_entropy)

        return loss * self.env.policy_al_w
Beispiel #2
0
class MainModel(nn.Module):
    def __init__(self,
                 marker_num,
                 neighbor_num,
                 embed_dim,
                 d_model,
                 d_inner,
                 d_q,
                 d_k,
                 d_v,
                 n_head,
                 candi_size,
                 max_time,
                 beta,
                 cuda_id,
                 K,
                 discount,
                 regular,
                 dropout=0.1):

        super(MainModel, self).__init__()
        self.generator = Generator(marker_num,
                                   neighbor_num,
                                   embed_dim,
                                   d_model,
                                   d_inner,
                                   d_q,
                                   d_k,
                                   d_v,
                                   n_head,
                                   candi_size,
                                   max_time,
                                   beta,
                                   cuda_id,
                                   dropout=dropout)
        self.discriminator = Discriminator(marker_num,
                                           embed_dim,
                                           d_model,
                                           d_inner,
                                           d_q,
                                           d_k,
                                           d_v,
                                           n_head,
                                           beta,
                                           cuda_id,
                                           K,
                                           dropout=dropout)

        self.marker_embeddings = nn.Parameter(torch.ones(marker_num, d_model))
        self.d_loss_func = D_Loss(K)
        self.g_loss_func = PolicyGradient(discount, regular, K, cuda_id)
        self.discount = discount
        self.regular = regular
        self.marker_num = marker_num
        self.K = K

    def forward(self, marker_data, time_data, mask_data):
        gen_markers, gen_times, gen_masks, gen_p_neighbor, gen_p_sample = [], [], [], [], []

        for i in range(self.K):
            new_markers, new_times, new_masks, new_p_neighbor, new_p_sample = \
                self.generator.forward(marker_data, time_data, mask_data, self.marker_embeddings)
            gen_markers.append(new_markers)
            gen_times.append(new_times.detach())
            gen_masks.append(new_masks)
            gen_p_neighbor.append(new_p_neighbor)
            gen_p_sample.append(new_p_sample)

        true_reward, true_masks, bogus_reward, bogus_masks = \
            self.discriminator.forward(marker_data, time_data, mask_data,
                                       gen_markers, gen_times, gen_masks, self.marker_embeddings)

        d_loss = self.d_loss_func.forward(true_reward, true_masks,
                                          bogus_reward, bogus_masks)
        g_loss = self.g_loss_func.forward(gen_p_neighbor, gen_p_sample,
                                          bogus_reward, bogus_masks)

        return d_loss, g_loss
Beispiel #3
0
class DCGAN:
    def __init__(self, img_shape, epochs=50000,
                 lr_gen=0.0002, lr_dc=0.0002, z_shape=100, batch_size=100,
                 beta1=0.5, epochs_for_sample=50):

        self.rows, self.cols, self.channels = img_shape
        self.batch_size = batch_size
        self.epochs = epochs
        self.z_shape = z_shape
        self.epochs_for_sample = epochs_for_sample
        self.generator = Generator(img_shape)
        self.discriminator = Discriminator(img_shape)

        mnist = tf.keras.datasets.mnist

        (x_train, _), (x_test, _) = mnist.load_data()

        X = np.concatenate([x_train, x_test])
        X = np.reshape(X, (-1, 28, 28, 1))
        X = tf.image.resize_images(X, [64, 64])
        self.X = (X / 127.5) - 1  # Scale between -1 and 1

        self.phX = tf.placeholder(dtype=tf.float32, shape=[None, self.rows, self.cols, self.channels])
        self.phZ = tf.placeholder(dtype=tf.float32, shape=[None, 1, 1, self.z_shape])
        self.loss_plot = tf.placeholder(dtype=tf.float32, shape=[])

        self.gen_out = self.generator.forward(self.phZ)

        disc_logits_fake = self.discriminator.forward(self.gen_out)
        disc_logits_real = self.discriminator.forward(self.phX)

        disc_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_logits_fake, labels=tf.zeros_like(disc_logits_fake)))
        disc_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_logits_real, labels=tf.ones_like(disc_logits_real)))

        self.disc_loss = tf.add(disc_loss_fake, disc_loss_real)

        self.gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_logits_fake,
                                                                               labels=tf.ones_like(disc_logits_fake)))

        self.disc_train = tf.train.AdamOptimizer(lr_dc, beta1=beta1).minimize(self.disc_loss,
                                                                              var_list=self.discriminator.variables)
        self.gen_train = tf.train.AdamOptimizer(lr_gen, beta1=beta1).minimize(self.gen_loss,
                                                                              var_list=self.generator.variables)

    def train(self):
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

        train_writer = tf.summary.FileWriter('./logs')
        train_writer.add_graph(tf.get_default_graph())

        dc_plot = tf.summary.scalar('Discriminator', self.loss_plot)
        gen_plot = tf.summary.scalar('Generator', self.loss_plot)

        cnt = 0

        for i in range(self.epochs):
            X_numpy = self.sess.run(self.X)
            idx = np.random.randint(0, len(X_numpy), self.batch_size)
            batch_X = X_numpy[idx]

            batch_Z = np.random.uniform(-1, 1, (self.batch_size, 1, 1, self.z_shape))
            _, d_loss = self.sess.run([self.disc_train, self.disc_loss],
                                      feed_dict={self.phX: batch_X, self.phZ: batch_Z})

            batch_Z = np.random.uniform(-1, 1, (self.batch_size, 1, 1, self.z_shape))
            _, g_loss = self.sess.run([self.gen_train, self.gen_loss], feed_dict={self.phZ: batch_Z})

            if i % self.epochs_for_sample == 0:
                self.generate_sample(i)
                print("Epoch: " + str(i) + " Discriminator loss: " + str(d_loss) + " Generator loss: " + str(g_loss))
                train_writer.add_summary(self.sess.run(dc_plot, feed_dict={self.loss_plot: d_loss}),
                                         i / self.epochs_for_sample)
                train_writer.add_summary(self.sess.run(gen_plot, feed_dict={self.loss_plot: g_loss}),
                                         i / self.epochs_for_sample)

    def generate_sample(self, epoch):
        c = 5
        r = 5
        imgs = self.sess.run(self.gen_out, feed_dict={self.phZ: fixed_z})
        imgs = imgs * 0.5 + 0.5  # scale between 0, 1
        fig, axs = plt.subplots(c, r)
        cnt = 0
        for i in range(c):
            for j in range(r):
                axs[i, j].imshow(imgs[cnt, :, :, 0], cmap="gray")
                axs[i, j].axis('off')
                cnt += 1
                fig.savefig("samples/%05d.png" % epoch)
        plt.close()
Beispiel #4
0
class GAN_CLS(object):
    def __init__(self, args, data_loader, SUPERVISED=True):
        """
		args : Arguments
		data_loader = An instance of class DataLoader for loading our dataset in batches
		"""

        self.data_loader = data_loader
        self.num_epochs = args.num_epochs
        self.batch_size = args.batch_size

        self.log_step = args.log_step
        self.sample_step = args.sample_step

        self.log_dir = args.log_dir
        self.checkpoint_dir = args.checkpoint_dir
        self.sample_dir = args.sample_dir
        self.final_model = args.final_model
        self.model_save_step = args.model_save_step

        #self.dataset = args.dataset
        #self.model_name = args.model_name

        self.img_size = args.img_size
        self.z_dim = args.z_dim
        self.text_embed_dim = args.text_embed_dim
        self.text_reduced_dim = args.text_reduced_dim
        self.learning_rate = args.learning_rate
        self.beta1 = args.beta1
        self.beta2 = args.beta2
        self.l1_coeff = args.l1_coeff
        self.resume_epoch = args.resume_epoch
        self.resume_idx = args.resume_idx
        self.SUPERVISED = SUPERVISED

        # Logger setting
        log_name = datetime.datetime.now().strftime('%Y-%m-%d') + '.log'
        self.logger = logging.getLogger('__name__')
        self.logger.setLevel(logging.INFO)
        self.formatter = logging.Formatter(
            '%(asctime)s:%(levelname)s:%(message)s')
        self.file_handler = logging.FileHandler(
            os.path.join(self.log_dir, log_name))
        self.file_handler.setFormatter(self.formatter)
        self.logger.addHandler(self.file_handler)

        self.build_model()

    def smooth_label(self, tensor, offset):
        return tensor + offset

    def dump_imgs(images_Array, name):
        with open('{}.pickle'.format(name), 'wb') as file:
            dump(images_Array, file)

    def build_model(self):
        """ A function of defining following instances :

		-----  Generator
		-----  Discriminator
		-----  Optimizer for Generator
		-----  Optimizer for Discriminator
		-----  Defining Loss functions

		"""

        # ---------------------------------------------------------------------#
        #						1. Network Initialization					   #
        # ---------------------------------------------------------------------#
        self.gen = Generator(batch_size=self.batch_size,
                             img_size=self.img_size,
                             z_dim=self.z_dim,
                             text_embed_dim=self.text_embed_dim,
                             text_reduced_dim=self.text_reduced_dim)

        self.disc = Discriminator(batch_size=self.batch_size,
                                  img_size=self.img_size,
                                  text_embed_dim=self.text_embed_dim,
                                  text_reduced_dim=self.text_reduced_dim)

        self.gen_optim = optim.Adam(self.gen.parameters(),
                                    lr=self.learning_rate,
                                    betas=(self.beta1, self.beta2))

        self.disc_optim = optim.Adam(self.disc.parameters(),
                                     lr=self.learning_rate,
                                     betas=(self.beta1, self.beta2))

        self.cls_gan_optim = optim.Adam(itertools.chain(
            self.gen.parameters(), self.disc.parameters()),
                                        lr=self.learning_rate,
                                        betas=(self.beta1, self.beta2))

        print('-------------  Generator Model Info  ---------------')
        self.print_network(self.gen, 'G')
        print('------------------------------------------------')

        print('-------------  Discriminator Model Info  ---------------')
        self.print_network(self.disc, 'D')
        print('------------------------------------------------')

        self.criterion = nn.BCELoss().cuda()
        # self.CE_loss = nn.CrossEntropyLoss().cuda()
        # self.MSE_loss = nn.MSELoss().cuda()
        self.gen.train()
        self.disc.train()

    def print_network(self, model, name):
        """ A function for printing total number of model parameters """
        num_params = 0
        for p in model.parameters():
            num_params += p.numel()

        print(model)
        print(name)
        print("Total number of parameters: {}".format(num_params))

    def load_checkpoints(self, resume_epoch, idx):
        """Restore the trained generator and discriminator."""
        print('Loading the trained models from epoch {} and iteration {}...'.
              format(resume_epoch, idx))
        G_path = os.path.join(self.checkpoint_dir,
                              '{}-{}-G.ckpt'.format(resume_epoch, idx))
        D_path = os.path.join(self.checkpoint_dir,
                              '{}-{}-D.ckpt'.format(resume_epoch, idx))
        self.gen.load_state_dict(
            torch.load(G_path, map_location=lambda storage, loc: storage))
        self.disc.load_state_dict(
            torch.load(D_path, map_location=lambda storage, loc: storage))

    def train_model(self):

        data_loader = self.data_loader

        start_epoch = 0
        if self.resume_epoch >= 0:
            start_epoch = self.resume_epoch
            self.load_checkpoints(self.resume_epoch, self.resume_idx)

        print('---------------  Model Training Started  ---------------')
        start_time = time.time()

        for epoch in range(start_epoch, self.num_epochs):
            print("Epoch: {}".format(epoch + 1))
            for idx, batch in enumerate(data_loader):
                print("Index: {}".format(idx + 1), end="\t")
                true_imgs = batch['true_imgs']
                true_embed = batch['true_embds']
                false_imgs = batch['false_imgs']

                real_labels = torch.ones(true_imgs.size(0))
                fake_labels = torch.zeros(true_imgs.size(0))

                smooth_real_labels = torch.FloatTensor(
                    self.smooth_label(real_labels.numpy(), -0.1))

                true_imgs = Variable(true_imgs.float()).cuda()
                true_embed = Variable(true_embed.float()).cuda()
                false_imgs = Variable(false_imgs.float()).cuda()

                real_labels = Variable(real_labels).cuda()
                smooth_real_labels = Variable(smooth_real_labels).cuda()
                fake_labels = Variable(fake_labels).cuda()

                # ---------------------------------------------------------------#
                # 					  2. Training the generator                  #
                # ---------------------------------------------------------------#
                self.gen.zero_grad()
                z = Variable(torch.randn(true_imgs.size(0), self.z_dim)).cuda()
                fake_imgs = self.gen.forward(true_embed, z)
                fake_out, fake_logit = self.disc.forward(fake_imgs, true_embed)
                fake_out = Variable(fake_out.data, requires_grad=True).cuda()

                true_out, true_logit = self.disc.forward(true_imgs, true_embed)
                true_out = Variable(true_out.data, requires_grad=True).cuda()

                g_sf = self.criterion(fake_out, real_labels)
                #g_img = self.l1_coeff * nn.L1Loss()(fake_imgs, true_imgs)
                gen_loss = g_sf

                gen_loss.backward()
                self.gen_optim.step()

                # ---------------------------------------------------------------#
                # 					3. Training the discriminator				 #
                # ---------------------------------------------------------------#
                self.disc.zero_grad()
                false_out, false_logit = self.disc.forward(
                    false_imgs, true_embed)
                false_out = Variable(false_out.data, requires_grad=True)

                sr = self.criterion(true_out, smooth_real_labels)
                sw = self.criterion(true_out, fake_labels)
                sf = self.criterion(false_out, smooth_real_labels)

                disc_loss = torch.log(sr) + (torch.log(1 - sw) +
                                             torch.log(1 - sf)) / 2

                disc_loss.backward()
                self.disc_optim.step()

                self.cls_gan_optim.step()

                # Logging
                loss = {}
                loss['G_loss'] = gen_loss.item()
                loss['D_loss'] = disc_loss.item()

                # ---------------------------------------------------------------#
                # 					4. Logging INFO into log_dir				 #
                # ---------------------------------------------------------------#
                log = ""
                if (idx + 1) % self.log_step == 0:
                    end_time = time.time() - start_time
                    end_time = datetime.timedelta(seconds=end_time)
                    log = "Elapsed [{}], Epoch [{}/{}], Idx [{}]".format(
                        end_time, epoch + 1, self.num_epochs, idx)

                for net, loss_value in loss.items():
                    log += "{}: {:.4f}".format(net, loss_value)
                    self.logger.info(log)
                    print(log)
                """
				# ---------------------------------------------------------------#
				# 					5. Saving generated images					 #
				# ---------------------------------------------------------------#
				if (idx + 1) % self.sample_step == 0:
					concat_imgs = torch.cat((true_imgs, fake_imgs), 0)  # ??????????
					concat_imgs = (concat_imgs + 1) / 2
					# out.clamp_(0, 1)
					 
					save_path = os.path.join(self.sample_dir, '{}-{}-images.jpg'.format(epoch, idx + 1))
					# concat_imgs.cpu().detach().numpy()
					self.dump_imgs(concat_imgs.cpu().numpy(), save_path)
					
					#save_image(concat_imgs.data.cpu(), self.sample_dir, nrow=1, padding=0)
					print ('Saved real and fake images into {}...'.format(self.sample_dir))
				"""

                # ---------------------------------------------------------------#
                # 				6. Saving the checkpoints & final model			 #
                # ---------------------------------------------------------------#
                if (idx + 1) % self.model_save_step == 0:
                    G_path = os.path.join(
                        self.checkpoint_dir,
                        '{}-{}-G.ckpt'.format(epoch, idx + 1))
                    D_path = os.path.join(
                        self.checkpoint_dir,
                        '{}-{}-D.ckpt'.format(epoch, idx + 1))
                    torch.save(self.gen.state_dict(), G_path)
                    torch.save(self.disc.state_dict(), D_path)
                    print('Saved model checkpoints into {}...\n'.format(
                        self.checkpoint_dir))

        print('---------------  Model Training Completed  ---------------')
        # Saving final model into final_model directory
        G_path = os.path.join(self.final_model, '{}-G.pth'.format('final'))
        D_path = os.path.join(self.final_model, '{}-D.pth'.format('final'))
        torch.save(self.gen.state_dict(), G_path)
        torch.save(self.disc.state_dict(), D_path)
        print('Saved final model into {}...'.format(self.final_model))
Beispiel #5
0
class DCGAN:
    def __init__(self, img_shape, epochs=50000, lr_gen=0.0001, lr_disc=0.0001, z_shape=100, num_classes = 256, batch_size=100, beta1=0.5, epochs_for_sample=500):
        
        self.rows, self.cols, self.channels = img_shape
        self.batch_size = batch_size
        self.epochs = epochs
        self.z_shape = z_shape
        self.num_classes = num_classes
        self.epochs_for_sample = epochs_for_sample
        self.generator = Generator(self.z_shape,self.num_classes, img_shape, self.batch_size)
        self.discriminator = Discriminator(self.channels, self.num_classes, img_shape)
        self.samples = []
        self.losses = []

        self.SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))

        # Default paths.
        self.DEFAULT_LABEL_FILE = os.path.join(self.SCRIPT_PATH, './labels/256-common-hangul.txt')
        self.DEFAULT_TFRECORDS_DIR = os.path.join(self.SCRIPT_PATH, 'tfrecords-output')


        """Perform graph definition and model training.

        Here we will first create our input pipeline for reading in TFRecords
        files and producing random batches of images and labels.
        """

        labels = io.open(self.DEFAULT_LABEL_FILE, 'r', encoding='utf-8').read().splitlines()
        num_classes = len(labels)

        print('Processing data...')

        tf_record_pattern = os.path.join(self.DEFAULT_TFRECORDS_DIR, '%s-*' % 'train')
        self.train_data_files = tf.gfile.Glob(tf_record_pattern)

        """
        label, image = get_image(self.train_data_files, num_classes)

        # Associate objects with a randomly selected batch of labels and images.
        self.image_batch, self.label_batch = tf.train.shuffle_batch(
            [image, label], batch_size=self.batch_size,
            capacity=2000,
            min_after_dequeue=1000)
        """

        # Make tf.data.Dataset
        # If you want to use one more parameter for decode, use 'lambda' for data.map
        dataset = tf.data.TFRecordDataset(self.train_data_files)
        dataset = dataset.map(lambda x: get_image(x, self.num_classes))
        dataset = dataset.repeat(self.train_epoch)  # set epoch
        dataset = dataset.shuffle(buffer_size=3 * self.batch_size)  # for getting data in each buffer size data part
        dataset = dataset.batch(self.batch_size)  # set batch size
        dataset = dataset.prefetch(buffer_size=1)  # reduce GPU starvation

        # Make iterator for dataset
        self.iterator = dataset.make_initializable_iterator()
        self.next_element = self.iterator.get_next()

        self.phX = tf.placeholder(tf.float32, [None, self.rows, self.cols, self.channels])
        self.phZ = tf.placeholder(tf.float32, [None, self.z_shape])
        self.phY_g = tf.placeholder(tf.float32, [None, self.num_classes])
        self.phY_d = tf.placeholder(tf.float32, shape=(None,  self.rows, self.cols, self.num_classes))
    
        self.gen_out = self.generator.forward(self.phZ, self.phY_g) #output shape of this z is (?, 28, 28, 1)

        disc_logits_fake = self.discriminator.forward(self.gen_out, self.phY_d ) #out put shape of this logit is (?, 1)
        disc_logits_real = self.discriminator.forward(self.phX, self.phY_d ) # out put shape of this logit is (?, 1)
        
        disc_fake_loss = cost(tf.zeros_like(disc_logits_fake), disc_logits_fake)
        disc_real_loss = cost(tf.ones_like(disc_logits_real), disc_logits_real)

        self.disc_loss = tf.add(disc_fake_loss, disc_real_loss)
        self.gen_loss = cost(tf.ones_like(disc_logits_fake), disc_logits_fake)

        train_vars = tf.trainable_variables()

        self.disc_vars = [var for var in train_vars if 'd' in var.name]
        self.gen_vars = [var for var in train_vars if 'g' in var.name]

        self.disc_train = tf.train.AdamOptimizer(lr_disc,beta1=beta1).minimize(self.disc_loss, var_list=self.disc_vars)
        self.gen_train = tf.train.AdamOptimizer(lr_gen, beta1=beta1).minimize(self.gen_loss, var_list=self.gen_vars)
        


    def train(self):
        init = [tf.global_variables_initializer(), self.iterator.initializer]
        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True
        self.sess = tf.Session(config=config)
        self.sess.run(init)

        # Initialize the queue threads.
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=self.sess, coord=coord)

        epoch_start_time = time.time()
        for i in range(self.epochs):
            # Get a random batch of images and labels.
            train_labels, train_images = self.sess.run(self.next_element)

            # Real image input for Real Discriminator,
            # Get images, reshape and rescale to pass to D
            batch_X = train_images.reshape((self.batch_size, self.rows, self.cols, self.channels))
            batch_X = batch_X * 2 - 1

            # Z noise for Generator
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape)) # Shape is [?, 100]

            # Label input for Generator
            batch_Y_g = train_labels
            batch_Y_g = batch_Y_g.reshape([self.batch_size, self.num_classes])

            # Label input for Discriminator
            batch_Y_d = train_labels    
            batch_Y_d = batch_Y_d.reshape([self.batch_size,1,1,self.num_classes])
            batch_Y_d = batch_Y_d * np.ones([self.batch_size, self.rows, self.cols, self.num_classes])

            _, d_loss = self.sess.run([self.disc_train, self.disc_loss], feed_dict={self.phX:batch_X, self.phZ:batch_Z, self.phY_g:batch_Y_g, self.phY_d:batch_Y_d})
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
            _, g_loss = self.sess.run([self.gen_train, self.gen_loss], feed_dict={self.phX:batch_X, self.phZ:batch_Z, self.phY_g:batch_Y_g, self.phY_d:batch_Y_d})
            
            if i % self.epochs_for_sample == 0:
                epoch_end_time = time.time()
                per_epoch_ptime = epoch_end_time - epoch_start_time

                print(f"Epoch: {i}. Discriminator loss: {d_loss}. Generator loss: {g_loss}")
                # Save losses to view after training
                self.losses.append((d_loss, g_loss))

        # Save training generator samples
        with open('train_samples.pkl', 'wb') as f:
            pkl.dump(self.samples, f)

        # Generate random sample after training
        self.generate_random_sample()
        
        # Stop queue threads and close session.
        coord.request_stop()
        coord.join(threads)
        self.sess.close() 



    def generate_random_sample(self):
        init = tf.global_variables_initializer()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True
        self.sess = tf.Session(config=config)
        self.sess.run(init)
        # Only save generator variables
        saver = tf.train.Saver(var_list=self.gen_vars)
        c = 7
        r = 7
        # data_len = Get_dataset_length(self.train_data_files)
        # data_len_y = np.ndarray(data_len, dtype=np.uint8)

        # z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
        # idx = np.random.randint(0, data_len, self.batch_size)
        # print('length of images are ', data_len)
        # print('Batch size is ', self.batch_size)
        # print('idx shape is is ', idx.shape)
        # print('Y shape is ', data_len_y.shape)
        
        # # Label input for Generator
        # batch_Y_g = np.eye(self.num_classes)[data_len_y]
        # batch_Y_g = batch_Y_g[idx]
        # batch_Y_g = batch_Y_g.reshape([self.batch_size, self.num_classes])
        n_sample = 100
        z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))

        # Create conditional one-hot vector, with index 5 = 1
        batch_Y_g = np.zeros(shape=[n_sample, 256])
        batch_Y_g[:, 0] = 4
        saver.restore(self.sess, tf.train.latest_checkpoint('checkpoints'))
        samples = self.sess.run(self.gen_out, feed_dict={self.phZ:z, self.phY_g:batch_Y_g})

        # scale between 0, 1
        fig, axs = plt.subplots(c, r)
        cnt = 0
        for i in range(c):
            for j in range(r):
                axs[i, j].imshow(samples[cnt, :, :, 0], cmap="gray")
                axs[i, j].axis('off')
                cnt += 1
        fig.savefig("generated/generated_test_1.png")
        plt.close()
Beispiel #6
0
class DCGAN:
    def __init__(self,
                 img_shape,
                 sample_folder_name,
                 iterations=15000,
                 lr_gen=0.0001,
                 lr_dc=0.00005,
                 z_shape=100,
                 batch_size=64,
                 beta1=0.7,
                 sample_interval=1000):

        #Create sample folder
        if not os.path.exists(f"{sample_folder_name}/"):
            os.makedirs(f"{sample_folder_name}/")

        self.SAMPLE_FOLDER_NAME = sample_folder_name
        #Unpack Image shape
        self.rows, self.cols, self.channels = img_shape
        self.batch_size = batch_size
        self.iterations = iterations
        self.z_shape = z_shape
        self.sample_interval = sample_interval
        self.generator = Generator()
        self.discriminator = Discriminator(img_shape)

        #Load CelebA dataset
        dir_data = "./data/celebA/"
        Ntrain = 200000
        Ntest = 100
        nm_imgs = np.sort(os.listdir(dir_data))
        ## name of the jpg files for training set
        nm_imgs_train = nm_imgs[:Ntrain]
        ## name of the jpg files for the testing data
        nm_imgs_test = nm_imgs[Ntrain:Ntrain + Ntest]
        img_shape = (28, 28, 3)

        X_train = []
        for i, myid in enumerate(nm_imgs_train):
            im = image.load_img(dir_data + "/" + myid,
                                target_size=img_shape[:2])
            im = image.img_to_array(im)
            X_train.append(im)
        X = np.array(X_train)

        #Values 0~255
        #Scale -1~1
        self.X = X / 127.5 - 1

        #Create placeholders for input
        self.phX = tf.placeholder(tf.float32,
                                  [None, self.rows, self.cols, self.channels])
        self.phZ = tf.placeholder(tf.float32, [None, self.z_shape])

        #Generate forward pass
        self.gen_out = self.generator.forward(self.phZ)

        #Discriminator predictions
        #Fake IMG
        dc_logits_fake = self.discriminator.forward(self.gen_out)
        #Real IMG
        dc_logits_real = self.discriminator.forward(self.phX)

        #cost functions
        #fake -- 0; real -- 1
        dc_fake_loss = cost(tf.zeros_like(dc_logits_fake), dc_logits_fake)
        dc_real_loss = cost(tf.ones_like(dc_logits_real), dc_logits_real)

        self.dc_loss = tf.add(dc_fake_loss, dc_real_loss)
        #Generator tries to fool D so that it outputs 1 for fake IMGs
        self.gen_loss = cost(tf.ones_like(dc_logits_fake), dc_logits_fake)

        #Collect trainable variables
        train_vars = tf.trainable_variables()

        #Differentiate G and D variables
        dc_vars = [var for var in train_vars if 'd' in var.name]
        gen_vars = [var for var in train_vars if 'g' in var.name]

        #Create training variables
        self.dc_train = tf.train.AdamOptimizer(lr_dc, beta1=beta1).minimize(
            self.dc_loss, var_list=dc_vars)
        self.gen_train = tf.train.AdamOptimizer(lr_gen, beta1=beta1).minimize(
            self.gen_loss, var_list=gen_vars)

    def train(self):
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        #Init all vars
        self.sess.run(init)

        #Start training loop
        for i in range(self.iterations):
            #rand batch and indices
            idx = np.random.randint(0, len(self.X), self.batch_size)
            batch_X = self.X[idx]
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))

            #Train D and store dc loss
            batch_X = batch_X.reshape([-1, 28, 28, self.channels])
            _, d_loss = self.sess.run([self.dc_train, self.dc_loss],
                                      feed_dict={
                                          self.phX: batch_X,
                                          self.phZ: batch_Z
                                      })

            #Create new batch for G
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))

            #Train G and store G loss
            _, g_loss = self.sess.run([self.gen_train, self.gen_loss],
                                      feed_dict={self.phZ: batch_Z})

            #Generate samples and print loss
            if i % self.sample_interval == 0:
                self.generate_sample(i)
                print(
                    f"Epoch:{i}. Discriminator loss: {d_loss}. Generator loss {g_loss}"
                )

    def generate_sample(self, iteration):
        # 5 samples per IMG
        c, r = 5, 5

        # New input for sample, 5*5 = 25 IMGs
        z = np.random.uniform(-1, 1, (25, self.z_shape))
        imgs = self.sess.run(self.gen_out, feed_dict={self.phZ: z})

        #Scale back to values (0,1), currently (-1,1)
        imgs = imgs * 0.5 + 0.5

        imgs = cv2.cvtColor(imgs, cv2.COLOR_BGR2RGB)

        fig, axs = plt.subplots(c, r)
        count = 0
        for i in range(c):
            for j in range(r):
                axs[i, j].imshow(imgs[count, :, :, 0])
                axs[i, j].axis('off')
                count += 1

        # save image
        fig.savefig(f"{self.SAMPLE_FOLDER_NAME}/{iteration}.png")
        plt.close()
Beispiel #7
0
class MGAIL(object):
    def __init__(self, environment, use_irl=False):
        self.use_irl = use_irl
        self.env = environment

        # Create placeholders for all the inputs
        self.states_ = tf.compat.v1.placeholder("float", shape=(None, self.env.state_size), name='states_')  # Batch x State
        self.states = tf.compat.v1.placeholder("float", shape=(None, self.env.state_size), name='states')  # Batch x State
        self.actions = tf.compat.v1.placeholder("float", shape=(None, self.env.action_size), name='action')  # Batch x Action
        self.label = tf.compat.v1.placeholder("float", shape=(None, 1), name='label')
        self.gamma = tf.compat.v1.placeholder("float", shape=(), name='gamma')
        self.temp = tf.compat.v1.placeholder("float", shape=(), name='temperature')
        self.noise = tf.compat.v1.placeholder("float", shape=(), name='noise_flag')
        self.do_keep_prob = tf.compat.v1.placeholder("float", shape=(), name='do_keep_prob')
        self.lprobs = tf.compat.v1.placeholder('float', shape=(None, 1), name='log_probs')

        # Create MGAIL blocks
        self.forward_model = ForwardModel(state_size=self.env.state_size,
                                          action_size=self.env.action_size,
                                          encoding_size=self.env.fm_size,
                                          lr=self.env.fm_lr)
        
        # MODIFYING THE NEW DISCRIMINATOR:
        if self.use_irl:
            self.discriminator = DiscriminatorIRL(in_dim=self.env.state_size + self.env.action_size,
                                            out_dim=1,
                                            size=self.env.d_size,
                                            lr=self.env.d_lr,
                                            do_keep_prob=self.do_keep_prob,
                                            weight_decay=self.env.weight_decay,
                                            state_only=True,
                                            gamma=self.gamma,
                                            state_size = self.env.state_size,
                                            action_size = self.env.action_size)
        # END MODIFYING THE NEW DISCRIMINATOR
        else:
            self.discriminator = Discriminator(in_dim=self.env.state_size + self.env.action_size,
                                            out_dim=2,
                                            size=self.env.d_size,
                                            lr=self.env.d_lr,
                                            do_keep_prob=self.do_keep_prob,
                                            weight_decay=self.env.weight_decay)

        self.policy = Policy(in_dim=self.env.state_size,
                              out_dim=self.env.action_size,
                              size=self.env.p_size,
                              lr=self.env.p_lr,
                              do_keep_prob=self.do_keep_prob,
                              n_accum_steps=self.env.policy_accum_steps,
                              weight_decay=self.env.weight_decay)

        # Create experience buffers
        self.er_agent = ER(memory_size=self.env.er_agent_size,
                           state_dim=self.env.state_size,
                           action_dim=self.env.action_size,
                           batch_size=self.env.batch_size,
                           history_length=1)

        self.er_expert = common.load_d4rl_er(h5path=os.path.join(self.env.run_dir, self.env.expert_data),
                                        batch_size=self.env.batch_size,
                                        history_length=1,
                                        traj_length=2)

        self.env.sigma = self.er_expert.actions_std / self.env.noise_intensity

        # Normalize the inputs
        states_ = common.normalize(self.states_, self.er_expert.states_mean, self.er_expert.states_std)
        states = common.normalize(self.states, self.er_expert.states_mean, self.er_expert.states_std)
        if self.env.continuous_actions:
            actions = common.normalize(self.actions, self.er_expert.actions_mean, self.er_expert.actions_std)
        else:
            actions = self.actions

        # 1. Forward Model
        initial_gru_state = np.ones((1, self.forward_model.encoding_size))
        forward_model_prediction, _ = self.forward_model.forward([states_, actions, initial_gru_state])
        forward_model_loss = tf.reduce_mean(tf.square(states-forward_model_prediction))
        self.forward_model.train(objective=forward_model_loss)

        # 2. Discriminator
        labels = tf.concat([1 - self.label, self.label], 1)
        lprobs = self.lprobs
        
        # MODIFIED DISCRIMINATOR SECTION
        if self.use_irl:
            self.discrim_output, log_p_tau, log_q_tau, log_pq = self.discriminator.forward(states_, actions, states, lprobs)


            correct_predictions = tf.equal(tf.cast(tf.round(self.discrim_output), tf.int64), tf.argmax(labels, 1))
            self.discriminator.acc = tf.reduce_mean(tf.cast(correct_predictions, "float"))

            d_cross_entropy = self.label*(log_p_tau-log_pq) + (1-self.label)*(log_q_tau-log_pq)

            d_loss_weighted = self.env.cost_sensitive_weight * tf.multiply(tf.compat.v1.to_float(tf.equal(tf.squeeze(self.label), 1.)), d_cross_entropy) +\
                                                            tf.multiply(tf.compat.v1.to_float(tf.equal(tf.squeeze(self.label), 0.)), d_cross_entropy)
            
            discriminator_loss = -tf.reduce_mean(d_loss_weighted)
            self.discriminator.train(objective=discriminator_loss)
        # END MODIFIED DISCRIMINATOR SECTION


        else:
            d = self.discriminator.forward(states, actions)
            # 2.1 0-1 accuracy
            correct_predictions = tf.equal(tf.argmax(d, 1), tf.argmax(labels, 1))
            self.discriminator.acc = tf.reduce_mean(tf.cast(correct_predictions, "float"))
            # 2.2 prediction
            d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=d, labels=labels)
            # cost sensitive weighting (weight true=expert, predict=agent mistakes)
            d_loss_weighted = self.env.cost_sensitive_weight * tf.multiply(tf.compat.v1.to_float(tf.equal(tf.squeeze(self.label), 1.)), d_cross_entropy) +\
                                                            tf.multiply(tf.compat.v1.to_float(tf.equal(tf.squeeze(self.label), 0.)), d_cross_entropy)
        
            discriminator_loss = tf.reduce_mean(d_loss_weighted)
            self.discriminator.train(objective=discriminator_loss)

        # 3. Collect experience
        mu = self.policy.forward(states)
        if self.env.continuous_actions:
            a = common.denormalize(mu, self.er_expert.actions_mean, self.er_expert.actions_std)
            eta = tf.random.normal(shape=tf.shape(a), stddev=self.env.sigma)
            self.action_test = a + self.noise * eta
            # self.action_means = mu
            N = tf.shape(self.action_test)[0]
            expanded_sigma= tf.repeat(tf.expand_dims(tf.cast(self.env.sigma, dtype=tf.float32), 0), N, axis=0)
            self.action_probs_test = common.compute_action_probs_tf(self.action_test, mu, expanded_sigma)
        else:
            a = common.gumbel_softmax(logits=mu, temperature=self.temp)
            self.action_test = tf.compat.v1.argmax(a, dimension=1)
            self.action_means = tf.squeeze(mu)

        # 4.3 AL
        def policy_loop(state_, t, total_cost, total_trans_err, _):
            mu = self.policy.forward(state_, reuse=True)

            if self.env.continuous_actions:
                eta = self.env.sigma * tf.random.normal(shape=tf.shape(mu))
                action = mu + eta
                N = tf.shape(action)[0]
                expanded_sigma= tf.repeat(tf.expand_dims(tf.cast(self.env.sigma, dtype=tf.float32), 0), N, axis=0)
                a_prob = common.compute_action_probs_tf(action, mu, expanded_sigma)
            else:
                action = common.gumbel_softmax_sample(logits=mu, temperature=self.temp)
                a_prob = 0.5

            # get action
            if self.env.continuous_actions:
                a_sim = common.denormalize(action, self.er_expert.actions_mean, self.er_expert.actions_std)
            else:
                a_sim = tf.compat.v1.argmax(action, dimension=1)

            # get next state
            state_env, _, env_term_sig, = self.env.step(a_sim, mode='tensorflow')[:3]
            state_e = common.normalize(state_env, self.er_expert.states_mean, self.er_expert.states_std)
            state_e = tf.stop_gradient(state_e)

            state_a, _ = self.forward_model.forward([state_, action, initial_gru_state], reuse=True)

            state, nu = common.re_parametrization(state_e=state_e, state_a=state_a)
            total_trans_err += tf.reduce_mean(abs(nu))
            t += 1

            # minimize the gap between agent logit (d[:,0]) and expert logit (d[:,1])

            # MODIFIED DISCRIMINATOR SECTION:
            if self.use_irl:
                self.discrim_output, log_p_tau, log_q_tau, log_pq = self.discriminator.forward(state_, action, state, a_prob, reuse=True)
                cost = self.al_loss(log_p=log_p_tau, log_q=log_q_tau, log_pq=log_pq)
            else:
                d = self.discriminator.forward(state_, action, reuse=True)
                cost = self.al_loss(d=d)

            # END MODIFIED DISCRIMINATOR SECTION

            # add step cost
            total_cost += tf.multiply(tf.pow(self.gamma, t), cost)

            return state, t, total_cost, total_trans_err, env_term_sig

        def policy_stop_condition(state_, t, cost, trans_err, env_term_sig):
            cond = tf.logical_not(env_term_sig)
            cond = tf.logical_and(cond, t < self.env.n_steps_train)
            cond = tf.logical_and(cond, trans_err < self.env.total_trans_err_allowed)
            return cond

        state_0 = tf.slice(states, [0, 0], [1, -1])
        loop_outputs = tf.while_loop(policy_stop_condition, policy_loop, [state_0, 0., 0., 0., False])
        self.policy.train(objective=loop_outputs[2])

    def al_loss(self, d=None, log_p=None, log_q=None, log_pq=None):
        if not self.use_irl:
            logit_agent, logit_expert = tf.split(axis=1, num_or_size_splits=2, value=d)
            labels = tf.concat([tf.zeros_like(logit_agent), tf.ones_like(logit_expert)], 1)
            d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=d, labels=labels)
        
        else: # USING IRL
            d_cross_entropy = - (log_p - log_pq) + (log_q - log_pq)

        loss = tf.reduce_mean(d_cross_entropy)
        return loss*self.env.policy_al_w
Beispiel #8
0
class DCGAN:
    # initialise network with learning rate, layer shape etc
    def __init__(self,
                 img_shape,
                 epochs=50000,
                 lr_gen=0.0001,
                 lr_disc=0.0001,
                 z_shape=100,
                 batch_size=64,
                 beta1=0.5,
                 epochs_for_sample=500):

        # initalise architecture vars
        self.rows, self.cols, self.channels = img_shape
        self.batch_size = batch_size
        self.epochs = epochs
        self.z_shape = z_shape
        self.epochs_for_sample = epochs_for_sample
        # intialise underlying networks
        self.generator = Generator(img_shape, self.batch_size)
        self.discriminator = Discriminator(img_shape)

        mnist = tf.keras.datasets.mnist
        (x_train, _), (x_test, _) = mnist.load_data()

        X = np.concatenate([x_train, x_test])
        # As and after training for the generator,sampling will occur. Uses tanh for generator output for
        # best results <--- need to rescale MNIST [0,1] -> [-1,1]
        self.X = X / 127.5 - 1  # Scale between -1 and 1
        self.phX = tf.placeholder(tf.float32, [None, self.rows, self.cols])
        self.phZ = tf.placeholder(tf.float32, [None, self.z_shape])

        self.gen_out = self.generator.forward(self.phZ)

        disc_logits_fake = self.discriminator.forward(self.gen_out)
        disc_logits_real = self.discriminator.forward(self.phX)

        # compute cost functions - sigmoid cross entropy (sigmoid as real or fake)
        disc_fake_loss = cost(tf.zeros_like(disc_logits_fake),
                              disc_logits_fake)
        disc_real_loss = cost(tf.ones_like(disc_logits_real), disc_logits_real)

        self.disc_loss = tf.add(disc_fake_loss, disc_real_loss)
        self.gen_loss = cost(tf.ones_like(disc_logits_fake), disc_logits_fake)

        train_vars = tf.trainable_variables()

        disc_vars = [var for var in train_vars if 'd' in var.name]
        gen_vars = [var for var in train_vars if 'g' in var.name]

        self.disc_train = tf.train.AdamOptimizer(
            lr_disc, beta1=beta1).minimize(self.disc_loss, var_list=disc_vars)
        self.gen_train = tf.train.AdamOptimizer(lr_gen, beta1=beta1).minimize(
            self.gen_loss, var_list=gen_vars)

    def train(self):
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

        for i in range(self.epochs):
            idx = np.random.randint(0, len(self.X), self.batch_size)
            batch_X = self.X[idx]
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))

            _, d_loss = self.sess.run([self.disc_train, self.disc_loss],
                                      feed_dict={
                                          self.phX: batch_X,
                                          self.phZ: batch_Z
                                      })
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
            _, g_loss = self.sess.run([self.gen_train, self.gen_loss],
                                      feed_dict={self.phZ: batch_Z})
            if i % self.epochs_for_sample == 0:
                self.generate_sample(i)
                print(
                    f"Epoch: {i}. Discriminator loss: {d_loss}. Generator loss: {g_loss}"
                )

    def generate_sample(self, epoch):
        c = 7
        r = 7
        z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
        imgs = self.sess.run(self.gen_out, feed_dict={self.phZ: z})
        imgs = imgs * 0.5 + 0.5
        # scale between 0, 1
        fig, axs = plt.subplots(c, r)
        cnt = 0
        for i in range(c):
            for j in range(r):
                axs[i, j].imshow(imgs[cnt, :, :, 0], cmap="gray")
                axs[i, j].axis('off')
                cnt += 1
        fig.savefig("samples/%d.png" % epoch)
        plt.close()
def main(args):
    transform = transforms.ToTensor()

    args.dataset_root.mkdir(parents=True, exist_ok=True)

    train_dataset = torchvision.datasets.MNIST(
        args.dataset_root, train=True, download=True, transform=transform
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        shuffle=True,
        batch_size=args.batch_size,
        pin_memory=True,
        num_workers=8,
    )

    log_dir = get_summary_writer_log_dir(args)

    summary_writer = SummaryWriter(
            str(log_dir),
            flush_secs=5
    )

    noise_vector_size = 100
    real_label = 1
    fake_label = 0

    fixed_noise = torch.randn(16,100,1,1).to(DEVICE)
    fixed_noise_generator_output = dict()

    generator = Generator(noise_vector_size,28,28,1).to(DEVICE)

    discriminator = Discriminator(28,28,1).to(DEVICE)

    criterion = nn.BCELoss()

    generator_optimiser = optim.Adam(generator.parameters(), 0.0002,(0.5,0.999))
    discriminator_optimiser = optim.Adam(discriminator.parameters(), 0.0002, (0.5,0.999))

    try:
        os.mkdir("generated_digits")
    except FileExistsError:
        pass

    step = 0

    for i in range(args.epochs):
        for j, (batch,labels) in enumerate(train_loader):
            batch = batch.to(DEVICE)
            labels = labels.to(DEVICE)

            discriminator.zero_grad()
            d_real_output = discriminator.forward(batch).view(-1)
            Dx = d_real_output.mean().item()

            real_labels = torch.full(labels.shape, real_label, device=DEVICE)
            d_real_error = criterion(d_real_output, real_labels)
            d_real_error.backward()

            noise = torch.randn(len(labels), noise_vector_size, 1, 1).to(DEVICE)

            fake_data = generator.forward(noise)
            fake_labels = torch.full(labels.shape, fake_label, device=DEVICE)

            d_fake_output = discriminator.forward(fake_data.detach()).view(-1)
            d_fake_error = criterion(d_fake_output, fake_labels)
            d_fake_error.backward()
            DGz = d_fake_output.mean().item()

            d_error = d_fake_error + d_real_error

            discriminator_optimiser.step()

            generator.zero_grad()

            g_real_labels = torch.full(labels.shape, real_label, device=DEVICE)

            d_output = discriminator.forward(fake_data).view(-1)

            g_error = criterion(d_output, g_real_labels)

            g_error.backward()

            generator_optimiser.step()

            print(f"epoch: {i}, step: {j+1}/{len(train_loader)}, Dx: {Dx:.5f}, DGz: {DGz:.5f}, D loss: {d_error:.5f}, G loss: {g_error:.5f}")

            summary_writer.add_scalars(
                "loss",
                {"D": d_error, "G": g_error},
                step
            )

            step += 1

        with torch.no_grad():
            fixed_output = generator.forward(fixed_noise).detach().cpu()
            fixed_noise_generator_output[i] = vutils.make_grid(fixed_output, padding=2, normalize=True)
            plt.imshow(np.transpose(fixed_noise_generator_output[i], (1,2,0)))
            plt.axis("off")
            plt.tight_layout()
            plt.savefig(f"{str(log_dir)}/{i}.png",dpi=400,bbox_inches=0)
Beispiel #10
0
class DCGAN:
    def __init__(self, img_shape, epochs=50000, lr_gen=0.0001, lr_disc=0.0001, z_shape=11, batch_size=64, beta1=0.5, epochs_for_sample=10000):
        
       
        self.rows, self.cols, self.channels = img_shape
        self.batch_size = batch_size
        self.epochs = epochs
        self.z_shape = z_shape
        self.epochs_for_sample = epochs_for_sample
        self.generator = Generator(img_shape, self.batch_size, self.z_shape)
        self.discriminator = Discriminator(img_shape)
        self.matching = 0

        mnist = tf.keras.datasets.mnist 
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        X = np.concatenate([x_train, x_test])

        # replace the last column with the digit information in the input data
        Y = np.concatenate([y_train, y_test])
        Y_onehot = indices_to_one_hot(Y, self.rows)
        self.X = X / 127.5 - 1 # Scale between -1 and 1
        self.X[:, :, -1] = Y_onehot
        self.X[:, -1, :] = Y_onehot
        # FIXME: only works with rectangular images, because the same Y_onehot vector is used for both column & row

        self.phX = tf.placeholder(tf.float32, [None, self.rows, self.cols])
        self.phZ = tf.placeholder(tf.float32, [None, self.z_shape])
    
        self.gen_out = self.generator.forward(self.phZ)

        disc_logits_fake = self.discriminator.forward(self.gen_out)
        disc_logits_real = self.discriminator.forward(self.phX)

        disc_fake_loss = cost(tf.ones_like(disc_logits_fake), disc_logits_fake)
        disc_real_loss = cost(tf.zeros_like(disc_logits_real), disc_logits_real)

        self.disc_loss = tf.add(disc_fake_loss, disc_real_loss)
        self.gen_loss = cost(tf.zeros_like(disc_logits_fake), disc_logits_fake)

        train_vars = tf.trainable_variables()

        disc_vars = [var for var in train_vars if 'd' in var.name]
        gen_vars = [var for var in train_vars if 'g' in var.name]

        self.disc_train = tf.train.AdamOptimizer(lr_disc,beta1=beta1).minimize(self.disc_loss, var_list=disc_vars)
        self.gen_train = tf.train.AdamOptimizer(lr_gen, beta1=beta1).minimize(self.gen_loss, var_list=gen_vars)
        


    def train(self):
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

        for i in range(self.epochs):
            # print(i)
            idx = np.random.randint(0, len(self.X), self.batch_size)
            batch_X = self.X[idx]
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
            # add digit information in the input
            batch_Z[:, :10] = 0.
            # if i % self.epochs_for_sample == 0:
            #     self.generate_sample(i)
            #     print(i)
            np.put_along_axis(batch_Z, np.random.randint(10, size=self.batch_size)[..., np.newaxis], 1, axis=1)
            _, d_loss = self.sess.run([self.disc_train, self.disc_loss], feed_dict={self.phX:batch_X, self.phZ:batch_Z})
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
            batch_Z[:, :10] = 0.
            np.put_along_axis(batch_Z, np.random.randint(10, size=self.batch_size)[..., np.newaxis], 1, axis=1)
            _, g_loss = self.sess.run([self.gen_train, self.gen_loss], feed_dict={self.phZ: batch_Z})
            if i % self.epochs_for_sample == 0:
                self.generate_sample(i)
                print(f"Epoch: {i}. Discriminator loss: {d_loss}. Generator loss: {g_loss}. Matching digit indicators: {self.matching}")


    def generate_sample(self, epoch):
        c = 7
        r = 7
        z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
        z[:, :10] = 0.
        y = np.random.randint(10, size=self.batch_size)
        np.put_along_axis(z, y[..., np.newaxis], 1, axis=1)
        imgs = self.sess.run(self.gen_out, feed_dict={self.phZ:z})
        imgs = imgs*0.5 + 0.5
        result = np.argmax(imgs[:, :, -1, 0], axis=1)
        self.matching = np.sum(y == result)

        # scale between 0, 1
        fig, axs = plt.subplots(c, r)
        fig.suptitle(f"Matching indices: {self.matching}")
        cnt = 0
        for i in range(c):
            for j in range(r):
                axs[i, j].imshow(imgs[cnt, :, :, 0], cmap="gray")
                axs[i, j].axis('off')
                # if discs[cnt]:
                #     col = 'g'
                # else:
                #     col = 'r'
                axs[i, j].set_title(str(y[cnt]), size=7, pad=0.5) #, color = col)
                axs[i, j].text(30, 13.5, str(result[cnt]), size=7,
                               verticalalignment='center')
                cnt += 1
        fig.savefig("samples/targets_swapped_" + str(epoch).zfill(len(str(self.epochs))) + ".png")
        plt.close()
Beispiel #11
0
            NEGATIVE_FILE1 =  NEGATIVE_FILE + '\\' + str(total_batch) + '\\gene'
            samples_lenth = generate_samples(generator, BATCH_SIZE, GENERATED_NUM, NEGATIVE_FILE1,x_info,x_ids,start_id_list,end_id_list,bank_dict)                                        
            NEGATIVE_FILEtxt = NEGATIVE_FILE + '\\' + str(total_batch) + '\\gene.txt'
            dis_data_iter = DisDataIter(real_data_id1, NEGATIVE_FILEtxt, BATCH_SIZE)
            for q in range(2):                        
                total_loss = 0.
                total_words = 0.                        
                n = 0 
                for (data, target) in dis_data_iter:
                    n+=1
                    data = Variable(data)
                    target = Variable(target)
                    if opt.cuda:
                        data, target = data.cuda(), target.cuda()
                    target = target.contiguous().view(-1) 
                    pred = discriminator.forward(data) 
                    loss = dis_criterion(pred, target) # negative log likelihood loss                            
                    total_loss += loss.item()
                    total_words += data.size(0) * data.size(1)       
                    
                    dis_optimizer.zero_grad() 
                    loss.backward()     
                    dis_optimizer.step()      
                
                dis_data_iter.reset() 
                f_loss = math.exp(total_loss/ total_words) 

                d_save_path = os.path.join(m_save_path, c_cat)
                if not os.path.exists(d_save_path):
                    os.mkdir(d_save_path)
                d_save_path = os.path.join(d_save_path, 'discriminator'+str(total_batch)+'.pkl')                        
Beispiel #12
0
        for epoch in range(num_epochs):
            x_batch = torch.from_numpy(next(x))
            y_batch = torch.from_numpy(next(y))
            model.train(x_batch.type(torch.FloatTensor),
                        y_batch.type(torch.FloatTensor), args.loss_type)
        #torch.save(model.state_dict(), os.path.join(directory, 'best_params_'+str(i)+'.pt'))
        x_dist = samplers.distribution1(0, 10000)
        y_dist = samplers.distribution1(i, 10000)
        x_dist_batch = torch.from_numpy(next(x_dist))
        y_dist_batch = torch.from_numpy(next(y_dist))
        x_value = x_dist_batch.type(torch.FloatTensor)
        y_value = y_dist_batch.type(torch.FloatTensor)
        if args.loss_type == "JSD":
            print("JSD")
            jsd = model.loss_JSD(
                model.forward(x_dist_batch.type(torch.FloatTensor)),
                model.forward(y_dist_batch.type(torch.FloatTensor)))
            values.append(-jsd)
        elif args.loss_type == "WD":
            wd = torch.mean(model.forward(x_value) - model.forward(y_value))
            values.append(wd)

    plt.plot(phi, values, 'o-')
    if args.loss_type == "JSD":
        plt.ylabel("JSD")
        plt.xlabel("phi")
        plt.title("JSD vs phi")
        plt.savefig(directory + '_JSD_phi.png', bbox_inches='tight')
    elif args.loss_type == "WD":
        plt.ylabel("Wasserstein Distance")
        plt.xlabel("Phi")
Beispiel #13
0
class DCGAN:
    def __init__(self,
                 img_shape,
                 epochs=50000,
                 lr_gen=0.0001,
                 lr_dc=0.0001,
                 z_shape=100,
                 batch_size=64,
                 beta1=0.5,
                 epochs_for_sample=500):

        # lr_gen = Learning rate for Generator
        # lr_dc = Learning rate for Discriminator
        # z_shape = Shape for generator input
        # batch_size can be changed --> bigger = slower training/epochs--> smaller = faster training/epochs(but needs more epochs)
        # epochs_for_sample --> Interval for genrating images

        # Unpack image Shape
        self.rows, self.cols, self.channels = img_shape
        self.batch_size = batch_size
        self.epochs = epochs
        self.z_shape = z_shape
        self.epochs_for_sample = epochs_for_sample
        self.generator = Generator(img_shape, self.batch_size)
        self.discriminator = Discriminator(img_shape)

        # Load MNIST dataset
        mnist = tf.keras.datasets.mnist
        (x_train, _), (x_test, _) = mnist.load_data()

        # Labels not needed
        # Differentiation between x_train and x_test not needed --> Concat x_train and x_test
        X = np.concatenate([x_train, x_test])
        # Values between 0 and 255
        # Scale between -1 and 1
        self.X = X / 127.5 - 1

        # Create placeholders for input
        self.phX = tf.placeholder(tf.float32, [None, self.rows, self.cols])
        self.phZ = tf.placeholder(tf.float32, [None, self.z_shape])

        # Generator forward pass
        self.gen_out = self.generator.forward(self.phZ)

        # Discriminator prediction
        dc_logits_fake = self.discriminator.forward(self.gen_out)

        # Real images
        dc_logits_real = self.discriminator.forward(self.phX)

        # Cost functions
        # Discriminator should predict that fake images are 0 and real images are 1

        dc_fake_loss = cost(tf.zeros_like(dc_logits_fake), dc_logits_fake)
        dc_real_loss = cost(tf.ones_like(dc_logits_real), dc_logits_real)

        self.dc_loss = tf.add(dc_fake_loss, dc_real_loss)

        # Generator tries to fool discriminator so that the discriminator outputs 1 for fake images
        self.gen_loss = cost(tf.ones_like(dc_logits_fake), dc_logits_fake)

        train_vars = tf.trainable_variables()

        # Differentiating between generator and discriminator variables
        dc_vars = [var for var in train_vars if 'd' in var.name]
        gen_vars = [var for var in train_vars if 'g' in var.name]

        # Create training variables
        self.dc_train = tf.train.AdamOptimizer(lr_dc, beta1=beta1).minimize(
            self.dc_loss, var_list=dc_vars)
        self.gen_train = tf.train.AdamOptimizer(lr_gen, beta1=beta1).minimize(
            self.gen_loss, var_list=gen_vars)

    def train(self):
        init = tf.global_variables_initializer()
        self.sess = tf.Session()

        # Initialize all variables
        self.sess.run(init)

        # Start training loop
        for i in range(self.epochs):
            # Create random batch for training
            # Create random indices (minimum: 0, maxmium: size of X, size: batch_size)
            idx = np.random.randint(0, len(self.X), self.batch_size)
            batch_X = self.X[idx]
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))

            # Train discriminator and store dc loss
            # batch_X = batch_X.reshape([-1, 28, 28, 1])
            _, d_loss = self.sess.run([self.dc_train, self.dc_loss],
                                      feed_dict={
                                          self.phX: batch_X,
                                          self.phZ: batch_Z
                                      })

            # Create new batch for generator training
            batch_Z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))

            # Train generator and store generator loss
            _, g_loss = self.sess.run([self.gen_train, self.gen_loss],
                                      feed_dict={self.phZ: batch_Z})

            # Generate samples and print loss
            if i % self.epochs_for_sample == 0:
                self.generate_sample(i)
                print(
                    f"Epoch: {i}. Discriminator loss: {d_loss}. Generator loss: {g_loss}"
                )

    def generate_sample(self, epoch):
        # 7 sample per image
        c = 7
        r = 7

        # New input for sample
        # 7x7 = 49 image samples
        z = np.random.uniform(-1, 1, (self.batch_size, self.z_shape))
        imgs = self.sess.run(self.gen_out, feed_dict={self.phZ: z})

        # Scale back to values between 0 and 1 (currently between -1 and 1)
        imgs = imgs * 0.5 + 0.5

        # Create subplots
        fig, axs = plt.subplots(c, r)
        count = 0
        for i in range(c):
            for j in range(r):
                axs[i, j].imshow(imgs[count, :, :, 0], cmap="gray")
                axs[i, j].axis("off")
                count += 1

        # Save images
        fig.savefig("DCGAN 01/samples/%d.png" % epoch)
        plt.close()
Beispiel #14
0
        # Batch size is 1 by default.
        # It's not neccessary to use data loader
        # As I'm loading images one by one.
        for index in tqdm(range(len(inp_list))):
            img = mpimg.imread(config.data_dir + inp_list[index])
            img = utils.image2tensor(img)
            label_img = mpimg.imread(config.label_dir + label_list[index])
            label_img = utils.image2tensor(label_img)

            if torch.cuda.is_available():
                img = img.cuda()
                label_img = label_img.cuda()

            # Train Discriminator
            optim_d.zero_grad()
            mask_r, D_real = model_discriminator.forward(label_img)
            masks, f1, f2, x = model_gen.forward(img)
            mask_f, D_fake = model_discriminator.forward(x)

            # Eq9
            # L_map is the loss between the features extraced from
            # interior layers of the discriminator and the final attention map
            map_loss = d_map_loss(masks[-1], mask_f, mask_r)

            # -log(D(R))
            D_loss_real = BCE_loss(D_real, label_real)
            # -log(1-D(O)) where O = G(z)
            D_loss_fake = BCE_loss(D_fake, label_fake)
            # Eq8. Gamma default to 0.05
            D_loss = D_loss_real + D_loss_fake + config.gamma * map_loss
Beispiel #15
0
class MGAIL(object):
    def __init__(self, environment, reweight, ensemble):

        self.env = environment
        self.reweight = reweight
        self.ensemble = ensemble

        # Create placeholders for all the inputs
        self.states_ = tf.placeholder("float", shape=(None, self.env.state_size), name='states_')  # Batch x State
        self.states = tf.placeholder("float", shape=(None, self.env.state_size), name='states')  # Batch x State
        self.actions = tf.placeholder("float", shape=(None, self.env.action_size), name='action')  # Batch x Action
        self.label = tf.placeholder("float", shape=(None, 1), name='label')
        self.gamma = tf.placeholder("float", shape=(), name='gamma')
        self.temp = tf.placeholder("float", shape=(), name='temperature')
        self.noise = tf.placeholder("float", shape=(), name='noise_flag')
        self.do_keep_prob = tf.placeholder("float", shape=(), name='do_keep_prob')

        self.states_e_ = tf.placeholder("float", shape=(None, self.env.state_size), name='states_e_')
        self.states_e = tf.placeholder("float", shape=(None, self.env.state_size), name='states_e')
        self.actions_e = tf.placeholder("float", shape=(None, self.env.action_size), name='action_e')
        self.ex_wts_ = tf.placeholder("float", shape=(self.ensemble, None), name='ex_wts')

        # Create MGAIL blocks
        self.forward_model = ForwardModel(state_size=self.env.state_size,
                                          action_size=self.env.action_size,
                                          encoding_size=self.env.fm_size,
                                          lr=self.env.fm_lr,
                                          ensemble=self.ensemble)

        self.discriminator = Discriminator(in_dim=self.env.state_size + self.env.action_size,
                                           out_dim=2,
                                           size=self.env.d_size,
                                           lr=self.env.d_lr,
                                           do_keep_prob=self.do_keep_prob,
                                           weight_decay=self.env.weight_decay)

        self.policy = Policy(in_dim=self.env.state_size,
                              out_dim=self.env.action_size,
                              size=self.env.p_size,
                              lr=self.env.p_lr,
                              do_keep_prob=self.do_keep_prob,
                              n_accum_steps=self.env.policy_accum_steps,
                              weight_decay=self.env.weight_decay)

        # Create experience buffers
        self.er_agent = ER(memory_size=self.env.er_agent_size,
                           state_dim=self.env.state_size,
                           action_dim=self.env.action_size,
                           reward_dim=1,  # stub connection
                           qpos_dim=self.env.qpos_size,
                           qvel_dim=self.env.qvel_size,
                           batch_size=self.env.batch_size,
                           history_length=1)

        self.er_expert = common.load_er(fname=os.path.join(self.env.run_dir, self.env.expert_data),
                                        batch_size=self.env.batch_size,
                                        history_length=1,
                                        traj_length=2)

        self.env.sigma = self.er_expert.actions_std / self.env.noise_intensity

        # Normalize the inputs
        states_ = common.normalize(self.states_, self.er_expert.states_mean, self.er_expert.states_std)
        states = common.normalize(self.states, self.er_expert.states_mean, self.er_expert.states_std)
        if self.env.continuous_actions:
            actions = common.normalize(self.actions, self.er_expert.actions_mean, self.er_expert.actions_std)
        else:
            actions = self.actions

        states_e_ = common.normalize(self.states_e_, self.er_expert.states_mean, self.er_expert.states_std)
        states_e = common.normalize(self.states_e, self.er_expert.states_mean, self.er_expert.states_std)
        if self.env.continuous_actions:
            actions_e = common.normalize(self.actions_e, self.er_expert.actions_mean, self.er_expert.actions_std)
        else:
            actions_e = self.actions_e

        # 1. Forward Model
        if self.reweight:
            initial_gru_state = np.ones((1, self.forward_model.encoding_size))
            self.forward_model.train(x_=[states_, actions, initial_gru_state], y_=states, ex_wts=self.ex_wts_)

            initial_gru_state_rw = np.ones((1, self.forward_model.encoding_size))
            initial_gru_state_val = np.ones((1, self.forward_model.encoding_size))
            self.forward_model.reweight(x_=[states_, actions, initial_gru_state_rw], y_=states,
                                        x_val_=[states_e_, actions_e, initial_gru_state_val], y_val_=states_e,
                                        bsize_a=self.env.batch_size, bsize_b=self.env.batch_size)
        else:
            initial_gru_state = np.ones((1, self.forward_model.encoding_size))
            self.forward_model.train(x_=[states_, actions, initial_gru_state], y_=states, ex_wts=None)

        # 1.1 prediction (for development)
        # self.forward_model.predict(x_=[states_, actions, initial_gru_state], y_=states)

        # 2. Discriminator
        labels = tf.concat([1 - self.label, self.label], 1)
        d = self.discriminator.forward(states, actions)

        # 2.1 0-1 accuracy
        correct_predictions = tf.equal(tf.argmax(d, 1), tf.argmax(labels, 1))
        self.discriminator.acc = tf.reduce_mean(tf.cast(correct_predictions, "float"))
        # 2.2 prediction
        d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=d, labels=labels)
        # cost sensitive weighting (weight true=expert, predict=agent mistakes)
        d_loss_weighted = self.env.cost_sensitive_weight * tf.multiply(tf.to_float(tf.equal(tf.squeeze(self.label), 1.)), d_cross_entropy) +\
                                                           tf.multiply(tf.to_float(tf.equal(tf.squeeze(self.label), 0.)), d_cross_entropy)
        discriminator_loss = tf.reduce_mean(d_loss_weighted)
        self.discriminator.train(objective=discriminator_loss)

        # 3. Collect experience
        mu = self.policy.forward(states)
        if self.env.continuous_actions:
            a = common.denormalize(mu, self.er_expert.actions_mean, self.er_expert.actions_std)
            eta = tf.random_normal(shape=tf.shape(a), stddev=self.env.sigma)
            self.action_test = tf.squeeze(a + self.noise * eta)
        else:
            a = common.gumbel_softmax(logits=mu, temperature=self.temp)
            self.action_test = tf.argmax(a, dimension=1)

        # 4.3 AL
        def policy_loop(state_, t, total_cost, total_trans_err, _):
            mu = self.policy.forward(state_, reuse=True)

            if self.env.continuous_actions:
                eta = self.env.sigma * tf.random_normal(shape=tf.shape(mu))
                action = mu + eta
            else:
                action = common.gumbel_softmax_sample(logits=mu, temperature=self.temp)

            # minimize the gap between agent logit (d[:,0]) and expert logit (d[:,1])
            d = self.discriminator.forward(state_, action, reuse=True)
            cost = self.al_loss(d)

            # add step cost
            total_cost += tf.multiply(tf.pow(self.gamma, t), cost)

            # get action
            if self.env.continuous_actions:
                a_sim = common.denormalize(action, self.er_expert.actions_mean, self.er_expert.actions_std)
            else:
                a_sim = tf.argmax(action, dimension=1)

            # get next state
            state_env, _, env_term_sig, = self.env.step(a_sim, mode='tensorflow')[:3]
            state_e = common.normalize(state_env, self.er_expert.states_mean, self.er_expert.states_std)
            state_e = tf.stop_gradient(state_e)

            # state_a, _ = self.forward_model.forward([state_, action, initial_gru_state], reuse=True)
            state_a, _ = self.forward_model.forward(inputs=[state_, action, initial_gru_state],
                                                    is_training=False, dtype=tf.float32,
                                                    w_dict=None, ex_wts=None, reuse=True)

            state, nu = common.re_parametrization(state_e=state_e, state_a=state_a)
            total_trans_err += tf.reduce_mean(abs(nu))
            t += 1

            return state, t, total_cost, total_trans_err, env_term_sig

        def policy_stop_condition(state_, t, cost, trans_err, env_term_sig):
            cond = tf.logical_not(env_term_sig)
            cond = tf.logical_and(cond, t < self.env.n_steps_train)
            cond = tf.logical_and(cond, trans_err < self.env.total_trans_err_allowed)
            return cond

        state_0 = tf.slice(states, [0, 0], [1, -1])
        loop_outputs = tf.while_loop(policy_stop_condition, policy_loop, [state_0, 0., 0., 0., False])
        self.policy.train(objective=loop_outputs[2])

    def al_loss(self, d):
        logit_agent, logit_expert = tf.split(axis=1, num_or_size_splits=2, value=d)

        # Cross entropy loss
        labels = tf.concat([tf.zeros_like(logit_agent), tf.ones_like(logit_expert)], 1)
        d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=d, labels=labels)
        loss = tf.reduce_mean(d_cross_entropy)

        return loss*self.env.policy_al_w