Ejemplo n.º 1
0
    def pretrain(self, sess, generator, train_discriminator):
        # samples = generate_samples(sess, BATCH_SIZE, generated_num)
        self.gen_loader.create_batches(self.positive_samples)
        results = OrderedDict({'exp_name': self.PREFIX})

        #  pre-train generator
        print('Start pre-training...')
        start = time.time()
        for epoch in tqdm(range(self.PRE_EPOCH_NUM)):
            print(' gen pre-train')
            loss = self.pre_train_epoch(sess, generator, self.gen_loader)
            if epoch == 10 or epoch % 40 == 0:
                samples = self.generate_samples(sess, generator,
                                                self.BATCH_SIZE,
                                                self.SAMPLE_NUM)
                self.gen_loader.create_batches(samples)
                print('\t train_loss {}'.format(loss))
                mm.compute_results(samples, self.train_samples, self.ord_dict,
                                   results)

        samples = self.generate_samples(sess, generator, self.BATCH_SIZE,
                                        self.SAMPLE_NUM)
        self.gen_loader.create_batches(samples)

        samples = self.generate_samples(sess, generator, self.BATCH_SIZE,
                                        self.SAMPLE_NUM)
        self.gen_loader.create_batches(samples)

        print('Start training discriminator...')
        for i in tqdm(range(self.dis_alter_epoch)):
            print(' discriminator pre-train')
            d_loss, acc = train_discriminator()
        end = time.time()
        print('Total time was {:.4f}s'.format(end - start))
        return
Ejemplo n.º 2
0
    def train(self, ckpt_dir='checkpoints/'):
        """Trains the model. If necessary, also includes pretraining."""

        if not self.PRETRAINED and not self.SESS_LOADED:

            self.sess.run(tf.global_variables_initializer())
            self.pretrain()

            if not os.path.exists(ckpt_dir):
                os.makedirs(ckpt_dir)
            ckpt_file = os.path.join(ckpt_dir,
                                     '{}_pretrain_ckpt'.format(self.PREFIX))
            saver = tf.train.Saver()
            path = saver.save(self.sess, ckpt_file)
            if self.verbose:
                print('Pretrain saved at {}'.format(path))

        if not hasattr(self, 'rollout'):
            self.rollout = Rollout(self.generator, 0.8, self.PAD_NUM)

        if self.verbose:
            print('\nSTARTING TRAINING')
            print('============================\n')

        results_rows = []
        for nbatch in tqdm(range(self.TOTAL_BATCH)):

            results = OrderedDict({'exp_name': self.PREFIX})

            metric = self.EDUCATION[nbatch]

            if metric in self.AV_METRICS.keys():
                reward_func = self.AV_METRICS[metric]
            else:
                raise ValueError('Metric {} not found!'.format(metric))

            if self.kwargs[metric] is not None:

                def batch_reward(samples):
                    decoded = [
                        mm.decode(sample, self.ord_dict) for sample in samples
                    ]
                    pct_unique = len(list(set(decoded))) / float(len(decoded))
                    rewards = reward_func(decoded, self.train_samples,
                                          **self.kwargs[metric])
                    weights = np.array([
                        pct_unique / float(decoded.count(sample))
                        for sample in decoded
                    ])

                    return rewards * weights

            else:

                def batch_reward(samples):
                    decoded = [
                        mm.decode(sample, self.ord_dict) for sample in samples
                    ]
                    pct_unique = len(list(set(decoded))) / float(len(decoded))
                    rewards = reward_func(decoded, self.train_samples)
                    weights = np.array([
                        pct_unique / float(decoded.count(sample))
                        for sample in decoded
                    ])

                    return rewards * weights

            if nbatch % 10 == 0:
                gen_samples = self.generate_samples(self.BIG_SAMPLE_NUM)
            else:
                gen_samples = self.generate_samples(self.SAMPLE_NUM)
            self.gen_loader.create_batches(gen_samples)
            results['Batch'] = nbatch
            print('Batch n. {}'.format(nbatch))
            print('============================\n')

            # results
            mm.compute_results(gen_samples, self.train_samples, self.ord_dict,
                               results)

            for it in range(self.GEN_ITERATIONS):
                samples = self.generator.generate(self.sess)
                rewards = self.rollout.get_reward(self.sess, samples, 16,
                                                  self.discriminator,
                                                  batch_reward, self.LAMBDA)
                nll = self.generator.generator_step(self.sess, samples,
                                                    rewards)

                print('Rewards')
                print('~~~~~~~~~~~~~~~~~~~~~~~~\n')
                np.set_printoptions(precision=3, suppress=True)
                mean_r, std_r = np.mean(rewards), np.std(rewards)
                min_r, max_r = np.min(rewards), np.max(rewards)
                print('Mean:                {:.3f}'.format(mean_r))
                print('               +/-   {:.3f}'.format(std_r))
                print('Min:                 {:.3f}'.format(min_r))
                print('Max:                 {:.3f}'.format(max_r))
                np.set_printoptions(precision=8, suppress=False)
                results['neg-loglike'] = nll
            self.rollout.update_params()

            # generate for discriminator
            if self.LAMBDA != 0:
                print('\nDISCRIMINATOR TRAINING')
                print('============================\n')
                for i in range(self.DIS_EPOCHS):
                    print('Discriminator epoch {}...'.format(i + 1))

                    negative_samples = self.generate_samples(self.POSITIVE_NUM)
                    dis_x_train, dis_y_train = self.dis_loader.load_train_data(
                        self.positive_samples, negative_samples)
                    dis_batches = self.dis_loader.batch_iter(
                        zip(dis_x_train, dis_y_train), self.DIS_BATCH_SIZE,
                        self.DIS_EPOCHS)

                    for batch in dis_batches:
                        x_batch, y_batch = zip(*batch)
                        feed = {
                            self.discriminator.input_x: x_batch,
                            self.discriminator.input_y: y_batch,
                            self.discriminator.dropout_keep_prob:
                            self.DIS_DROPOUT
                        }
                        _, step, d_loss, accuracy = self.sess.run([
                            self.dis_train_op, self.dis_global_step,
                            self.discriminator.loss,
                            self.discriminator.accuracy
                        ], feed)

                    results['D_loss_{}'.format(i)] = d_loss
                    results['Accuracy_{}'.format(i)] = accuracy
                print('\nDiscriminator trained.')
            results_rows.append(results)
            if nbatch % self.EPOCH_SAVES == 0 or \
               nbatch == self.TOTAL_BATCH - 1:

                if results_rows is not None:
                    df = pd.DataFrame(results_rows)
                    df.to_csv('{}_results.csv'.format(self.folder),
                              index=False)
                if nbatch is None:
                    label = 'final'
                else:
                    label = str(nbatch)

                # save models
                model_saver = tf.train.Saver()
                ckpt_dir = os.path.join(self.CHK_PATH, self.folder)
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                ckpt_file = os.path.join(
                    ckpt_dir, '{}_{}.ckpt'.format(self.PREFIX, label))
                path = model_saver.save(self.sess, ckpt_file)
                print('\nModel saved at {}'.format(path))

        print('\n######### FINISHED #########')
Ejemplo n.º 3
0
    def train(self):

        print(
            '#########################################################################'
        )
        print('Start Reinforcement Training Generator...')
        results_rows = []
        for nbatch in tqdm(range(self.TOTAL_BATCH)):
            results = OrderedDict({'exp_name': self.PREFIX})
            batch_reward = self.make_reward(self.train_samples, nbatch)
            if nbatch % 1 == 0 or nbatch == self.TOTAL_BATCH - 1:
                print('* Making samples')
                if nbatch % 10 == 0:
                    gen_samples = self.generate_samples(
                        self.sess, self.generator, self.BATCH_SIZE,
                        self.BIG_SAMPLE_NUM)
                else:
                    gen_samples = self.generate_samples(
                        self.sess, self.generator, self.BATCH_SIZE,
                        self.SAMPLE_NUM)
                self.gen_loader.create_batches(gen_samples)
                print('batch_num: {}'.format(nbatch))
                results['Batch'] = nbatch

                # results
                mm.compute_results(gen_samples, self.train_samples,
                                   self.ord_dict, results)

            print(
                '#########################################################################'
            )
            print('-> Training generator with RL.')
            print('G Epoch {}'.format(nbatch))

            for it in range(self.TRAIN_ITER):
                samples = self.generator.generate(self.sess)
                rewards = self.rollout.get_reward(self.sess, samples, 16,
                                                  self.discriminator,
                                                  batch_reward, self.D_WEIGHT)
                nll = self.generator.generator_step(self.sess, samples,
                                                    rewards)
                # results
                self.print_rewards(rewards)
                print('neg-loglike: {}'.format(nll))
                results['neg-loglike'] = nll
            self.rollout.update_params()

            # generate for discriminator
            print('-> Training Discriminator')
            for i in range(self.D):
                print('D_Epoch {}'.format(i))
                d_loss, accuracy = self.train_discriminator()
                results['D_loss_{}'.format(i)] = d_loss
                results['Accuracy_{}'.format(i)] = accuracy
            print('results')
            results_rows.append(results)
            if nbatch % self.params["EPOCH_SAVES"] == 0:
                self.save_results(self.sess, self.PREFIX,
                                  self.PREFIX + '_model', results_rows, nbatch)

        # write results
        self.save_results(self.sess, self.PREFIX, self.PREFIX + '_model',
                          results_rows)

        print('\n:*** FINISHED ***')