Ejemplo n.º 1
0
    def __init__(self, config, session):
        self.config = config
        self.session = session

        if self.config.method == 'svgd':
            self.filepath = '%s_%s_%s_%s_%d' % (
                config.method, config.dataset, config.kernel,
                repr(config.temperature), config.seed)
        else:
            self.filepath = '%s_%s' % (config.method, config.dataset)

        self.res_dir = './results/%s/' % self.filepath
        self.fig_dir = './results/%s/figures' % self.filepath
        self.res_gmm_dir = './results/%s/gmm' % self.filepath
        self.res_pretrain_dir = './results/%s_%s_pretrain' % (
            self.config.method, self.config.dataset)

        #for folder in [self.train_dir, self.fig_dir]:
        import glob
        for folder in [
                self.res_dir, self.fig_dir, self.res_gmm_dir,
                self.res_pretrain_dir
        ]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            ### clean
            ###if self.config.clean:
            #files = glob.glob(folder + '/events.*') + glob.glob(folder + '/*.png')
            #for f in files: os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)
        if self.config.method == 'svgd':
            from model_svgd import SVGD
            self.model = SVGD(config)
        else:
            raise NotImplementedError

        # --- optimizer ---
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.learning_rate = config.learning_rate

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        #self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.summary_writer = tf.summary.FileWriter(self.res_dir)
        self.checkpoint_secs = 300  # 5 min

        ## prtraining op
        self.pre_train_op = self.optimize_adam(self.model.net_train_vars, \
                            loss=self.model.loss_recons_noisy, lr=self.config.learning_rate)

        if self.config.method == 'svgd':
            self.depict_op = self.optimize_adam(self.model.train_vars, \
                            loss=self.model.depict_loss, lr=self.learning_rate)
            self.svgd_op = self.optimize_adam(self.model.gmm_train_vars, \
                            train_grads=self.model.gmm_train_grads, lr=self.config.learning_rate)

        tf.global_variables_initializer().run()
Ejemplo n.º 2
0
    def __init__(self, config, dataset, session):
        self.config = config
        self.session = session
        self.dataset = dataset

        self.filepath = '%s-%s' % (
            config.method,
            config.dataset,
        )

        self.train_dir = './train_dir/%s' % self.filepath
        #self.fig_dir = './figures/%s' % self.filepath

        #for folder in [self.train_dir, self.fig_dir]:
        for folder in [self.train_dir]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            # clean train folder
            if self.config.clean:
                files = glob.glob(folder + '/*')
                for f in files:
                    os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)

        # --- create model ---
        self.model = SVGD(config)

        # --- optimizer ---
        #self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)
        self.global_step = tf.Variable(0, name="global_step")

        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            decay_step = int(0.1 * self.config.n_epoches *
                             self.config.n_train // self.config.batch_size)
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=decay_step,
                decay_rate=0.8,
                staircase=True,
                name='decaying_learning_rate')

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.checkpoint_secs = 300  # 5 min

        #self.train_op = self.optimize_adam( self.model.kl_loss, lr=self.learning_rate)
        if self.config.method == 'svgd':
            self.train_op = self.optimize_adagrad(
                self.model.train_vars,
                train_grads=self.model.svgd_grads,
                lr=self.learning_rate)
        elif self.config.method in ['svgd_kfac', 'map_kfac', 'mixture_kfac']:
            self.inc_op = self.model.inc_ops
            self.inv_op = self.model.inv_ops
            if self.config.method == 'svgd_kfac':
                self.train_op = self.optimize_adagrad(
                    self.model.train_vars,
                    train_grads=self.model.svgd_kfac_grads,
                    lr=self.learning_rate)
            elif self.config.method == 'map_kfac':
                self.train_op = self.optimize_adagrad(
                    self.model.train_vars,
                    train_grads=self.model.map_kfac_grads,
                    lr=self.learning_rate)
            elif self.config.method == 'mixture_kfac':
                self.train_op = self.optimize_adagrad(
                    self.model.train_vars,
                    train_grads=self.model.mixture_kfac_grads,
                    lr=self.learning_rate)
        elif self.config.method in ['SGLD', 'pSGLD']:
            self.train_op = self.optimize_sgd(
                self.model.train_vars,
                train_grads=self.model.psgld_grads,
                lr=1.0)

        tf.global_variables_initializer().run()
        if config.checkpoint is not None:
            self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint)
            if self.ckpt_path is not None:
                log.info("Checkpoint path: %s", self.ckpt_path)
                self.saver.restore(self.session, self.ckpt_path)
                log.info(
                    "Loaded the pretrain parameters from the provided checkpoint path"
                )
Ejemplo n.º 3
0
class Trainer(object):
    def optimize_sgd(self, train_vars, loss=None, train_grads=None, lr=1e-2):
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=lr)  #adagrad with momentum
        if train_grads is not None:
            train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
        else:
            train_op = optimizer.minimize(tf.reduce_mean(loss),
                                          var_list=train_vars,
                                          global_step=self.global_step)
        return train_op

    def optimize_adagrad(self,
                         train_vars,
                         loss=None,
                         train_grads=None,
                         lr=1e-2):
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=lr, decay=0.95)  #adagrad with momentum
        if train_grads is not None:
            train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
        else:
            train_op = optimizer.minimize(tf.reduce_mean(loss),
                                          var_list=train_vars,
                                          global_step=self.global_step)
        return train_op

    def optimize_adam(self, train_vars, loss=None, train_grads=None, lr=1e-2):
        assert (loss is not None) or (train_grads
                                      is not None), 'illegal inputs'
        optimizer = tf.train.AdamOptimizer(lr)
        if train_grads is not None:
            train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
        else:
            train_op = optimizer.minimize(loss,
                                          var_list=train_vars,
                                          global_step=self.global_step)
        return train_op

    def __init__(self, config, dataset, session):
        self.config = config
        self.session = session
        self.dataset = dataset

        self.filepath = '%s-%s' % (
            config.method,
            config.dataset,
        )

        self.train_dir = './train_dir/%s' % self.filepath
        #self.fig_dir = './figures/%s' % self.filepath

        #for folder in [self.train_dir, self.fig_dir]:
        for folder in [self.train_dir]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            # clean train folder
            if self.config.clean:
                files = glob.glob(folder + '/*')
                for f in files:
                    os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)

        # --- create model ---
        self.model = SVGD(config)

        # --- optimizer ---
        #self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)
        self.global_step = tf.Variable(0, name="global_step")

        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            decay_step = int(0.1 * self.config.n_epoches *
                             self.config.n_train // self.config.batch_size)
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=decay_step,
                decay_rate=0.8,
                staircase=True,
                name='decaying_learning_rate')

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.checkpoint_secs = 300  # 5 min

        #self.train_op = self.optimize_adam( self.model.kl_loss, lr=self.learning_rate)
        if self.config.method == 'svgd':
            self.train_op = self.optimize_adagrad(
                self.model.train_vars,
                train_grads=self.model.svgd_grads,
                lr=self.learning_rate)
        elif self.config.method in ['svgd_kfac', 'map_kfac', 'mixture_kfac']:
            self.inc_op = self.model.inc_ops
            self.inv_op = self.model.inv_ops
            if self.config.method == 'svgd_kfac':
                self.train_op = self.optimize_adagrad(
                    self.model.train_vars,
                    train_grads=self.model.svgd_kfac_grads,
                    lr=self.learning_rate)
            elif self.config.method == 'map_kfac':
                self.train_op = self.optimize_adagrad(
                    self.model.train_vars,
                    train_grads=self.model.map_kfac_grads,
                    lr=self.learning_rate)
            elif self.config.method == 'mixture_kfac':
                self.train_op = self.optimize_adagrad(
                    self.model.train_vars,
                    train_grads=self.model.mixture_kfac_grads,
                    lr=self.learning_rate)
        elif self.config.method in ['SGLD', 'pSGLD']:
            self.train_op = self.optimize_sgd(
                self.model.train_vars,
                train_grads=self.model.psgld_grads,
                lr=1.0)

        tf.global_variables_initializer().run()
        if config.checkpoint is not None:
            self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint)
            if self.ckpt_path is not None:
                log.info("Checkpoint path: %s", self.ckpt_path)
                self.saver.restore(self.session, self.ckpt_path)
                log.info(
                    "Loaded the pretrain parameters from the provided checkpoint path"
                )

    def evaluate(self, ):

        dev_set = {
            'X': self.dataset.x_train[:1000],
            'y': self.dataset.y_train[:1000],
        }
        test_set = {
            'X': self.dataset.x_test,
            'y': self.dataset.y_test,
        }

        pred_y_dev = self.session.run(self.model.y_pred,
                                      self.model.get_feed_dict(dev_set))
        pred_y_dev = pred_y_dev * self.dataset.std_y_train + self.dataset.mean_y_train
        y_dev = dev_set[
            'y'] * self.dataset.std_y_train + self.dataset.mean_y_train
        neg_log_var = -np.log(np.mean((pred_y_dev - y_dev)**2))

        y_test = test_set['y']
        pred_y_test = self.session.run(self.model.y_pred,
                                       self.model.get_feed_dict(test_set))
        pred_y_test = pred_y_test * self.dataset.std_y_train + self.dataset.mean_y_train
        prob = np.sqrt(np.exp(neg_log_var) / (2 * np.pi)) * np.exp(
            -0.5 *
            (pred_y_test - np.expand_dims(y_test, 0))**2 * np.exp(neg_log_var))

        rmse = np.sqrt(np.mean((y_test - np.mean(pred_y_test, 0))**2))
        ll = np.mean(np.log(np.mean(prob, axis=0)))
        return rmse, ll, neg_log_var

    def train(self):
        log.infov("Training Starts!")
        output_save_step = 1000
        buffer_save_step = 100
        self.session.run(self.global_step.assign(0))  # reset global step
        n_updates = 1

        x_train, y_train = self.dataset.x_train, self.dataset.y_train
        for ep in xrange(1, 1 + self.config.n_epoches):
            x_train, y_train = shuffle(x_train, y_train)
            #x_train, y_train = self.dataset.x_train, self.dataset.y_train
            max_batches = self.config.n_train // self.config.batch_size

            #if self.config.n_train % self.config.batch_size != 0: max_batches += 1
            for bi in xrange(max_batches):
                start = bi * self.config.batch_size
                end = min((bi + 1) * self.config.batch_size,
                          self.config.n_train)

                batch_chunk = {
                    'X': x_train[start:end],
                    'y': y_train[start:end]
                }

                step, summary, log_prob, step_time = \
                        self.run_single_step(n_updates, batch_chunk)

                if np.any(np.isnan(log_prob)): sys.exit(1)

                self.summary_writer.add_summary(summary, global_step=step)
                #if n_updates % 500 == 0:
                #    self.log_step_message(n_updates, log_prob, step_time)

                #if n_updates % 50 == 0:
                #    rmse, ll, _ = self.evaluate()
                #    print(n_updates, rmse, ll)

                n_updates += 1

            if ep % (self.config.n_epoches // 10 + 1) == 0:
                rmse, ll, neg_log_var = self.evaluate()
                print(ep, neg_log_var, rmse, ll)

        test_rmse, test_ll, _ = self.evaluate()
        write_time = time.strftime("%m-%d-%H:%M:%S")

        pardir = "%s_%s/" % (self.config.method, repr(
            self.config.learning_rate))
        if not os.path.exists(self.config.savepath + pardir):
            os.makedirs(self.config.savepath + pardir)

        if self.config.trial == 1:
            fm = 'w'
        else:
            fm = 'a'
        with open(
                self.config.savepath + pardir + self.config.dataset +
                "_test_ll_rmse_%s.txt" % (self.filepath), fm) as f:
            f.write(
                repr(self.config.trial) + ',' + write_time + ',' +
                repr(self.config.n_epoches) + ',' + repr(test_rmse) + ',' +
                repr(test_ll) + '\n')

        #if self.config.save:
        #    # save model at the end
        #    self.saver.save(self.session,
        #        os.path.join(self.train_dir, 'model'),
        #        global_step=step)

    def run_single_step(self, step, batch_chunk):
        _start_time = time.time()
        fetch = [self.global_step, self.summary_op, self.model.log_prob]
        if self.config.method in ['svgd_kfac', 'map_kfac', 'mixture_kfac']:
            fetch += [self.inc_op]
            if step % self.config.inverse_update_freq == 0:
                fetch += [self.inv_op]

        if self.config.method == 'pSGLD':
            fetch += [self.model.moment_op]

        fetch += [self.train_op]

        fetch_values = self.session.run(fetch,
                                        feed_dict=self.model.get_feed_dict(
                                            batch_chunk, step))

        [step, summary, log_prob] = fetch_values[:3]
        _end_time = time.time()
        return step, summary, np.sum(log_prob), (_end_time - _start_time)

    def log_step_message(self, step, log_prob, step_time, is_train=True):
        if step_time == 0:
            step_time = 0.001
        log_fn = (is_train and log.info or log.infov)
        log_fn((
            " [{split_mode:5s} step {step:4d}] " +
            #"loss: {loss:.4f} " +
            "log_prob: {log_prob:.4f} " +
            "({sec_per_batch:.3f} sec/batch)").format(
                split_mode=(is_train and 'train' or 'val'),
                step=step,
                log_prob=log_prob,
                sec_per_batch=step_time,
            ))
Ejemplo n.º 4
0
class Trainer(object):
    def optimize_adagrad(self,
                         train_vars,
                         loss=None,
                         train_grads=None,
                         lr=1e-2):
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=lr, decay=0.9)  #adagrad with momentum
        if train_grads is not None:
            clip_grads = [(tf.clip_by_norm(grad, 20), var)
                          for grad, var in zip(train_grads, train_vars)]
            #train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
            train_op = optimizer.apply_gradients(clip_grads)
        else:
            train_op = optimizer.minimize(tf.reduce_mean(loss),
                                          var_list=train_vars,
                                          global_step=self.global_step)
        return train_op

    def optimize_adam(self, train_vars, loss=None, train_grads=None, lr=1e-2):
        assert (loss is not None) or (train_grads
                                      is not None), 'illegal inputs'
        optimizer = tf.train.AdamOptimizer(lr)
        if train_grads is not None:
            clip_grads = [(tf.clip_by_norm(grad, 20), var)
                          for grad, var in zip(train_grads, train_vars)]
            #train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
            train_op = optimizer.apply_gradients(clip_grads)
        else:
            train_op = optimizer.minimize(loss,
                                          var_list=train_vars,
                                          global_step=self.global_step)
        return train_op

    def __init__(self, config, session):
        self.config = config
        self.session = session

        if self.config.method == 'svgd':
            self.filepath = '%s_%s_%s_%s_%d' % (
                config.method, config.dataset, config.kernel,
                repr(config.temperature), config.seed)
        else:
            self.filepath = '%s_%s' % (config.method, config.dataset)

        self.res_dir = './results/%s/' % self.filepath
        self.fig_dir = './results/%s/figures' % self.filepath
        self.res_gmm_dir = './results/%s/gmm' % self.filepath
        self.res_pretrain_dir = './results/%s_%s_pretrain' % (
            self.config.method, self.config.dataset)

        #for folder in [self.train_dir, self.fig_dir]:
        import glob
        for folder in [
                self.res_dir, self.fig_dir, self.res_gmm_dir,
                self.res_pretrain_dir
        ]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            ### clean
            ###if self.config.clean:
            #files = glob.glob(folder + '/events.*') + glob.glob(folder + '/*.png')
            #for f in files: os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)
        if self.config.method == 'svgd':
            from model_svgd import SVGD
            self.model = SVGD(config)
        else:
            raise NotImplementedError

        # --- optimizer ---
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.learning_rate = config.learning_rate

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        #self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.summary_writer = tf.summary.FileWriter(self.res_dir)
        self.checkpoint_secs = 300  # 5 min

        ## prtraining op
        self.pre_train_op = self.optimize_adam(self.model.net_train_vars, \
                            loss=self.model.loss_recons_noisy, lr=self.config.learning_rate)

        if self.config.method == 'svgd':
            self.depict_op = self.optimize_adam(self.model.train_vars, \
                            loss=self.model.depict_loss, lr=self.learning_rate)
            self.svgd_op = self.optimize_adam(self.model.gmm_train_vars, \
                            train_grads=self.model.gmm_train_grads, lr=self.config.learning_rate)

        tf.global_variables_initializer().run()

    def iterate_minibatches(self, inputs, targets, batchsize, shuffle=False):
        if shuffle:
            indices = np.arange(len(inputs))
            np.random.shuffle(indices)
        max_batches = len(inputs) // batchsize
        if len(inputs) % batchsize != 0: max_batches += 1
        for i in range(max_batches):
            start_idx = i * batchsize
            end_idx = min(len(inputs), (i + 1) * batchsize)
            if shuffle:
                excerpt = indices[start_idx:end_idx]
            else:
                excerpt = slice(start_idx, end_idx)
            yield inputs[excerpt], targets[excerpt], excerpt

    def try_load_checkpoint(self, model_path):

        ckpt_path = tf.train.latest_checkpoint(model_path)
        assert ckpt_path is not None, '%s is empty' % model_path
        log.info("Checkpoint path: %s", ckpt_path)
        self.saver.restore(self.session, ckpt_path)
        log.info(
            "Loaded the pretrain parameters from the provided checkpoint path")

    def save_curr_model(self, model_path):

        step = self.session.run(self.global_step)
        self.saver.save(self.session, model_path, global_step=step)

    def get_latent_rep_and_pred(self, inputs, targets, batch_size=100):
        y_pred, latent_z = [], []
        for batch in self.iterate_minibatches(inputs,
                                              targets,
                                              batch_size,
                                              shuffle=False):
            x_batch, _, _ = batch
            pred, z = self.session.run(
                [self.model.pred_clean, self.model.z],
                feed_dict=self.model.get_feed_dict(x_batch))
            y_pred.append(pred)
            latent_z.append(z)
        y_pred = np.concatenate(y_pred, axis=0)
        latent_z = np.concatenate(latent_z, axis=0)

        return latent_z, np.argmax(y_pred, 1)

    ''' pre-training auto encoder '''

    def pre_train_enc_dec(self,
                          inputs,
                          targets,
                          num_epochs=1000,
                          batch_size=100):
        for epoch in range(1, num_epochs + 1):
            train_err = 0
            for batch in self.iterate_minibatches(inputs,
                                                  targets,
                                                  batch_size,
                                                  shuffle=True):
                x_batch, _, _ = batch
                err, _ = self.session.run( [self.model.loss_recons_clean, self.pre_train_op], \
                            feed_dict=self.model.get_feed_dict(x_batch))
                train_err += err
            log.info(
                ("pre-training autoencoder epoch: {:d}, loss:{:4f}").format(
                    epoch, train_err))

            if epoch % (num_epochs // 10) == 0:
                self.save_curr_model(
                    os.path.join(self.res_pretrain_dir, 'model'))  #save model
                latent_z, _ = self.get_latent_rep_and_pred(inputs, targets)
                y_pred, _ = clustering(latent_z, self.config.num_clusters)
                metrics(targets, y_pred)

    def train_svgd(self, inputs, targets, batch_size=100, num_epochs=4000):
        def normalize(y_prob):
            cluster_frequency = np.sum(y_prob, axis=0)
            y_prob = y_prob**2 / cluster_frequency
            y_prob = np.transpose(y_prob.T / np.sum(y_prob, axis=1))
            y_pred = np.argmax(y_prob, axis=1)
            return y_prob, y_pred

        n_train = len(inputs)
        y_prob = np.zeros((n_train, self.config.num_clusters))
        y_prob_prev = np.zeros((n_train, self.config.num_clusters))

        for batch in self.iterate_minibatches(inputs,
                                              targets,
                                              batch_size,
                                              shuffle=False):
            x_batch, _, idx_batch = batch  #TODO
            minibatch_prob = self.session.run(
                self.model.pred_clean,
                feed_dict=self.model.get_feed_dict(x_batch))
            y_prob[idx_batch] = minibatch_prob

        if True:
            y_prob, y_pred = normalize(y_prob)

        n_updates = 0
        for epoch in range(1, num_epochs + 1):

            recon_loss_iter, clus_loss_iter, loss_iter, energy_iter = 0., 0., 0, 0
            for batch in self.iterate_minibatches(inputs,
                                                  targets,
                                                  batch_size,
                                                  shuffle=True):
                x_batch, _, idx_batch = batch

                fetch_values = [
                    self.model.loss, self.model.loss_recons_noisy,
                    self.model.loss_clus, self.model.energy_noisy,
                    self.summary_op, self.depict_op
                ]
                if epoch > 20: fetch_values.append(self.svgd_op)

                ret = \
                            self.session.run( fetch_values, feed_dict=self.model.get_feed_dict(x_batch, y_prob[idx_batch]))

                loss, loss_recons, loss_clus, energy, summary = ret[:5]

                minibatch_prob = self.session.run(
                    self.model.pred_clean,
                    feed_dict=self.model.get_feed_dict(x_batch))
                y_prob[idx_batch] = minibatch_prob

                loss_iter += loss
                recon_loss_iter += loss_recons
                clus_loss_iter += loss_clus
                energy_iter += energy

                self.summary_writer.add_summary(summary, global_step=n_updates)
                n_updates += 1

            print(epoch, 'recon_loss', recon_loss_iter, 'clus_loss',
                  clus_loss_iter, 'loss', loss_iter, 'energy', energy_iter)
            print(epoch, metrics(targets, y_pred))

            if True:
                y_prob, y_pred = normalize(y_prob)
            if np.sum((y_pred - np.argmax(y_prob_prev, axis=1))**2) < 1e-6:
                break
            y_prob_prev = np.copy(y_prob)

            if epoch % 10 == 0:
                latent_z, y_pred = self.get_latent_rep_and_pred(
                    inputs, targets)
                plot_latent_z_space(latent_z, y_pred,
                                    '%s/step-%d.png' % (self.res_dir, epoch))

        print(epoch, metrics(targets, y_pred))

    def train(self):

        log.infov("Training Starts!")
        output_save_step = 1000
        self.session.run(self.global_step.assign(0))  # reset global step

        if self.config.dataset == 'mnist':
            from load import load_mnist
            inputs, targets = load_mnist()
        else:
            raise NotImplementedError

        if self.config.method == 'kmeans':
            y_pred, _ = clustering(np.reshape(inputs, (len(inputs), -1)),
                                   self.config.num_clusters)
            metrics(targets, y_pred)
            return
        ''' pre-training '''
        if not self.config.skip_pretrain:
            self.pre_train_enc_dec(inputs,
                                   targets,
                                   batch_size=self.config.batch_size,
                                   num_epochs=1000)
            # save model
            self.save_curr_model(os.path.join(self.res_pretrain_dir, 'model'))
        else:
            self.try_load_checkpoint(self.res_pretrain_dir)

        # plot
        latent_z, _ = self.get_latent_rep_and_pred(inputs, targets)
        y_pred, centroids = clustering(latent_z, self.config.num_clusters)
        plot_latent_z_space(latent_z, y_pred, \
                    '%s/pre_train_z' % self.res_dir, with_legend=True)
        #sys.exit(0)

        if self.config.method == 'svgd':
            if not self.config.skip_svgd:
                self.session.run(self.model.mu.assign(centroids))
                #scale = np.zeros((self.config.num_clusters,  self.config.z_dim*(self.config.z_dim+1)//2))
                scale = np.zeros((self.config.num_clusters, self.config.z_dim))
                for c in range(self.config.num_clusters):
                    z_c = latent_z[np.where(y_pred == c)[0]]
                    s0 = np.std(z_c, axis=0)
                    scale[c] = s0
                self.session.run(self.model.scale_diag.assign(scale))

                self.train_svgd(inputs,
                                targets,
                                num_epochs=400,
                                batch_size=self.config.batch_size)
                self.save_curr_model(os.path.join(self.res_dir, 'model'))
            else:
                self.try_load_checkpoint(self.res_dir)

        # plot
        latent_z, y_pred = self.get_latent_rep_and_pred(inputs, targets)
        #y_pred, centroids = clustering(latent_z, self.config.num_clusters)
        plot_latent_z_space(latent_z, y_pred, \
                    '%s/%s_z' % (self.res_dir, self.config.method), with_legend=True)
Ejemplo n.º 5
0
class Trainer(object):

    def optimize_sgd(self, train_vars, loss=None, train_grads=None, lr=1e-2):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)  #adagrad with momentum
        if train_grads is not None:
            train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
        else:
            train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step)
        return train_op

    def optimize_adagrad(self, train_vars, loss=None, train_grads=None, lr=1e-2):
        optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.9)  #adagrad with momentum
        if train_grads is not None:
            train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
        else:
            train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step)
        return train_op


    def optimize_adam(self, train_vars, loss=None, train_grads=None, lr=1e-2):
        assert (loss is not None) or (train_grads is not None), 'illegal inputs'
        optimizer = tf.train.AdamOptimizer(lr)
        if train_grads is not None:
            train_op = optimizer.apply_gradients(zip(train_grads, train_vars))
        else:
            train_op = optimizer.minimize(loss, var_list=train_vars, global_step=self.global_step)
        return train_op


    def __init__(self, config, dataset, session):
        self.config = config
        self.session = session
        self.dataset = dataset

        self.filepath = '%s' % (
            config.method,
        )

        self.train_dir = './train_dir/%s' % self.filepath
        #self.fig_dir = './figures/%s' % self.filepath

        #for folder in [self.train_dir, self.fig_dir]:
        for folder in [self.train_dir]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            # clean train folder
            if self.config.clean:
                files = glob.glob(folder + '/*')
                for f in files: os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)

        # --- create model ---
        self.model = SVGD(config)

        # --- optimizer ---
        #self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)
        self.global_step = tf.Variable(0, name="global_step")

        self.learning_rate = config.learning_rate
        #self.learning_rate = tf.train.exponential_decay(
        #        self.learning_rate,
        #        global_step=self.global_step,
        #        decay_steps=500,
        #        decay_rate=0.5,
        #        staircase=True,
        #        name='decaying_learning_rate'
        #)

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.checkpoint_secs = 300  # 5 min

        ##self.train_op = self.optimize_adam( self.model.kl_loss, lr=self.learning_rate)
        if self.config.method == 'svgd':
            self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.svgd_grads, lr=self.learning_rate)
        elif self.config.method == 'svgd_kfac':
            self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.kfac_grads, lr=self.learning_rate)
        elif self.config.method == 'mixture_kfac':
            self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.mixture_grads, lr=self.learning_rate)
        elif self.config.method in ['SGLD', 'pSGLD']:
            self.train_op = self.optimize_sgd( self.model.train_vars, train_grads=self.model.psgld_grads, lr=1.0)

        tf.global_variables_initializer().run()
        if config.checkpoint is not None:
            self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint)
            if self.ckpt_path is not None:
                log.info("Checkpoint path: %s", self.ckpt_path)
                self.saver.restore(self.session, self.ckpt_path)
                log.info("Loaded the pretrain parameters from the provided checkpoint path")


    def evaluate(self, step):
        
        def get_lik_and_acc(X, y):
            n = len(X)
            ll, acc = [], []
            batch_size = 2000
            for i in range( n // batch_size +1 ):
                start = i * batch_size
                end = min((i+1)*batch_size, n)
                batch = {
                    'X': X[start:end],
                    'y': y[start:end],
                }
                ll_i, acc_i = self.session.run([self.model.ll, self.model.accuracy], feed_dict=self.model.get_feed_dict(batch, step))

                ll.append(ll_i)
                acc.append(acc_i)
            return np.mean(ll), np.mean(acc)

        train_ll, train_acc = get_lik_and_acc(self.dataset.x_train, self.dataset.y_train)
        valid_ll, valid_acc = get_lik_and_acc(self.dataset.x_valid, self.dataset.y_valid)
        test_ll, test_acc = get_lik_and_acc(self.dataset.x_test, self.dataset.y_test)

        return train_ll, train_acc, valid_ll, valid_acc, test_ll, test_acc



    def train(self):
        log.infov("Training Starts!")
        output_save_step = 1000
        buffer_save_step = 100
        self.session.run(self.global_step.assign(0)) # reset global step
        n_updates = 1

        for ep in xrange(1, 1+self.config.n_epoches):
            x_train, y_train = shuffle(self.dataset.x_train, self.dataset.y_train)
            max_batches = self.config.n_train // self.config.batch_size 

            #if self.config.n_train % self.config.batch_size != 0: max_batches += 1
            for bi in xrange(max_batches):
                start = bi * self.config.batch_size
                end = min((bi+1) * self.config.batch_size, self.config.n_train)

                batch_chunk = {
                    'X': x_train[start:end],
                    'y': y_train[start:end]
                }

                step, summary, log_prob, step_time = \
                        self.run_single_step(n_updates, batch_chunk)

                #if np.any(np.isnan(log_prob)): sys.exit(1)

                self.summary_writer.add_summary(summary, global_step=step)
                #if n_updates % 100 == 0:
                #    self.log_step_message(n_updates, log_prob, step_time)

                if n_updates % 50 == 0:
                    print (n_updates, self.evaluate(n_updates))

                n_updates+= 1


            #if ep % (self.config.n_epoches//10 + 1) == 0:
            #    rmse, ll = self.evaluate()
            #    print(ep, rmse, ll)

        #test_rmse, test_ll = self.evaluate()
        #write_time = time.strftime("%m-%d-%H:%M:%S")
        #with open(self.config.savepath + self.config.dataset + "_test_ll_rmse_%s.txt" % (self.filepath), 'a') as f:
        #    f.write(repr(self.config.trial) + ',' + write_time + ',' + repr(self.config.n_epoches) + ',' + repr(test_rmse) + ',' + repr(test_ll) + '\n')

        #if self.config.save:
        #    # save model at the end
        #    self.saver.save(self.session,
        #        os.path.join(self.train_dir, 'model'),
        #        global_step=step)


    def run_single_step(self, step, batch_chunk):
        _start_time = time.time()
        fetch = [self.global_step, self.summary_op, self.model.log_prob]
        if self.config.method in ['mixture_kfac', 'svgd_kfac']:
            fetch += [self.model.cov_update_step]

        if self.config.method == 'pSGLD':
            fetch += [self.model.moment_op]

        fetch += [self.train_op]

        fetch_values = self.session.run(
            fetch, feed_dict = self.model.get_feed_dict(batch_chunk, step)
        )

        [step, summary, log_prob] = fetch_values[:3]
        _end_time = time.time()
        return step, summary, log_prob, (_end_time - _start_time)


    def log_step_message(self, step, log_prob, step_time, is_train=True):
        if step_time == 0:
            step_time = 0.001
        log_fn = (is_train and log.info or log.infov)
        log_fn((" [{split_mode:5s} step {step:4d}] " +
                #"loss: {loss:.4f} " +
                "log_prob: {log_prob:.4f} " +
                "({sec_per_batch:.3f} sec/batch)"
                ).format(split_mode=(is_train and 'train' or 'val'),
                         step=step, log_prob=log_prob,
                         sec_per_batch=step_time,
                         )
               )
Ejemplo n.º 6
0
    def __init__(self, config, session):
        self.config = config
        self.session = session

        self.filepath = '%s_%d_%s_%s_%d' % (
            config.method,
            config.n_components,
            config.kernel,
            repr(config.temperature),
            config.seed,
        )
        self.res_dir = './results/%s' % self.filepath

        #for folder in [self.train_dir, self.fig_dir]:
        import glob
        for folder in [self.res_dir]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            ## clean
            if self.config.clean:
                files = glob.glob(folder + '/*')
                for f in files:
                    os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)
        if self.config.method == 'svgd':
            from model_svgd import SVGD
            self.model = SVGD(config)
        else:
            raise NotImplementedError

        # --- optimizer ---
        self.global_step = tf.Variable(0, name="global_step")
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=4000,
                decay_rate=0.8,
                staircase=True,
                name='decaying_learning_rate')

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        #self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.summary_writer = tf.summary.FileWriter(self.res_dir)
        self.checkpoint_secs = 300  # 5 min

        #self.train_op = self.optimize_adam(self.model.train_grads, self.model.train_vars, lr=self.learning_rate)
        self.train_op = self.optimize_adagrad(self.model.train_grads,
                                              self.model.train_vars,
                                              lr=self.learning_rate)

        tf.global_variables_initializer().run()
        if config.checkpoint is not None:
            self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint)
            if self.ckpt_path is not None:
                log.info("Checkpoint path: %s", self.ckpt_path)
                self.saver.restore(self.session, self.ckpt_path)
                log.info(
                    "Loaded the pretrain parameters from the provided checkpoint path"
                )
Ejemplo n.º 7
0
class Trainer(object):
    def optimize_adagrad(self, train_grads, train_vars, lr=1e-2):
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=lr, decay=0.9)  #adagrad with momentum
        clip_grads = [(tf.clip_by_norm(grad, 5), var)
                      for grad, var in zip(train_grads, train_vars)]
        train_op = optimizer.apply_gradients(clip_grads)
        return train_op

    def optimize_adam(self, train_grads, train_vars, lr=1e-2):
        optimizer = tf.train.AdamOptimizer(lr)
        #grads = optimizer.compute_gradients(loss)
        clip_grads = [(tf.clip_by_norm(grad, 5), var)
                      for grad, var in zip(train_grads, train_vars)]
        train_op = optimizer.apply_gradients(clip_grads)

        return train_op

    def __init__(self, config, session):
        self.config = config
        self.session = session

        self.filepath = '%s_%d_%s_%s_%d' % (
            config.method,
            config.n_components,
            config.kernel,
            repr(config.temperature),
            config.seed,
        )
        self.res_dir = './results/%s' % self.filepath

        #for folder in [self.train_dir, self.fig_dir]:
        import glob
        for folder in [self.res_dir]:
            if not os.path.exists(folder):
                os.makedirs(folder)
            ## clean
            if self.config.clean:
                files = glob.glob(folder + '/*')
                for f in files:
                    os.remove(f)

        #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir)
        if self.config.method == 'svgd':
            from model_svgd import SVGD
            self.model = SVGD(config)
        else:
            raise NotImplementedError

        # --- optimizer ---
        self.global_step = tf.Variable(0, name="global_step")
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=4000,
                decay_rate=0.8,
                staircase=True,
                name='decaying_learning_rate')

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver(max_to_keep=1)
        #self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.summary_writer = tf.summary.FileWriter(self.res_dir)
        self.checkpoint_secs = 300  # 5 min

        #self.train_op = self.optimize_adam(self.model.train_grads, self.model.train_vars, lr=self.learning_rate)
        self.train_op = self.optimize_adagrad(self.model.train_grads,
                                              self.model.train_vars,
                                              lr=self.learning_rate)

        tf.global_variables_initializer().run()
        if config.checkpoint is not None:
            self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint)
            if self.ckpt_path is not None:
                log.info("Checkpoint path: %s", self.ckpt_path)
                self.saver.restore(self.session, self.ckpt_path)
                log.info(
                    "Loaded the pretrain parameters from the provided checkpoint path"
                )

    def evaluate(self, x_train, y_train, x_test, y_test):
        def _compute_energy(X):
            energy = []
            n_x = len(X)
            max_batches = n_x // self.config.batch_size
            if n_x % self.config.batch_size != 0: max_batches += 1
            for x_batch in tqdm(iter_data(X, size=self.config.batch_size),
                                total=max_batches):
                #z = self.session.run(self.model.z, feed_dict=self.model.get_feed_dict(x_batch))
                #energy.append( self.session.run(self.model.compute_energy(z, phi, mu, scale)) )
                energy.append(
                    self.session.run(
                        self.model.energy,
                        feed_dict=self.model.get_feed_dict(x_batch)))
            return np.concatenate(energy)

        eng_train = _compute_energy(x_train)
        eng_test = _compute_energy(x_test)
        assert len(eng_train) == len(x_train) and len(eng_test) == len(
            x_test), 'double check'

        combined_energy = np.concatenate((eng_train, eng_test))
        thresh = np.percentile(combined_energy, 100 - 20)

        pred = (eng_test > thresh).astype(int)
        gt = y_test.astype(int)

        accuracy = accuracy_score(gt, pred)
        precision, recall, f_score, support = prf(gt, pred, average='binary')

        print(
            "Seed : {:3d}, Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}"
            .format(self.config.seed, accuracy, precision, recall, f_score))
        return accuracy, precision, recall, f_score

    def train(self):
        log.infov("Training Starts!")
        output_save_step = 1000
        self.session.run(self.global_step.assign(0))  # reset global step

        from data_loader import load_kdd99
        x_train, x_test, y_train, y_test = load_kdd99('kdd_cup.npz',
                                                      self.config.seed)

        n_updates = 0
        with open(self.res_dir + "/step.txt", 'w') as f:
            for e in range(1, 1 + self.config.n_epochs):
                x_train, y_train = shuffle(x_train, y_train)
                n_train = len(x_train)
                max_batches = n_train // self.config.batch_size
                #if n_train % self.config.batch_size != 0: max_batches+=1

                for x_batch, y_batch in tqdm(iter_data(
                        x_train, y_train, size=self.config.batch_size),
                                             total=max_batches):
                    step, summary, loss, step_time = self.run_single_step(
                        x_batch)
                    self.summary_writer.add_summary(summary,
                                                    global_step=n_updates)

                    n_updates += 1
                    #if n_updates % 100 == 0:
                    #    eng, eng_chk = self.session.run([self.model.energy, self.model.energy_check], feed_dict=self.model.get_feed_dict(x_batch))
                    #    print(np.mean(eng), np.mean(eng_chk))

                if e % 10 == 0:
                    accuracy, precision, recall, f_score = self.evaluate(
                        x_train, y_train, x_test, y_test)
                    f.write(self.filepath + ',' + repr(e) + ',' +
                            repr(accuracy) + ',' + repr(precision) + ',' +
                            repr(recall) + ',' + repr(f_score) + '\n')
                    f.flush()

                    # save model at the end
                    self.saver.save(self.session,
                                    os.path.join(self.res_dir, 'model'),
                                    global_step=step)

    def run_single_step(self, x_batch):
        _start_time = time.time()

        fetch = [
            self.global_step, self.summary_op, self.model.loss, self.train_op
        ]
        fetch_values = self.session.run(
            fetch, feed_dict=self.model.get_feed_dict(x_batch))
        [step, summary, loss] = fetch_values[:3]

        _end_time = time.time()

        return step, summary, loss, (_end_time - _start_time)

    def log_step_message(self, step, loss, step_time, is_train=True):
        if step_time == 0:
            step_time = 0.001
        log_fn = (is_train and log.info or log.infov)
        log_fn((" [{split_mode:5s} step {step:4d}] " + "loss: {loss:.4f} " +
                "({sec_per_batch:.3f} sec/batch)").format(
                    split_mode=(is_train and 'train' or 'val'),
                    step=step,
                    loss=loss,
                    sec_per_batch=step_time,
                ))