def test_adam_optimizer_numpy(self):
        """Test momentum optimizer."""
        lr = 1e-3
        beta1 = 0.9
        beta2 = 0.999
        epsilon = 1e-8

        with tf.Graph().as_default(), tf.Session() as sess, tf.device(
                "/cpu:0"):
            self._test_numpy_optimizer_equal(
                sess,
                tf.train.AdamOptimizer(lr,
                                       beta1=beta1,
                                       beta2=beta2,
                                       epsilon=epsilon),
                AdamNumpyOptimizer(lr,
                                   beta1=beta1,
                                   beta2=beta2,
                                   epsilon=epsilon))

        with tf.Graph().as_default(), tf.Session() as sess, tf.device(
                "/cpu:0"):
            self._test_optimizer_equal(
                sess,
                tf.train.AdamOptimizer(lr,
                                       beta1=beta1,
                                       beta2=beta2,
                                       epsilon=epsilon),
                AdamOptimizer(lr,
                              beta1=beta1,
                              beta2=beta2,
                              epsilon=epsilon,
                              dtype=self._dtype))
 def _build_optimizer(self):
     from optimizers import AdamOptimizer
     from optimizers.lr_schedulers import InverseSquareRootSchedule
     parameters = list(self.model.module.parameters())
     args = self.args['train']["pg"]
     self.optimizer = AdamOptimizer(args, parameters)
     self.lr_scheduler = InverseSquareRootSchedule(args, self.optimizer)
Esempio n. 3
0
 def define_agent(self, width, height, num_actions):
     return NStepDQNAgent(
         config=Config(num_actions=num_actions,
                       encoder=OneHotEncoder(width, height),
                       optimizer=AdamOptimizer(0.01),
                       network=MLP(),
                       policy=EpsilonGreedyPolicy(1, 0.01, 1000),
                       discount=0.95,
                       n_step=8))
Esempio n. 4
0
 def define_agent(self, width, height, num_actions):
     return DQNAgent(config=Config(num_actions=num_actions,
                                   encoder=OneHotEncoder(width, height),
                                   optimizer=AdamOptimizer(0.01),
                                   network=MLP(),
                                   policy=EpsilonGreedyPolicy(1, 0.01, 500),
                                   discount=0.95,
                                   capacity=100,
                                   batch_size=16))
Esempio n. 5
0
 def define_agent(self, width, height, num_actions):
     return NStepDQNAgent(config=Config(
         num_actions=num_actions,
         encoder=LayerEncoder(width, height, treasure_position=True),
         optimizer=AdamOptimizer(0.001),
         network=CNN(hidden_units=[128]),
         policy=EpsilonGreedyPolicy(1, 0.01, 100000),
         discount=0.95,
         n_step=16))
Esempio n. 6
0
 def define_agent(self, width, height, num_actions):
     return DQNAgent(
         config=Config(
             num_actions=num_actions,
             encoder=LayerEncoder(width, height, treasure_position=True),
             optimizer=AdamOptimizer(0.001),
             network=CNN(hidden_units=[128]),
             policy=EpsilonGreedyPolicy(1, 0.01, 50000),
             discount=0.95,
             capacity=10000,
             batch_size=8,
             target_sync=100,
             double_q=True))
Esempio n. 7
0
    def run(self):
        pyramid_level_count = len(self.pyramid_factors)
        transform_count = len(self.initial_transforms)

        for t_it in range(transform_count):
            init_transform = self.initial_transforms[t_it]
            param_scaling = self.transforms_param_scaling[t_it]

            self.value_history.append([])

            for lvl_it in range(pyramid_level_count):
                if self.opt_name == 'adam':
                    opt = AdamOptimizer(self.distances[lvl_it],
                                        init_transform.copy())
                elif self.opt_name == 'sgd':
                    opt = GradientDescentOptimizer(self.distances[lvl_it],
                                                   init_transform.copy())
                else:
                    raise ValueError(
                        'Optimizer name must be \'adam\' or \'sgd\'')

                if self.step_lengths.ndim == 1:
                    opt.set_step_length(self.step_lengths[0],
                                        self.step_lengths[1])
                else:
                    opt.set_step_length(self.step_lengths[lvl_it, 0],
                                        self.step_lengths[lvl_it, 1])
                opt.set_scalings(param_scaling)
                opt.set_gradient_magnitude_threshold(
                    self.gradient_magnitude_threshold)
                opt.set_report_freq(self.report_freq)
                if type(self.report_func) is list or type(
                        self.report_func) is tuple:
                    opt.set_report_callback(self.report_func[t_it])
                else:
                    opt.set_report_callback(self.report_func)

                if isinstance(self.iterations, int):
                    itercount = self.iterations
                else:
                    assert (len(self.iterations) == pyramid_level_count)
                    itercount = self.iterations[lvl_it]

                opt.optimize(itercount)

                if lvl_it + 1 == pyramid_level_count:
                    self.output_transforms.append(opt.get_transform())
                    self.values.append(opt.get_value())
                    self.initial_transforms[t_it] = opt.get_transform()
                else:
                    init_transform = opt.get_transform()

                self.value_history[-1].append(opt.get_value_history())
    def __init__(self,
                 config,
                 x,
                 y,
                 optimizer='momentum',
                 dtype=tf.float32,
                 training=True):
        self._config = config
        self._dtype = dtype
        h = self.build_inference(x)
        xent = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=h, labels=y))
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        log.info(('Regularization', reg_losses))
        if len(reg_losses) > 0:
            wd_cost = tf.add_n(reg_losses)
            cost = xent + wd_cost
            print(reg_losses)
            log.fatal('')
        else:
            wd_cost = tf.constant(0.0)
            cost = xent
        self._cost = cost

        correct = tf.reduce_sum(tf.to_float(tf.equal(tf.argmax(h, 1), y)))
        acc = tf.realdiv(correct, tf.to_float(tf.shape(x)[0]))

        self._acc = acc
        self._correct = correct
        var_list = tf.trainable_variables()
        # print('Variables')
        # [print(vv.name) for vv in var_list]
        self._var_list = var_list
        self._x = x
        self._y = y

        if training:
            global_step = tf.get_variable('global_step', [],
                                          dtype=tf.int64,
                                          initializer=tf.constant_initializer(
                                              0, dtype=tf.int64),
                                          trainable=False)
            if optimizer == 'momentum':
                opt = MomentumOptimizer(
                    config.init_lr,
                    config.momentum,
                    effective_lr=config.effective_lr,
                    negative_momentum=config.negative_momentum,
                    dtype=dtype)
            elif optimizer == 'momentum_inv_decay':
                opt = MomentumInvDecayOptimizer(
                    config.init_lr,
                    config.momentum,
                    config.lr_decay,
                    effective_lr=config.effective_lr,
                    negative_momentum=config.negative_momentum,
                    dtype=dtype)
            elif optimizer == 'gradient_descent':
                opt = GradientDescentOptimizer(config.init_lr, dtype=dtype)
            elif optimizer == 'adam':
                opt = AdamOptimizer(config.init_lr, dtype=dtype)
            else:
                raise ValueError('Unknown Optimizer')
            train_op = opt.minimize(cost, global_step=global_step)
            self._train_op = train_op
            self._optimizer = opt
            self._global_step = global_step
        else:
            self._optimizer = None
Esempio n. 9
0
    def transfer_multiscale(self,
                            content_images,
                            style_images,
                            initial_image,
                            aux_image,
                            callback=None,
                            **kwargs):
        """Performs style transfer from style_image to content_image at the given sizes."""
        output_image = None
        output_raw = None
        print_('Starting %d worker process(es).' % len(ARGS.devices))
        self.pool = TileWorkerPool(self.model, ARGS.devices, ARGS.caffe_path)

        size = ARGS.size
        sizes = [ARGS.size]
        while True:
            size = round(size / np.sqrt(2))
            if size < ARGS.min_size:
                break
            sizes.append(size)

        steps = 0
        for i in range(len(sizes)):
            steps += ARGS.iterations[min(i, len(ARGS.iterations) - 1)]
        callback.set_steps(steps)

        for i, size in enumerate(reversed(sizes)):
            content_scaled = []
            for image in content_images:
                if image.size != content_images[0].size:
                    raise ValueError(
                        'All of the content images must be the same size')
                content_scaled.append(resize_to_fit(image, size,
                                                    scale_up=True))
                w, h = content_scaled[0].size
            print_('\nScale %d, image size %dx%d.\n' % (i + 1, w, h))
            style_scaled = []
            for image in style_images:
                if ARGS.style_scale >= 32:
                    style_scaled.append(
                        resize_to_fit(image, ARGS.style_scale, scale_up=True))
                else:
                    style_scaled.append(
                        resize_to_fit(image,
                                      round(size * ARGS.style_scale),
                                      scale_up=ARGS.style_scale_up))
            if aux_image:
                aux_scaled = aux_image.resize(content_scaled[0].size,
                                              Image.LANCZOS)
                self.aux_image = self.model.pil_to_image(aux_scaled)
            if output_image:  # this is not the first scale
                self.model.img = output_raw
                self.model.resize_image(content_scaled[0].size)
                params = self.model.img
                self.optimizer.set_params(params)
            else:  # this is the first scale
                biased_g1 = True
                if initial_image:  # and the user supplied an initial image
                    initial_image = initial_image.resize(
                        content_scaled[0].size, Image.LANCZOS)
                    self.model.set_image(initial_image)
                else:  # and the user did not supply an initial image
                    w, h = content_scaled[0].size
                    self.model.set_image(
                        np.random.uniform(0, 255, size=(h, w, 3)))
                    biased_g1 = False
                # make sure the optimizer's params array shares memory with self.model.img
                # after preprocess_image is called later
                if ARGS.optimizer == 'adam':
                    self.optimizer = AdamOptimizer(self.model.img,
                                                   step_size=ARGS.step_size,
                                                   bp1=1 -
                                                   (1 / ARGS.avg_window),
                                                   decay=ARGS.step_decay[0],
                                                   power=ARGS.step_decay[1],
                                                   biased_g1=biased_g1)
                elif ARGS.optimizer == 'lbfgs':
                    self.optimizer = LBFGSOptimizer(self.model.img)
                else:
                    raise ValueError()

            params = self.model.img
            iters_i = ARGS.iterations[min(i, len(ARGS.iterations) - 1)]
            output_image = self.transfer(iters_i, params, content_scaled,
                                         style_scaled, callback, **kwargs)
            output_raw = self.current_raw

        return output_image
Esempio n. 10
0
    def max_ent_irl(self,
                    base_dir,
                    n_iter=200,
                    tol=1e-10,
                    verbose=True,
                    optimizer="adam",
                    lr=0.1,
                    lr_order=1,
                    emp_p_in=None,
                    beta=0,
                    beta_op=0,
                    softQ_lr=0.5,
                    n_steps=150,
                    two_players=False,
                    alpha=0.9,
                    reg_opp=False,
                    no_one_hot=True,
                    fix_horizon=False):
        if optimizer == "gd":
            opt = GDOptimizer(lr, lr_order)
        elif optimizer == "adam":
            opt = AdamOptimizer(self.solver.env.features_dim, lr)

        policies = []
        player_policies = []
        adv_policies = []
        rewards = []
        err = []

        if not reg_opp:
            mu_learner, max_ent_policy, opponent_policy = self.solver.mu_w(
                self.w,
                emp_p_in,
                two_players,
                alpha,
                no_one_hot=no_one_hot,
                fix_horizon=fix_horizon)
            policy = alpha * max_ent_policy + (1 - alpha) * opponent_policy
        else:
            max_ent_policy, opponent_policy = self.solver.two_players_soft_Q(
                alpha=alpha,
                beta=beta,
                beta_op=beta_op,
                n_episodes=1000,
                lr=softQ_lr,
                reuseQ=False)
            policy = alpha * max_ent_policy + (1 - alpha) * opponent_policy
            if fix_horizon:
                mu_learner = self.solver.mu_policy_fixed_horizon(
                    policy,
                    stochastic=True,
                    emp_p_in=emp_p_in,
                    no_one_hot=no_one_hot)
            else:
                mu_learner = self.solver.mu_policy(policy,
                                                   stochastic=True,
                                                   emp_p_in=emp_p_in,
                                                   no_one_hot=no_one_hot)

        rewards.append(np.copy(self.w))
        player_policies.append(max_ent_policy)
        adv_policies.append(opponent_policy)
        policies.append(policy)
        while opt.step < n_iter:
            # Update on w
            grad = self.mu_teacher - mu_learner
            self.w += opt.update(grad)
            print("Weights")
            print(self.w)
            # Update features expectation

            if not reg_opp:
                mu_learner, max_ent_policy, opponent_policy = self.solver.mu_w(
                    self.w,
                    emp_p_in,
                    two_players,
                    alpha,
                    no_one_hot=no_one_hot,
                    fix_horizon=fix_horizon)
                policy = alpha * max_ent_policy + (1 - alpha) * opponent_policy
            else:
                max_ent_policy, opponent_policy = self.solver.two_players_soft_Q(
                    alpha=alpha,
                    beta=beta,
                    beta_op=beta_op,
                    n_episodes=1000,
                    lr=softQ_lr,
                    reuseQ=False)
                policy = alpha * max_ent_policy + (1 - alpha) * opponent_policy
                if fix_horizon:
                    mu_learner = self.solver.mu_policy_fixed_horizon(
                        policy,
                        stochastic=True,
                        emp_p_in=emp_p_in,
                        no_one_hot=no_one_hot)
                else:
                    mu_learner = self.solver.mu_policy(policy,
                                                       stochastic=True,
                                                       emp_p_in=emp_p_in,
                                                       no_one_hot=no_one_hot)

            # Error
            err_t = np.linalg.norm(self.mu_teacher - mu_learner)

            err.append(err_t)
            rewards.append(np.copy(self.w))
            player_policies.append(max_ent_policy)
            adv_policies.append(opponent_policy)
            policies.append(policy)

            if verbose:
                print("Step", opt.step, ", error : ", err_t)

            if np.linalg.norm(grad) < tol:
                break

            if ((opt.step + 1) % 5):
                with open(base_dir + '/policy_' + str(lr),
                          "wb") as fp:  #Pickling
                    pickle.dump(policies, fp)
                with open(base_dir + '/player_' + str(lr),
                          "wb") as fp:  #Pickling
                    pickle.dump(player_policies, fp)
                with open(base_dir + '/adv_' + str(lr), "wb") as fp:  #Pickling
                    pickle.dump(adv_policies, fp)
                with open(base_dir + '/reward_' + str(lr),
                          "wb") as fp:  #Pickling
                    pickle.dump(rewards, fp)
                with open(base_dir + '/err_' + str(lr), "wb") as fp:  #Pickling
                    pickle.dump(err, fp)

        return policies, player_policies, adv_policies, rewards, err, self.solver.v
Esempio n. 11
0
from pldiffer import Operations as op
from utils import mnist_util as mnist
from utils import plot_loss
from nnet import learn
from optimizers import SGDOptimizer, MomentumOptimizer, RmspropOptimizer, AdamOptimizer
from layers import DenseLayer, BatchNorm

mnist_in_data, mnist_out_data, mnist_test_in_data, mnist_test_out_data = mnist.load_data_set()

num_hidden_neuron = 1600

optimizer = AdamOptimizer(learning_rate=0.001)

l1 = DenseLayer((784, num_hidden_neuron), optimizer)
bn1 = BatchNorm((num_hidden_neuron,), optimizer)
l2 = DenseLayer((num_hidden_neuron, num_hidden_neuron), optimizer)
l3 = DenseLayer((num_hidden_neuron, 10), optimizer)


def calc_model(x, y=None, train_mode=False):
    a1 = op.relu(bn1.compute(l1.compute(x), train_mode=train_mode))
    a2 = op.relu(l2.compute(a1))
    z3 = l3.compute(a2)
    if not train_mode:
        return op.softmax(z3)
    else:
        return op.softmax_cross_entropy(z3, y)


def loss_function(y, m, train_mode=False):
    if train_mode:
Esempio n. 12
0
    def _initialize_optimizer(self, t_it, lvl_it, init_transform):
        """Instantiate and initialize optimizer depending on which one has been selected.
        
        Args:
            t_it: which initial transform are we working from (its index from 0 up)
            lvl_it: which pyramid level we are at (affects optimizer paramters)
            init_transform: where the optimizer should start from
        Returns:
            optimizer
        """
        assert (self.opt_name in available_opts), 'Optimizer has not been set'

        if self.opt_name == 'adam':
            opt = AdamOptimizer(self.distances[lvl_it], init_transform.copy())
        elif self.opt_name == 'gd':
            opt = GradientDescentOptimizer(self.distances[lvl_it],
                                           init_transform.copy())
#            if self.opt_opts['step_length'].ndim == 1:
#                step_length = self.opt_opts['step_length']
#            else:
#                step_length=self.opt_opts['step_length'][lvl_it, :]
        elif self.opt_name == 'scipy':
            opt = SciPyOptimizer(self.distances[lvl_it], init_transform.copy())
            self.optimizer_opts['method'] = 'L-BFGS-B'
            minim_opts = {'gtol': self.optimizer_opts.get('gradient_magnitude_threshold', 1e-9), \
                          'eps': self.optimizer_opts.get('epsilon', 0.1)}
            self.optimizer_opts['minimizer_opts'] = minim_opts
        elif self.opt_name == 'gridsearch':
            opt = GridSearchOptimizer(self.distances[lvl_it],
                                      init_transform.copy())
        else:
            raise NotImplementedError(
                f'Sorry, optimizer {self.opt_name} has not been implemented')

        self.optimizer_opts['param_scaling'] = self.transforms_param_scaling[
            t_it]
        opt.set_report_freq(self.report_freq)

        if type(self.report_func) is list or type(self.report_func) is tuple:
            opt.set_report_callback(self.report_func[t_it])
        else:
            opt.set_report_callback(self.report_func)

        return opt
Esempio n. 13
0
    def fit(self, X, y):
        self.label_binarizer.fit(y)
        y = self.label_binarizer.transform(y)
        if self.label_binarizer.y_type_ == 'multiclass':
            self.out_activation = 'softmax'
        else:
            self.out_activation = 'logistic'

        n_samples, n_features = X.shape
        if y.ndim == 1:
            y = y.reshape((-1, 1))
        self.n_outputs = y.shape[1]
        layer_units = ([n_features] + self.hidden_layer_sizes +
                       [self.n_outputs])

        self.weights = []
        self.biases = []
        for i in range(self.n_layers - 1):
            if self.solver == 'ga':
                init_bound = 1.0
            else:
                if self.activation == 'logistic':
                    factor = 2
                else:
                    factor = 6
                init_bound = np.sqrt(factor /
                                     (layer_units[i] + layer_units[i + 1]))
            self.weights.append(
                np.random.uniform(-init_bound, init_bound,
                                  (layer_units[i], layer_units[i + 1])))
            self.biases.append(
                np.random.uniform(-init_bound, init_bound, layer_units[i + 1]))

        activations = [X]
        for _ in range(len(layer_units) - 1):
            activations.append(None)
        deltas = [None] * (len(activations) - 1)
        weight_grads = [
            np.empty((n_fan_in, n_fan_out))
            for n_fan_in, n_fan_out in zip(layer_units[:-1], layer_units[1:])
        ]
        bias_grads = [np.empty(n_fan_out) for n_fan_out in layer_units[1:]]
        params = self.weights + self.biases
        if self.solver == 'sgd':
            self.optimizer = SGDOptimizer(params, self.learning_rate_init,
                                          self.learning_rate, self.momentum,
                                          self.nesterovs_momentum,
                                          self.power_t)
        elif self.solver == 'adam':
            self.optimizer = AdamOptimizer(params, self.learning_rate_init,
                                           self.beta_1, self.beta_2,
                                           self.epsilon)
        else:
            self.optimizer = GAOptimizer(
                model=self,
                X=X,
                y=self.label_binarizer.inverse_transform(y),
                params=params,
                pop_size=self.pop_size,
                crossover_rate=self.crossover_rate,
                mutation_rate=self.mutation_rate)

        if self.early_stopping:
            stratify = y if self.n_outputs == 1 else None
            X, X_val, y, y_val = train_test_split(
                X,
                y,
                random_state=self.random_state,
                test_size=self.validation_fraction,
                stratify=stratify)
            y_val = self.label_binarizer.inverse_transform(y_val)
        else:
            X_val = None
            y_val = None

        n_samples = X.shape[0]
        sample_idx = np.arange(n_samples, dtype=int)
        time_step = 0
        for it in range(self.max_iter):
            if self.solver == 'ga':
                self.optimizer.update_params()
                loss = 0.0
            else:
                accumulated_loss = 0.0
                if self.shuffle:
                    np.random.shuffle(sample_idx)
                batch_slices = [
                    sample_idx[start:start + self.batch_size]
                    for start in range(0, n_samples, self.batch_size)
                ]
                if n_samples % self.batch_size != 0:
                    batch_slices.append(
                        sample_idx[(n_samples - n_samples % self.batch_size):])
                for batch_slice in batch_slices:
                    X_batch = X[batch_slice]
                    y_batch = y[batch_slice]
                    activations[0] = X_batch
                    batch_loss, weight_grads, bias_grads = self.backprop(
                        X_batch, y_batch, activations, deltas, weight_grads,
                        bias_grads)
                    accumulated_loss += batch_loss * (len(batch_slice))
                    grads = weight_grads + bias_grads
                    self.optimizer.update_params(grads)
                loss = accumulated_loss / n_samples

            time_step += n_samples
            accuracy = accuracy_score(y.flatten(), self.predict(X))
            self.loss_curve.append(accuracy)
            # print(f"Iteration {it + 1}, accuracy = {accuracy}")

            self.update_no_improvement_count(X_val, y_val)
            self.optimizer.iteration_ends(time_step)
            if self.no_improvement_count > self.n_iter_no_change:
                is_stopping = self.optimizer.trigger_stopping()
                if is_stopping:
                    break
                else:
                    self.no_improvement_count = 0
        if self.early_stopping:
            self.weights = self.best_weights
            self.biases = self.best_biases
        return self
Esempio n. 14
0
from pldiffer import Operations as op
from utils import mnist_util as mnist
from utils import plot_loss
from nnet import learn
from optimizers import SGDOptimizer, MomentumOptimizer, RmspropOptimizer, AdamOptimizer
from layers import DenseLayer, dropout

mnist_in_data, mnist_out_data, mnist_test_in_data, mnist_test_out_data = mnist.load_data_set(
)

num_hidden_neuron = 1600

optimizer = AdamOptimizer(learning_rate=0.001, bias_correction=False)

l1 = DenseLayer((784, num_hidden_neuron), optimizer)
l2 = DenseLayer((num_hidden_neuron, num_hidden_neuron), optimizer)
l3 = DenseLayer((num_hidden_neuron, 10), optimizer)


def calc_model(x, y=None, train_mode=False):
    a1 = dropout(op.relu(l1.compute(x)), train_mode=train_mode)
    a2 = dropout(op.relu(l2.compute(a1)), train_mode=train_mode)
    z3 = l3.compute(a2)
    if not train_mode:
        return op.softmax(z3)
    else:
        return op.softmax_cross_entropy(z3, y)


def loss_function(y, m, train_mode=False):
    if train_mode: