Python ScipyOptimizerInterfaceの例、tensorflow.contrib.opt.ScipyOptimizerInterface Pythonの例

コード例 #1

0

ファイルを表示

def line_search_general(objective_func,
                        var_list=None,
                        equalities=None,
                        inequalities=None,
                        methods=None,
                        maxiter=DEFAULT_LINE_SEARCH_BFGS_MAX_ITER):
    """
    Line Search Optimizer for non-convex optimization problem:
    L-BFGS by default.
    """
    if methods == 'backtracking':
        # TODO
        raise NotImplementedError
    elif methods is not None:
        return ScipyOptimizerInterface(objective_func,
                                       var_list,
                                       equalities,
                                       inequalities,
                                       method=methods,
                                       options={'maxiter': maxiter})
    else:
        return ScipyOptimizerInterface(objective_func,
                                       var_list,
                                       equalities,
                                       inequalities,
                                       options={'maxiter': maxiter})

コード例 #2

0

ファイルを表示

    def _build_net(self):

        with tf.Graph().as_default() as g:

            self._init_variables()

            self._init_placeholders()

            self.U_hat = self.__NN(self.X)
            self.loss_U = self._get_loss(self.U, self.U_hat)
            self.loss_dU = self._get_loss_du()

            self.loss = self.loss_U + self.regularization_param * self.loss_dU

            self.optimizer_BFGS = ScipyOptimizerInterface(
                self.loss,
                method='L-BFGS-B',
                options={'maxiter': 50000,
                         'maxfun': 50000,
                         'maxcor': 50,
                         'maxls': 50,
                         'ftol': 1.0 * np.finfo(float).eps,
                         'gtol': 1.0 * np.finfo(float).eps})

            init = tf.global_variables_initializer()

            self.sess = tf.Session(graph=g)

        self.sess.run(init)
        self.sess.graph.finalize()

コード例 #3

0

ファイルを表示

ファイル: nn.py プロジェクト: beeperman/Fenchel_Lifted_Networks

 def __init__(self,
              loss,
              var_list=None,
              lr=1e-3,
              clip_val=10.0,
              iteration=500,
              **optimizer_kwargs):
     super(ScipyADAMOptimizerInterface,
           self).__init__(loss, var_list, lr, clip_val, iteration,
                          **optimizer_kwargs)
     self.scipy_optimizer = ScipyOptimizerInterface(loss, var_list)

コード例 #4

0

ファイルを表示

    def fit(self,
            sess,
            inputs,
            outputs,
            var_list=None,
            spo_config=None,
            feed_dict=None,
            **kwargs):
        '''
    Fit a given model state via MAP estimation.
    '''
        assert inputs.ndim == outputs.ndim == 2, 'Tensor rank should be 2'
        if (feed_dict is None): feed_dict = dict()

        # Which <tf.Variable> should we optimize?
        if (var_list is None):
            filter_fn = lambda v: isinstance(v, tf.Variable)
            var_list = filter(filter_fn, self.state.values())
        var_list = tuple(var_list)

        # Build/retrieve negative log likelihood
        input_dim, output_dim = inputs.shape[-1], outputs.shape[-1]
        inputs_ref = self.get_or_create_ref('inputs/old', [None, input_dim])
        outputs_ref = self.get_or_create_ref('outputs/old', [None, output_dim])
        nll = self.get_or_create_node\
        (
          group='log_likelihood',
          fn=self.log_likelihood,
          args=(inputs_ref, outputs_ref),
          kwargs={**kwargs, 'state_id':self.active_state, 'as_negative':True},
        )

        # Get (updated) copy of configuration for Scipy Optimize
        spo_config = self.update_dict(self.spo_config,
                                      spo_config,
                                      as_copy=True)

        # For active parameters, replace <string> keys with <tf.Variable>
        var_to_bounds = dict()
        for key, bounds in spo_config.get('var_to_bounds', {}).items():
            for var in var_list:
                if key in var.name:  #[!] too permissive, improve me...
                    var_to_bounds[var] = bounds
                    break
        spo_config['var_to_bounds'] = var_to_bounds

        # Initialize/run optimizer
        feed_dict = {**feed_dict, inputs_ref: inputs, outputs_ref: outputs}
        optimizer = ScipyOptimizerInterface(nll, var_list, **spo_config)
        optimizer.minimize(sess, feed_dict)
        return var_list

コード例 #5

0

ファイルを表示

ファイル: layers.py プロジェクト: beeperman/Fenchel_Lifted_Networks

 def X_optimizer(self):
     """
     Get the optimizer that has the method 'minimize' to do the optimization.
     :return:
         An ExternalOptimizerInterface
     """
     if self.loss == 'none' and self.prev_layer.activation == 'none':
         return XFINInterface(self)
     if self.prev_layer.activation == 'relu':
         return ScipyOptimizerInterface(
             self.X_loss,
             var_list=[self.X],
             var_to_bounds={self.X: (0, np.infty)})
     else:
         return ScipyOptimizerInterface(self.X_loss, var_list=[self.X])

コード例 #6

0

ファイルを表示

ファイル: hmf.py プロジェクト: meinternational/HMF

 def __init__(self, settings):
     self.settings = settings
     self.placeholder = self.build_placeholder()
     self.model = self.build_model()
     self.optimizer = ScipyOptimizerInterface(self.model['cost'],
                                              method='L-BFGS-B',
                                              options={
                                                  'maxiter':
                                                  self.settings['max_iter'],
                                                  'disp':
                                                  True
                                              })
     init = tf.global_variables_initializer()
     self.sess = tf.Session(config=tf.ConfigProto(
         log_device_placement=False))
     self.sess.run(init)

コード例 #7

0

ファイルを表示

ファイル: layers.py プロジェクト: beeperman/Fenchel_Lifted_Networks

 def W_optimizer(self):
     """
     Get the optimizer that has the method 'minimize' to do the optimization.
     :return:
         An ExternalOptimizerInterface
     """
     alpha = self.rhod if self.rhod > 0.0 else self.rho
     if self.is_last_layer and self.loss == 'cross_entropy':
         return ScipyOptimizerInterface(self.Wb_loss, var_list=[self.W])
         #return RidgeInterface(self.W, self.b, self.X, self.X_next, W_offset=self.W_0, alpha=alpha, normalize=False)
     elif self.is_last_layer and self.loss == 'none':
         return RidgeInterface(self.W,
                               self.b,
                               self.X,
                               self.X_next,
                               W_offset=self.W_0,
                               alpha=alpha,
                               normalize=False)
     else:
         # optimize both W and b
         #return ScipyOptimizerInterface(self.Wb_loss, var_list=[self.W, self.b])
         return RidgeInterface(self.W,
                               self.b,
                               self.X,
                               self.X_next,
                               W_offset=self.W_0,
                               alpha=alpha / self.lmbda,
                               normalize=False)

コード例 #8

0

ファイルを表示

ファイル: layers.py プロジェクト: beeperman/Fenchel_Lifted_Networks

 def W_optimizer(self):
     """
     Get the optimizer that has the method 'minimize' to do the optimization.
     :return:
         An ExternalOptimizerInterface
     """
     return ScipyOptimizerInterface(self.Wb_loss, var_list=[self.W, self.b])

コード例 #9

0

ファイルを表示

ファイル: layers.py プロジェクト: beeperman/Fenchel_Lifted_Networks

 def X_optimizer(self):
     """
     Get the optimizer that has the method 'minimize' to do the optimization.
     :return:
         An ExternalOptimizerInterface
     """
     if self.prev_layer.activation == 'relu':
         return ScipyOptimizerInterface(
             self.X_loss,
             var_list=[self.X],
             var_to_bounds={
                 self.X: (0, np.infty)
             })  #, options={'ftol': 2e-15, 'gtol': 1e-11, 'maxls': 100})
     else:
         return ScipyOptimizerInterface(self.X_loss, var_list=[self.X])
     raise NotImplementedError

コード例 #10

0

ファイルを表示

ファイル: trpo_class.py プロジェクト: rvtsukanov/AI

    def build_trpo_SOI(self):
        # I use index _k to fix variables in graph
        # Fixed probs on k-th iteration
        self.soft_out_k = tf.placeholder('float32', name="SOFTOUT_K")

        # Fixed advantage on k-th iteration
        self.A_k = tf.placeholder('float32', name="A_K")

        # Number of steps to estimate expectation
        self.N = tf.placeholder('float32', name="number")

        # Advantage function = emperical_return - baseline
        self.A = self.q_return - self.q_out

        # Choosing particular action "actions" and multiply by A_k
        #here was a mistake -> A instead of A_k
        trpo_obj = -tf.reduce_mean(self.A_k * tf.gather(tf.exp(self.soft_out - self.soft_out_k), self.actions))

        # KL(soft_out_k, soft_out) should be less than KL_delta
        constraints = [(-self.kl(self.soft_out_k, self.soft_out) + self.KL_delta)]

        # Use ScipyOptimiztationInterface (SOI) to solve optimization task with constrains
        self.trpo_opt = SOI(trpo_obj,
                            method='SLSQP',
                            inequalities=constraints,
                            options={'maxiter': 3})

コード例 #11

0

ファイルを表示

ファイル: nn.py プロジェクト: beeperman/Fenchel_Lifted_Networks

class ScipyADAMOptimizerInterface(ADAMOptimizerInterface):
    def __init__(self,
                 loss,
                 var_list=None,
                 lr=1e-3,
                 clip_val=10.0,
                 iteration=500,
                 **optimizer_kwargs):
        super(ScipyADAMOptimizerInterface,
              self).__init__(loss, var_list, lr, clip_val, iteration,
                             **optimizer_kwargs)
        self.scipy_optimizer = ScipyOptimizerInterface(loss, var_list)

    def minimize(self, sess, feed_dict):
        super(ScipyADAMOptimizerInterface, self).minimize(sess, feed_dict)
        self.scipy_optimizer.minimize(sess, feed_dict)

コード例 #12

0

ファイルを表示

ファイル: layers.py プロジェクト: beeperman/Fenchel_Lifted_Networks

 def W_optimizer(self):
     """
     Get the optimizer that has the method 'minimize' to do the optimization.
     :return:
         An ExternalOptimizerInterface
     """
     return ScipyOptimizerInterface(
         self.Wb_loss, var_list=[self.W, self.b]
     )  #, options={'ftol': 2e-15, 'gtol': 1e-15, 'maxls': 100, 'eps': 1e-12})
     raise NotImplementedError

コード例 #13

0

ファイルを表示

def _optimize_zinb(mu, dropout, theta=None):
    pred, a, b, t = _tf_zinb_zero(mu, theta)
    #loss = tf.reduce_mean(tf.abs(tf_logit(pred) - tf_logit(dropout)))
    loss = tf.losses.log_loss(labels=dropout.astype('float32'),
                              predictions=pred)

    optimizer = ScipyOptimizerInterface(loss, options={'maxiter': 100})

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        optimizer.minimize(sess)
        ret_a = sess.run(a)
        ret_b = sess.run(b)
        if theta is None:
            ret_t = sess.run(t)
        else:
            ret_t = t

    return ret_a, ret_b, ret_t

コード例 #14

0

ファイルを表示

    def get_optimizer(loss, n_opt_steps, var_to_bounds, inequalities):
        options = {'maxiter': n_opt_steps, 'disp': True, 'ftol': 1e-15}

        with tf.name_scope('optimizer'):
            optimizer = ScipyOptimizerInterface(loss,
                                                options=options,
                                                method='SLSQP',
                                                var_to_bounds=var_to_bounds,
                                                inequalities=inequalities)

        return optimizer

コード例 #15

0

ファイルを表示

    def fit(self, sess, data, feed_dict, maxiter):
        pred = self.get_pred(data)
        loss, pred_normed, labels_normed = self.get_loss(pred, data['labels'])
        optimizer = ScipyOptimizerInterface(loss, options={'maxiter': maxiter})
        self.losses = []

        def append_loss(loss):
            self.losses.append(loss)

        optimizer.minimize(sess,
                           feed_dict=feed_dict,
                           loss_callback=append_loss,
                           fetches=[loss])
        for name, var in self.vars.items():
            self.vars_evals[name] = sess.run(var)

        self.eval_pred, self.eval_pred_normed, self.eval_label, self.eval_label_normed = sess.run(
            [pred, pred_normed, data['labels'], labels_normed],
            feed_dict=feed_dict)
        self.r2 = stats.linregress(self.eval_pred_normed.flatten(),
                                   self.eval_label_normed.flatten())[2]**2
        self.final_loss = sess.run(loss, feed_dict=feed_dict)

コード例 #16

0

ファイルを表示

ファイル: hyper_gradients.py プロジェクト: yishuihanhan/FAR-HO

    def __init__(self, linear_system_solver_gen=None, tolerance=None, name='ImplicitHG'):
        super(ImplicitHG, self).__init__(name)
        if linear_system_solver_gen is None:
            linear_system_solver_gen = lambda _obj, var_list, _tolerance: ScipyOptimizerInterface(
                _obj, var_list=var_list, options={'maxiter': 100}, method='cg', tol=_tolerance)
        self.linear_system_solver = linear_system_solver_gen

        if tolerance is None:
            tolerance = lambda _k: 0.1 * (0.9 ** _k)
        self.tolerance = tolerance

        self._lin_sys = []
        self._qs = []

コード例 #17

0

ファイルを表示

def reconmodel(config, data, sigma=0.01**0.5, maxiter=100):

    bs, nc = config['boxsize'], config['nc']
    kmesh = sum(kk**2 for kk in config['kvec'])**0.5
    priorwt = config['ipklin'](kmesh) * bs**-3

    g = tf.Graph()

    with g.as_default():

        initlin = tf.placeholder(tf.float32, data.shape, name='initlin')
        linear = tf.get_variable('linmesh',
                                 shape=(nc, nc, nc),
                                 initializer=tf.random_normal_initializer(),
                                 trainable=True)
        initlin_op = linear.assign(initlin, name='initlin_op')
        #PM
        icstate = tfpm.lptinit(linear, config, name='icstate')
        fnstate = tfpm.nbody(icstate, config, verbose=False, name='fnstate')
        final = tf.zeros_like(linear)
        final = tfpf.cic_paint(final, fnstate[0], boxsize=bs, name='final')
        #
        #Prior
        lineark = tfpf.r2c3d(linear, norm=nc**3)
        priormesh = tf.square(tf.cast(tf.abs(lineark), tf.float32))
        prior = tf.reduce_sum(tf.multiply(priormesh, 1 / priorwt))
        prior = tf.multiply(prior, 1 / nc**3, name='prior')

        data2d = data.sum(axis=0)
        final2d = tf.reduce_sum(final, axis=0)
        residual = tf.subtract(final2d, data2d)

        residual = tf.multiply(residual, 1 / sigma)

        chisq = tf.multiply(residual, residual)
        chisq = tf.reduce_sum(chisq)
        chisq = tf.multiply(chisq, 1 / nc**2, name='chisq')

        loss = tf.add(chisq, prior, name='loss')

        optimizer = ScipyOptimizerInterface(loss,
                                            var_list=[linear],
                                            method='L-BFGS-B',
                                            options={'maxiter': maxiter})

        tf.add_to_collection('utils', [initlin_op, initlin])
        tf.add_to_collection('opt', optimizer)
        tf.add_to_collection('diagnostics', [prior, chisq, loss])
        tf.add_to_collection('reconpm', [linear, final, fnstate, final2d])
        tf.add_to_collection('data', [data, data2d])
    return g

コード例 #18

0

ファイルを表示

ファイル: base_v1.py プロジェクト: roman-amici/PINN_Base

    def _init_optimizers(self):
        '''
        Initialize optimizers
        By default LBFGS-B and Adam are initialized.
        '''

        self.optimizer_BFGS = ScipyOptimizerInterface(
            self.loss,
            method='L-BFGS-B',
            options={
                'maxiter': 50000,
                'maxfun': 50000,
                'maxcor': 50,
                'maxls': 50,
                'gtol': 1.0 * np.finfo(float).eps,
                'ftol': 1.0 * np.finfo(float).eps
            })

        if self.learning_rate is not None:
            self.optimizer_Adam = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.loss)
        else:
            self.optimizer_Adam = tf.train.AdamOptimizer(
                **self.optimizer_kwargs).minimize(self.loss)

コード例 #19

0

ファイルを表示

    def __init__(
        self,
        inner_method="Trad",
        linear_system_solver_gen=None,
        name="BMLOuterGradImplicit",
    ):
        super(BOMLOuterGradImplicit, self).__init__(name)
        self._inner_method = inner_method
        if linear_system_solver_gen is None:
            linear_system_solver_gen = lambda _obj, var_list, _tolerance: ScipyOptimizerInterface(
                _obj,
                var_list=var_list,
                options={"maxiter": 5},
                method="cg",
                tol=_tolerance,
            )
        self.linear_system_solver = linear_system_solver_gen

        self.tolerance = lambda _k: 0.1 * (0.9 ** _k)

        self._lin_sys = []
        self._qs = []

コード例 #20

0

ファイルを表示

ファイル: task_utils.py プロジェクト: ioangatop/MaximizingAcquisitionFunctions

def minimize_task(config,
                  sess,
                  task,
                  num_starts=None,
                  num_options=None,
                  scope='minimize_task',
                  reuse=None,
                  spo_config=None,
                  dtype=None,
                  rng=None):
    '''
  Estimate task minimum using multi-start L-BFGS-B.
  '''
    if (rng is None): rng = npr.RandomState(config.seed)
    if (dtype is None): dtype = task.dtype
    if (spo_config is None): spo_config = dict()
    if (num_starts is None): num_starts = config.num_starts_fmin
    if (num_options is None): num_options = config.num_options_fmin
    with tf.variable_scope(scope, reuse=reuse) as vs:
        # Build minimization target
        shape = [num_starts, task.input_dim]
        inputs_var = tf.get_variable(
            'inputs',
            shape=shape,
            dtype=dtype,
            initializer=tf.random_uniform_initializer())

        task_op = task.tensorflow(inputs_var, noisy=False, stop_gradient=False)
        loss_op = tf.reduce_mean(task_op)

        # Find starting positions via initial random sweep
        counter, x_mins, f_mins, = 0, None, None
        while counter + num_starts <= num_options:
            inputs = rng.rand(*shape)
            outputs = np.squeeze(sess.run(task_op, {inputs_var: inputs}))
            if (counter == 0):
                x_mins, f_mins = inputs, outputs
            else:
                inputs = np.vstack([x_mins, inputs])
                outputs = np.hstack([f_mins, outputs])
                argmins = np.argpartition(outputs, num_starts - 1)[:num_starts]
                x_mins, f_mins = inputs[argmins], outputs[argmins]
            counter += num_starts
        _ = sess.run(tf.assign(inputs_var, x_mins))

        # Initialize task optimizer
        spo_config =\
        {
          'method' : 'L-BFGS-B',
          'var_list' : [inputs_var],
          'var_to_bounds' : {inputs_var : (0, 1)},
          'options' : {'maxiter' : 1024},
          **spo_config, #user-specified settings take precedence
        }
        optimizer = ScipyOptimizerInterface(loss_op, **spo_config)

        # Run task optimizer
        _ = sess.run(tf.variables_initializer([inputs_var]))
        _ = optimizer.minimize(sess)

        # Evaluate task at optimized input locations
        inputs, outputs = sess.run([inputs_var, task_op])
        argmin = np.argmin(outputs)
        x_min = inputs[argmin]
        f_min = outputs[argmin, 0]
        return x_min, f_min

コード例 #21

0

ファイルを表示

ファイル: fm_classifier.py プロジェクト: stephen-hoover/muffnn

    def _set_up_graph(self):
        """Initialize TF objects (needed before fitting or restoring)."""

        # Input values.
        if self.is_sparse_:
            self._x_inds = tf.placeholder(tf.int64, [None, 2], "x_inds")
            self._x_vals = tf.placeholder(tf.float32, [None], "x_vals")
            self._x_shape = tf.placeholder(tf.int64, [2], "x_shape")
            self._x = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals, self._x_shape))
            x2 = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals * self._x_vals,
                                self._x_shape))
            matmul = tf.sparse_tensor_dense_matmul
        else:
            self._x = tf.placeholder(tf.float32, [None, self.n_dims_], "x")
            x2 = self._x * self._x
            matmul = tf.matmul

        if self._output_size == 1:
            self._y = tf.placeholder(tf.float32, [None], "y")
        else:
            self._y = tf.placeholder(tf.int32, [None], "y")

        with tf.variable_scope("fm"):
            self._v = tf.get_variable(
                "v", [self.rank, self.n_dims_, self._output_size])
            self._beta = tf.get_variable("beta",
                                         [self.n_dims_, self._output_size])
            self._beta0 = tf.get_variable("beta0", [self._output_size])

        vx = tf.stack(
            [matmul(self._x, self._v[i, :, :]) for i in range(self.rank)],
            axis=-1)
        v2 = self._v * self._v
        v2x2 = tf.stack([matmul(x2, v2[i, :, :]) for i in range(self.rank)],
                        axis=-1)
        int_term = 0.5 * tf.reduce_sum(tf.square(vx) - v2x2, axis=-1)
        self._logit_y_proba \
            = self._beta0 + matmul(self._x, self._beta) + int_term

        if self._output_size == 1:
            self._logit_y_proba = tf.squeeze(self._logit_y_proba)
            self._obj_func = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self._logit_y_proba, labels=self._y))
            self._y_proba = tf.sigmoid(self._logit_y_proba)
        else:
            self._obj_func = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self._logit_y_proba, labels=self._y))
            self._y_proba = tf.nn.softmax(self._logit_y_proba)

        if self.lambda_v > 0:
            self._obj_func \
                += self.lambda_v * tf.reduce_sum(tf.square(self._v))

        if self.lambda_beta > 0:
            self._obj_func \
                += self.lambda_beta * tf.reduce_sum(tf.square(self._beta))

        if isinstance(self.solver, str):
            from tensorflow.contrib.opt import ScipyOptimizerInterface

            self._train_step = ScipyOptimizerInterface(
                self._obj_func,
                method=self.solver,
                options=self.solver_kwargs if self.solver_kwargs else {})
        else:
            self._train_step = self.solver(
                **self.solver_kwargs if self.solver_kwargs else {}).minimize(
                    self._obj_func)

コード例 #22

0

ファイルを表示

    x_flat = tf.reshape(x,[-1])
    input_features=int(x_flat.get_shape()[0])
    W = tf.Variable(tf.random_uniform(shape=[input_features, output_features], name="weight"))
    b = tf.Variable(tf.random_uniform(shape=[output_features], name="bias"))
    pred = tf.add(tf.multiply(X, W), b)
    return pred, W, b
#pred = linear(X, 1)
pred, W, b = linear(X,1, return_params=True)


# Mean squared error
cost = tf.reduce_mean(tf.pow(pred-Y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

optimizer = ScipyOptimizerInterface(cost, options={ 'maxiter': 100}, method='BFGS')

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
    sess.run(init)

    # Fit all training data
    for epoch in range(training_epochs):
        for (x, y) in zip(train_X, train_Y):
            optimizer.minimize(sess, feed_dict={X: x, Y: y} )
            # print(a)
            sess.run(cost)
            # sess.run(optimizer.minimize(cost), feed_dict={X: x, Y: y})
        #Display logs per epoch step

コード例 #23

0

ファイルを表示

def tsne(X,
         perplexity=50,
         dim=2,
         theta=0.5,
         knn_method='knnparallel',
         pca_dim=50,
         exag=12.,
         exag_iter=250,
         max_iter=1000,
         verbose=False,
         print_iter=50,
         lr=200.,
         init_momentum=0.5,
         final_momentum=0.8,
         save_snapshots=False,
         optimizer='momentum',
         tf_optimizer='AdamOptimizer',
         seed=42):

    X -= X.mean(axis=0)
    N = X.shape[0]
    result = {}

    assert optimizer in (
        'momentum', 'tensorflow',
        'bfgs'), 'Available options: momentum, tensorflow and bfgs'

    if pca_dim is not None:
        result['PCA'] = PCA(n_components=pca_dim)
        X = result['PCA'].fit_transform(X)

    P = x2p(X, perplexity=perplexity, method=knn_method, verbose=verbose)
    result['P'] = P
    result['exag_iter'] = exag_iter
    result['print_iter'] = print_iter
    result['loss'] = []
    if save_snapshots:
        result['snapshots'] = []

    tf.reset_default_graph()
    tf.set_random_seed(seed)

    with tf.Session() as sess:
        step = 1

        def step_callback(Y_var):
            nonlocal step
            if step % print_iter == 0:
                print('Step: %d, error: %.16f' % (step, result['loss'][-1]))
                if save_snapshots:
                    result['snapshots'].append(Y_var.reshape((N, dim)).copy())
            if step == exag_iter:
                sess.run(tf.assign(exag_var, 1.))

            #zero mean
            sess.run(tf.assign(Y, Y - tf.reduce_mean(Y, axis=0)))
            step += 1

        def loss_callback(err):
            result['loss'].append(err)

        stddev = 1. if optimizer == 'bfgs' else 0.01
        Y = tf.Variable(
            tf.random_normal((N, dim), stddev=stddev, dtype=X.dtype))
        exag_var = tf.Variable(exag, dtype=P.dtype)

        if isinstance(P, sp.sparse.csr_matrix):
            loss = tsne_op((P.indptr, P.indices, P.data * exag_var), Y)
        else:
            loss = tsne_op(P * exag_var, Y)

        if optimizer == 'bfgs':
            opt = ScipyOptimizerInterface(loss,
                                          var_list=[Y],
                                          method='L-BFGS-B',
                                          options={
                                              'eps': 1.,
                                              'gtol': 0.,
                                              'ftol': 0.,
                                              'disp': False,
                                              'maxiter': max_iter,
                                              'maxls': 100
                                          })
            tf.global_variables_initializer().run()
            opt.minimize(sess,
                         fetches=[loss],
                         loss_callback=loss_callback,
                         step_callback=step_callback)
            Y_final = Y.eval()

        else:
            zero_mean = tf.assign(Y, Y - tf.reduce_mean(Y, axis=0))

            if optimizer == 'tensorflow':
                opt = getattr(tf.train, tf_optimizer)(learning_rate=lr)
                update = opt.minimize(loss, var_list=[Y])
            else:
                mom_var = tf.Variable(init_momentum, dtype=X.dtype)
                uY = tf.Variable(tf.zeros((N, dim), dtype=X.dtype))
                gains = tf.Variable(tf.ones((N, dim), dtype=X.dtype))
                dY = tf.gradients(loss, [Y])[0]

                gains = tf.assign(
                    gains,
                    tf.where(tf.equal(tf.sign(dY), tf.sign(uY)), gains * .8,
                             gains + .2))

                gains = tf.assign(gains, tf.maximum(gains, 0.01))
                uY = tf.assign(uY, mom_var * uY - lr * gains * dY)

                update = tf.assign_add(Y, uY)

            tf.global_variables_initializer().run()

            t = time.time()
            for i in range(1, max_iter + 1):
                if i == exag_iter:
                    if optimizer == 'momentum':
                        sess.run(tf.assign(mom_var, final_momentum))
                    sess.run(tf.assign(exag_var, 1.))

                sess.run(update)
                sess.run(zero_mean)

                if i % print_iter == 0:
                    kl = loss.eval()
                    result['loss'].append(kl)
                    if verbose:
                        print('Step: %d, error: %f (in %f sec.)' %
                              (i, kl, (time.time() - t)))
                        t = time.time()
                    if save_snapshots:
                        result['snapshots'].append(Y.eval())
            Y_final = Y.eval()

    result['Y'] = Y_final
    return result

コード例 #24

0

ファイルを表示

ファイル: fm_classifier.py プロジェクト: stephen-hoover/muffnn

class FMClassifier(TFPicklingBase, ClassifierMixin, BaseEstimator):
    """Factorization machine classifier.

    Parameters
    ----------
    rank : int, optional
        Rank of the underlying low-rank representation.
    batch_size : int, optional
        The batch size for learning and prediction. If there are fewer
        examples than the batch size during fitting, then the the number of
        examples will be used instead.
    n_epochs : int, optional
        The number of epochs (iterations through the training data) when
        fitting. These are counted for the positive training examples, not
        the unlabeled data.
    random_state: int, RandomState instance or None, optional
        If int, the random number generator seed. If RandomState instance,
        the random number generator itself. If None, then `np.random` will be
        used.
    lambda_v : float, optional
        L2 regularization strength for the low-rank embedding.
    lambda_beta : float, optional
        L2 regularization strength for the linear coefficients.
    init_scale : float, optional
        Standard deviation of random normal initialization.
    solver : a subclass of `tf.train.Optimizer` or str, optional
        Solver to use. If a string is passed, then the corresponding solver
        from `scipy.optimize.minimize` is used.
    solver_kwargs : dict, optional
        Additional keyword arguments to pass to `solver` upon construction.
        See the TensorFlow documentation for possible options. Typically,
        one would want to set the `learning_rate`.

    Attributes
    ----------
    n_dims_ : int
        Number of input dimensions.
    classes_ : array
        Classes from the data.
    n_classes_ : int
        Number of classes.
    is_sparse_ : bool
        Whether a model taking sparse input was fit.
    """
    def __init__(self,
                 rank=8,
                 batch_size=64,
                 n_epochs=5,
                 random_state=None,
                 lambda_v=0.0,
                 lambda_beta=0.0,
                 solver=tf.train.AdadeltaOptimizer,
                 init_scale=0.1,
                 solver_kwargs=None):
        self.rank = rank
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.random_state = random_state
        self.lambda_v = lambda_v
        self.lambda_beta = lambda_beta
        self.solver = solver
        self.init_scale = init_scale
        self.solver_kwargs = solver_kwargs

    def _set_up_graph(self):
        """Initialize TF objects (needed before fitting or restoring)."""

        # Input values.
        if self.is_sparse_:
            self._x_inds = tf.placeholder(tf.int64, [None, 2], "x_inds")
            self._x_vals = tf.placeholder(tf.float32, [None], "x_vals")
            self._x_shape = tf.placeholder(tf.int64, [2], "x_shape")
            self._x = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals, self._x_shape))
            x2 = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals * self._x_vals,
                                self._x_shape))
            matmul = tf.sparse_tensor_dense_matmul
        else:
            self._x = tf.placeholder(tf.float32, [None, self.n_dims_], "x")
            x2 = self._x * self._x
            matmul = tf.matmul

        if self._output_size == 1:
            self._y = tf.placeholder(tf.float32, [None], "y")
        else:
            self._y = tf.placeholder(tf.int32, [None], "y")

        with tf.variable_scope("fm"):
            self._v = tf.get_variable(
                "v", [self.rank, self.n_dims_, self._output_size])
            self._beta = tf.get_variable("beta",
                                         [self.n_dims_, self._output_size])
            self._beta0 = tf.get_variable("beta0", [self._output_size])

        vx = tf.stack(
            [matmul(self._x, self._v[i, :, :]) for i in range(self.rank)],
            axis=-1)
        v2 = self._v * self._v
        v2x2 = tf.stack([matmul(x2, v2[i, :, :]) for i in range(self.rank)],
                        axis=-1)
        int_term = 0.5 * tf.reduce_sum(tf.square(vx) - v2x2, axis=-1)
        self._logit_y_proba \
            = self._beta0 + matmul(self._x, self._beta) + int_term

        if self._output_size == 1:
            self._logit_y_proba = tf.squeeze(self._logit_y_proba)
            self._obj_func = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self._logit_y_proba, labels=self._y))
            self._y_proba = tf.sigmoid(self._logit_y_proba)
        else:
            self._obj_func = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self._logit_y_proba, labels=self._y))
            self._y_proba = tf.nn.softmax(self._logit_y_proba)

        if self.lambda_v > 0:
            self._obj_func \
                += self.lambda_v * tf.reduce_sum(tf.square(self._v))

        if self.lambda_beta > 0:
            self._obj_func \
                += self.lambda_beta * tf.reduce_sum(tf.square(self._beta))

        if isinstance(self.solver, str):
            from tensorflow.contrib.opt import ScipyOptimizerInterface

            self._train_step = ScipyOptimizerInterface(
                self._obj_func,
                method=self.solver,
                options=self.solver_kwargs if self.solver_kwargs else {})
        else:
            self._train_step = self.solver(
                **self.solver_kwargs if self.solver_kwargs else {}).minimize(
                    self._obj_func)

    def _make_feed_dict(self, X, y):
        # Make the dictionary mapping tensor placeholders to input data.
        if self.is_sparse_:
            x_inds = np.vstack(X.nonzero())
            x_srt = np.lexsort(x_inds[::-1, :])
            x_inds = x_inds[:, x_srt].T.astype(np.int64)
            x_vals = np.squeeze(np.array(X[x_inds[:, 0],
                                           x_inds[:, 1]])).astype(np.float32)
            x_shape = np.array(X.shape).astype(np.int64)
            feed_dict = {
                self._x_inds: x_inds,
                self._x_vals: x_vals,
                self._x_shape: x_shape
            }
        else:
            feed_dict = {self._x: X.astype(np.float32)}

        if self._output_size == 1:
            feed_dict[self._y] = y.astype(np.float32)
        else:
            feed_dict[self._y] = y.astype(np.int32)

        return feed_dict

    def _check_data(self, X):
        """check input data

        Raises an error if number of features doesn't match.
        If the estimator has not yet been fitted, then do nothing.
        """

        if self._is_fitted:
            if X.shape[1] != self.n_dims_:
                raise ValueError("Number of features in the input data does "
                                 "not match the number assumed by the "
                                 "estimator!")

    def __getstate__(self):
        # Handles TF persistence
        state = super(FMClassifier, self).__getstate__()

        # Add attributes of this estimator
        state.update(
            dict(rank=self.rank,
                 batch_size=self.batch_size,
                 n_epochs=self.n_epochs,
                 random_state=self.random_state,
                 lambda_v=self.lambda_v,
                 lambda_beta=self.lambda_beta,
                 solver=self.solver,
                 init_scale=self.init_scale,
                 solver_kwargs=self.solver_kwargs))

        # Add fitted attributes if the model has been fitted.
        if self._is_fitted:
            state['n_dims_'] = self.n_dims_
            state['_random_state'] = self._random_state
            state['_enc'] = self._enc
            state['classes_'] = self.classes_
            state['n_classes_'] = self.n_classes_
            state['_output_size'] = self._output_size
            state['is_sparse_'] = self.is_sparse_

        return state

    def fit(self, X, y):
        """Fit the classifier.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Training data.
        y : numpy array [n_samples]
            Targets.

        Returns
        -------
        self : returns an instance of self.
        """
        _LOGGER.info("Fitting %s", re.sub(r"\s+", r" ", repr(self)))

        # Mark the model as not fitted (i.e., not fully initialized based on
        # the data).
        self._is_fitted = False

        # Call partial fit, which will initialize and then train the model.
        return self.partial_fit(X, y)

    def partial_fit(self, X, y, classes=None, monitor=None):
        """Fit the classifier.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Training data.
        y : numpy array [n_samples]
            Targets.
        classes : array, shape (n_classes,)
            Classes to be used across calls to partial_fit.  If not set in the
            first call, it will be inferred from the given targets. If
            subsequent calls include additional classes, they will fail.
        monitor : callable, optional
            The monitor is called after each iteration with the current
            iteration, a reference to the estimator, and a dictionary with
            {'loss': loss_value} representing the loss calculated by the
            objective function at this iteration.
            If the callable returns True the fitting procedure is stopped.
            The monitor can be used for various things such as computing
            held-out estimates, early stopping, model introspection,
            and snapshotting.

        Returns
        -------
        self : returns an instance of self.
        """

        X, y = check_X_y(X, y, accept_sparse='csr')

        # check target type
        target_type = type_of_target(y)
        if target_type not in ['binary', 'multiclass']:
            # Raise an error, as in
            # sklearn.utils.multiclass.check_classification_targets.
            raise ValueError("Unknown label type: %r" % y)

        # Initialize the model if it hasn't been already by a previous call.
        if not self._is_fitted:
            self._random_state = check_random_state(self.random_state)
            assert self.batch_size > 0, "batch_size <= 0"

            self.n_dims_ = X.shape[1]

            if classes is not None:
                self._enc = LabelEncoder().fit(classes)
            else:
                self._enc = LabelEncoder().fit(y)

            self.classes_ = self._enc.classes_
            self.n_classes_ = len(self.classes_)

            if self.n_classes_ <= 2:
                self._output_size = 1
            else:
                self._output_size = self.n_classes_

            if sp.issparse(X):
                self.is_sparse_ = True
            else:
                self.is_sparse_ = False

            # Instantiate the graph.  TensorFlow seems easier to use by just
            # adding to the default graph, and as_default lets you temporarily
            # set a graph to be treated as the default graph.
            self.graph_ = tf.Graph()
            with self.graph_.as_default():
                tf.set_random_seed(self._random_state.randint(0, 10000000))

                tf.get_variable_scope().set_initializer(
                    tf.random_normal_initializer(stddev=self.init_scale))

                self._build_tf_graph()

                # Train model parameters.
                self._session.run(tf.global_variables_initializer())

            # Set an attributed to mark this as at least partially fitted.
            self._is_fitted = True

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # transform targets
        if sp.issparse(y):
            y = y.toarray()
        y = self._enc.transform(y)

        # Train the model with the given data.
        with self.graph_.as_default():
            if not isinstance(self.solver, str):
                n_examples = X.shape[0]
                indices = np.arange(n_examples)

                for epoch in range(self.n_epochs):
                    self._random_state.shuffle(indices)
                    for start_idx in range(0, n_examples, self.batch_size):
                        max_ind = min(start_idx + self.batch_size, n_examples)
                        batch_ind = indices[start_idx:max_ind]
                        feed_dict = self._make_feed_dict(
                            X[batch_ind], y[batch_ind])
                        obj_val, _ = self._session.run(
                            [self._obj_func, self._train_step],
                            feed_dict=feed_dict)
                        _LOGGER.debug("objective: %.4f, epoch: %d, idx: %d",
                                      obj_val, epoch, start_idx)

                    _LOGGER.info("objective: %.4f, epoch: %d, idx: %d",
                                 obj_val, epoch, start_idx)

                    if monitor:
                        stop_early = monitor(epoch, self, {'loss': obj_val})
                        if stop_early:
                            _LOGGER.info(
                                "stopping early due to monitor function.")
                            return self
            else:
                feed_dict = self._make_feed_dict(X, y)
                self._train_step.minimize(self._session, feed_dict=feed_dict)

        return self

    def predict_log_proba(self, X):
        """Compute log p(y=1).

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Log probabilities.
        """
        if not self._is_fitted:
            raise NotFittedError("Call fit before predict_log_proba!")
        return np.log(self.predict_proba(X))

    def predict_proba(self, X):
        """Compute p(y=1).

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Probabilities.
        """

        if not self._is_fitted:
            raise NotFittedError("Call fit before predict_proba!")

        X = check_array(X, accept_sparse='csr')

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # Compute weights in batches.
        probs = []
        start_idx = 0
        n_examples = X.shape[0]
        with self.graph_.as_default():
            while start_idx < n_examples:
                X_batch = \
                    X[start_idx:min(start_idx + self.batch_size, n_examples)]
                feed_dict = self._make_feed_dict(X_batch,
                                                 np.zeros(self.n_dims_))
                start_idx += self.batch_size
                probs.append(
                    self._y_proba.eval(session=self._session,
                                       feed_dict=feed_dict))

        probs = np.concatenate(probs, axis=0)
        if probs.ndim == 1:
            return np.column_stack([1.0 - probs, probs])
        else:
            return probs

    def predict(self, X):
        """Compute the predicted class.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Predicted class.
        """
        if not self._is_fitted:
            raise NotFittedError("Call fit before predict!")
        return self.classes_[self.predict_proba(X).argmax(axis=1)]

コード例 #25

0

ファイルを表示

# Advantage function = emperical_return - baseline
A = q_return - q_out

cumulative_trpo_obj = 0

# Choosing particular action "actions" and multiply by A_k
trpo_obj = (tf.gather(tf.squeeze(soft_out), actions) /
            tf.gather(tf.squeeze(soft_out_k), actions) * A_k)
cumulative_trpo_obj += trpo_obj

# KL(soft_out_k, soft_out) should be less than KL_delta
constraints = [(-kl(soft_out_k, soft_out) + KL_delta)]

#Use ScipyOptimiztationInterface (SOI) to solve optimization task with constrains
trpo_opt = SOI(-1. / N * cumulative_trpo_obj,
               inequalities=constraints,
               method='SLSQP',
               options={'maxiter': 1})  #it is not enough!
'''
=======================
HyperParams
=======================
'''
num_episodes = 10000
num_steps = 200
trpo_steps = 5
gamma = 0.9

successes = []
success_episodes = []
slice = 10
success_counter = 0

コード例 #26

0

ファイルを表示

def match(query_full,
          d_query,
          query_2d_full,
          scene,
          intr,
          gap,
          tr_ground,
          scale,
          thresh_log_conf=7.5,
          w_3d=0.01,
          fps=3,
          step_samples=100):
    with_y = False  # optimize for y as well
    np.set_printoptions(suppress=True, linewidth=220)

    pjoin = os.path.join

    len_gap = gap[1] - gap[0] + 1
    query, q_v = get_partial_scenelet(query_full,
                                      start=gap[0],
                                      end=gap[1] + 1,
                                      fps=1)
    q_v_sum = np.sum(q_v)
    q_v_sum_inv = np.float32(1. / q_v_sum)
    # lg.debug("q_v_sum: %s/%s" % (q_v_sum, q_v.size))
    # scene_min_y = scene.skeleton.get_min_y(tr_ground)
    # lg.debug("scene_min_y: %s" % repr(scene_min_y))

    mid_frames = range(len_gap * fps,
                       scene.skeleton.poses.shape[0] - len_gap * fps,
                       step_samples)
    if not len(mid_frames):
        return []

    scenelets, sc_v = (np.array(e) for e in zip(*[
        get_partial_scenelet(
            scene, mid_frame_id=mid_frame_id, n_frames=len_gap, fps=fps)
        for mid_frame_id in mid_frames
    ]))
    # for i, (scenelet, sc_v_) in enumerate(zip(scenelets, sc_v)):
    #     mn = np.min(scenelet[sc_v_.astype('b1'), 1, :])
    #     scenelets[i, :, 1, :] -= mn
    # mn = np.min(scenelets[i, sc_v_.astype('b1'), 1, :])
    # scenelets = np.array(scenelets, dtype=np.float32)
    # sc_v = np.array(sc_v, dtype=np.int32)
    # print("sc_v: %s" % sc_v)
    # print("q_v: %s" % q_v)

    lg.debug("have %d/%d 3D poses in scenelet, and %d/%d in query" %
             (np.sum(sc_v), sc_v.shape[0], np.sum(q_v), q_v.shape[0]))

    query_2d = np.zeros((len_gap, 2, 16), dtype=np.float32)
    conf_2d = np.zeros((len_gap, 1, 16), dtype=np.float32)
    for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):

        if query_2d_full.has_pose(frame_id):
            query_2d[lin_id, :, :] = query_2d_full.get_pose(frame_id)[:2, :]
        # else:
        #     lg.warning("Query2d_full does not have pose at %d?" % frame_id)

        # im = im_.copy()
        if query_2d_full.has_confidence(frame_id):
            # print("showing %s" % frame_id)
            for joint, conf in query_2d_full._confidence[frame_id].items():
                log_conf = abs(np.log(conf)) if conf >= 0. else 0.
                # print("conf: %g, log_conf: %g" % (conf, log_conf))
                # if log_conf <= thresh_log_conf:
                #     p2d = scale * query_2d_full.get_joint_3d(joint,
                #                                              frame_id=frame_id)
                #     p2d = (int(round(p2d[0])), int(round(p2d[1])))
                #     cv2.circle(im, center=p2d,
                #                radius=int(round(3)),
                #                color=(1., 1., 1., 0.5), thickness=1)
                conf_2d[lin_id, 0, joint] = max(
                    0., (thresh_log_conf - log_conf) / thresh_log_conf)

            # cv2.imshow('im', im)
            # cv2.waitKey(100)
    # while cv2.waitKey() != 27: pass
    conf_2d /= np.max(conf_2d)

    # scale from Denis' scale to current image size
    query_2d *= scale

    # move to normalized camera coordinates
    query_2d -= intr[:2, 2:3]
    query_2d[:, 0, :] /= intr[0, 0]
    query_2d[:, 1, :] /= intr[1, 1]

    #
    # initialize translation
    #

    # centroid of query poses
    c3d = np.mean(query[q_v.astype('b1'), :, :], axis=(0, 2))
    # estimate scenelet centroids
    sclt_means = np.array([
        np.mean(scenelets[i, sc_v[i, ...].astype('b1'), ...], axis=(0, 2))
        for i in range(scenelets.shape[0])
    ],
                          dtype=np.float32)
    # don't change height
    sclt_means[:, 1] = 0
    scenelets -= sclt_means[:, None, :, None]
    lg.debug("means: %s" % repr(sclt_means.shape))
    if with_y:
        np_translation = np.array([c3d for i in range(scenelets.shape[0])],
                                  dtype=np.float32)
    else:
        np_translation = np.array(
            [c3d[[0, 2]] for i in range(scenelets.shape[0])], dtype=np.float32)
    np_rotation = np.array(
        [np.pi * (i % 2) for i in range(scenelets.shape[0])],
        dtype=np.float32)[:, None]
    n_cands = np_translation.shape[0]
    graph = tf.Graph()
    with graph.as_default(), tf.device('/gpu:0'):
        # 3D translation
        translation_ = tf.Variable(initial_value=np_translation,
                                   name='translation',
                                   dtype=tf.float32)
        t_y = tf.fill(dims=(n_cands, ),
                      value=(tr_ground[1, 3]).astype(np.float32))
        # t_y = tf.fill(dims=(n_cands,), value=np.float32(0.))
        lg.debug("t_y: %s" % t_y)
        if with_y:
            translation = translation_
        else:
            translation = tf.concat(
                (translation_[:, 0:1], t_y[:, None], translation_[:, 1:2]),
                axis=1)

        lg.debug("translation: %s" % translation)
        # 3D rotation (Euler XYZ)
        rotation = tf.Variable(np_rotation, name='rotation', dtype=tf.float32)
        # lg.debug("rotation: %s" % rotation)

        w = tf.Variable(conf_2d, trainable=False, name='w', dtype=tf.float32)

        pos_3d_in = tf.Variable(query,
                                trainable=False,
                                name='pos_3d_in',
                                dtype=tf.float32)
        # pos_3d_in = tf.constant(query, name='pos_3d_in', dtype=tf.float32)

        pos_2d_in = tf.Variable(query_2d,
                                trainable=False,
                                name='pos_2d_in',
                                dtype=tf.float32)
        # pos_2d_in = tf.constant(query_2d, name='pos_2d_in',
        #                         dtype=tf.float32)

        pos_3d_sclt = tf.Variable(scenelets,
                                  trainable=False,
                                  name='pos_3d_sclt',
                                  dtype=tf.float32)
        # print("pos_3d_sclt: %s" % pos_3d_sclt)

        # rotation around y
        my_zeros = tf.zeros((n_cands, 1), dtype=tf.float32, name='my_zeros')
        # tf.add_to_collection('to_init', my_zeros)
        my_ones = tf.ones((n_cands, 1))
        # tf.add_to_collection('to_init', my_ones)
        c = tf.cos(rotation, 'cos')
        # tf.add_to_collection('to_init', c)
        s = tf.sin(rotation, 'sin')
        # t0 = tf.concat([c, my_zeros, -s], axis=1)
        # t1 = tf.concat([my_zeros, my_ones, my_zeros], axis=1)
        # t2 = tf.concat([s, my_zeros, c], axis=1)
        # transform = tf.stack([t0, t1, t2], axis=2, name="transform")
        # print("t: %s" % transform)
        transform = tf.concat(
            [c, my_zeros, -s, my_zeros, my_ones, my_zeros, s, my_zeros, c],
            axis=1)
        transform = tf.reshape(transform, ((-1, 3, 3)), name='transform')
        print("t2: %s" % transform)
        # lg.debug("transform: %s" % transform)

        # transform to 3d
        # pos_3d = tf.matmul(transform, pos_3d_sclt) \
        #          + tf.tile(tf.expand_dims(translation, 2),
        #                    [1, 1, int(pos_3d_in.shape[2])])
        # pos_3d = tf.einsum("bjk,bcjd->bcjd", transform, pos_3d_sclt)
        shp = pos_3d_sclt.get_shape().as_list()
        transform_tiled = tf.tile(transform[:, None, :, :, None],
                                  (1, shp[1], 1, 1, shp[3]))
        # print("transform_tiled: %s" % transform_tiled)
        pos_3d = tf.einsum("abijd,abjd->abid", transform_tiled, pos_3d_sclt)
        # print("pos_3d: %s" % pos_3d)
        pos_3d += translation[:, None, :, None]
        #pos_3d = pos_3d_sclt
        # print("pos_3d: %s" % pos_3d)

        # perspective divide
        # pos_2d = tf.divide(
        #     tf.slice(pos_3d, [0, 0, 0], [n_cands, 2, -1]),
        #     tf.slice(pos_3d, [0, 2, 0], [n_cands, 1, -1]))
        pos_2d = tf.divide(pos_3d[:, :, :2, :], pos_3d[:, :, 2:3, :])

        # print("pos_2d: %s" % pos_2d)

        diff = pos_2d - pos_2d_in
        # mask loss by 2d key-point visibility
        # print("w: %s" % w)
        # w_sum = tf.reduce_sum()
        masked = tf.multiply(diff, w)
        # print(masked)
        # loss_reproj = tf.nn.l2_loss(masked)
        # loss_reproj = tf.reduce_sum(tf.square(masked[:, :, 0, :])
        #                             + tf.square(masked[:, :, 1, :]),
        #                             axis=[1, 2])
        masked_sqr = tf.square(masked[:, :, 0, :]) \
                     + tf.square(masked[:, :, 1, :])
        loss_reproj = tf.reduce_sum(masked_sqr, axis=[1, 2])
        # lg.debug("loss_reproj: %s" % loss_reproj)

        # distance from existing 3D skeletons
        d_3d = q_v_sum_inv * tf.multiply(pos_3d - query[None, ...],
                                         q_v[None, :, None, None],
                                         name='diff_3d')
        # print(d_3d)

        loss_3d = w_3d * tf.reduce_sum(tf.square(d_3d[:, :, 0, :]) + tf.square(
            d_3d[:, :, 1, :]) + tf.square(d_3d[:, :, 2, :]),
                                       axis=[1, 2],
                                       name='loss_3d_each')
        # print(loss_3d)

        loss = tf.reduce_sum(loss_reproj) + tf.reduce_sum(loss_3d)

        # optimize
        optimizer = ScipyOptimizerInterface(loss,
                                            var_list=[translation_, rotation],
                                            options={'gtol': 1e-12})

    with Timer('solve', verbose=True) as t:
        with tf.Session(graph=graph) as session:
            session.run(tf.global_variables_initializer())
            optimizer.minimize(session)
            o_pos_3d, o_pos_2d, o_masked, o_t, o_r, o_w, o_d_3d, \
                o_loss_reproj, o_loss_3d, o_transform, o_translation = \
                session.run([
                    pos_3d, pos_2d, masked, translation, rotation, w,
                    d_3d, loss_reproj, loss_3d, transform, translation])
            o_masked_sqr = session.run(masked_sqr)
        # o_t, o_r = session.run([translation, rotation])
    # print("pos_3d: %s" % o_pos_3d)
    # print("pos_2d: %s" % o_pos_2d)
    # print("o_loss_reproj: %s, o_loss_3d: %s" % (o_loss_reproj, o_loss_3d))
    # print("t: %s" % o_t)
    # print("r: %s" % o_r)
    chosen = sorted((i for i in range(o_loss_reproj.shape[0])),
                    key=lambda i2: o_loss_reproj[i2] + o_loss_3d[i2])
    lg.info("Best candidate is %d with error %g + %g" %
            (chosen[0], o_loss_reproj[chosen[0]], o_loss_3d[chosen[0]]))
    # print("masked: %s" % o_masked)
    # opp = np.zeros_like(o_pos_3d)
    # for i in range(o_pos_3d.shape[0]):
    #     for j in range(o_pos_3d.shape[1]):
    #         for k in range(16):
    #             opp[i, j, :2, k] = o_pos_3d[i, j, :2, k] / o_pos_3d[i, j, 2:3, k]
    #             # opp[i, j, 0, k] *= intr[0, 0]
    #             # opp[i, j, 1, k] *= intr[1, 1]
    #             # opp[i, j, :2, k] *= intr[1, 1]
    #             a = o_pos_2d[i, j, :, k]
    #             b = opp[i, j, :2, k]
    #             if not np.allclose(a, b):
    #                 print("diff: %s, %s" % (a, b))

    o_pos_2d[:, :, 0, :] *= intr[0, 0]
    o_pos_2d[:, :, 1, :] *= intr[1, 1]
    o_pos_2d += intr[:2, 2:3]

    # for cand_id in range(o_pos_2d.shape[0]):
    if False:
        # return
        # print("w: %s" % o_w)
        # print("conf_2d: %s" % conf_2d)
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d[:, 0, :] *= intr[0, 0]
        query_2d[:, 1, :] *= intr[1, 1]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d += intr[:2, 2:3]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])

        ims = {}
        for cand_id in chosen[:5]:
            lg.debug("starting %s" % cand_id)
            pos_ = o_pos_2d[cand_id, ...]
            for lin_id in range(pos_.shape[0]):
                frame_id = gap[0] + lin_id
                try:
                    im = ims[frame_id].copy()
                except KeyError:
                    p_im = pjoin(d_query, 'origjpg',
                                 "color_%05d.jpg" % frame_id)
                    ims[frame_id] = cv2.imread(p_im)
                    im = ims[frame_id].copy()
                # im = im_.copy()
                for jid in range(pos_.shape[-1]):

                    xy2 = int(round(query_2d[lin_id, 0, jid])), \
                          int(round(query_2d[lin_id, 1, jid]))
                    # print("printing %s" % repr(xy))
                    cv2.circle(im,
                               center=xy2,
                               radius=5,
                               color=(10., 200., 10.),
                               thickness=-1)

                    if o_masked[cand_id, lin_id, 0, jid] > 0 \
                       or o_w[lin_id, 0, jid] > 0:
                        xy = int(round(pos_[lin_id, 0, jid])), \
                             int(round(pos_[lin_id, 1, jid]))
                        # print("printing %s" % repr(xy))
                        cv2.circle(im,
                                   center=xy,
                                   radius=3,
                                   color=(200., 10., 10.),
                                   thickness=-1)
                        cv2.putText(im,
                                    "d2d: %g" %
                                    o_masked_sqr[cand_id, lin_id, jid],
                                    org=((xy2[0] - xy[0]) // 2 + xy[0],
                                         (xy2[1] - xy[1]) // 2 + xy[1]),
                                    fontFace=1,
                                    fontScale=1,
                                    color=(0., 0., 0.))
                        cv2.line(im, xy, xy2, color=(0., 0., 0.))
                        d3d = o_d_3d[cand_id, lin_id, :, jid]
                        d3d_norm = np.linalg.norm(d3d)
                        if d3d_norm > 0.:
                            cv2.putText(
                                im,
                                "%g" % d3d_norm,
                                org=((xy2[0] - xy[0]) // 2 + xy[0] + 10,
                                     (xy2[1] - xy[1]) // 2 + xy[1]),
                                fontFace=1,
                                fontScale=1,
                                color=(0., 0., 255.))

                cv2.putText(im,
                            text="%d::%02d" % (cand_id, lin_id),
                            org=(40, 80),
                            fontFace=1,
                            fontScale=2,
                            color=(255., 255., 255.))

                # pos_2d_ = np.matmul(intr, pos_[lin_id, :2, :] / pos_[lin_id, 2:3, :])
                # for p2d in pos_2d_
                cv2.imshow('im', im)
                cv2.waitKey()
            break

        while cv2.waitKey() != 27:
            pass

    out_scenelets = []
    for cand_id in chosen[:1]:
        lg.debug("score of %d is %g + %g = %g" %
                 (cand_id, o_loss_reproj[cand_id], o_loss_3d[cand_id],
                  o_loss_reproj[cand_id] + o_loss_3d[cand_id]))
        scenelet = Scenelet()
        rate = query_full.skeleton.get_rate()
        prev_time = None
        for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):
            time_ = query_full.get_time(frame_id)
            if lin_id and rate is None:
                rate = time_ - prev_time
            if time_ == frame_id:
                time_ = prev_time + rate
            scenelet.skeleton.set_pose(frame_id=frame_id,
                                       pose=o_pos_3d[cand_id, lin_id, :, :],
                                       time=time_)
            prev_time = time_
        tr = np.concatenate((np.concatenate(
            (o_transform[cand_id, ...], o_translation[cand_id, None, :].T),
            axis=1), [[0., 0., 0., 1.]]),
                            axis=0)
        tr_m = np.concatenate(
            (np.concatenate((np.identity(3), -sclt_means[cand_id, None, :].T),
                            axis=1), [[0., 0., 0., 1.]]),
            axis=0)
        tr = np.matmul(tr, tr_m)
        for oid, ob in scene.objects.items():
            if ob.label in ('wall', 'floor'):
                continue
            ob2 = copy.deepcopy(ob)
            ob2.apply_transform(tr)
            scenelet.add_object(obj_id=oid, scene_obj=ob2, clone=False)
        scenelet.name_scene = scene.name_scene
        out_scenelets.append((o_loss_reproj[cand_id], scenelet))
    return out_scenelets

コード例 #27

0

ファイルを表示

ファイル: gals.py プロジェクト: modichirag/galmodel

def reconmodel(config,
               data,
               sigma=0.01**0.5,
               maxiter=100,
               gtol=1e-5,
               anneal=True):

    bs, nc = config['boxsize'], config['nc']
    kmesh = sum(kk**2 for kk in config['kvec'])**0.5
    priorwt = config['ipklin'](kmesh) * bs**-3

    g = tf.Graph()

    with g.as_default():

        module = hub.Module(modpath)
        initlin = tf.placeholder(tf.float32, (nc, nc, nc), name='initlin')
        linear = tf.get_variable('linmesh',
                                 shape=(nc, nc, nc),
                                 initializer=tf.random_normal_initializer(
                                     mean=1.0, stddev=0.5),
                                 trainable=True)
        initlin_op = linear.assign(initlin, name='initlin_op')
        #PM
        icstate = tfpm.lptinit(linear, config, name='icstate')
        fnstate = tfpm.nbody(icstate, config, verbose=False, name='fnstate')
        final = tf.zeros_like(linear)
        final = tfpf.cic_paint(final, fnstate[0], boxsize=bs, name='final')
        #
        #xx = tf.reshape(final, shape=[-1, cube_sizeft, cube_sizeft, cube_sizeft, nchannels], name='input')
        xx = tf.concat((final[-pad:, :, :], final, final[:pad, :, :]), axis=0)
        xx = tf.concat((xx[:, -pad:, :], xx, xx[:, :pad, :]), axis=1)
        xx = tf.concat((xx[:, :, -pad:], xx, xx[:, :, :pad]), axis=2)
        xx = tf.expand_dims(tf.expand_dims(xx, 0), -1)
        #Halos
        #yy = tf.reshape(data, shape=[-1, cube_size, cube_size, cube_size, 1], name='labels')
        yy = tf.expand_dims(data, 0)

        print('xx, yy shape :', xx.shape, yy.shape)
        likelihood = module(dict(features=tf.cast(xx, tf.float32),
                                 labels=tf.cast(yy, tf.float32)),
                            as_dict=True)['loglikelihood']
        print(likelihood.shape)

        ##Anneal
        Rsm = tf.placeholder(tf.float32, name='smoothing')
        if anneal:
            Rsm = tf.multiply(Rsm, bs / nc)
            Rsmsq = tf.multiply(Rsm, Rsm)
            smwts = tf.exp(tf.multiply(-kmesh**2, Rsmsq))
            likelihood = tf.squeeze(likelihood)
            print(likelihood.shape)
            likelihoodk = tfpf.r2c3d(likelihood, norm=nc**3)
            likelihoodk = tf.multiply(likelihoodk,
                                      tf.cast(smwts, tf.complex64))
            likelihood = tfpf.c2r3d(likelihoodk, norm=nc**3)

        residual = -tf.reduce_sum(likelihood)

        #Prior
        lineark = tfpf.r2c3d(linear, norm=nc**3)
        priormesh = tf.square(tf.cast(tf.abs(lineark), tf.float32))
        prior = tf.reduce_sum(tf.multiply(priormesh, 1 / priorwt))
        prior = tf.multiply(prior, 1 / nc**3, name='prior')

        chisq = tf.multiply(residual, 1 / nc**0, name='chisq')

        loss = tf.add(chisq, prior, name='loss')

        optimizer = ScipyOptimizerInterface(loss,
                                            var_list=[linear],
                                            method='L-BFGS-B',
                                            options={
                                                'maxiter': maxiter,
                                                'gtol': gtol
                                            })

        tf.add_to_collection('inits', [initlin_op, initlin])
        tf.add_to_collection('opt', optimizer)
        tf.add_to_collection('diagnostics', [prior, chisq, loss])
        tf.add_to_collection('reconpm', [linear, final, fnstate])
        tf.add_to_collection('data', data)
    return g

コード例 #28

0

ファイルを表示

ファイル: optimizer.py プロジェクト: txyzy1232006/Replication-for-A-Neural-Algorithm-of-Artistic-Style

 def __init__(self):
     self.__OptimizerWrap = \
         lambda loss, max_steps: ScipyOptimizerInterface(loss,
                                                         options={'maxiter': max_steps,
                                                                  'disp': 50})

コード例 #29

0

ファイルを表示

    def run(self):
        content = self._load_image(self._content_image)
        h, w = content.shape[1], content.shape[2]
        style = self._load_image(self._style_image, size=(h, w))

        print("Content shape: ", content.shape)
        print("Style shape: ", style.shape)

        image = tf.Variable(style,
                            dtype=tf.float32,
                            validate_shape=False,
                            name='image')
        self._output_shape = content.shape
        self._build_vgg19(image)
        self._add_gramians()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # Calculate loss function
            sess.run(tf.global_variables_initializer())
            with tf.name_scope('losses'):
                style_losses = self._setup_style_losses(sess, image, style)
                content_losses = self._setup_content_losses(
                    sess, image, content)

                losses = content_losses + style_losses

                if self._hist_weight > 0:
                    with tf.name_scope('histogram'), tf.device('/cpu:0'):
                        hist_loss = self._setup_histogram_loss(
                            image, style, sess)
                    losses += hist_loss

                image.set_shape(
                    content.shape)  # tv loss expects explicit shape
                if self._tv_weight:
                    tv_loss = tf.image.total_variation(image[0])
                    tv_loss_weighted = tf.multiply(tv_loss,
                                                   self._tv_weight,
                                                   name='tv_loss')
                    losses += tv_loss_weighted
                loss = tf.foldl(add, losses, name='loss')

            # Set optimizator
            if self._optimizer == 'Adam':
                opt = tf.train.AdamOptimizer(10).minimize(loss)

                sess.run(tf.global_variables_initializer())
                self._set_initial_image(sess, image, content)

                self._step_callback(sess.run(image))

                for it in range(self._num_iterations):
                    _, ll, out = sess.run([opt, loss, image])
                    self._step_callback(out)
                    print("Iteration: {:3d}\tLoss = {:.6f}".format(it, ll))

            elif self._optimizer == 'L-BFGS':
                sess.run(tf.global_variables_initializer())
                self._set_initial_image(sess, image, content)
                self._step_callback(sess.run(image))

                opt = ScipyOptimizerInterface(loss,
                                              options={
                                                  'maxiter':
                                                  self._num_iterations,
                                                  'disp': self._print_iter
                                              },
                                              method='L-BFGS-B')
                opt.minimize(sess, step_callback=self._step_callback)
            else:
                raise ValueError("Unknown optimization method")

            self._save_image(self._output_image, sess.run(image))

コード例 #30

0

ファイルを表示

ファイル: linear_regression.py プロジェクト: nbren12/gnl

# In[75]:


tf.reset_default_graph()

X = tf.placeholder("float")
Y = tf.placeholder("float")

M = tf.get_variable("beta", [n, 1], "float", initializer=tf.zeros_initializer)



y_pred = tf.matmul(X, M )
loss = tf.reduce_sum(tf.pow(Y-y_pred, 2))

optim = ScipyOptimizerInterface(loss, [M])


# In[76]:

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    optim.minimize(sess, feed_dict={X:x, Y:y[:,None]})
    M_st = sess.run(M)
#     ans = sess.run(loss, feed_dict={X: x, Y: y})


# In[77]:

print(M_st.T)

コード例 #31

0

ファイルを表示

ファイル: trpo_class.py プロジェクト: rvtsukanov/AI

class TRPO():
    def __init__(self, env, gamma=1, lr=0.01, num_episodes=1000, num_steps=200, KL_delta=10 ** (-4)):

        #self.env = ArmEnv(size_x=4, size_y=3, cubes_cnt=4, episode_max_length=2000, finish_reward=200,
        #                  action_minus_reward=0.0, tower_target_size=3)
        self.env = env
        self.gamma = gamma
        self.lr = lr
        self.num_episodes = num_episodes
        self.num_steps = num_steps
        self.KL_delta = KL_delta
        self.success_counter = 0
        self.build_graph()
        self.obs_len = len(self.env.reset())
        self.trajectory = []
        self.total_rew = []
        self.disc = 0
        self.log_file = open('logs_' + str(time.time()) + '.txt', 'w+')

        self.sess = tf.Session()

    def build_graph(self):
        tf.reset_default_graph()
        self.state = tf.placeholder('float32', shape=[None, len(self.env.reset())], name="STATE")
        self.actions = tf.squeeze(tf.placeholder('int32', name="ACTIONS"))
        self.q_estimation = tf.placeholder('float32', name="Q-EST")
        self.build_actor()
        self.build_critic()
        self.build_trpo_tf()
        #self.build_trpo_SOI()


    '''
    ============================
    Actor = Policy Approxiamtion
    ============================
    '''
    def build_actor(self):
        self.inp = tf.layers.dense(
            self.state,
            10,
            name="ACTOR_INPUT",
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.out = tf.layers.dense(
            self.inp,
            self.env.action_space.n,
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.soft_out = tf.nn.softmax(self.out)
        nl = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.out, labels=self.actions)
        wnl = tf.multiply(nl, self.q_estimation)
        self.loss = tf.reduce_mean(wnl)
        self.opt = tf.train.AdamOptimizer(learning_rate=0.001).minimize(self.loss)

    '''
    ======================================
    Critic = Approximation of Q-function
    ======================================
    '''
    def build_critic(self):
        self.q_return = tf.placeholder('float32', name="Q-Return")  # sum of rewards on rest of traj
        self.q_inp = tf.layers.dense(
            tf.concat(self.state, self.actions),
            10,
            name="Q-input",
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.q_out = tf.layers.dense(
            self.q_inp,
            1,
            name="Q-output",
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.q_loss = tf.losses.mean_squared_error(self.q_out, self.q_return)
        self.q_opt = tf.train.AdamOptimizer(0.01).minimize(self.q_loss)

    def build_trpo_SOI(self):
        # I use index _k to fix variables in graph
        # Fixed probs on k-th iteration
        self.soft_out_k = tf.placeholder('float32', name="SOFTOUT_K")

        # Fixed advantage on k-th iteration
        self.A_k = tf.placeholder('float32', name="A_K")

        # Number of steps to estimate expectation
        self.N = tf.placeholder('float32', name="number")

        # Advantage function = emperical_return - baseline
        self.A = self.q_return - self.q_out

        # Choosing particular action "actions" and multiply by A_k
        #here was a mistake -> A instead of A_k
        trpo_obj = -tf.reduce_mean(self.A_k * tf.gather(tf.exp(self.soft_out - self.soft_out_k), self.actions))

        # KL(soft_out_k, soft_out) should be less than KL_delta
        constraints = [(-self.kl(self.soft_out_k, self.soft_out) + self.KL_delta)]

        # Use ScipyOptimiztationInterface (SOI) to solve optimization task with constrains
        self.trpo_opt = SOI(trpo_obj,
                            method='SLSQP',
                            inequalities=constraints,
                            options={'maxiter': 3})

    def apply_trpo_SOI(self, s, a, q_app, soft, r, adv):

        #Use trajectory s -> a -> r to optimize policy in Trust-Region interval
        feed_dict = [[self.state, [s]],
                     [self.soft_out_k, [soft]],
                     [self.actions, [a]],
                     [self.q_return, [r]],
                     [self.q_out, q_app],
                     [self.A_k, adv]]

        self.trpo_opt.minimize(self.sess, feed_dict=feed_dict)



    def build_trpo_tf(self):

        self.beta = tf.placeholder('float32')
        self.eta = tf.placeholder('float32')
        self.learn_rate = tf.placeholder('float32')
        self.learn_rate_value = 0.001

        self.soft_out_k = tf.placeholder('float32', name="SOFTOUT_K")
        # Fixed advantage on k-th iteration
        self.A_k = tf.placeholder('float32', name="A_K")
        self.A = self.q_return - self.q_out #?
        self.D_KL = self.kl(self.soft_out, self.soft_out_k)

        trpo_loss_1 = -tf.reduce_mean(self.A_k * tf.exp(self.soft_out - self.soft_out_k))
        trpo_loss_2 = self.beta * self.D_KL
        trpo_loss_3 = self.eta * tf.square(tf.maximum(0.0, self.KL_delta - 2 * self.D_KL))

        trpo_total_loss = trpo_loss_1 + trpo_loss_2 + trpo_loss_3
        self.trpo_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(trpo_total_loss)



    def apply_trpo_tf(self, old_policy, advantage, state, actions, num_steps):
        beta = 0.5
        eta = 0.5
        DKL = 0.01
        for i in range(num_steps):
            if DKL > 2 * self.KL_delta:
                beta *= 1.5
                if beta > 30:
                    self.learn_rate_value /= 1.5
            elif DKL < 0.5 * self.KL_delta:
                beta /= 1.05
                if beta < 1./30:
                    self.learn_rate_value *= 1.5

            _, DKL = self.sess.run([self.trpo_opt, self.D_KL], feed_dict={self.A_k: advantage,
                                                          self.soft_out_k: old_policy,
                                                          self.actions: actions,
                                                          self.state: [state],
                                                          self.beta: beta,
                                                          self.eta: eta,
                                                          self.learn_rate: self.learn_rate_value})


    def roll_trajectory(self, episode):
        s = self.env.reset()
        self.trajectory = []
        self.total_rew = []

        for step in range(self.num_steps):
            output = self.sess.run([self.soft_out], feed_dict={self.state: [s]})
            probs = output[0][0]
            if not self.learn_flag:
                a = np.random.choice(self.env.action_space.n,
                                     p=[1. / self.env.action_space.n for _ in range(self.env.action_space.n)])
                #print("probs: ", probs, self.learn_flag, "action: ", a)
                self.log_file.write('probs: ' + str(probs) + '\n')
            else:
                a = np.random.choice(self.env.action_space.n, p=probs)
                #print("probs: ", probs, self.learn_flag, "action: ", a)
                self.log_file.write('probs: ' + str(probs) + '\n')
            new_state, reward, done, _ = self.env.step(a)
            self.total_rew.append(reward)
            self.trajectory.append((s, a, reward))

            if done:
                if reward != 0:
                    self.env.render()
                    self.learn_flag = True
                    print(reward)
                    self.success_counter += 1
                return
            s = new_state

        print('====================== end of episode {} ======================'.format(episode))

    def learn(self):
        self.learn_flag = False
        with self.sess:
            self.sess.run(tf.global_variables_initializer())

            # Calculate metrics
            self.successes = []
            self.success_episodes = []
            self.slice = 10
            self.success_counter = 0

            for episode in range(self.num_episodes):
                self.roll_trajectory(episode)
                disc = self.discount_and_norm_rewards(self.total_rew, self.gamma)
                for n, st in enumerate(self.trajectory):
                    traj_state = st[0]
                    traj_action = int(st[1])
                    traj_reward = disc[n]

                    #Learning Critic
                    q_approximated, _ = self.sess.run([self.q_out, self.q_opt],
                                                      feed_dict={self.state: [traj_state],
                                                                 self.actions: [traj_action],
                                                                 self.q_return: traj_reward}
                                                      )

                    #Learning Actor
                    _, soft, adv = self.sess.run([self.opt, self.soft_out, self.A],
                                            feed_dict={self.state: [traj_state],
                                                       self.actions: [traj_action],
                                                       self.q_estimation: q_approximated,
                                                       self.q_return: traj_reward}
                                            )

                    #Optimization Actor-Parameters
                    #self.apply_trpo_SOI(traj_state, traj_action, q_approximated, soft, traj_reward, adv)
                    if episode > 500:
                        self.apply_trpo_tf(soft, adv, traj_state, traj_action, 20)

                if episode % self.slice == 0:
                    print("Episode: ", episode)
                    print("Successes to all: ", self.success_counter / self.slice)
                    self.success_episodes.append(episode)
                    self.successes.append(self.success_counter / self.slice)
                    self.success_counter = 0

            plt.plot(self.success_episodes, self.successes)
            plt.show()
            self.log_file.close()
            print(self.successes)

    @staticmethod
    def kl(p, q):
        return tf.reduce_sum(tf.multiply(p, tf.log(p / q)))

    @staticmethod
    def kl_num(p, q):
        return np.sum(np.multiply(p, np.log(p/q)))

    @staticmethod
    def to_cat(a, n):
        return np.array([1 if a == i else 0 for i in range(n)])

    @staticmethod
    def discount_and_norm_rewards(episode_rewards, gamma):
        discounted_episode_rewards = np.zeros_like(episode_rewards)
        cumulative = 0
        for t in reversed(range(len(episode_rewards))):
            cumulative = cumulative * gamma + episode_rewards[t]
            discounted_episode_rewards[t] = cumulative
        return discounted_episode_rewards