예제 #1
0
    def __call__(self,
                 cost_func,
                 variables=None,
                 subgraph=None,
                 warning=False):
        """
        Arguments:
            cost_func (Op): The cost function to optimize
            variables (list of variables): List of variables to optimize
            subgraph (SubGraph): A subgraph instance containing all variables to optimize
            warning (bool): If True displays warning message if any variables
                            specified do not participate in batch cost computation

        .. Note::
            If subgraph is provided, the variables to optimize will be taken from it.
            Otherwise, they can be provided explicitly by passing a list as `variables`.
            If neither `subgraph` nor `variables` is provided, the variables to optimize will be
            all trainable variables on which `cost` depends.
        """

        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        if cost_func.axes.batch_axis() is None:
            batch_size = 1
        else:
            batch_size = cost_func.axes.batch_axis().length

        # determine variables to optimize
        if subgraph is not None:
            if variables is not None:
                raise ValueError(
                    "variables and subgraph cannot both be specified.")
            variables = list(subgraph.variables.values())

        if variables is None:
            variables = batch_cost.variables()
        elif variables is not None and warning is True:
            all_variables = batch_cost.variables()
            selected_variables = all_variables & set(variables)
            if len(selected_variables) < len(variables):
                logger.warn(
                    "not all selected variables participate in cost computation"
                )

        # gradients
        grads = [ng.deriv(batch_cost, v) / batch_size for v in variables]
        scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm)

        # updates
        for variable, grad in zip(variables, grads):
            updates = self.variable_update(variable, grad, scale_factor)
            all_updates.append(updates)
        updates = ng.doall(all_updates)
        grads = ng.doall(grads)
        clips = ng.doall([
            ng.assign(variable,
                      clip_weight_value(variable, self.weight_clip_value))
            for variable in variables
        ])
        return ng.sequential([grads, updates, clips, 0])
예제 #2
0
    def get_restore_op(self):
        """
        Get variable restoring ngraph op from TF model checkpoint

        Returns:
            A `ng.doall` op that restores the stored weights in TF model
            checkpoint
        """
        if self._graph is None:
            raise ValueError("self._graph is None, import meta_graph first.")
        if self._checkpoint_path is None:
            raise ValueError("self._checkpoint_path is None, please specify"
                             "checkpoint_path while importing meta_graph.")
        with self._graph.as_default():
            tf_variables = tf.all_variables()
            ng_variables = self.get_op_handle(tf_variables)
            ng_restore_ops = []
            with tf.Session() as sess:
                self.saver.restore(sess, self._checkpoint_path)
                for tf_variable, ng_variable in zip(tf_variables,
                                                    ng_variables):
                    val = sess.run(tf_variable)
                    with ng.Op.saved_user_deps():
                        restore_op = ng.assign(ng_variable, val)
                        ng_restore_ops.append(restore_op)
            with ng.Op.saved_user_deps():
                ng_restore_ops = ng.doall(ng_restore_ops)
            return ng_restore_ops
예제 #3
0
    def __call__(self, cost_func):
        with ng.Op.saved_user_deps():
            velocity_updates, param_updates = [], []
            batch_cost = ng.sum(cost_func, out_axes=())
            batch_size = cost_func.axes.batch_axes()[0].length

            grads = [ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables()]
            scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm)

            for variable, grad in zip(batch_cost.variables(), grads):
                grad = clip_gradient_value(grad, self.gradient_clip_value)

                velocity = ng.persistent_tensor(axes=variable.axes,
                                                initial_value=0.).named(variable.name + '_vel')
                velocity_updates.append(
                    ng.assign(velocity,
                              velocity * self.momentum_coef - self.learning_rate * (
                                  scale_factor * grad + self.wdecay * variable)))

                param_updates.append(ng.assign(variable, variable + velocity))

            lr_update = [ng.assign(self.learning_rate,
                                   self.schedule.get_learning_rate(self.learning_rate,
                                                                   self.iteration_index))]

            updates = ng.doall(velocity_updates + param_updates + lr_update)
            self.iteration_index += 1

        return updates
예제 #4
0
    def __call__(self, cost_func):
        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        batch_size = cost_func.axes.batch_axes()[0].length

        grads = [
            ng.deriv(batch_cost, v) / batch_size
            for v in batch_cost.variables()
        ]
        scale_factor = clip_gradient_norm(grads, batch_size,
                                          self.gradient_clip_norm)

        epsilon, decay = (self.epsilon, self.decay_rate)
        for i, (variable, grad) in enumerate(zip(batch_cost.variables(),
                                                 grads)):
            grad = clip_gradient_value(grad, self.gradient_clip_value)
            state = ng.persistent_tensor(axes=variable.axes, initial_value=0.)
            all_updates.append(
                ng.sequential([
                    ng.assign(state,
                              decay * state + (1.0 - decay) * ng.square(grad)),
                    ng.assign(
                        variable,
                        variable - ((scale_factor * grad * self.lrate) /
                                    (ng.sqrt(state + epsilon) + epsilon)))
                ]))

        return ng.doall(all_updates)
예제 #5
0
 def __call__(self, cost_func):
     all_updates = []
     batch_cost = ng.sum(cost_func, out_axes=())
     batch_size = cost_func.axes.batch_axes()[0].length
     grads = [
         ng.deriv(batch_cost, v) / batch_size
         for v in batch_cost.variables()
     ]
     scale_factor = clip_gradient_norm(grads, batch_size,
                                       self.gradient_clip_norm)
     for variable, grad in zip(batch_cost.variables(), grads):
         updates = []
         velocity = ng.persistent_tensor(
             axes=variable.axes,
             initial_value=0.).named(variable.name + '_vel')
         clip_grad = clip_gradient_value(grad, self.gradient_clip_value)
         lr = -self.lrate * (scale_factor * clip_grad +
                             self.wdecay * variable)
         updates.append(
             ng.assign(velocity, velocity * self.momentum_coef + lr))
         if self.nesterov:
             delta = (self.momentum_coef * velocity + lr)
         else:
             delta = velocity
         updates.append(ng.assign(variable, variable + delta))
         all_updates.append(ng.sequential(updates))
     return ng.doall(all_updates)
예제 #6
0
    def __call__(self, cost_func, variable_scope=None):
        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        batch_size = cost_func.axes.batch_axis().length

        selected_variables = batch_cost.variables()
        if variable_scope is not None:
            selected_variables = [op for op in selected_variables if op.scope == variable_scope]
        grads = [ng.deriv(batch_cost, v) / batch_size for v in selected_variables]
        scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm)

        for variable, grad in zip(selected_variables, grads):
            updates = self.variable_update(variable, grad, scale_factor)
            all_updates.append(updates)
        updates = ng.doall(all_updates)
        grads = ng.doall(grads)
        return ng.sequential([grads, updates, 0])
예제 #7
0
    def NoOp(self, tf_node, inputs):
        """
        Does nothing. Only useful to implement doall by applying dependencies.

        Arguments:
            tf_node: NodeDef object, the tensorflow node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the tensorflow node.
        """

        if tf_node.name == "init":
            # TODO remove hardcoded name by passing in names for op
            return ng.doall(all=inputs)
        else:
            raise NotImplementedError
예제 #8
0
    def __call__(self, cost_func):
        with ng.Op.saved_user_deps():
            state_updates, param_updates = [], []
            batch_cost = ng.sum(cost_func, out_axes=())
            batch_size = cost_func.axes.batch_axes()[0].length

            grads = [
                ng.deriv(batch_cost, v) / batch_size
                for v in batch_cost.variables()
            ]
            scale_factor = clip_gradient_norm(
                grads) if self.gradient_clip_norm else 1

            epsilon, decay = (self.epsilon, self.decay_rate)
            for i, (variable,
                    grad) in enumerate(zip(batch_cost.variables(), grads)):
                grad = clip_gradient_value(grad, self.gradient_clip_value)

                state = ng.persistent_tensor(axes=variable.axes,
                                             initial_value=0.)
                state_updates.append(
                    ng.assign(lvalue=state,
                              rvalue=decay * state +
                              (1.0 - decay) * ng.square(grad)).named(
                                  'state_u_%s' % i))

                param_updates.append(
                    ng.assign(
                        lvalue=variable,
                        rvalue=variable -
                        ((scale_factor * grad * self.learning_rate) /
                         (ng.sqrt(state + epsilon) + epsilon)),
                    ).named('var_u_%s' % i))

            lr_update = [
                ng.assign(
                    self.learning_rate,
                    self.schedule.get_learning_rate(self.learning_rate,
                                                    self.iteration_index))
            ]

            updates = ng.doall(state_updates + param_updates + lr_update)
            self.iteration_index += 1

        return updates
예제 #9
0
파일: utils.py 프로젝트: rsumner31/ngraph
    def minimize(self, cost, variables):
        """
        Minimize cost by returning update Ops.

        Arguments:
            cost: The cost Op to be minimized
            variables: TODO

        Returns:
            A doall op containing setitems to variable ops.
        """

        assert cost is not None
        assert variables is not None

        return ng.doall([ng.assign(variable,
                                   variable - self.compute_lr_op * ng.deriv(cost, variable))
                         for variable in variables])
예제 #10
0
    def minimize(self, cost):
        """
        Minimize cost by returning update Ops.

        Arguments:
            cost: The cost Op to be minimized

        Returns:
            A doall op containing setitems to variable ops.
        """
        variables = list(cost.variables())
        grads = [ng.deriv(cost, variable) for variable in variables]
        with ng.Op.saved_user_deps():
            param_updates = [
                ng.assign(variable, variable - self.lrate * grad)
                for variable, grad in zip(variables, grads)
            ]
            updates = ng.doall(param_updates)
        return updates
예제 #11
0
    def __call__(self, in_obj, init_state=None):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
            in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer
            init_state (tuple of Tensor): object that provides initial state, and in LSTM,
                                          it includes hidden state, and cell states

        Returns:
            rnn_out (Tensor): output

        """
        # try to understand the axes from the input
        if init_state is not None:
            assert len(init_state) == 2 and init_state[0].axes == init_state[1].axes
            self.interpret_axes(in_obj, init_state[0])
        else:
            self.interpret_axes(in_obj, init_state)

        # initialize the hidden states
        if init_state is not None:
            self.h_init = init_state[0]
            self.c_init = init_state[1]
        else:
            if self.reset_cells:
                self.h_init = ng.temporary(initial_value=0,
                                           axes=self.out_axes).named('h_init')
                self.c_init = ng.temporary(initial_value=0,
                                           axes=self.out_axes).named('c_init')
            else:
                self.h_init = ng.variable(initial_value=0,
                                          axes=self.out_axes).named('h_init')
                self.c_init = ng.variable(initial_value=0,
                                          axes=self.out_axes).named('c_init')

        # params are dictionary for i, f, o, g
        self.W_input = {k: ng.variable(axes=self.w_in_axes,
                                       initial_value=self.init,
                                       scope=self.scope).
                        named("W_in_{}".format(k)) for k in self.metadata['gates']}

        self.W_recur = {k: ng.variable(axes=self.w_re_axes,
                                       initial_value=self.init_inner,
                                       scope=self.scope).
                        named("W_re_{}".format(k)) for k in self.metadata['gates']}

        self.b = {k: ng.variable(axes=self.out_feature_axes,
                                 initial_value=0,
                                 scope=self.scope).
                  named("bias_{}".format(k)) for k in self.metadata['gates']}

        h = self.h_init
        c = self.c_init

        h_list = []
        c_list = []

        # Compute feed forward weighted inputs
        # Batch norm is computed only on the weighted inputs
        # as in https://arxiv.org/abs/1510.01378
        h_ff = dict()
        for k in self.metadata["gates"]:
            h_ff[k] = ng.dot(self.W_input[k], in_obj)
            if self.batch_norm is not None:
                h_ff[k] = self.batch_norm[k](h_ff[k])

            # slice the weighted inputs into time slices
        h_ff = get_steps(h_ff, self.recurrent_axis, self.backward)

        # recurrent computation
        for i in range(self.recurrent_axis.length):
            with ng.metadata(recurrent_step=str(i)):
                [h, c] = self._step(h_ff[i], [h, c])
                h_list.append(h)
                c_list.append(c)

        if self.return_sequence is True:
            if self.backward:
                h_list = h_list[::-1]
                c_list = c_list[::-1]
            lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx)
        else:
            lstm_out = h_list[-1]

        if self.reset_cells is True:
            return lstm_out
        else:
            return ng.sequential([
                ng.doall([
                    ng.assign(self.h_init, h_list[-1]),
                    ng.assign(self.c_init, c_list[-1])
                ]),
                lstm_out
            ])
예제 #12
0
def unroll_with_attention(cell,
                          num_steps,
                          H_pr,
                          H_hy,
                          init_states=None,
                          reset_cells=True,
                          return_sequence=True,
                          reverse_mode=False,
                          input_data=None):
    """
    Unroll the cell with attention for num_steps steps.

    Arguments:
    ----------
    cell : provide the cell that has to be unrolled (Eg: MatchLSTMCell_withAttention)
    num_steps: the number of steps needed to unroll
    H_pr : the encoding for the question
    H_hy : the encoding for the passage
    init_states: Either None or a dictionary containing states
    reset_cell: argument which determine if cell has to be reset or not
    reverse_mode: Set to True if unrolling in the opposite direction is desired
    input_data: the ArrayIterator object for training data
                (contains information of length of each sentence)

    """
    recurrent_axis = H_hy.axes.recurrent_axis()

    if init_states is not None:
        states = {
            k: ng.cast_role(v, out_axes)
            for (k, v) in init_states.items()
        }
    else:
        states = init_states

    stepped_inputs = get_steps(H_hy, recurrent_axis, backward=reverse_mode)
    stepped_outputs = []

    for t in range(num_steps):
        with ng.metadata(step=str(t)):
            if t == 0:
                output, states = cell(H_pr,
                                      stepped_inputs[t],
                                      states,
                                      output=None,
                                      input_data=input_data)
            else:
                output, states = cell(H_pr,
                                      stepped_inputs[t],
                                      states,
                                      output=output,
                                      input_data=input_data)

            stepped_outputs.append(output)

    if reverse_mode:
        if return_sequence:
            stepped_outputs.reverse()

    if return_sequence:
        outputs = ng.stack(stepped_outputs, recurrent_axis, pos=1)
    else:
        outputs = stepped_outputs[-1]

    if not reset_cells:
        update_inits = ng.doall([
            ng.assign(initial, states[name])
            for (name, initial) in states.items()
        ])
        outputs = ng.sequential([update_inits, outputs])

    return outputs
예제 #13
0
    def __init__(self,
                 state_axes,
                 action_size,
                 batch_size,
                 model,
                 learning_rate=0.0001):
        """
        for now, model must be a function which takes action_axes, and
        returns a neon container
        """
        super(ModelWrapper, self).__init__()

        self.axes = Namespace()
        self.axes.state = make_axes(state_axes, name='state')
        self.axes.action = ng.make_axis(name='action', length=action_size)
        self.axes.n = ng.make_axis(name='N', length=batch_size)
        self.axes.n1 = ng.make_axis(name='N', length=1)

        # placeholders
        self.state = ng.placeholder(self.axes.state + [self.axes.n])
        self.state_single = ng.placeholder(self.axes.state + [self.axes.n1])
        self.target = ng.placeholder([self.axes.action, self.axes.n])

        # these q functions have the same structure but different variables
        self.q_function = model(self.axes.action)
        self.q_function_target = model(self.axes.action)

        # construct inference computation
        with neon.Layer.inference_mode_on():
            inference = self.q_function(self.state)
        inference_computation = ng.computation(inference, self.state)

        # construct inference target computation
        with neon.Layer.inference_mode_on():
            inference_target = self.q_function_target(self.state)
        inference_target_computation = ng.computation(inference_target,
                                                      self.state)

        # construct inference computation for evaluating a single observation
        with neon.Layer.inference_mode_on():
            inference_single = self.q_function(self.state_single)
        inference_computation_single = ng.computation(inference_single,
                                                      self.state_single)

        # update q function target weights with values from q function
        # assumes that the variables in each are in the same order
        update_computation = ng.computation(
            ng.doall([
                ng.assign(target_variable,
                          ng.cast_axes(variable, target_variable.axes))
                for target_variable, variable in zip(
                    self.q_function_target.variables.values(),
                    self.q_function.variables.values())
            ]))

        # construct training computation
        loss = ng.squared_L2(self.q_function(self.state) - self.target)

        optimizer = neon.RMSProp(
            learning_rate=learning_rate,
            gradient_clip_value=1,
        )

        train_output = ng.sequential([
            optimizer(loss),
            loss,
        ])

        train_computation = ng.computation(train_output, self.state,
                                           self.target)

        # now bind computations we are interested in
        self.transformer = ng.transformers.make_transformer()
        self.inference_function = self.transformer.add_computation(
            inference_computation)
        self.inference_target_function = self.transformer.add_computation(
            inference_target_computation)
        self.inference_function_single = self.transformer.add_computation(
            inference_computation_single)
        self.train_function = self.transformer.add_computation(
            train_computation)
        self.update_function = self.transformer.add_computation(
            update_computation)

        # run a single update to ensure that both q functions have the same
        # initial weights
        self.update()
예제 #14
0
파일: layer.py 프로젝트: kkasravi/ngraph
    def train_outputs(self, in_obj, init_state=None):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
            in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer
            init_state (tuple of Tensor): object that provides initial state, and in LSTM,
                                          it includes hidden state, and cell states

        Returns:
            rnn_out (Tensor): output

        """
        # try to understand the axes from the input
        if init_state is not None:
            assert len(
                init_state) == 2 and init_state[0].axes == init_state[1].axes
            self.interpret_axes(in_obj, init_state[0])
        else:
            self.interpret_axes(in_obj, init_state)

        # initialize the hidden states
        if init_state is not None:
            self.h_init = init_state[0]
            self.c_init = init_state[1]
        else:
            if self.reset_cells:
                self.h_init = ng.temporary(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('h_init')
                self.c_init = ng.temporary(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('c_init')
            else:
                self.h_init = ng.variable(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('h_init')
                self.c_init = ng.variable(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('c_init')

        # params are dictionary for i, f, o, g
        self.W_input = {
            k: ng.variable(axes=self.w_in_axes,
                           initial_value=self.init).named("W_in_{}".format(k))
            for k in self.metadata['gates']
        }

        self.W_recur = {
            k: ng.variable(axes=self.w_re_axes,
                           initial_value=self.init_inner).named(
                               "W_re_{}".format(k))
            for k in self.metadata['gates']
        }

        self.b = {
            k: ng.variable(axes=self.hidden_axes,
                           initial_value=0).named("bias_{}".format(k))
            for k in self.metadata['gates']
        }

        h = self.h_init
        c = self.c_init

        h_list = []
        c_list = []

        # feedforward computation
        in_s = get_steps(in_obj, self.recurrent_axis, self.backward)

        # recurrent computation
        for i in range(self.recurrent_axis.length):
            with ng.metadata(recurrent_step=str(i)):
                [h, c] = self._step(in_s[i], [h, c])
                h_list.append(h)
                c_list.append(c)

        if self.return_sequence is True:
            if self.backward:
                h_list = h_list[::-1]
                c_list = c_list[::-1]
            lstm_out = ng.stack(h_list,
                                self.recurrent_axis,
                                pos=self.recurrent_axis_idx)
        else:
            lstm_out = h_list[-1]

        if self.reset_cells is True:
            return lstm_out
        else:
            return ng.sequential([
                ng.doall([
                    ng.assign(self.h_init, h_list[-1]),
                    ng.assign(self.c_init, c_list[-1])
                ]), lstm_out
            ])