Ejemplo n.º 1
0
  def insert_quant_op_for_weights(self, w_bit_dict):
    """Insert quantization operation for weights

    Args:
    * wewight_bit_dict: A dict with (key: matmul_op_name, value: quant_bits)
    """

    for op in self.matmul_ops:
      w = op.inputs[1]
      prefix = prefix_filter(op.name)
      qw = self.__uniform_quantize(w, w_bit_dict[op.name], 'weight', prefix)
      weight_fn = {'MatMul': tf.matmul,
                   'Conv2D': tf.nn.conv2d,
                   'DepthwiseConv2dNative': tf.nn.depthwise_conv2d}
      is_conv_fn = lambda x: 'Conv' in x.type
      try:
        if is_conv_fn(op):
          strides = op.get_attr('strides')
          padding = op.get_attr('padding')
          qw_op = weight_fn[op.type](op.inputs[0], qw, strides, padding).op
        else:
          # fc layers
          qw_op = weight_fn[op.type](op.inputs[0], qw).op
        self.quantized_matmul_ops.append(qw_op)
      except KeyError:
        raise NotImplementedError("Unrecognied Mul op, \
            try to add it into matmul_typs for quantization")

    # replace input
    for wop, qwop in zip(self.matmul_ops, self.quantized_matmul_ops):
      old_sgv = ge.sgv(wop)
      new_sgv = ge.sgv(qwop)
      ge.reroute_inputs(new_sgv, old_sgv)
Ejemplo n.º 2
0
  def insert_quant_op_for_activations(self, act_bit_dict):
    """ Insert quantization operation for activation

    Args:
    * act_bit_dict: A dict with (key: act_op_name, value: act_bits)
    """
    activation_fn = {'Relu': tf.nn.relu,
                     'Tanh': tf.nn.tanh,
                     'Softplus': tf.nn.softplus,
                     'Sigmoid': tf.nn.sigmoid,
                     'Relu6': tf.nn.relu6}

    for op in self.activation_ops:
      old_sgv = ge.sgv(op)
      input_ = old_sgv.inputs[0]

      if op.type in self.support_act_types:
        try:
          tmp_input_ = activation_fn[op.type](input_)
        except KeyError:
          raise NotImplementedError("The activation_fn needs to include %s manually" % op.type)

        prefix = prefix_filter(op.name)
        qa = self.__uniform_quantize(tmp_input_, act_bit_dict[op.name], 'activation', prefix)
        new_sgv = ge.sgv(qa.op)
        ge.reroute_outputs(new_sgv, old_sgv)
        self.quantized_activation_ops.append(qa.op)
      else:
        raise ValueError("Unknown activation mode, you may add it manually here")
Ejemplo n.º 3
0
def convert_consts_to_var(graph_def):
    graph = get_graph_from(graph_def)
    all_const_ops = set(i.name for i in graph.get_operations() if i.type == "Const")
    const_names_list = list(all_const_ops - set(get_white_list(graph)))
    const_var_names_pairs = []
    ops_to_delete = []
    with graph.as_default():
        preexisting_vars = [
            tf.get_variable(i.name, i.outputs[0].shape)
            for i in graph.get_operations()
            if i.type == "VariableV2" or i.type == "Variable"
        ]

        var_list = []
        for name in const_names_list:
            tensor = graph.get_operation_by_name(name).outputs[0]
            with tf.compat.v1.Session() as sess:
                t_value = sess.run(tensor)
            t_name = "{}_mpc_const_var".format(name)
            var = tf.compat.v1.Variable(t_value, name=t_name)
            var_read_op_name = var.to_proto().snapshot_name[:-2]
            const_var_names_pairs.append((name, var_read_op_name))
            var_list.append(var)

        for const_name, var_read_name in const_var_names_pairs:
            const_op = graph.get_operation_by_name(const_name)
            var_op = graph.get_operation_by_name(var_read_name)
            ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_op))
            ops_to_delete.append(const_op)

        tf.compat.v1.variables_initializer(
            var_list + preexisting_vars, "init_constvars"
        )
    return delete_nodes(graph.as_graph_def(), ops_to_delete)
Ejemplo n.º 4
0
def replace_read_ops(loss_or_losses, var_list):
    """
    Replaces read ops of each variable in `vars` with new read ops obtained
    from `read_value()`, thus forcing to read the most up-to-date values of
    the variables (which might incur copies across devices).
    The graph is seeded from the tensor(s) `loss_or_losses`.
    """
    # ops between var ops and the loss
    ops = set(ge.get_walks_intersection_ops([var.op for var in var_list], loss_or_losses))
    if not ops:  # loss_or_losses doesn't depend on any var in var_list, so there is nothiing to replace
        return

    # filter out variables that are not involved in computing the loss
    var_list = [var for var in var_list if var.op in ops]

    # assume that for each variable, the only op required to compute the loss
    # is a read op, and there is exactly one per variable
    read_ops = []
    for var in var_list:
        output, = var.op.outputs
        read_op, = set(output.consumers()) & ops
        read_ops.append(read_op)

    for var, read_op in zip(var_list, read_ops):
        with tf.name_scope('/'.join(read_op.name.split('/')[:-1])):
            with tf.device(read_op.device):
                read_t, = read_op.outputs
                consumer_ops = set(read_t.consumers()) & ops
                # consumer_sgv might have multiple inputs, but we only care
                # about replacing the input that is read_t
                consumer_sgv = ge.sgv(consumer_ops)
                consumer_sgv = consumer_sgv.remap_inputs([list(consumer_sgv.inputs).index(read_t)])
                ge.connect(ge.sgv(var.read_value().op), consumer_sgv)
Ejemplo n.º 5
0
def convert_consts_to_var(graph, const_names_list):
    const_var_names_pairs = []
    ops_to_delete = []
    with graph.as_default():
        preexisting_vars = [
            tf.get_variable(i.name, i.outputs[0].shape)
            for i in graph.get_operations()
            if i.type == "VariableV2" or i.type == "Variable"
        ]

        var_list = []
        for name in const_names_list:
            tensor = graph.get_operation_by_name(name).outputs[0]
            with tf.Session() as sess:
                t_value = sess.run(tensor)
            t_name = '{}_mpc_const_var'.format(name)
            var = tf.Variable(t_value, name=t_name)
            const_var_names_pairs.append((name, t_name))
            var_list.append(var)

        for const_name, var_name in const_var_names_pairs:
            const_op = graph.get_operation_by_name(const_name)
            var_op = graph.get_operation_by_name('{}/read'.format(var_name))
            ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_op))
            ops_to_delete.append(const_op)

        tf.compat.v1.variables_initializer(var_list + preexisting_vars,
                                           'init_constvars')
    return delete_nodes(graph, ops_to_delete)
Ejemplo n.º 6
0
 def test_multiswap(self):
   with self.graph.as_default():
     a3 = constant_op.constant(3.0, shape=[2], name="a3")
   ge.swap_ios(ge.sgv(a3.op).remap_outputs([0, 0]),
               ge.sgv(self.a0.op, self.a1.op))
   self.assertTrue(match.OpMatcher("c0").input_ops("a3", "b0")(self.c0.op))
   self.assertTrue(match.OpMatcher("c1").input_ops("a3", "b1")(self.c1.op))
Ejemplo n.º 7
0
def replace_nodes_with_identity(graph, nop_splits):
    with graph.as_default():
        for split in nop_splits:
            inp_var = split.inputs[1]
            identity = tf.identity(inp_var).op
            ge.swap_outputs(ge.sgv(split), ge.sgv(identity))
    return graph
Ejemplo n.º 8
0
def tensor_swapin_and_out(g, origin_op, swapin_op):
    global added_control
    all_ops = g.get_operations()
    #find the origin_op's output tensor name
    origin_op_name = origin_op.values()[0].name

    added_control = False
    #search the to_swapin_op which use
    for op in all_ops:
        for i in range(len(op.inputs)):
            if ((op.inputs[i].name == origin_op_name) and
               ("_grad" in op.name)):
                print("gradient op.name:", op.name)
                """
                ('op.name:', u'layer1/L1_SwapOut')
                ('op.name:', u'layer2/MatMul')
                ('op.name:', u'optimizer/gradients/layer1/Sigmoid_grad/SigmoidGrad')
                """
                #Use connect and remap function to reconnect
                ge.connect(ge.sgv(swapin_op), ge.sgv(op).remap_inputs([i]))
                # FIXME:
                # obviously we cannot add more than 1 control dependency for swap_in op
                if added_control is False:
                    print("Control Dependency==> swapin_op:", swapin_op, "op:", op)
                    add_control_dependency(all_ops, swapin_op, op)
Ejemplo n.º 9
0
    def _connect_ops(self,
                     src_op,
                     dest_op,
                     remap_inputs=False,
                     remap_outputs=False,
                     idx=None,
                     disconnect_first=False):
        """A wrapper of `tensorflow.contrib.graph_editor.connect`.

        This method does an in-place modification to the graph.

        Args:
          src_op: a `tf.Operation`.
          dest_op: a `tf.Operation`.
          remap_inputs: remap the input of `dest_op` or not.
          remap_outputs: remap the output of `src_op` or not.
          idx: index of input or output tensor.
          disconnect_first: True means the current outputs of sgv0 are
            disconnected.
        """
        src_sgv = ge.sgv(src_op, graph=self._graph)
        dest_sgv = ge.sgv(dest_op, graph=self._graph)
        if remap_outputs:
            src_sgv = src_sgv.remap_outputs([idx])
        if remap_inputs:
            dest_sgv = dest_sgv.remap_inputs([idx])

        ge.connect(src_sgv, dest_sgv, disconnect_first)
Ejemplo n.º 10
0
 def test_multiswap(self):
     with self.graph.as_default():
         a3 = tf.constant(3.0, shape=[2], name="a3")
     ge.reroute.swap(
         ge.sgv(a3.op).remap_outputs([0, 0]),
         ge.sgv(self.a0.op, self.a1.op))
     self.assertTrue(ge.matcher("c0").input_ops("a3", "b0")(self.c0.op))
     self.assertTrue(ge.matcher("c1").input_ops("a3", "b1")(self.c1.op))
Ejemplo n.º 11
0
def build_pb_fact(pb_location, main_location, breath, quant, lvl):
    graph = load_graph(pb_location)

    W1 = graph.get_tensor_by_name('prefix/w_in:0')
    matmul = graph.get_tensor_by_name('prefix/MatMul:0')
    bias = graph.get_tensor_by_name('prefix/b_in:0')
    add = graph.get_tensor_by_name('prefix/add:0')
    reshape = graph.get_tensor_by_name('prefix/Reshape:0')

    # #remove all conncetions from matmul
    ge.detach(ge.sgv(matmul.op))

    with tf.Session(graph=graph) as sess:
        # os.system("mkdir " + main_location + breath + "/" + quant + "/fact_" + str(lvl))

        # for op in sess.graph.get_operations():
        #     print(op.name)

        W = W1.eval()
        u, s, v, ss = svd_compress_gs(W, lvl)
        logEntry("structural_similarity == > " + str(ss))
        u1 = tf.matmul(reshape, u, name="prefix/u1")
        s1 = tf.matmul(u1, s, name="prefix/s1")
        v1 = tf.matmul(s1, v, name="prefix/v1")
        ge.connect(ge.sgv(v1.op), ge.sgv(add.op).remap_inputs([0]))

        sess.run(tf.variables_initializer([tf.Variable(5, name="dummy" + str(lvl))]))
        saver = tf.train.Saver()

        # save log for tensorboad
        LOGDIR = main_location + '/LOG'
        train_writer = tf.summary.FileWriter(LOGDIR)
        train_writer.add_graph(sess.graph)
        train_writer.close()

        # save the freezed model
        os.system("mkdir " + main_location + "pb")
        tf.train.write_graph(sess.graph_def, main_location + 'pb/', "RNN_" + breath + "_" + quant + "_fact_" + str(lvl) + ".pbtxt")
        saver.save(sess, save_path=main_location + "model.ckpt")

        input_graph_path = main_location + '/pb/' + "RNN_" + breath + "_" + quant + "_fact_" + str(lvl) + ".pbtxt"

        checkpoint_path = main_location + "model.ckpt"
        restore_op_name = "save/restore_all"
        filename_tensor_name = "save/Const:0"
        output_frozen_graph_name = main_location + 'pb/' + "RNN_" + breath + "_" + quant + "_fact_" + str(lvl) + ".pb"

        logEntry("Start Freezing the graph")

        freeze_graph.freeze_graph(input_graph_path, input_saver="",
                                  input_binary=False, input_checkpoint=checkpoint_path,
                                  output_node_names="prefix/y_", restore_op_name="save/restore_all",
                                  filename_tensor_name="save/Const:0",
                                  output_graph=output_frozen_graph_name, clear_devices=True, initializer_nodes="")

        logEntry("End Freezing the graph")

        sess.close()
Ejemplo n.º 12
0
def _connect_ops(src_op, dest_op, remap_inputs=False, remap_outputs=True):
    src_sgv = ge.sgv(src_op, graph=tf.get_default_graph())
    dest_sgv = ge.sgv(dest_op, graph=tf.get_default_graph())

    if remap_outputs:
        src_sgv = src_sgv.remap_outputs([0])
    if remap_inputs:
        dest_sgv = dest_sgv.remap_inputs([0])

    ge.connect(src_sgv, dest_sgv)
Ejemplo n.º 13
0
def replace_node_with_const(node):
    print("Trying to execute node {}".format(node.name))
    graph = node.graph
    with graph.as_default():
        const_lists = []
        with tf.Session() as sess:
            for out_t in node.outputs:
                const_val = sess.run(out_t)
                const_op = tf.constant(const_val).op
                const_lists.append(const_op)
            ge.swap_outputs(ge.sgv(node), ge.sgv(const_lists))
Ejemplo n.º 14
0
  def test_connect(self):
    """Test for ge.connect."""
    with self.graph.as_default():
      x = tf.constant([1., 1.], shape=[2], name="x")
      y = tf.constant([2., 2.], shape=[2], name="y")
      z = tf.add(x, y, name="z")

    sgv = ge.sgv(x.op, y.op, z.op)
    ge.connect(sgv, ge.sgv(self.e.op).remap_inputs([0]))
    self.assertTrue(ge.matcher("^foo/bar/e$").input_ops("^z$", "foo/d$")
                    (self.e.op))
    def test_subgraph(self):
        sgv = ge.sgv(self.graph)
        self.assertEqual(list(sgv.outputs), [self.e, self.h])
        self.assertEqual(list(sgv.inputs), [])
        self.assertEqual(len(sgv.ops), 8)

        sgv = ge.sgv(self.f.op, self.g.op)
        self.assertEqual(list(sgv.outputs), [self.f, self.g])
        self.assertEqual(list(sgv.inputs), [self.c, self.d, self.a])

        sgv = ge.sgv_scope("foo/bar", graph=self.graph)
        self.assertEqual(list(sgv.ops),
                         [self.e.op, self.f.op, self.g.op, self.h.op])
Ejemplo n.º 16
0
  def test_subgraph(self):
    sgv = ge.sgv(self.graph)
    self.assertEqual(list(sgv.outputs), [self.e, self.h])
    self.assertEqual(list(sgv.inputs), [])
    self.assertEqual(len(sgv.ops), 8)

    sgv = ge.sgv(self.f.op, self.g.op)
    self.assertEqual(list(sgv.outputs), [self.f, self.g])
    self.assertEqual(list(sgv.inputs), [self.c, self.d, self.a])

    sgv = ge.sgv_scope("foo/bar", graph=self.graph)
    self.assertEqual(
        list(sgv.ops), [self.e.op, self.f.op, self.g.op, self.h.op])
Ejemplo n.º 17
0
 def test_detach(self):
   """Test for ge.detach."""
   sgv = ge.sgv(self.c.op, self.a.op)
   control_outputs = ge.util.ControlOutputs(self.graph)
   ge.detach(sgv, control_inputs=control_outputs)
   # make sure the detached graph is as expected.
   self.assertTrue(ge.matcher("^foo/c$")
                   .input_ops("geph__a_0", "geph__b_0")(self.c.op))
    def test_remove_unused_ops(self):
        sgv = ge.sgv(self.graph)
        self.assertEqual(list(sgv.outputs), [self.e, self.h])
        self.assertEqual(len(sgv.ops), 8)

        sgv = sgv.remap_outputs(new_output_indices=[1]).remove_unused_ops()
        self.assertEqual(list(sgv.outputs), [self.h])
        self.assertEqual(len(sgv.ops), 7)
Ejemplo n.º 19
0
  def test_remove_unused_ops(self):
    sgv = ge.sgv(self.graph)
    self.assertEqual(list(sgv.outputs), [self.e, self.h])
    self.assertEqual(len(sgv.ops), 8)

    sgv = sgv.remap_outputs(new_output_indices=[1]).remove_unused_ops()
    self.assertEqual(list(sgv.outputs), [self.h])
    self.assertEqual(len(sgv.ops), 7)
Ejemplo n.º 20
0
def create_op_pruning_no_update(
    op: tf_compat.Operation,
    op_input: tf_compat.Tensor,
    ks_group: str,
    leave_enabled: bool = True,
    is_after_end_step: tf_compat.Tensor = None,
) -> PruningOpVars:
    """
    Creates the necessary variables and operators to gradually
    apply sparsity to an operators variable without returning a
    PruningOpVars.update value.

    :param op: the operation to prune to the given sparsity
    :param op_input: the parameter within the op to create a mask for
    :param ks_group: the group identifier the scope should be created under
        mask_creator
    :param leave_enabled: True to continue masking the weights after end_epoch,
        False to stop masking
    :param is_after_end_step: only should be provided if leave_enabled is False;
        tensor that is true if the current global step is after end_epoch
    :return: a named tuple containing the assignment op, mask variable,
        threshold tensor, and masked tensor
    """
    if tf_contrib_err:
        raise tf_contrib_err

    op_sgv = graph_editor.sgv(op)

    # create the necessary variables first
    with tf_compat.variable_scope(PruningScope.model(op, ks_group),
                                  reuse=tf_compat.AUTO_REUSE):
        mask = tf_compat.get_variable(
            PruningScope.VAR_MASK,
            op_input.get_shape(),
            initializer=tf_compat.ones_initializer(),
            trainable=False,
            dtype=op_input.dtype,
        )
    tf_compat.add_to_collection(
        PruningScope.collection_name(ks_group, PruningScope.VAR_MASK), mask)

    # create the masked operation and assign as the new input to the op
    with tf_compat.name_scope(
            PruningScope.model(op, ks_group, trailing_slash=True)):
        masked = tf_compat.multiply(mask, op_input, PruningScope.OP_MASKED_VAR)
        op_inp_tens = (masked if leave_enabled else tf_compat.cond(
            is_after_end_step, lambda: op_input, lambda: masked))
        op_swapped_inputs = [
            inp if inp != op_input else op_inp_tens for inp in op_sgv.inputs
        ]
        graph_editor.swap_inputs(op, op_swapped_inputs)
    tf_compat.add_to_collection(
        PruningScope.collection_name(ks_group, PruningScope.OP_MASKED_VAR),
        masked)
    return PruningOpVars(op, op_input, None, mask, masked)
Ejemplo n.º 21
0
    def test_subgraph_remap(self):
        sgv = ge.sgv(self.c.op)
        self.assertEqual(list(sgv.outputs), [self.c])
        self.assertEqual(list(sgv.inputs), [self.a, self.b])

        sgv = ge.sgv(self.c.op).remap([self.a], [0, self.c])
        self.assertEqual(list(sgv.outputs), [self.c, self.c])
        self.assertEqual(list(sgv.inputs), [self.a])

        sgv = sgv.remap_outputs_to_consumers()
        self.assertEqual(list(sgv.outputs), [self.c, self.c, self.c])
        sgv = sgv.remap_outputs_make_unique()
        self.assertEqual(list(sgv.outputs), [self.c])

        sgv = sgv.remap(new_input_indices=[], new_output_indices=[])
        self.assertEqual(len(sgv.inputs), 0)
        self.assertEqual(len(sgv.outputs), 0)
        sgv = sgv.remap_default()
        self.assertEqual(list(sgv.outputs), [self.c])
        self.assertEqual(list(sgv.inputs), [self.a, self.b])
Ejemplo n.º 22
0
  def test_subgraph_remap(self):
    sgv = ge.sgv(self.c.op)
    self.assertEqual(list(sgv.outputs), [self.c])
    self.assertEqual(list(sgv.inputs), [self.a, self.b])

    sgv = ge.sgv(self.c.op).remap([self.a], [0, self.c])
    self.assertEqual(list(sgv.outputs), [self.c, self.c])
    self.assertEqual(list(sgv.inputs), [self.a])

    sgv = sgv.remap_outputs_to_consumers()
    self.assertEqual(list(sgv.outputs), [self.c, self.c, self.c])
    sgv = sgv.remap_outputs_make_unique()
    self.assertEqual(list(sgv.outputs), [self.c])

    sgv = sgv.remap(new_input_indices=[], new_output_indices=[])
    self.assertEqual(len(sgv.inputs), 0)
    self.assertEqual(len(sgv.outputs), 0)
    sgv = sgv.remap_default()
    self.assertEqual(list(sgv.outputs), [self.c])
    self.assertEqual(list(sgv.inputs), [self.a, self.b])
Ejemplo n.º 23
0
  def test_reroute_can_modify(self):
    graph = tf.Graph()
    # create a special graph where "a" is an ambiguous tensor. That is
    # it is both an input and an output of the ops in sgv0.
    with graph.as_default():
      a = tf.constant(1.0, shape=[2], name="a")
      b = tf.constant(2.0, shape=[2], name="b")
      c = tf.add(a, b, name="c")
      d = tf.add(a, c, name="d")

      e = tf.constant(1.0, shape=[2], name="e")
      f = tf.constant(2.0, shape=[2], name="f")
      g = tf.add(e, f, name="g")

    sgv0 = ge.sgv(a.op, b.op, c.op)
    sgv1 = ge.sgv(e.op, f.op)

    ge.reroute.swap_outputs(sgv0, sgv1)
    self.assertTrue(ge.matcher("g").input_ops("a", ge.matcher("c")
                                              .input_ops("a", "b"))(g.op))
    self.assertTrue(ge.matcher("d").input_ops("e", "f")(d.op))
    def _fuse_swapin_ops(self, src_op, swapout_op, bw_frontier_ops, ts0):
        """Fuse all swapin ops that swaps in the same tensor.

        This method does an in-place modification to the graph.

        Args:
          src_op: a `tf.Operation`.
          swapout_op: a `tf.Operation`.
          bw_frontier_ops: a set of `tf.Operation`.
          ts0: a `tf.Tensor`.

        Return:
          A set of `tf.Operation` that cannot be fused.
        """
        fuse_bw_frontier_ops = {
            op for op in bw_frontier_ops
            if self._topo_sort.get_order(op) > 0}
        if len(fuse_bw_frontier_ops) >= 2:
            with tf.device(self._cpu_device):
                swap_in = tf.identity(ts0, name="lms/swapin")

            # Connect: swap_out -> swap_in
            self._connect_ops(swapout_op, swap_in.op)
            self._excl_ops.add(swap_in.op)

            # reuse swap_in tensors
            for op in fuse_bw_frontier_ops:
                # Connect: swap_in -> dest
                input_idx = ge.sgv(
                    op, graph=self._graph).input_index(ts0)
                self._connect_ops(swap_in.op, op, remap_inputs=True,
                                  idx=input_idx)

                self._log_info(
                    "{} (order {}) reuses tensor {}".format(
                        op.name,
                        self._topo_sort.get_order(op),
                        ts0.name),
                    1)

            # control dependency -> swap_in
            min_order = self._topo_sort.size + 1
            earliest_op = None
            for op in fuse_bw_frontier_ops:
                order = self._topo_sort.get_order(op)
                if order < min_order:
                    min_order = order
                    earliest_op = op
            if earliest_op:
                self._add_control_dependency(src_op, earliest_op, swap_in.op)
            bw_frontier_ops -= fuse_bw_frontier_ops
        return bw_frontier_ops
Ejemplo n.º 25
0
def convert_consts_to_var(graph, const_names_list):
    const_var_names_pairs = []
    ops_to_delete = []
    with graph.as_default():
        var_list = []
        for name in const_names_list:
            #tensor = graph.get_tensor_by_name('{}:0'.format(name))
            tensor = graph.get_operation_by_name(name).outputs[0]
            with tf.Session() as sess:
                t_value = sess.run(tensor)
            t_name = '{}_const_var'.format(name)
            var = tf.Variable(t_value, name=t_name)
            const_var_names_pairs.append((name, t_name))
            var_list.append(var)

        for const_name, var_name in const_var_names_pairs:
            const_op = graph.get_operation_by_name(const_name)
            var_op = graph.get_operation_by_name('{}/read'.format(var_name))
            ge.swap_outputs(ge.sgv(const_op), ge.sgv(var_op))
            ops_to_delete.append(const_op)
        tf.compat.v1.variables_initializer(var_list, 'init_constvars')
    return delete_nodes(graph, ops_to_delete)
Ejemplo n.º 26
0
    def test_reroute_can_modify(self):
        graph = tf.Graph()
        # create a special graph where "a" is an ambiguous tensor. That is
        # it is both an input and an output of the ops in sgv0.
        with graph.as_default():
            a = tf.constant(1.0, shape=[2], name="a")
            b = tf.constant(2.0, shape=[2], name="b")
            c = tf.add(a, b, name="c")
            d = tf.add(a, c, name="d")

            e = tf.constant(1.0, shape=[2], name="e")
            f = tf.constant(2.0, shape=[2], name="f")
            g = tf.add(e, f, name="g")

        sgv0 = ge.sgv(a.op, b.op, c.op)
        sgv1 = ge.sgv(e.op, f.op)

        ge.reroute.swap_outputs(sgv0, sgv1)
        self.assertTrue(
            ge.matcher("g").input_ops("a",
                                      ge.matcher("c").input_ops("a",
                                                                "b"))(g.op))
        self.assertTrue(ge.matcher("d").input_ops("e", "f")(d.op))
Ejemplo n.º 27
0
def recompute_tensor(target, known_values, preceding_op=None,
                     copy_known_values=False):
  """Computes target tensor from known_values. If preceding_op is not None,
  adds necessary control dependencies such that newly created computation takes
  place after preceding_op. 

  If copy_known_values is set, also copies known_values (for nicer graph
  visualization)
  """

  assert is_computable(target, known_values)
  
  # position of target in parent op
  target_pos = list(target.op.outputs).index(target)

  if copy_known_values:
    computation = ge.get_backward_walk_ops(target)
  else:
    computation = ge.get_backward_walk_ops(target, stop_at_ts=known_values)
    
  # create copy of computation
  copied_sgv, info = ge.copy_with_input_replacements(ge.sgv(computation), {})

  # find our target tensor in the new computation
  new_target_op = info._transformed_ops[target.op]
  new_target = new_target_op.outputs[target_pos]
  new_computation = list(info._transformed_ops.values())

  # restrict computation to run after given op
  SAVE_ON_CONTROL_EDGES = True

  if SAVE_ON_CONTROL_EDGES:
    # only add "run_after" control dependencies to root of computation,
    # the rest automatically runs after because of data dependencies
    # TODO: more efficient implementation by walking back from new_target
    # instead of whole graph
    computation_graph = linearize_lib.get_graph(restrict_to=new_computation)

    # note, toposort order is reversed from networkx/mine convention
    computation_root = list(toposort.toposort(computation_graph))[-1]
    for op in computation_root:
      run_after(op, preceding_op)
  else:
    if preceding_op is not None:
      for op in info._transformed_ops.values():
        run_after(op, preceding_op)
  return new_target
Ejemplo n.º 28
0
    def _add_swapin(self, swapout_op, dest_op, ts0):
        """Add a swapin operation to the graph. The swapin ops reads
        the output tensor of `swapout_op` and passes it to `dest_op`,
        replacing the input tensor `ts0` of `dest_op`.

        This method does an in-place modification to the graph.

        Example: the graph before and after this method invoked.
        ```
        Before
          |ts0| -> (swapout_op)
          |ts0| -> (dest_op)

        After:
          |ts0| -> (swapout_op) -> (swapin_op) -> (dest_op)
        ```

        Args:
          swapout_op: a `tf.Operation` that swapped out the tensor `ts0`.
          dest_op: a `tf.Operation` that will consume the output tensor of `swapout_op`.
          ts0: a `tf.Tensor` being the original input tensor of `dest_op`.

        Return:
          A `tf.Operation` newly added to the graph.
        """
        with tf.device(self._cpu_device):
            swap_in = tf.identity(ts0, name="lms/swapin")

        # Connect: swap_out -> swap_in
        self._connect_ops(swapout_op, swap_in.op)

        # Connect: swap_in -> dest
        dest_svg = ge.sgv(dest_op, graph=self._graph)
        input_idx = dest_svg.input_index(ts0)
        self._connect_ops(swap_in.op,
                          dest_op,
                          remap_inputs=True,
                          idx=input_idx)
        self._excl_ops.add(swap_in.op)

        self._log_info(
            "Consuming op {} (order {}) swaps in {}".format(
                dest_op.name, self._topo_sort.get_order(dest_op), ts0.name), 1)

        return swap_in.op
Ejemplo n.º 29
0
def get_op_input_var(
    operation: tf_compat.Operation,
    var_index: Union[str, int] = VAR_INDEX_FROM_TRAINABLE,
) -> tf_compat.Tensor:
    """
    Get the input variable for an operation.
    Ex: the weight for a conv operator.
    See @get_op_var_index for proper values for var_index.

    :param operation: the operation to get the input variable for
    :param var_index: the index to guide which input to grab from the operation
    :return: the tensor input that represents the variable input for the operation
    """
    if tf_contrib_err:
        raise tf_contrib_err

    op_sgv = graph_editor.sgv(operation)
    var_index = get_op_var_index(var_index, op_sgv.inputs)

    return op_sgv.inputs[var_index]
Ejemplo n.º 30
0
    def _add_swapout(self, src_op, ts0):
        """Add a swapout operation to the graph to swap out the output tensor `ts0`
        of the operation `src_op`.

        This method does an in-place modification to the graph.

        Example: the graph before and after this method invoked.
        ```
        Before
          (src_op) -> |ts0| -> (dest_op)

        After:
          (src_op) -> |ts0| -> (swapout_op)
          |ts0| -> (dest_op)
        ```

        Args:
          src_op: a `tf.Operation` that produces the tensor `ts0`.
          ts0: a output `tf.Tensor` of `src_op` being swapped out.

        Return:
          A `tf.Operation` newly added to the graph.
        """
        with tf.device(self._cpu_device):
            swap_out = tf.identity(ts0, name="lms/swapout")

        # Connect: src-node -> swap-out
        src_svg = ge.sgv(src_op, graph=self._graph)
        src_out_idx = src_svg.output_index(ts0)
        self._connect_ops(src_op,
                          swap_out.op,
                          remap_outputs=True,
                          idx=src_out_idx)
        self._excl_ops.add(swap_out.op)
        self._log_info(
            "Tensor {} will be placed on {}".format(ts0.name,
                                                    self._cpu_device), 1)

        return swap_out.op
def gradients(ys, xs, grad_ys=None, checkpoints='collection', **kwargs):
    '''
    Authors: Tim Salimans & Yaroslav Bulatov

    memory efficient gradient implementation inspired by "Training Deep Nets with Sublinear Memory Cost"
    by Chen et al. 2016 (https://arxiv.org/abs/1604.06174)

    ys,xs,grad_ys,kwargs are the arguments to standard tensorflow tf.gradients
    (https://www.tensorflow.org/versions/r0.12/api_docs/python/train.html#gradients)

    'checkpoints' can either be
        - a list consisting of tensors from the forward pass of the neural net
          that we should re-use when calculating the gradients in the backward pass
          all other tensors that do not appear in this list will be re-computed
        - a string specifying how this list should be determined. currently we support
            - 'speed':  checkpoint all outputs of convolutions and matmuls. these ops are usually the most expensive,
                        so checkpointing them maximizes the running speed
                        (this is a good option if nonlinearities, concats, batchnorms, etc are taking up a lot of memory)
            - 'memory': try to minimize the memory usage
                        (currently using a very simple strategy that identifies a number of bottleneck tensors in the graph to checkpoint)
            - 'collection': look for a tensorflow collection named 'checkpoints', which holds the tensors to checkpoint
    '''

    #    print("Calling memsaving gradients with", checkpoints)
    if not isinstance(ys, list):
        ys = [ys]
    if not isinstance(xs, list):
        xs = [xs]

    bwd_ops = ge.get_backward_walk_ops([y.op for y in ys], inclusive=True)

    debug_print("bwd_ops: %s", bwd_ops)

    # forward ops are all ops that are candidates for recomputation
    fwd_ops = ge.get_forward_walk_ops([x.op for x in xs],
                                      inclusive=True,
                                      within_ops=bwd_ops)
    debug_print("fwd_ops: %s", fwd_ops)

    # exclude ops with no inputs
    fwd_ops = [op for op in fwd_ops if op.inputs]

    # don't recompute xs, remove variables
    xs_ops = _to_ops(xs)
    fwd_ops = [op for op in fwd_ops if not op in xs_ops]
    fwd_ops = [op for op in fwd_ops if not '/assign' in op.name]
    fwd_ops = [op for op in fwd_ops if not '/Assign' in op.name]
    fwd_ops = [op for op in fwd_ops if not '/read' in op.name]
    ts_all = ge.filter_ts(fwd_ops, True)  # get the tensors
    ts_all = [t for t in ts_all if '/read' not in t.name]
    ts_all = set(ts_all) - set(xs) - set(ys)

    # construct list of tensors to checkpoint during forward pass, if not
    # given as input
    if type(checkpoints) is not list:
        if checkpoints == 'collection':
            checkpoints = tf.get_collection('checkpoints')

        elif checkpoints == 'speed':
            # checkpoint all expensive ops to maximize running speed
            checkpoints = ge.filter_ts_from_regex(fwd_ops,
                                                  'conv2d|Conv|MatMul')

        elif checkpoints == 'memory':

            # remove very small tensors and some weird ops
            def fixdims(
                t
            ):  # tf.Dimension values are not compatible with int, convert manually
                try:
                    return [int(e if e.value is not None else 64) for e in t]
                except:
                    return [0]  # unknown shape

            ts_all = [
                t for t in ts_all
                if np.prod(fixdims(t.shape)) > MIN_CHECKPOINT_NODE_SIZE
            ]
            ts_all = [t for t in ts_all if 'L2Loss' not in t.name]
            ts_all = [t for t in ts_all if 'entropy' not in t.name]
            ts_all = [t for t in ts_all if 'FusedBatchNorm' not in t.name]
            ts_all = [t for t in ts_all if 'Switch' not in t.name]
            ts_all = [t for t in ts_all if 'dropout' not in t.name]
            # DV: FP16_FIX - need to add 'Cast' layer here to make it work for FP16
            ts_all = [t for t in ts_all if 'Cast' not in t.name]

            # filter out all tensors that are inputs of the backward graph
            with util.capture_ops() as bwd_ops:
                tf_gradients(ys, xs, grad_ys, **kwargs)

            bwd_inputs = [t for op in bwd_ops for t in op.inputs]
            # list of tensors in forward graph that is in input to bwd graph
            ts_filtered = list(set(bwd_inputs).intersection(ts_all))
            debug_print("Using tensors %s", ts_filtered)

            # try two slightly different ways of getting bottlenecks tensors
            # to checkpoint
            for ts in [ts_filtered, ts_all]:

                # get all bottlenecks in the graph
                bottleneck_ts = []
                for t in ts:
                    b = set(
                        ge.get_backward_walk_ops(t.op,
                                                 inclusive=True,
                                                 within_ops=fwd_ops))
                    f = set(
                        ge.get_forward_walk_ops(t.op,
                                                inclusive=False,
                                                within_ops=fwd_ops))
                    # check that there are not shortcuts
                    b_inp = set([inp for op in b
                                 for inp in op.inputs]).intersection(ts_all)
                    f_inp = set([inp for op in f
                                 for inp in op.inputs]).intersection(ts_all)
                    if not set(b_inp).intersection(
                            f_inp) and len(b_inp) + len(f_inp) >= len(ts_all):
                        bottleneck_ts.append(t)  # we have a bottleneck!
                    else:
                        debug_print(
                            "Rejected bottleneck candidate and ops %s",
                            [t] + list(set(ts_all) - set(b_inp) - set(f_inp)))

                # success? or try again without filtering?
                if len(bottleneck_ts) >= np.sqrt(
                        len(ts_filtered)):  # yes, enough bottlenecks found!
                    break

            if not bottleneck_ts:
                raise Exception(
                    'unable to find bottleneck tensors! please provide checkpoint nodes manually, or use checkpoints="speed".'
                )

            # sort the bottlenecks
            bottlenecks_sorted_lists = tf_toposort(bottleneck_ts,
                                                   within_ops=fwd_ops)
            sorted_bottlenecks = [
                t for ts in bottlenecks_sorted_lists for t in ts
            ]

            # save an approximately optimal number ~ sqrt(N)
            N = len(ts_filtered)
            if len(bottleneck_ts) <= np.ceil(np.sqrt(N)):
                checkpoints = sorted_bottlenecks
            else:
                step = int(np.ceil(len(bottleneck_ts) / np.sqrt(N)))
                checkpoints = sorted_bottlenecks[step::step]

        else:
            raise Exception('%s is unsupported input for "checkpoints"' %
                            (checkpoints, ))

    checkpoints = list(set(checkpoints).intersection(ts_all))

    # at this point automatic selection happened and checkpoints is list of nodes
    assert isinstance(checkpoints, list)

    debug_print("Checkpoint nodes used: %s", checkpoints)
    # better error handling of special cases
    # xs are already handled as checkpoint nodes, so no need to include them
    xs_intersect_checkpoints = set(xs).intersection(set(checkpoints))
    if xs_intersect_checkpoints:
        debug_print("Warning, some input nodes are also checkpoint nodes: %s",
                    xs_intersect_checkpoints)
    ys_intersect_checkpoints = set(ys).intersection(set(checkpoints))
    debug_print("ys: %s, checkpoints: %s, intersect: %s", ys, checkpoints,
                ys_intersect_checkpoints)
    # saving an output node (ys) gives no benefit in memory while creating
    # new edge cases, exclude them
    if ys_intersect_checkpoints:
        debug_print(
            "Warning, some output nodes are also checkpoints nodes: %s",
            format_ops(ys_intersect_checkpoints))

    # remove initial and terminal nodes from checkpoints list if present
    checkpoints = list(set(checkpoints) - set(ys) - set(xs))

    # check that we have some nodes to checkpoint
    # if not checkpoints:
    #     raise Exception('no checkpoints nodes found or given as input! ')

    # disconnect dependencies between checkpointed tensors
    checkpoints_disconnected = {}
    for x in checkpoints:
        if x.op and x.op.name is not None:
            grad_node = tf.stop_gradient(x, name=x.op.name + "_sg")
        else:
            grad_node = tf.stop_gradient(x)
        checkpoints_disconnected[x] = grad_node

    # partial derivatives to the checkpointed tensors and xs
    ops_to_copy = fast_backward_ops(seed_ops=[y.op for y in ys],
                                    stop_at_ts=checkpoints,
                                    within_ops=fwd_ops)
    debug_print("Found %s ops to copy within fwd_ops %s, seed %s, stop_at %s",
                len(ops_to_copy), fwd_ops, [r.op for r in ys], checkpoints)
    debug_print("ops_to_copy = %s", ops_to_copy)
    debug_print("Processing list %s", ys)
    copied_sgv, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {})
    for origin_op, op in info._transformed_ops.items():
        op._set_device(origin_op.node_def.device)
    copied_ops = info._transformed_ops.values()
    debug_print("Copied %s to %s", ops_to_copy, copied_ops)
    ge.reroute_ts(checkpoints_disconnected.values(),
                  checkpoints_disconnected.keys(),
                  can_modify=copied_ops)
    debug_print("Rewired %s in place of %s restricted to %s",
                checkpoints_disconnected.values(),
                checkpoints_disconnected.keys(), copied_ops)

    # get gradients with respect to current boundary + original x's
    copied_ys = [info._transformed_ops[y.op]._outputs[0] for y in ys]
    boundary = list(checkpoints_disconnected.values())
    dv = tf_gradients(ys=copied_ys,
                      xs=boundary + xs,
                      grad_ys=grad_ys,
                      **kwargs)
    debug_print("Got gradients %s", dv)
    debug_print("for %s", copied_ys)
    debug_print("with respect to %s", boundary + xs)

    inputs_to_do_before = [y.op for y in ys]
    if grad_ys is not None:
        inputs_to_do_before += grad_ys
    wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None]
    my_add_control_inputs(wait_to_do_ops, inputs_to_do_before)

    # partial derivatives to the checkpointed nodes
    # dictionary of "node: backprop" for nodes in the boundary
    d_checkpoints = {
        r: dr
        for r, dr in zip(checkpoints_disconnected.keys(),
                         dv[:len(checkpoints_disconnected)])
    }
    # partial derivatives to xs (usually the params of the neural net)
    d_xs = dv[len(checkpoints_disconnected):]

    # incorporate derivatives flowing through the checkpointed nodes
    checkpoints_sorted_lists = tf_toposort(checkpoints, within_ops=fwd_ops)
    for ts in checkpoints_sorted_lists[::-1]:
        debug_print("Processing list %s", ts)
        checkpoints_other = [r for r in checkpoints if r not in ts]
        checkpoints_disconnected_other = [
            checkpoints_disconnected[r] for r in checkpoints_other
        ]

        # copy part of the graph below current checkpoint node, stopping at
        # other checkpoints nodes
        ops_to_copy = fast_backward_ops(within_ops=fwd_ops,
                                        seed_ops=[r.op for r in ts],
                                        stop_at_ts=checkpoints_other)
        debug_print("Found %s ops to copy within %s, seed %s, stop_at %s",
                    len(ops_to_copy), fwd_ops, [r.op for r in ts],
                    checkpoints_other)
        debug_print("ops_to_copy = %s", ops_to_copy)
        if not ops_to_copy:  # we're done!
            break
        copied_sgv, info = ge.copy_with_input_replacements(
            ge.sgv(ops_to_copy), {})
        for origin_op, op in info._transformed_ops.items():
            op._set_device(origin_op.node_def.device)
        copied_ops = info._transformed_ops.values()
        debug_print("Copied %s to %s", ops_to_copy, copied_ops)
        ge.reroute_ts(checkpoints_disconnected_other,
                      checkpoints_other,
                      can_modify=copied_ops)
        debug_print("Rewired %s in place of %s restricted to %s",
                    checkpoints_disconnected_other, checkpoints_other,
                    copied_ops)

        # gradient flowing through the checkpointed node
        boundary = [info._transformed_ops[r.op]._outputs[0] for r in ts]
        substitute_backprops = [d_checkpoints[r] for r in ts]
        dv = tf_gradients(boundary,
                          checkpoints_disconnected_other + xs,
                          grad_ys=substitute_backprops,
                          **kwargs)
        debug_print("Got gradients %s", dv)
        debug_print("for %s", boundary)
        debug_print("with respect to %s", checkpoints_disconnected_other + xs)
        debug_print("with boundary backprop substitutions %s",
                    substitute_backprops)

        inputs_to_do_before = [d_checkpoints[r].op for r in ts]
        wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None]
        my_add_control_inputs(wait_to_do_ops, inputs_to_do_before)

        # partial derivatives to the checkpointed nodes
        for r, dr in zip(checkpoints_other, dv[:len(checkpoints_other)]):
            if dr is not None:
                if d_checkpoints[r] is None:
                    d_checkpoints[r] = dr
                else:
                    d_checkpoints[r] += dr

        def _unsparsify(x):
            if not isinstance(x, tf.IndexedSlices):
                return x
            assert x.dense_shape is not None, "memory_saving_gradients encountered sparse gradients of unknown shape"
            indices = x.indices
            while indices.shape.ndims < x.values.shape.ndims:
                indices = tf.expand_dims(indices, -1)
            return tf.scatter_nd(indices, x.values, x.dense_shape)

        # partial derivatives to xs (usually the params of the neural net)
        d_xs_new = dv[len(checkpoints_other):]
        for j in range(len(xs)):
            if d_xs_new[j] is not None:
                if d_xs[j] is None:
                    d_xs[j] = _unsparsify(d_xs_new[j])
                else:
                    d_xs[j] += _unsparsify(d_xs_new[j])

    return d_xs
    # Select the correct op from the forward walk to connect to
    for fw_op in fw_ops:
        if fw_op.type not in not_types:
            next_op = fw_op
            break

    if next_op is None:
        raise ValueError('No suitable next op found to connect to. Try looking at the graph or full list of forward ops')

    # Add placeholder and variable
    add_op = tf.add(var, delta_placeholder)  # TODO - might be neater if these were created in the same scope as the variable; also might solve issue with connecting add ops within while loop

    # Connect add_op output to next op input
    # Create subgraph 1 (outputs)
    sgv0 = ge.sgv(add_op.op)

    # Create subgraph 2 (inputs)
    sgv1 = ge.sgv(next_op).remap_inputs([1])

    # Connect
    ge.connect(sgv0, sgv1)  # TODO - sort out error with tf.while loops; may not be possible: try the assign_add method first

    # Define parameter update ops
    update = 0.01*delta_placeholder
    new_update_op = tf.assign_sub(var, update)

    if gamma_update_op is not None:
        gamma_update_op = tf.group(new_update_op, gamma_update_op)
    else:
        gamma_update_op = new_update_op
Ejemplo n.º 33
0
def gradients(ys, xs,   # pylint: disable: too-many-statements, too-many-branches
              grad_ys=None, checkpoints='collection', **kwargs):
    '''
    Authors: Tim Salimans & Yaroslav Bulatov

    memory efficient gradient implementation inspired by "Training Deep Nets with Sublinear Memory
    Cost" by Chen et al. 2016 (https://arxiv.org/abs/1604.06174)

    ys,xs,grad_ys,kwargs are the arguments to standard tensorflow tf.gradients
    (https://www.tensorflow.org/versions/r0.12/api_docs/python/train.html#gradients)

    'checkpoints' can either be
        - a list consisting of tensors from the forward pass of the neural net
          that we should re-use when calculating the gradients in the backward pass
          all other tensors that do not appear in this list will be re-computed
        - a string specifying how this list should be determined. currently we support
            - 'speed':  checkpoint all outputs of convolutions and matmuls. these ops are usually
                        the most expensive, so checkpointing them maximizes the running speed
                        (this is a good option if nonlinearities, concats, batchnorms, etc are
                        taking up a lot of memory)
            - 'memory': try to minimize the memory usage
                        (currently using a very simple strategy that identifies a number of
                        bottleneck tensors in the graph to checkpoint)
            - 'collection': look for a tensorflow collection named 'checkpoints', which holds the
                            tensors to checkpoint
    '''

    #    print("Calling memsaving gradients with", checkpoints)
    if not isinstance(ys, list):
        ys = [ys]
    if not isinstance(xs, list):
        xs = [xs]

    bwd_ops = ge.get_backward_walk_ops([y.op for y in ys],
                                       inclusive=True)

    debug_print("bwd_ops: {}".format(bwd_ops))

    # forward ops are all ops that are candidates for recomputation
    fwd_ops = ge.get_forward_walk_ops([x.op for x in xs],
                                      inclusive=True,
                                      within_ops=bwd_ops)
    debug_print("fwd_ops: {}".format(fwd_ops))

    # exclude ops with no inputs
    fwd_ops = [op for op in fwd_ops if op.inputs]

    # don't recompute xs, remove variables
    xs_ops = _to_ops(xs)
    fwd_ops = [op for op in fwd_ops if op not in xs_ops]
    fwd_ops = [op for op in fwd_ops if '/assign' not in op.name]
    fwd_ops = [op for op in fwd_ops if '/Assign' not in op.name]
    fwd_ops = [op for op in fwd_ops if '/read' not in op.name]
    ts_all = ge.filter_ts(fwd_ops, True)  # get the tensors
    ts_all = [t for t in ts_all if '/read' not in t.name]
    ts_all = set(ts_all) - set(xs) - set(ys)

    # construct list of tensors to checkpoint during forward pass, if not
    # given as input
    if type(checkpoints) is not list:
        if checkpoints == 'collection':
            checkpoints = tf.get_collection('checkpoints')

        elif checkpoints == 'speed':
            # checkpoint all expensive ops to maximize running speed
            checkpoints = ge.filter_ts_from_regex(fwd_ops, 'conv2d|Conv|MatMul')

        elif checkpoints == 'memory':

            # remove very small tensors and some weird ops
            def fixdims(t):  # tf.Dimension values are not compatible with int, convert manually
                try:
                    return [int(e if e.value is not None else 64) for e in t]
                except:
                    return [0]  # unknown shape
            ts_all = [t for t in ts_all if np.prod(fixdims(t.shape)) > MIN_CHECKPOINT_NODE_SIZE]
            ts_all = [t for t in ts_all if 'L2Loss' not in t.name]
            ts_all = [t for t in ts_all if 'entropy' not in t.name]
            ts_all = [t for t in ts_all if 'FusedBatchNorm' not in t.name]
            ts_all = [t for t in ts_all if 'Switch' not in t.name]
            ts_all = [t for t in ts_all if 'dropout' not in t.name]
            # DV: FP16_FIX - need to add 'Cast' layer here to make it work for FP16
            ts_all = [t for t in ts_all if 'Cast' not in t.name]

            # filter out all tensors that are inputs of the backward graph
            with util.capture_ops() as bwd_ops:
                tf_gradients(ys, xs, grad_ys, **kwargs)

            bwd_inputs = [t for op in bwd_ops for t in op.inputs]
            # list of tensors in forward graph that is in input to bwd graph
            ts_filtered = list(set(bwd_inputs).intersection(ts_all))
            debug_print("Using tensors {}".format(ts_filtered))

            # try two slightly different ways of getting bottlenecks tensors
            # to checkpoint
            for ts in [ts_filtered, ts_all]:

                # get all bottlenecks in the graph
                bottleneck_ts = []
                for t in ts:
                    b = set(ge.get_backward_walk_ops(t.op, inclusive=True, within_ops=fwd_ops))
                    f = set(ge.get_forward_walk_ops(t.op, inclusive=False, within_ops=fwd_ops))
                    # check that there are not shortcuts
                    b_inp = set([inp for op in b for inp in op.inputs]).intersection(ts_all)
                    f_inp = set([inp for op in f for inp in op.inputs]).intersection(ts_all)
                    if not set(b_inp).intersection(f_inp) and len(b_inp)+len(f_inp) >= len(ts_all):
                        bottleneck_ts.append(t)  # we have a bottleneck!
                    else:
                        debug_print("Rejected bottleneck candidate and ops {}".format(
                            [t] + list(set(ts_all) - set(b_inp) - set(f_inp))))

                # success? or try again without filtering?
                if len(bottleneck_ts) >= np.sqrt(len(ts_filtered)):  # enough bottlenecks found!
                    break

            if not bottleneck_ts:
                raise Exception('unable to find bottleneck tensors! please provide checkpoint '
                                'nodes manually, or use checkpoints="speed".')

            # sort the bottlenecks
            bottlenecks_sorted_lists = tf_toposort(bottleneck_ts, within_ops=fwd_ops)
            sorted_bottlenecks = [t for ts in bottlenecks_sorted_lists for t in ts]

            # save an approximately optimal number ~ sqrt(N)
            N = len(ts_filtered)
            if len(bottleneck_ts) <= np.ceil(np.sqrt(N)):
                checkpoints = sorted_bottlenecks
            else:
                step = int(np.ceil(len(bottleneck_ts) / np.sqrt(N)))
                checkpoints = sorted_bottlenecks[step::step]

        else:
            raise Exception('%s is unsupported input for "checkpoints"' % (checkpoints,))

    checkpoints = list(set(checkpoints).intersection(ts_all))

    # at this point automatic selection happened and checkpoints is list of nodes
    assert isinstance(checkpoints, list)

    debug_print("Checkpoint nodes used: {}".format(checkpoints))
    # better error handling of special cases
    # xs are already handled as checkpoint nodes, so no need to include them
    xs_intersect_checkpoints = set(xs).intersection(set(checkpoints))
    if xs_intersect_checkpoints:
        debug_print("Warning, some input nodes are also checkpoint nodes: {}".format(
            xs_intersect_checkpoints))
    ys_intersect_checkpoints = set(ys).intersection(set(checkpoints))
    debug_print("ys: {}, checkpoints:{}, intersect: {}".format(
        ys, checkpoints, ys_intersect_checkpoints))
    # saving an output node (ys) gives no benefit in memory while creating
    # new edge cases, exclude them
    if ys_intersect_checkpoints:
        debug_print("Warning, some output nodes are also checkpoints nodes: {}".format(
            format_ops(ys_intersect_checkpoints)))

    # remove initial and terminal nodes from checkpoints list if present
    checkpoints = list(set(checkpoints) - set(ys) - set(xs))

    # check that we have some nodes to checkpoint
    if not checkpoints:
        raise Exception('no checkpoints nodes found or given as input! ')

    # disconnect dependencies between checkpointed tensors
    checkpoints_disconnected = {}
    for x in checkpoints:
        if x.op and x.op.name is not None:
            grad_node = tf.stop_gradient(x, name=x.op.name+"_sg")
        else:
            grad_node = tf.stop_gradient(x)
        checkpoints_disconnected[x] = grad_node

    # partial derivatives to the checkpointed tensors and xs
    ops_to_copy = fast_backward_ops(seed_ops=[y.op for y in ys],
                                    stop_at_ts=checkpoints, within_ops=fwd_ops)
    debug_print("Found {} ops to copy within fwd_ops {}, seed {}, stop_at {}".format(
        len(ops_to_copy), fwd_ops, [r.op for r in ys], checkpoints))
    debug_print("ops_to_copy = {}".format(ops_to_copy))
    debug_print("Processing list {}".format(ys))
    _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {})
    for origin_op, op in info._transformed_ops.items():
        op._set_device(origin_op.node_def.device)
    copied_ops = info._transformed_ops.values()
    debug_print("Copied {} to {}".format(ops_to_copy, copied_ops))
    ge.reroute_ts(checkpoints_disconnected.values(),
                  checkpoints_disconnected.keys(),
                  can_modify=copied_ops)
    debug_print("Rewired {} in place of {} restricted to {}".format(
        checkpoints_disconnected.values(), checkpoints_disconnected.keys(), copied_ops))

    # get gradients with respect to current boundary + original x's
    copied_ys = [info._transformed_ops[y.op]._outputs[0] for y in ys]
    boundary = list(checkpoints_disconnected.values())
    dv = tf_gradients(ys=copied_ys, xs=boundary+xs, grad_ys=grad_ys, **kwargs)
    debug_print("Got gradients {}".format(dv))
    debug_print("for %s", copied_ys)
    debug_print("with respect to {}".format(boundary+xs))

    inputs_to_do_before = [y.op for y in ys]
    if grad_ys is not None:
        inputs_to_do_before += grad_ys
    wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None]
    my_add_control_inputs(wait_to_do_ops, inputs_to_do_before)

    # partial derivatives to the checkpointed nodes
    # dictionary of "node: backprop" for nodes in the boundary
    d_checkpoints = {r: dr for r, dr in zip(checkpoints_disconnected.keys(),
                                            dv[:len(checkpoints_disconnected)])}
    # partial derivatives to xs (usually the params of the neural net)
    d_xs = dv[len(checkpoints_disconnected):]

    # incorporate derivatives flowing through the checkpointed nodes
    checkpoints_sorted_lists = tf_toposort(checkpoints, within_ops=fwd_ops)
    for ts in checkpoints_sorted_lists[::-1]:
        debug_print("Processing list {}".format(ts))
        checkpoints_other = [r for r in checkpoints if r not in ts]
        checkpoints_disconnected_other = [checkpoints_disconnected[r] for r in checkpoints_other]

        # copy part of the graph below current checkpoint node, stopping at
        # other checkpoints nodes
        ops_to_copy = fast_backward_ops(within_ops=fwd_ops,
                                        seed_ops=[r.op for r in ts],
                                        stop_at_ts=checkpoints_other)
        debug_print("Found {} ops to copy within {}, seed {}, stop_at {}".format(
            len(ops_to_copy), fwd_ops, [r.op for r in ts], checkpoints_other))
        debug_print("ops_to_copy = {}".format(ops_to_copy))
        if not ops_to_copy:  # we're done!
            break
        _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {})
        for origin_op, op in info._transformed_ops.items():
            op._set_device(origin_op.node_def.device)
        copied_ops = info._transformed_ops.values()
        debug_print("Copied {} to {}".format(ops_to_copy, copied_ops))
        ge.reroute_ts(checkpoints_disconnected_other, checkpoints_other, can_modify=copied_ops)
        debug_print("Rewired %s in place of %s restricted to %s",
                    checkpoints_disconnected_other, checkpoints_other, copied_ops)

        # gradient flowing through the checkpointed node
        boundary = [info._transformed_ops[r.op]._outputs[0] for r in ts]
        substitute_backprops = [d_checkpoints[r] for r in ts]
        dv = tf_gradients(boundary,
                          checkpoints_disconnected_other+xs,
                          grad_ys=substitute_backprops, **kwargs)
        debug_print("Got gradients {}".format(dv))
        debug_print("for {}".format(boundary))
        debug_print("with respect to {}".format(checkpoints_disconnected_other+xs))
        debug_print("with boundary backprop substitutions {}".format(substitute_backprops))

        inputs_to_do_before = [d_checkpoints[r].op for r in ts]
        wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None]
        my_add_control_inputs(wait_to_do_ops, inputs_to_do_before)

        # partial derivatives to the checkpointed nodes
        for r, dr in zip(checkpoints_other, dv[:len(checkpoints_other)]):
            if dr is not None:
                if d_checkpoints[r] is None:
                    d_checkpoints[r] = dr
                else:
                    d_checkpoints[r] += dr

        def _unsparsify(var_x):
            if not isinstance(var_x, tf.IndexedSlices):
                return var_x
            assert var_x.dense_shape is not None, \
                "memory_saving_gradients encountered sparse gradients of unknown shape"
            indices = var_x.indices
            while indices.shape.ndims < var_x.values.shape.ndims:
                indices = tf.expand_dims(indices, -1)
            return tf.scatter_nd(indices, var_x.values, var_x.dense_shape)

        # partial derivatives to xs (usually the params of the neural net)
        d_xs_new = dv[len(checkpoints_other):]
        for j in range(len(xs)):
            if d_xs_new[j] is not None:
                if d_xs[j] is None:
                    d_xs[j] = _unsparsify(d_xs_new[j])
                else:
                    d_xs[j] += _unsparsify(d_xs_new[j])

    return d_xs
Ejemplo n.º 34
0
  def apply(self, new_inputs, update_colocation_groups=True):
    assert len(new_inputs) == len(self.inputs)
    g = tf.get_default_graph()  # todo: make that member variable

    new_inputs2 = []
    # replace variable inputs with their read endpoint
    for input in new_inputs:
      if isinstance(input, tf.Variable):
        new_inputs2.append(input.read_value())
      else:
        new_inputs2.append(input)
    new_inputs = new_inputs2
    
    replacements = OrderedDict()
    for old_input_ts, new_input_ts in zip(self.inputs, new_inputs):
      # shape/dtype checks
      if isinstance(old_input_ts, (list, tuple)):
        reference_ts = old_input_ts[0]
      else:
        reference_ts = old_input_ts
      assert reference_ts.get_shape() == new_input_ts.get_shape()
      assert reference_ts.dtype == new_input_ts.dtype

      # Variable with multiple read endpoints, replace all of them with
      # new input tensor
      if isinstance(old_input_ts, (list, tuple)):
        for sub_input in old_input_ts:
          replacements[sub_input] = new_input_ts
      # regular Tensor
      else:
        replacements[old_input_ts] = new_input_ts


    # sanity checks
    # copying Variables is confusing because they don't get added
    # to GLOBAL_VARIABLES collection hence escape global initialization
    # therefore forbit it
    for op in self.ops:
      if op.type.startswith('Variable'): # 'VariableV2' or 'Variable'
        assert False, "Can't copy variables"


    # TODO: remove this
    def summarize_ts(ts):
      from collections import Counter
      type_counter = Counter()
      ops = set([tensor.op for tensor in ts])
      print Counter([op.type for op in ops]).most_common(10)



        
    sgv = ge.sgv(self.ops)
    #    import pdb; pdb.set_trace()
    copied_sgv, info = ge.copy_with_input_replacements(sgv,
                                                       replacements)


    # converting between Python bytes and unicode
    def to_bytes(s): return s.encode('ascii')
    def from_bytes(s): return s.decode('ascii')

    # fix colocation constraints to point to copied ops
    # see https://github.com/tensorflow/tensorflow/issues/9925
    if update_colocation_groups:
      new_ops = [info._transformed_ops[op] for op in self.ops]
      for new_op in new_ops:
        assert len(new_op.colocation_groups()) == 1
        colocation_group = new_op.colocation_groups()[0]
        assert colocation_group.startswith(b'loc:@')
        colocated_with_name = from_bytes(colocation_group[len(b'loc:@'):])

        # if there were no colocation constraints, the op gets colocated with
        # itself (default colocation group), ignore that constraint
        if colocated_with_name == new_op.name:
          continue

        colocation_op = g.get_operation_by_name(colocated_with_name)
        if colocation_op in info._transformed_ops:
          new_colocation_op = info._transformed_ops[colocation_op]
        else:
          assert colocation_op in self.input_ops
          colocation_op_idx = self.input_ops.index(colocation_op)
          new_colocation_op = new_inputs[colocation_op_idx].op

        # overwrite existing _class attribute with new colocation constraints
        new_colocation_groups = [b'loc:@'+to_bytes(new_colocation_op.name)]
        new_op.node_def.attr["_class"].CopyFrom(attr_value_pb2.AttrValue(
          list=attr_value_pb2.AttrValue.ListValue(s=new_colocation_groups)))
    
    # place new ops on device from current device context
    device = get_current_device()
    if device:
      for op in info._transformed_ops.values():
        op._set_device(device)
      
    new_outputs = []
    for old_output_ts in self.outputs:
      new_output_op = info._transformed_ops[old_output_ts.op]
      new_output_ts = new_output_op.outputs[0]
      new_outputs.append(new_output_ts)
      
    return new_outputs
Ejemplo n.º 35
0
def get_ops_and_inputs_by_name_or_regex(
    var_names: List[str],
    graph: tf_compat.Graph = None,
) -> List[Tuple[tf_compat.Operation, tf_compat.Tensor]]:
    """
    Get tuples of operations and the inputs for inputs of operations that match
    a regex pattern in the list params.

    :param var_names: List of full names or regex patterns to match variable names by.
    :param graph: the graph to get the prunable operations from.
        If not supplied, then will use the default graph
    :return: a list of (operation, parameter) pairs for parameters that match a
        regex pattern in var_names.  If the wildcards '.' or '.*' are provided as regex
        patterns, then will match on all prunable layers and return variables using
        get_op_input_var
    """
    if tf_contrib_err:
        raise tf_contrib_err

    if not graph:
        graph = tf_compat.get_default_graph()

    prunable_ops_and_inputs = []
    if "re:.*" in var_names or "re:." in var_names:  # wildcard cases
        ops = get_prunable_ops(graph)
        for _, op in ops:
            prunable_ops_and_inputs.append((op, get_op_input_var(op)))
    else:
        for var in tf_compat.global_variables():
            if any_str_or_regex_matches_tensor_name(var.name, var_names):
                var_tens = graph.get_tensor_by_name(var.name)
                # get all the read ops for the var
                read_ops = [
                    read_op
                    for read_op in graph_editor.get_consuming_ops(var_tens)
                    if "/read" == read_op.name[-5:]
                ]  # filter for /read ops
                read_tensors = {
                    read_tensor
                    for read_op in read_ops
                    for read_tensor in graph_editor.sgv(read_op).outputs
                }
                # gets ops that read from read_tensors and filters any ops
                # that were created by mask_ks
                consuming_ops_with_input = [
                    (consuming_op, read_tensor)
                    for read_tensor in read_tensors
                    for consuming_op in graph_editor.get_consuming_ops(read_tensor)
                ]
                for op, inp in consuming_ops_with_input:
                    if "_nm_ks" not in op.name:
                        prunable_ops_and_inputs.append((op, inp))
                    else:
                        nm_ks_consuming_ops_with_input = [
                            (consuming_op, inp)
                            for output_tens in graph_editor.sgv(op).outputs
                            for consuming_op in graph_editor.get_consuming_ops(
                                output_tens
                            )
                            if "_nm_ks" not in consuming_op.name
                        ]
                        prunable_ops_and_inputs += nm_ks_consuming_ops_with_input
    # Check that all var_names values have a match
    _validate_all_params_found(var_names, prunable_ops_and_inputs)
    return prunable_ops_and_inputs
Ejemplo n.º 36
0
    fact_levels = [32, 30, 25, 20, 15, 10, 5]

    for lvl in fact_levels:
        logEntry(lvl)
        # load the orignal graph
        graph = load_graph("../model/pb_files/rnn-deep-250-32-2.pb")

        W1 = graph.get_tensor_by_name('prefix/w_in:0')
        matmul = graph.get_tensor_by_name('prefix/MatMul:0')
        bias = graph.get_tensor_by_name('prefix/b_in:0')
        add = graph.get_tensor_by_name('prefix/add:0')
        reshape = graph.get_tensor_by_name('prefix/Reshape:0')

        # #remove all conncetions from matmul
        ge.detach(ge.sgv(matmul.op))

        with tf.Session(graph=graph) as sess:
            os.system("mkdir /flush1/raj034/RNN/model/test_cases/" + breath + "/" + quant + "/fact_" + str(lvl))

            # for op in sess.graph.get_operations():
            #     print(op.name)

            W = W1.eval()
            u, s, v, ss = svd_compress_gs(W, lvl)
            logEntry("structural_similarity == > " + str(ss))
            u1 = tf.matmul(reshape, u, name="prefix/u1")
            s1 = tf.matmul(u1, s, name="prefix/s1")
            v1 = tf.matmul(s1, v, name="prefix/v1")
            ge.connect(ge.sgv(v1.op), ge.sgv(add.op).remap_inputs([0]))
Ejemplo n.º 37
0
    def minimize(self, loss, var_list=None, global_step=None):
        orig_graph_view = None
        trainable_vars = var_list if var_list != None else tf.trainable_variables(
        )
        if self.inputs is not None:
            seed_ops = [t.op for t in self.inputs]
            result = list(seed_ops)
            wave = set(seed_ops)
            while wave:  # stolen from grap_editor.select
                new_wave = set()
                for op in wave:
                    for new_t in op.outputs:
                        if new_t == loss:
                            continue
                        for new_op in new_t.consumers():
                            #if new_op not in result and is_within(new_op):
                            if new_op not in result:
                                new_wave.add(new_op)
                for op in new_wave:
                    if op not in result:
                        result.append(op)
                wave = new_wave
            orig_graph_view = ge.sgv(result)
        else:
            orig_graph_view = ge.sgv(self.work_graph)

        self.global_step_tensor = tf.Variable(
            0, name='global_step',
            trainable=False) if global_step is None else global_step

        # Perturbations
        deltas = {}
        n_perturbations = {}
        p_perturbations = {}
        with tf.name_scope("Perturbator"):
            self.c_t = tf.div(
                self.c,
                tf.pow(
                    tf.add(tf.cast(self.global_step_tensor, tf.float32),
                           tf.constant(1, dtype=tf.float32)), self.gamma),
                name="SPSA_ct")
            # self.c_t = 0.00 #MOD
            for var in trainable_vars:
                self.num_params += self._mul_dims(var.get_shape())
                var_name = var.name.split(':')[0]
                random = Bernoulli(tf.fill(var.get_shape(), 0.5),
                                   dtype=tf.float32)
                deltas[var] = tf.subtract(tf.constant(1, dtype=tf.float32),
                                          tf.scalar_mul(
                                              tf.constant(2, dtype=tf.float32),
                                              random.sample(1)[0]),
                                          name="SPSA_delta")
                c_t_delta = tf.scalar_mul(tf.reshape(self.c_t, []),
                                          deltas[var])
                n_perturbations[var_name + '/read:0'] = tf.subtract(
                    var, c_t_delta, name="perturb_n")
                p_perturbations[var_name + '/read:0'] = tf.add(
                    var, c_t_delta, name="perturb_p")
        # print("{} parameters".format(self.num_params))

        # Evaluator
        with tf.name_scope("Evaluator"):
            _, self.ninfo = self._clone_model(orig_graph_view, n_perturbations,
                                              'N_Eval')
            _, self.pinfo = self._clone_model(orig_graph_view, p_perturbations,
                                              'P_Eval')

        # Weight Updater
        optimizer_ops = []
        with tf.control_dependencies([loss]):
            with tf.name_scope('Updater'):
                a_t = self.a / (tf.pow(
                    tf.add(tf.cast(self.global_step_tensor, tf.float32),
                           tf.constant(1, dtype=tf.float32)), self.alpha))
                # a_t = 0.00 #MOD
                for var in trainable_vars:
                    l_pos = self.pinfo.transformed(loss)
                    l_neg = self.ninfo.transformed(loss)
                    # print( "l_pos: ", l_pos)
                    # print( "l_neg: ", l_neg)
                    ghat = (l_pos - l_neg) / (tf.constant(2, dtype=tf.float32)
                                              * self.c_t * deltas[var])
                    optimizer_ops.append(tf.assign_sub(var, a_t * ghat))
        grp = control_flow_ops.group(*optimizer_ops)
        with tf.control_dependencies([grp]):
            tf.assign_add(self.global_step_tensor,
                          tf.constant(1, dtype=self.global_step_tensor.dtype))

        return grp
Ejemplo n.º 38
0
 def test_bypass(self):
   """Test for ge.bypass."""
   ge.bypass(ge.sgv(self.f.op).remap_inputs([0]))
   self.assertTrue(ge.matcher("^foo/bar/h$").input_ops("^foo/c$", "foo/bar/g$")
                   (self.h.op))