コード例 #1
0
 def testFprint(self):
   with self._single_threaded_test_session():
     in_data = constant_op.constant(['abc', 'very looooooong string', 'def'])
     out_data = gen_sdca_ops.sdca_fprint(in_data)
     self.assertAllEqual([[4143508125394299908, -6879828354153669051],
                          [5849691694103072671, -4874542629849009556],
                          [603227410218889250, 8762207001949257490]],
                         out_data.eval())
コード例 #2
0
ファイル: sdca_ops_test.py プロジェクト: AnishShah/tensorflow
 def testFprint(self):
   with self._single_threaded_test_session():
     in_data = constant_op.constant(['abc', 'very looooooong string', 'def'])
     out_data = gen_sdca_ops.sdca_fprint(in_data)
     self.assertAllEqual([[4143508125394299908, -6879828354153669051],
                          [5849691694103072671, -4874542629849009556],
                          [603227410218889250, 8762207001949257490]],
                         out_data.eval())
コード例 #3
0
    def minimize(self, global_step=None, name=None):
        """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
        # Technically, the op depends on a lot more than the variables,
        # but we'll keep the list short.
        with name_scope(name, 'sdca/minimize'):
            sparse_example_indices = []
            sparse_feature_indices = []
            sparse_features_values = []
            for sf in self._examples['sparse_features']:
                sparse_example_indices.append(sf.example_indices)
                sparse_feature_indices.append(sf.feature_indices)
                # If feature values are missing, sdca assumes a value of 1.0f.
                if sf.feature_values is not None:
                    sparse_features_values.append(sf.feature_values)

            # pylint: disable=protected-access
            example_ids_hashed = gen_sdca_ops.sdca_fprint(
                internal_convert_to_tensor(self._examples['example_ids']))
            # pylint: enable=protected-access
            example_state_data = self._hashtable.lookup(example_ids_hashed)
            # Solver returns example_state_update, new delta sparse_feature_weights
            # and delta dense_feature_weights.

            sparse_weights = []
            sparse_indices = []
            # If we have partitioned variables, keep a few dictionaries of Tensors
            # around that we need for the assign_add after the op call to
            # gen_sdca_ops.sdca_optimizer().  These are keyed because we may have a
            # mix of partitioned and un-partitioned variables.
            num_partitions_by_var = {}
            p_assignments_by_var = {}
            gather_ids_by_var = {}
            for v_num, (w, i) in enumerate(
                    zip(self._slots['unshrinked_sparse_features_weights'],
                        sparse_feature_indices)):
                # Append the sparse_indices (in full-variable space).
                sparse_idx = math_ops.cast(
                    array_ops.unique(math_ops.cast(i, dtypes.int32))[0],
                    dtypes.int64)
                sparse_indices.append(sparse_idx)
                if isinstance(w, list) or isinstance(
                        w, var_ops.PartitionedVariable):
                    num_partitions = len(w)
                    flat_ids = array_ops.reshape(sparse_idx, [-1])
                    # We use div partitioning, which is easiest to support downstream.
                    # Compute num_total_ids as the sum of dim-0 of w, then assign
                    # to partitions based on a constant number of ids per partition.
                    # Optimize if we already know the full shape statically.
                    dim_0_size = self._get_first_dimension_size_statically(
                        w, num_partitions)

                    if tensor_shape.dimension_value(dim_0_size):
                        num_total_ids = constant_op.constant(
                            tensor_shape.dimension_value(dim_0_size),
                            flat_ids.dtype)
                    else:
                        dim_0_sizes = []
                        for p in range(num_partitions):
                            if tensor_shape.dimension_value(
                                    w[p].shape[0]) is not None:
                                dim_0_sizes.append(
                                    tensor_shape.dimension_value(
                                        w[p].shape[0]))
                            else:
                                with ops.colocate_with(w[p]):
                                    dim_0_sizes.append(
                                        array_ops.shape(w[p])[0])
                        num_total_ids = math_ops.reduce_sum(
                            math_ops.cast(array_ops.stack(dim_0_sizes),
                                          flat_ids.dtype))
                    ids_per_partition = num_total_ids // num_partitions
                    extras = num_total_ids % num_partitions

                    p_assignments = math_ops.maximum(
                        flat_ids // (ids_per_partition + 1),
                        (flat_ids - extras) // ids_per_partition)

                    # Emulate a conditional using a boolean indicator tensor
                    new_ids = array_ops.where(
                        p_assignments < extras,
                        flat_ids % (ids_per_partition + 1),
                        (flat_ids - extras) % ids_per_partition)

                    # Cast partition assignments to int32 for use in dynamic_partition.
                    # There really should not be more than 2^32 partitions.
                    p_assignments = math_ops.cast(p_assignments, dtypes.int32)
                    # Partition list of ids based on assignments into num_partitions
                    # separate lists.
                    gather_ids = data_flow_ops.dynamic_partition(
                        new_ids, p_assignments, num_partitions)
                    # Add these into the dictionaries for use in the later update.
                    num_partitions_by_var[v_num] = num_partitions
                    p_assignments_by_var[v_num] = p_assignments
                    gather_ids_by_var[v_num] = gather_ids

                    # Gather the weights from each partition.
                    partition_gathered_weights = []
                    for p in range(num_partitions):
                        with ops.colocate_with(w[p]):
                            partition_gathered_weights.append(
                                array_ops.gather(w[p], gather_ids[p]))

                    # Stitch the weights back together in the same order they were before
                    # we dynamic_partitioned them.
                    condition_indices = data_flow_ops.dynamic_partition(
                        math_ops.range(array_ops.shape(new_ids)[0]),
                        p_assignments, num_partitions)
                    batch_gathered_weights = data_flow_ops.dynamic_stitch(
                        condition_indices, partition_gathered_weights)
                else:
                    w_as_tensor = internal_convert_to_tensor(w)
                    with ops.device(w_as_tensor.device):
                        batch_gathered_weights = array_ops.gather(
                            w_as_tensor, sparse_idx)
                sparse_weights.append(batch_gathered_weights)

            # pylint: disable=protected-access
            if compat.forward_compatible(year=2018, month=10, day=30):
                esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
                    sparse_example_indices,
                    sparse_feature_indices,
                    sparse_features_values,
                    self._convert_n_to_tensor(
                        self._examples['dense_features']),
                    internal_convert_to_tensor(
                        self._examples['example_weights']),
                    internal_convert_to_tensor(
                        self._examples['example_labels']),
                    sparse_indices,
                    sparse_weights,
                    self._convert_n_to_tensor(
                        self._slots['unshrinked_dense_features_weights']),
                    example_state_data,
                    loss_type=self._options['loss_type'],
                    l1=self._options['symmetric_l1_regularization'],
                    l2=self._symmetric_l2_regularization(),
                    num_loss_partitions=self._num_loss_partitions(),
                    num_inner_iterations=1,
                    adaptive=self._adaptive())
            else:
                esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
                    sparse_example_indices,
                    sparse_feature_indices,
                    sparse_features_values,
                    self._convert_n_to_tensor(
                        self._examples['dense_features']),
                    internal_convert_to_tensor(
                        self._examples['example_weights']),
                    internal_convert_to_tensor(
                        self._examples['example_labels']),
                    sparse_indices,
                    sparse_weights,
                    self._convert_n_to_tensor(
                        self._slots['unshrinked_dense_features_weights']),
                    example_state_data,
                    loss_type=self._options['loss_type'],
                    l1=self._options['symmetric_l1_regularization'],
                    l2=self._symmetric_l2_regularization(),
                    num_loss_partitions=self._num_loss_partitions(),
                    num_inner_iterations=1,
                    adaptative=self._adaptive())
            # pylint: enable=protected-access

            with ops.control_dependencies([esu]):
                update_ops = [self._hashtable.insert(example_ids_hashed, esu)]
                # Update the weights before the proximal step.
                for v_num, (w, i, u) in enumerate(
                        zip(self._slots['unshrinked_sparse_features_weights'],
                            sparse_indices, sfw)):
                    if (isinstance(w, var_ops.PartitionedVariable)
                            or isinstance(w, list)):
                        update_ops += self._get_partitioned_update_ops(
                            v_num, num_partitions_by_var, p_assignments_by_var,
                            gather_ids_by_var, w, u, p_assignments,
                            num_partitions)
                    else:
                        update_ops.append(state_ops.scatter_add(w, i, u))
                for w, u in zip(
                        self._slots['unshrinked_dense_features_weights'], dfw):
                    if (isinstance(w, var_ops.PartitionedVariable)
                            or isinstance(w, list)):
                        split_updates = array_ops.split(
                            u,
                            num_or_size_splits=[
                                v.shape.as_list()[0] for v in w
                            ])
                        for v, split_update in zip(w, split_updates):
                            update_ops.append(
                                state_ops.assign_add(v, split_update))
                    else:
                        update_ops.append(state_ops.assign_add(w, u))
            if not global_step:
                return control_flow_ops.group(*update_ops)
            with ops.control_dependencies(update_ops):
                return state_ops.assign_add(global_step, 1, name=name).op
コード例 #4
0
ファイル: sdca_ops.py プロジェクト: Ajaycs99/tensorflow
  def minimize(self, global_step=None, name=None):
    """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
    # Technically, the op depends on a lot more than the variables,
    # but we'll keep the list short.
    with name_scope(name, 'sdca/minimize'):
      sparse_example_indices = []
      sparse_feature_indices = []
      sparse_features_values = []
      for sf in self._examples['sparse_features']:
        sparse_example_indices.append(sf.example_indices)
        sparse_feature_indices.append(sf.feature_indices)
        # If feature values are missing, sdca assumes a value of 1.0f.
        if sf.feature_values is not None:
          sparse_features_values.append(sf.feature_values)

      # pylint: disable=protected-access
      example_ids_hashed = gen_sdca_ops.sdca_fprint(
          internal_convert_to_tensor(self._examples['example_ids']))
      # pylint: enable=protected-access
      example_state_data = self._hashtable.lookup(example_ids_hashed)
      # Solver returns example_state_update, new delta sparse_feature_weights
      # and delta dense_feature_weights.

      sparse_weights = []
      sparse_indices = []
      # If we have partitioned variables, keep a few dictionaries of Tensors
      # around that we need for the assign_add after the op call to
      # gen_sdca_ops.sdca_optimizer().  These are keyed because we may have a
      # mix of partitioned and un-partitioned variables.
      num_partitions_by_var = {}
      p_assignments_by_var = {}
      gather_ids_by_var = {}
      for v_num, (w, i) in enumerate(
          zip(self._slots['unshrinked_sparse_features_weights'],
              sparse_feature_indices)):
        # Append the sparse_indices (in full-variable space).
        sparse_idx = math_ops.cast(
            array_ops.unique(math_ops.cast(i, dtypes.int32))[0],
            dtypes.int64)
        sparse_indices.append(sparse_idx)
        if isinstance(w, list) or isinstance(w, var_ops.PartitionedVariable):
          num_partitions = len(w)
          flat_ids = array_ops.reshape(sparse_idx, [-1])
          # We use div partitioning, which is easiest to support downstream.
          # Compute num_total_ids as the sum of dim-0 of w, then assign
          # to partitions based on a constant number of ids per partition.
          # Optimize if we already know the full shape statically.
          dim_0_size = self._get_first_dimension_size_statically(
              w, num_partitions)

          if tensor_shape.dimension_value(dim_0_size):
            num_total_ids = constant_op.constant(
                tensor_shape.dimension_value(dim_0_size),
                flat_ids.dtype)
          else:
            dim_0_sizes = []
            for p in range(num_partitions):
              if tensor_shape.dimension_value(w[p].shape[0]) is not None:
                dim_0_sizes.append(tensor_shape.dimension_value(w[p].shape[0]))
              else:
                with ops.colocate_with(w[p]):
                  dim_0_sizes.append(array_ops.shape(w[p])[0])
            num_total_ids = math_ops.reduce_sum(
                math_ops.cast(array_ops.stack(dim_0_sizes), flat_ids.dtype))
          ids_per_partition = num_total_ids // num_partitions
          extras = num_total_ids % num_partitions

          p_assignments = math_ops.maximum(
              flat_ids // (ids_per_partition + 1),
              (flat_ids - extras) // ids_per_partition)

          # Emulate a conditional using a boolean indicator tensor
          new_ids = array_ops.where(p_assignments < extras,
                                    flat_ids % (ids_per_partition + 1),
                                    (flat_ids - extras) % ids_per_partition)

          # Cast partition assignments to int32 for use in dynamic_partition.
          # There really should not be more than 2^32 partitions.
          p_assignments = math_ops.cast(p_assignments, dtypes.int32)
          # Partition list of ids based on assignments into num_partitions
          # separate lists.
          gather_ids = data_flow_ops.dynamic_partition(new_ids,
                                                       p_assignments,
                                                       num_partitions)
          # Add these into the dictionaries for use in the later update.
          num_partitions_by_var[v_num] = num_partitions
          p_assignments_by_var[v_num] = p_assignments
          gather_ids_by_var[v_num] = gather_ids

          # Gather the weights from each partition.
          partition_gathered_weights = []
          for p in range(num_partitions):
            with ops.colocate_with(w[p]):
              partition_gathered_weights.append(
                  array_ops.gather(w[p], gather_ids[p]))

          # Stitch the weights back together in the same order they were before
          # we dynamic_partitioned them.
          condition_indices = data_flow_ops.dynamic_partition(
              math_ops.range(array_ops.shape(new_ids)[0]),
              p_assignments, num_partitions)
          batch_gathered_weights = data_flow_ops.dynamic_stitch(
              condition_indices, partition_gathered_weights)
        else:
          w_as_tensor = internal_convert_to_tensor(w)
          with ops.device(w_as_tensor.device):
            batch_gathered_weights = array_ops.gather(
                w_as_tensor, sparse_idx)
        sparse_weights.append(batch_gathered_weights)

      # pylint: disable=protected-access
      if compat.forward_compatible(year=2018, month=10, day=30):
        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
            sparse_example_indices,
            sparse_feature_indices,
            sparse_features_values,
            self._convert_n_to_tensor(self._examples['dense_features']),
            internal_convert_to_tensor(self._examples['example_weights']),
            internal_convert_to_tensor(self._examples['example_labels']),
            sparse_indices,
            sparse_weights,
            self._convert_n_to_tensor(self._slots[
                'unshrinked_dense_features_weights']),
            example_state_data,
            loss_type=self._options['loss_type'],
            l1=self._options['symmetric_l1_regularization'],
            l2=self._symmetric_l2_regularization(),
            num_loss_partitions=self._num_loss_partitions(),
            num_inner_iterations=1,
            adaptive=self._adaptive())
      else:
        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
            sparse_example_indices,
            sparse_feature_indices,
            sparse_features_values,
            self._convert_n_to_tensor(self._examples['dense_features']),
            internal_convert_to_tensor(self._examples['example_weights']),
            internal_convert_to_tensor(self._examples['example_labels']),
            sparse_indices,
            sparse_weights,
            self._convert_n_to_tensor(self._slots[
                'unshrinked_dense_features_weights']),
            example_state_data,
            loss_type=self._options['loss_type'],
            l1=self._options['symmetric_l1_regularization'],
            l2=self._symmetric_l2_regularization(),
            num_loss_partitions=self._num_loss_partitions(),
            num_inner_iterations=1,
            adaptative=self._adaptive())
      # pylint: enable=protected-access

      with ops.control_dependencies([esu]):
        update_ops = [self._hashtable.insert(example_ids_hashed, esu)]
        # Update the weights before the proximal step.
        for v_num, (w, i, u) in enumerate(
            zip(self._slots['unshrinked_sparse_features_weights'],
                sparse_indices, sfw)):
          if (isinstance(w, var_ops.PartitionedVariable) or
              isinstance(w, list)):
            update_ops += self._get_partitioned_update_ops(
                v_num, num_partitions_by_var, p_assignments_by_var,
                gather_ids_by_var, w, u, p_assignments, num_partitions)
          else:
            update_ops.append(state_ops.scatter_add(w, i, u))
        for w, u in zip(self._slots['unshrinked_dense_features_weights'], dfw):
          if (isinstance(w, var_ops.PartitionedVariable) or
              isinstance(w, list)):
            split_updates = array_ops.split(
                u, num_or_size_splits=[v.shape.as_list()[0] for v in w])
            for v, split_update in zip(w, split_updates):
              update_ops.append(state_ops.assign_add(v, split_update))
          else:
            update_ops.append(state_ops.assign_add(w, u))
      if not global_step:
        return control_flow_ops.group(*update_ops)
      with ops.control_dependencies(update_ops):
        return state_ops.assign_add(global_step, 1, name=name).op
コード例 #5
0
    def minimize(self, global_step=None, name=None):
        """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
        # Technically, the op depends on a lot more than the variables,
        # but we'll keep the list short.
        with name_scope(name, 'sdca/minimize'):
            sparse_example_indices = []
            sparse_feature_indices = []
            sparse_features_values = []
            for sf in self._examples['sparse_features']:
                sparse_example_indices.append(sf.example_indices)
                sparse_feature_indices.append(sf.feature_indices)
                # If feature values are missing, sdca assumes a value of 1.0f.
                if sf.feature_values is not None:
                    sparse_features_values.append(sf.feature_values)

            # pylint: disable=protected-access
            example_ids_hashed = gen_sdca_ops.sdca_fprint(
                internal_convert_to_tensor(self._examples['example_ids']))
            # pylint: enable=protected-access
            example_state_data = self._hashtable.lookup(example_ids_hashed)
            # Solver returns example_state_update, new delta sparse_feature_weights
            # and delta dense_feature_weights.

            weights_tensor = self._convert_n_to_tensor(
                self._slots['unshrinked_sparse_features_weights'])
            sparse_weights = []
            sparse_indices = []
            for w, i in zip(weights_tensor, sparse_feature_indices):
                # Find the feature ids to lookup in the variables.
                with ops.device(w.device):
                    sparse_indices.append(
                        math_ops.cast(
                            array_ops.unique(math_ops.cast(i,
                                                           dtypes.int32))[0],
                            dtypes.int64))
                    sparse_weights.append(
                        array_ops.gather(w, sparse_indices[-1]))

            # pylint: disable=protected-access
            esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
                sparse_example_indices,
                sparse_feature_indices,
                sparse_features_values,
                self._convert_n_to_tensor(self._examples['dense_features']),
                internal_convert_to_tensor(self._examples['example_weights']),
                internal_convert_to_tensor(self._examples['example_labels']),
                sparse_indices,
                sparse_weights,
                self._convert_n_to_tensor(
                    self._slots['unshrinked_dense_features_weights']),
                example_state_data,
                loss_type=self._options['loss_type'],
                l1=self._options['symmetric_l1_regularization'],
                l2=self._symmetric_l2_regularization(),
                num_loss_partitions=self._num_loss_partitions(),
                num_inner_iterations=1)
            # pylint: enable=protected-access

            with ops.control_dependencies([esu]):
                update_ops = [self._hashtable.insert(example_ids_hashed, esu)]
                # Update the weights before the proximal step.
                for w, i, u in zip(
                        self._slots['unshrinked_sparse_features_weights'],
                        sparse_indices, sfw):
                    update_ops.append(state_ops.scatter_add(w, i, u))
                for w, u in zip(
                        self._slots['unshrinked_dense_features_weights'], dfw):
                    update_ops.append(w.assign_add(u))

            if not global_step:
                return control_flow_ops.group(*update_ops)
            with ops.control_dependencies(update_ops):
                return state_ops.assign_add(global_step, 1, name=name).op
コード例 #6
0
ファイル: sdca_ops.py プロジェクト: SylChan/tensorflow
  def minimize(self, global_step=None, name=None):
    """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
    # Technically, the op depends on a lot more than the variables,
    # but we'll keep the list short.
    with name_scope(name, 'sdca/minimize'):
      sparse_example_indices = []
      sparse_feature_indices = []
      sparse_features_values = []
      for sf in self._examples['sparse_features']:
        sparse_example_indices.append(sf.example_indices)
        sparse_feature_indices.append(sf.feature_indices)
        # If feature values are missing, sdca assumes a value of 1.0f.
        if sf.feature_values is not None:
          sparse_features_values.append(sf.feature_values)

      # pylint: disable=protected-access
      example_ids_hashed = gen_sdca_ops.sdca_fprint(
          internal_convert_to_tensor(self._examples['example_ids']))
      # pylint: enable=protected-access
      example_state_data = self._hashtable.lookup(example_ids_hashed)
      # Solver returns example_state_update, new delta sparse_feature_weights
      # and delta dense_feature_weights.

      weights_tensor = self._convert_n_to_tensor(self._slots[
          'unshrinked_sparse_features_weights'])
      sparse_weights = []
      sparse_indices = []
      for w, i in zip(weights_tensor, sparse_feature_indices):
        # Find the feature ids to lookup in the variables.
        with ops.device(w.device):
          sparse_indices.append(
              math_ops.cast(
                  array_ops.unique(math_ops.cast(i, dtypes.int32))[0],
                  dtypes.int64))
          sparse_weights.append(array_ops.gather(w, sparse_indices[-1]))

      # pylint: disable=protected-access
      esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
          sparse_example_indices,
          sparse_feature_indices,
          sparse_features_values,
          self._convert_n_to_tensor(self._examples['dense_features']),
          internal_convert_to_tensor(self._examples['example_weights']),
          internal_convert_to_tensor(self._examples['example_labels']),
          sparse_indices,
          sparse_weights,
          self._convert_n_to_tensor(self._slots[
              'unshrinked_dense_features_weights']),
          example_state_data,
          loss_type=self._options['loss_type'],
          l1=self._options['symmetric_l1_regularization'],
          l2=self._symmetric_l2_regularization(),
          num_loss_partitions=self._num_loss_partitions(),
          num_inner_iterations=1)
      # pylint: enable=protected-access

      with ops.control_dependencies([esu]):
        update_ops = [self._hashtable.insert(example_ids_hashed, esu)]
        # Update the weights before the proximal step.
        for w, i, u in zip(self._slots['unshrinked_sparse_features_weights'],
                           sparse_indices, sfw):
          update_ops.append(state_ops.scatter_add(w, i, u))
        for w, u in zip(self._slots['unshrinked_dense_features_weights'], dfw):
          update_ops.append(w.assign_add(u))

      if not global_step:
        return control_flow_ops.group(*update_ops)
      with ops.control_dependencies(update_ops):
        return state_ops.assign_add(global_step, 1, name=name).op