Example #1
0
  def compute(self, values, accumulator=None):
    """Compute a step in this computation, returning a new accumulator."""
    values = base_preprocessing_layer.convert_to_list(
        values, sparse_default_value=self._mask_value)

    if accumulator is None:
      accumulator = self._create_accumulator()

    # TODO(momernick): Benchmark improvements to this algorithm.
    if not isinstance(values, list):
      values = [values]
    for document in values:
      if not isinstance(document, list):
        document = [document]
      if self._compute_idf:
        current_doc_id = accumulator.data["next_doc_id"]
        accumulator.data["next_doc_id"] += 1
      for token in document:
        accumulator.count_dict[token] += 1
        if self._compute_idf:
          doc_count = accumulator.per_doc_count_dict[token]
          if doc_count["last_doc_id"] != current_doc_id:
            doc_count["count"] += 1
            doc_count["last_doc_id"] = current_doc_id

    return accumulator
Example #2
0
    def compute(self, values, accumulator=None):
        """Computes a step in this computation, returning a new accumulator."""
        values = base_preprocessing_layer.convert_to_list(values)

        if accumulator is None:
            accumulator = self._create_accumulator()

        # TODO(momernick): Benchmark improvements to this algorithm.
        for element in values:
            if not isinstance(element, list):
                element = [element]
            current_doc_id = accumulator.data[self.DOC_ID_IDX]
            for value in element:
                if self._max_tokens is None:
                    current_max_value = accumulator.data[self.MAX_VALUE_IDX]
                    if value > current_max_value:
                        accumulator.data[self.MAX_VALUE_IDX] = value
                if self._compute_idf:
                    doc_count = accumulator.per_doc_count_dict[value]
                    if doc_count["last_doc_id"] != current_doc_id:
                        doc_count["count"] += 1
                        doc_count["last_doc_id"] = current_doc_id
            accumulator.data[self.DOC_ID_IDX] += 1

        return accumulator
Example #3
0
    def compute(self, values, accumulator=None):
        """Compute a step in this computation, returning a new accumulator."""
        values = base_preprocessing_layer.convert_to_list(values)

        if accumulator is None:
            accumulator = self._create_accumulator()

        # TODO(momernick): Benchmark improvements to this algorithm.
        for document in values:
            for token in document:
                accumulator.count_dict[token] += 1

        return accumulator
Example #4
0
    def compute(self, values, accumulator=None):
        """Compute a step in this computation, returning a new accumulator."""
        values = base_preprocessing_layer.convert_to_list(
            values, sparse_default_value=self._mask_value)

        if accumulator is None:
            accumulator = self._create_accumulator()

        # TODO(momernick): Benchmark improvements to this algorithm.
        if isinstance(values, (str, bytes, np.int64)):
            accumulator.count_dict[values] += 1
        else:
            for document in values:
                if not isinstance(document, list):
                    accumulator.count_dict[document] += 1
                else:
                    for token in document:
                        accumulator.count_dict[token] += 1

        return accumulator
 def test_conversion(self, inputs, expected):
   values = base_preprocessing_layer.convert_to_list(inputs())
   self.assertAllEqual(expected, values)