def compute(self, values, accumulator=None): """Compute a step in this computation, returning a new accumulator.""" values = base_preprocessing_layer.convert_to_list( values, sparse_default_value=self._mask_value) if accumulator is None: accumulator = self._create_accumulator() # TODO(momernick): Benchmark improvements to this algorithm. if not isinstance(values, list): values = [values] for document in values: if not isinstance(document, list): document = [document] if self._compute_idf: current_doc_id = accumulator.data["next_doc_id"] accumulator.data["next_doc_id"] += 1 for token in document: accumulator.count_dict[token] += 1 if self._compute_idf: doc_count = accumulator.per_doc_count_dict[token] if doc_count["last_doc_id"] != current_doc_id: doc_count["count"] += 1 doc_count["last_doc_id"] = current_doc_id return accumulator
def compute(self, values, accumulator=None): """Computes a step in this computation, returning a new accumulator.""" values = base_preprocessing_layer.convert_to_list(values) if accumulator is None: accumulator = self._create_accumulator() # TODO(momernick): Benchmark improvements to this algorithm. for element in values: if not isinstance(element, list): element = [element] current_doc_id = accumulator.data[self.DOC_ID_IDX] for value in element: if self._max_tokens is None: current_max_value = accumulator.data[self.MAX_VALUE_IDX] if value > current_max_value: accumulator.data[self.MAX_VALUE_IDX] = value if self._compute_idf: doc_count = accumulator.per_doc_count_dict[value] if doc_count["last_doc_id"] != current_doc_id: doc_count["count"] += 1 doc_count["last_doc_id"] = current_doc_id accumulator.data[self.DOC_ID_IDX] += 1 return accumulator
def compute(self, values, accumulator=None): """Compute a step in this computation, returning a new accumulator.""" values = base_preprocessing_layer.convert_to_list(values) if accumulator is None: accumulator = self._create_accumulator() # TODO(momernick): Benchmark improvements to this algorithm. for document in values: for token in document: accumulator.count_dict[token] += 1 return accumulator
def compute(self, values, accumulator=None): """Compute a step in this computation, returning a new accumulator.""" values = base_preprocessing_layer.convert_to_list( values, sparse_default_value=self._mask_value) if accumulator is None: accumulator = self._create_accumulator() # TODO(momernick): Benchmark improvements to this algorithm. if isinstance(values, (str, bytes, np.int64)): accumulator.count_dict[values] += 1 else: for document in values: if not isinstance(document, list): accumulator.count_dict[document] += 1 else: for token in document: accumulator.count_dict[token] += 1 return accumulator
def test_conversion(self, inputs, expected): values = base_preprocessing_layer.convert_to_list(inputs()) self.assertAllEqual(expected, values)