Example #1
0
    def _add_numeric_column_ranks(self, column_ids, row_ids,
                                  table,
                                  features):
        """Adds column ranks for all numeric columns."""

        ranks = [0] * len(column_ids)
        inv_ranks = [0] * len(column_ids)

        if table:
            for col_index in range(len(table.columns)):
                table_numeric_values = self._get_column_values(
                    table, col_index)
                if not table_numeric_values:
                    continue

                try:
                    key_fn = number_annotation_utils.get_numeric_sort_key_fn(
                        table_numeric_values.values())
                except ValueError:
                    continue

                table_numeric_values = {
                    row_index: key_fn(value)
                    for row_index, value in table_numeric_values.items()
                }

                table_numeric_values_inv = collections.defaultdict(list)
                for row_index, value in table_numeric_values.items():
                    table_numeric_values_inv[value].append(row_index)

                unique_values = sorted(table_numeric_values_inv.keys())

                for rank, value in enumerate(unique_values):
                    for row_index in table_numeric_values_inv[value]:
                        for index in _get_cell_token_indexes(column_ids, row_ids, col_index,
                                                             row_index):
                            ranks[index] = rank + 1
                            inv_ranks[index] = len(unique_values) - rank

        features['column_ranks'] = create_int_feature(ranks)
        features['inv_column_ranks'] = create_int_feature(inv_ranks)
Example #2
0
    def _get_numeric_sort_key_fn(self, table_numeric_values, value):
        """Returns the sort key function for comparing value to table values.

        The function returned will be a suitable input for the key param of the
        sort(). See number_annotation_utils._get_numeric_sort_key_fn for details.

        Args:
          table_numeric_values: Numeric values of a column
          value: Numeric value in the question.

        Returns:
          A function key function to compare column and question values.

        """
        if not table_numeric_values:
            return None
        all_values = list(table_numeric_values.values())
        all_values.append(value)
        try:
            return number_annotation_utils.get_numeric_sort_key_fn(all_values)
        except ValueError:
            return None