Ejemplo n.º 1
0
 def test_number_comparison_works(self):
     # TableQuestionContext normlaizes all strings according to some rules. We want to ensure
     # that the original numerical values of number cells is being correctly processed here.
     tokens = SpacyTokenizer().tokenize("when was the attendance the highest?")
     tagged_file = self.FIXTURES_ROOT / "data" / "corenlp_processed_tables" / "TEST-2.table"
     language = self._get_world_with_question_tokens_and_table_file(tokens, tagged_file)
     result = language.execute(
         "(select_date (argmax all_rows number_column:attendance) date_column:date)"
     )
     assert result == Date(-1, 11, 10)
    def select_date(self, rows: List[Row], column: DateColumn) -> Date:
        """
        Select function takes a row as a list and a column name and returns the date in that column.
        """
        dates: List[Date] = []
        for row in rows:
            cell_value = row.values[column.name]
            if isinstance(cell_value, Date):
                dates.append(cell_value)

        return dates[0] if dates else Date(-1, -1, -1)  # type: ignore
 def min_date(self, rows: List[Row], column: DateColumn) -> Date:
     """
     Takes a list of rows and a column and returns the min of the values under that column in
     those rows.
     """
     cell_values = [row.values[column.name] for row in rows if row.values[column.name] is not None]
     if not cell_values:
         return Date(-1, -1, -1)
     if not all([isinstance(value, Date) for value in cell_values]):
         raise ExecutionError(f"Invalid values for date selection function: {cell_values}")
     return min(cell_values)  # type: ignore
 def mode_date(self, rows: List[Row], column: DateColumn) -> Date:
     """
     Takes a list of rows and a column and returns the most frequent value under
     that column in those rows.
     """
     most_frequent_list = self._get_most_frequent_values(rows, column)
     if not most_frequent_list:
         return Date(-1, -1, -1)
     most_frequent_value = most_frequent_list[0]
     if not isinstance(most_frequent_value, Date):
         raise ExecutionError(f"Invalid values for mode_date: {most_frequent_value}")
     return most_frequent_value
 def test_table_data(self):
     question = "what was the attendance when usl a league played?"
     question_tokens = self.tokenizer.tokenize(question)
     test_file = f"{self.FIXTURES_ROOT}/data/wikitables/sample_table.tagged"
     table_question_context = TableQuestionContext.read_from_file(
         test_file, question_tokens)
     assert table_question_context.table_data == [
         {
             "date_column:year": Date(2001, -1, -1),
             "number_column:year": 2001.0,
             "string_column:year": "2001",
             "number_column:division": 2.0,
             "string_column:division": "2",
             "string_column:league": "usl_a_league",
             "string_column:regular_season": "4th_western",
             "number_column:regular_season": 4.0,
             "string_column:playoffs": "quarterfinals",
             "string_column:open_cup": "did_not_qualify",
             "number_column:open_cup": None,
             "number_column:avg_attendance": 7169.0,
             "string_column:avg_attendance": "7_169",
         },
         {
             "date_column:year": Date(2005, -1, -1),
             "number_column:year": 2005.0,
             "string_column:year": "2005",
             "number_column:division": 2.0,
             "string_column:division": "2",
             "string_column:league": "usl_first_division",
             "string_column:regular_season": "5th",
             "number_column:regular_season": 5.0,
             "string_column:playoffs": "quarterfinals",
             "string_column:open_cup": "4th_round",
             "number_column:open_cup": 4.0,
             "number_column:avg_attendance": 6028.0,
             "string_column:avg_attendance": "6_028",
         },
     ]
 def _make_date(cell_string: str) -> Date:
     string_parts = cell_string.split("_")
     year = -1
     month = -1
     day = -1
     for part in string_parts:
         if part.isdigit():
             if len(part) == 4:
                 year = int(part)
             else:
                 day = int(part)
         elif part in MONTH_NUMBERS:
             month = MONTH_NUMBERS[part]
     return Date(year, month, day)
Ejemplo n.º 7
0
 def test_table_data(self):
     question = "what was the attendance when usl a league played?"
     question_tokens = self.tokenizer.tokenize(question)
     test_file = f'{self.FIXTURES_ROOT}/data/wikitables/sample_table.tagged'
     table_question_context = TableQuestionContext.read_from_file(
         test_file, question_tokens)
     assert table_question_context.table_data == [{
         'date_column:year':
         Date(2001, -1, -1),
         'number_column:year':
         2001.0,
         'string_column:year':
         '2001',
         'number_column:division':
         2.0,
         'string_column:division':
         '2',
         'string_column:league':
         'usl_a_league',
         'string_column:regular_season':
         '4th_western',
         'number_column:regular_season':
         4.0,
         'string_column:playoffs':
         'quarterfinals',
         'string_column:open_cup':
         'did_not_qualify',
         'number_column:open_cup':
         None,
         'number_column:avg_attendance':
         7169.0,
         'string_column:avg_attendance':
         '7_169'
     }, {
         'date_column:year':
         Date(2005, -1, -1),
         'number_column:year':
         2005.0,
         'string_column:year':
         '2005',
         'number_column:division':
         2.0,
         'string_column:division':
         '2',
         'string_column:league':
         'usl_first_division',
         'string_column:regular_season':
         '5th',
         'number_column:regular_season':
         5.0,
         'string_column:playoffs':
         'quarterfinals',
         'string_column:open_cup':
         '4th_round',
         'number_column:open_cup':
         4.0,
         'number_column:avg_attendance':
         6028.0,
         'string_column:avg_attendance':
         '6_028'
     }]
Ejemplo n.º 8
0
 def test_execute_works_with_argmin(self):
     logical_form = (
         "(select_date (argmin all_rows number_column:avg_attendance) date_column:year)"
     )
     cell_list = self.language.execute(logical_form)
     assert cell_list == Date(2005, 3, -1)
Ejemplo n.º 9
0
 def test_date_comparison_works(self):
     assert Date(2013, 12, 31) > Date(2013, 12, 30)
     assert Date(2013, 12, 31) == Date(2013, 12, -1)
     assert Date(2013, -1, -1) >= Date(2013, 12, 31)
     # pylint: disable=singleton-comparison
     assert (Date(2013, 12, -1) > Date(2013, 12, 31)) == False
     with pytest.raises(ExecutionError,
                        match='only compare Dates with Dates'):
         assert (Date(2013, 12, 31) > 2013) == False
     with pytest.raises(ExecutionError,
                        match='only compare Dates with Dates'):
         assert (Date(2013, 12, 31) >= 2013) == False
     with pytest.raises(ExecutionError,
                        match='only compare Dates with Dates'):
         assert Date(2013, 12, 31) != 2013
     assert (Date(2018, 1, 1) >= Date(-1, 2, 1)) == False
     assert (Date(2018, 1, 1) < Date(-1, 2, 1)) == False
     # When year is unknown in both cases, we can compare months and days.
     assert Date(-1, 2, 1) < Date(-1, 2, 3)
     # If both year and month are not know in both cases, the comparison is undefined, and both
     # < and >= return False.
     assert (Date(-1, -1, 1) < Date(-1, -1, 3)) == False
     assert (Date(-1, -1, 1) >= Date(-1, -1, 3)) == False
     # Same when year is known, but months are not.
     assert (Date(2018, -1, 1) < Date(2018, -1, 3)) == False
     assert (Date(2018, -1, 1) >= Date(2018, -1, 3)) == False
 def date(self, year: Number, month: Number, day: Number) -> Date:
     """
     Takes three numbers and returns a ``Date`` object whose year, month, and day are the three
     numbers in that order.
     """
     return Date(year, month, day)  # type: ignore