def test_number_comparison_works(self): # TableQuestionContext normlaizes all strings according to some rules. We want to ensure # that the original numerical values of number cells is being correctly processed here. tokens = SpacyTokenizer().tokenize("when was the attendance the highest?") tagged_file = self.FIXTURES_ROOT / "data" / "corenlp_processed_tables" / "TEST-2.table" language = self._get_world_with_question_tokens_and_table_file(tokens, tagged_file) result = language.execute( "(select_date (argmax all_rows number_column:attendance) date_column:date)" ) assert result == Date(-1, 11, 10)
def select_date(self, rows: List[Row], column: DateColumn) -> Date: """ Select function takes a row as a list and a column name and returns the date in that column. """ dates: List[Date] = [] for row in rows: cell_value = row.values[column.name] if isinstance(cell_value, Date): dates.append(cell_value) return dates[0] if dates else Date(-1, -1, -1) # type: ignore
def min_date(self, rows: List[Row], column: DateColumn) -> Date: """ Takes a list of rows and a column and returns the min of the values under that column in those rows. """ cell_values = [row.values[column.name] for row in rows if row.values[column.name] is not None] if not cell_values: return Date(-1, -1, -1) if not all([isinstance(value, Date) for value in cell_values]): raise ExecutionError(f"Invalid values for date selection function: {cell_values}") return min(cell_values) # type: ignore
def mode_date(self, rows: List[Row], column: DateColumn) -> Date: """ Takes a list of rows and a column and returns the most frequent value under that column in those rows. """ most_frequent_list = self._get_most_frequent_values(rows, column) if not most_frequent_list: return Date(-1, -1, -1) most_frequent_value = most_frequent_list[0] if not isinstance(most_frequent_value, Date): raise ExecutionError(f"Invalid values for mode_date: {most_frequent_value}") return most_frequent_value
def test_table_data(self): question = "what was the attendance when usl a league played?" question_tokens = self.tokenizer.tokenize(question) test_file = f"{self.FIXTURES_ROOT}/data/wikitables/sample_table.tagged" table_question_context = TableQuestionContext.read_from_file( test_file, question_tokens) assert table_question_context.table_data == [ { "date_column:year": Date(2001, -1, -1), "number_column:year": 2001.0, "string_column:year": "2001", "number_column:division": 2.0, "string_column:division": "2", "string_column:league": "usl_a_league", "string_column:regular_season": "4th_western", "number_column:regular_season": 4.0, "string_column:playoffs": "quarterfinals", "string_column:open_cup": "did_not_qualify", "number_column:open_cup": None, "number_column:avg_attendance": 7169.0, "string_column:avg_attendance": "7_169", }, { "date_column:year": Date(2005, -1, -1), "number_column:year": 2005.0, "string_column:year": "2005", "number_column:division": 2.0, "string_column:division": "2", "string_column:league": "usl_first_division", "string_column:regular_season": "5th", "number_column:regular_season": 5.0, "string_column:playoffs": "quarterfinals", "string_column:open_cup": "4th_round", "number_column:open_cup": 4.0, "number_column:avg_attendance": 6028.0, "string_column:avg_attendance": "6_028", }, ]
def _make_date(cell_string: str) -> Date: string_parts = cell_string.split("_") year = -1 month = -1 day = -1 for part in string_parts: if part.isdigit(): if len(part) == 4: year = int(part) else: day = int(part) elif part in MONTH_NUMBERS: month = MONTH_NUMBERS[part] return Date(year, month, day)
def test_table_data(self): question = "what was the attendance when usl a league played?" question_tokens = self.tokenizer.tokenize(question) test_file = f'{self.FIXTURES_ROOT}/data/wikitables/sample_table.tagged' table_question_context = TableQuestionContext.read_from_file( test_file, question_tokens) assert table_question_context.table_data == [{ 'date_column:year': Date(2001, -1, -1), 'number_column:year': 2001.0, 'string_column:year': '2001', 'number_column:division': 2.0, 'string_column:division': '2', 'string_column:league': 'usl_a_league', 'string_column:regular_season': '4th_western', 'number_column:regular_season': 4.0, 'string_column:playoffs': 'quarterfinals', 'string_column:open_cup': 'did_not_qualify', 'number_column:open_cup': None, 'number_column:avg_attendance': 7169.0, 'string_column:avg_attendance': '7_169' }, { 'date_column:year': Date(2005, -1, -1), 'number_column:year': 2005.0, 'string_column:year': '2005', 'number_column:division': 2.0, 'string_column:division': '2', 'string_column:league': 'usl_first_division', 'string_column:regular_season': '5th', 'number_column:regular_season': 5.0, 'string_column:playoffs': 'quarterfinals', 'string_column:open_cup': '4th_round', 'number_column:open_cup': 4.0, 'number_column:avg_attendance': 6028.0, 'string_column:avg_attendance': '6_028' }]
def test_execute_works_with_argmin(self): logical_form = ( "(select_date (argmin all_rows number_column:avg_attendance) date_column:year)" ) cell_list = self.language.execute(logical_form) assert cell_list == Date(2005, 3, -1)
def test_date_comparison_works(self): assert Date(2013, 12, 31) > Date(2013, 12, 30) assert Date(2013, 12, 31) == Date(2013, 12, -1) assert Date(2013, -1, -1) >= Date(2013, 12, 31) # pylint: disable=singleton-comparison assert (Date(2013, 12, -1) > Date(2013, 12, 31)) == False with pytest.raises(ExecutionError, match='only compare Dates with Dates'): assert (Date(2013, 12, 31) > 2013) == False with pytest.raises(ExecutionError, match='only compare Dates with Dates'): assert (Date(2013, 12, 31) >= 2013) == False with pytest.raises(ExecutionError, match='only compare Dates with Dates'): assert Date(2013, 12, 31) != 2013 assert (Date(2018, 1, 1) >= Date(-1, 2, 1)) == False assert (Date(2018, 1, 1) < Date(-1, 2, 1)) == False # When year is unknown in both cases, we can compare months and days. assert Date(-1, 2, 1) < Date(-1, 2, 3) # If both year and month are not know in both cases, the comparison is undefined, and both # < and >= return False. assert (Date(-1, -1, 1) < Date(-1, -1, 3)) == False assert (Date(-1, -1, 1) >= Date(-1, -1, 3)) == False # Same when year is known, but months are not. assert (Date(2018, -1, 1) < Date(2018, -1, 3)) == False assert (Date(2018, -1, 1) >= Date(2018, -1, 3)) == False
def date(self, year: Number, month: Number, day: Number) -> Date: """ Takes three numbers and returns a ``Date`` object whose year, month, and day are the three numbers in that order. """ return Date(year, month, day) # type: ignore