def test_table_question_answering_model_from_pretrained(self): for model_name in TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST[5:6]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, TapasConfig) model = TFAutoModelForTableQuestionAnswering.from_pretrained( model_name) model, loading_info = TFAutoModelForTableQuestionAnswering.from_pretrained( model_name, output_loading_info=True) self.assertIsNotNone(model) self.assertIsInstance(model, TFTapasForQuestionAnswering)
def test_integration_wtq_tf(self): model_id = "google/tapas-base-finetuned-wtq" model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) table_querier = pipeline("table-question-answering", model=model, tokenizer=tokenizer) data = { "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], } queries = [ "What repository has the largest number of stars?", "Given that the numbers of stars defines if a repository is active, what repository is the most active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", ] results = table_querier(data, queries) expected_results = [ { "answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE" }, { "answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE" }, { "answer": "COUNT > Transformers, Datasets, Tokenizers", "coordinates": [(0, 0), (1, 0), (2, 0)], "cells": ["Transformers", "Datasets", "Tokenizers"], "aggregator": "COUNT", }, { "answer": "AVERAGE > 36542, 4512, 3934", "coordinates": [(0, 1), (1, 1), (2, 1)], "cells": ["36542", "4512", "3934"], "aggregator": "AVERAGE", }, { "answer": "SUM > 36542, 4512, 3934", "coordinates": [(0, 1), (1, 1), (2, 1)], "cells": ["36542", "4512", "3934"], "aggregator": "SUM", }, ] self.assertListEqual(results, expected_results)
def test_integration_sqa_tf(self): model_id = "google/tapas-base-finetuned-sqa" model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) table_querier = pipeline( "table-question-answering", model=model, tokenizer=tokenizer, ) data = { "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Age": ["56", "45", "59"], "Number of movies": ["87", "53", "69"], "Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], } queries = [ "How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?" ] results = table_querier(data, queries, sequential=True) expected_results = [ { "answer": "69", "coordinates": [(2, 2)], "cells": ["69"] }, { "answer": "59", "coordinates": [(2, 1)], "cells": ["59"] }, { "answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"] }, ] self.assertListEqual(results, expected_results)
def test_slow_tokenizer_sqa_tf(self): model_id = "lysandre/tiny-tapas-random-sqa" model = TFAutoModelForTableQuestionAnswering.from_pretrained( model_id, from_pt=True) tokenizer = AutoTokenizer.from_pretrained(model_id) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) inputs = { "table": { "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, "query": [ "how many movies has george clooney played in?", "how old is he?", "what's his date of birth?" ], } sequential_outputs = table_querier(**inputs, sequential=True) batch_outputs = table_querier(**inputs, sequential=False) self.assertEqual(len(sequential_outputs), 3) self.assertEqual(len(batch_outputs), 3) self.assertEqual(sequential_outputs[0], batch_outputs[0]) self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) # self.assertNotEqual(sequential_outputs[2], batch_outputs[2]) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query="how many movies has george clooney played in?", ) self.assertEqual( outputs, { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, ) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query=[ "how many movies has george clooney played in?", "how old is he?", "what's his date of birth?" ], ) self.assertEqual( outputs, [ { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, ], ) outputs = table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, query=[ "What repository has the largest number of stars?", "Given that the numbers of stars defines if a repository is active, what repository is the most active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", ], ) self.assertEqual( outputs, [ { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, ], ) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table=None) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table="") with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table={}) with self.assertRaises(ValueError): table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }) with self.assertRaises(ValueError): table_querier( query="", table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, ) with self.assertRaises(ValueError): table_querier( query=None, table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, )
def test_small_model_tf(self): model_id = "lysandre/tiny-tapas-random-wtq" model = TFAutoModelForTableQuestionAnswering.from_pretrained( model_id, from_pt=True) tokenizer = AutoTokenizer.from_pretrained(model_id) self.assertIsInstance(model.config.aggregation_labels, dict) self.assertIsInstance(model.config.no_aggregation_label_index, int) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query="how many movies has george clooney played in?", ) self.assertEqual( outputs, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, ) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query=[ "how many movies has george clooney played in?", "how old is he?", "what's his date of birth?" ], ) self.assertEqual( outputs, [ { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, ], ) outputs = table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, query=[ "What repository has the largest number of stars?", "Given that the numbers of stars defines if a repository is active, what repository is the most active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", ], ) self.assertEqual( outputs, [ { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, ], ) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table=None) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table="") with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table={}) with self.assertRaises(ValueError): table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }) with self.assertRaises(ValueError): table_querier( query="", table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, ) with self.assertRaises(ValueError): table_querier( query=None, table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, )