def test_table_question_answering_model_from_pretrained(self):
        for model_name in TF_TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST[5:6]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, TapasConfig)

            model = TFAutoModelForTableQuestionAnswering.from_pretrained(
                model_name)
            model, loading_info = TFAutoModelForTableQuestionAnswering.from_pretrained(
                model_name, output_loading_info=True)
            self.assertIsNotNone(model)
            self.assertIsInstance(model, TFTapasForQuestionAnswering)
Exemple #2
0
    def test_integration_wtq_tf(self):
        model_id = "google/tapas-base-finetuned-wtq"
        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        table_querier = pipeline("table-question-answering",
                                 model=model,
                                 tokenizer=tokenizer)

        data = {
            "Repository": ["Transformers", "Datasets", "Tokenizers"],
            "Stars": ["36542", "4512", "3934"],
            "Contributors": ["651", "77", "34"],
            "Programming language":
            ["Python", "Python", "Rust, Python and NodeJS"],
        }
        queries = [
            "What repository has the largest number of stars?",
            "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
            "What is the number of repositories?",
            "What is the average number of stars?",
            "What is the total amount of stars?",
        ]

        results = table_querier(data, queries)

        expected_results = [
            {
                "answer": "Transformers",
                "coordinates": [(0, 0)],
                "cells": ["Transformers"],
                "aggregator": "NONE"
            },
            {
                "answer": "Transformers",
                "coordinates": [(0, 0)],
                "cells": ["Transformers"],
                "aggregator": "NONE"
            },
            {
                "answer": "COUNT > Transformers, Datasets, Tokenizers",
                "coordinates": [(0, 0), (1, 0), (2, 0)],
                "cells": ["Transformers", "Datasets", "Tokenizers"],
                "aggregator": "COUNT",
            },
            {
                "answer": "AVERAGE > 36542, 4512, 3934",
                "coordinates": [(0, 1), (1, 1), (2, 1)],
                "cells": ["36542", "4512", "3934"],
                "aggregator": "AVERAGE",
            },
            {
                "answer": "SUM > 36542, 4512, 3934",
                "coordinates": [(0, 1), (1, 1), (2, 1)],
                "cells": ["36542", "4512", "3934"],
                "aggregator": "SUM",
            },
        ]
        self.assertListEqual(results, expected_results)
Exemple #3
0
    def test_integration_sqa_tf(self):
        model_id = "google/tapas-base-finetuned-sqa"
        model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        table_querier = pipeline(
            "table-question-answering",
            model=model,
            tokenizer=tokenizer,
        )
        data = {
            "Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"],
            "Age": ["56", "45", "59"],
            "Number of movies": ["87", "53", "69"],
            "Date of birth":
            ["7 february 1967", "10 june 1996", "28 november 1967"],
        }
        queries = [
            "How many movies has George Clooney played in?", "How old is he?",
            "What's his date of birth?"
        ]
        results = table_querier(data, queries, sequential=True)

        expected_results = [
            {
                "answer": "69",
                "coordinates": [(2, 2)],
                "cells": ["69"]
            },
            {
                "answer": "59",
                "coordinates": [(2, 1)],
                "cells": ["59"]
            },
            {
                "answer": "28 november 1967",
                "coordinates": [(2, 3)],
                "cells": ["28 november 1967"]
            },
        ]
        self.assertListEqual(results, expected_results)
Exemple #4
0
    def test_slow_tokenizer_sqa_tf(self):
        model_id = "lysandre/tiny-tapas-random-sqa"
        model = TFAutoModelForTableQuestionAnswering.from_pretrained(
            model_id, from_pt=True)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        table_querier = TableQuestionAnsweringPipeline(model=model,
                                                       tokenizer=tokenizer)

        inputs = {
            "table": {
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            "query": [
                "how many movies has george clooney played in?",
                "how old is he?", "what's his date of birth?"
            ],
        }
        sequential_outputs = table_querier(**inputs, sequential=True)
        batch_outputs = table_querier(**inputs, sequential=False)

        self.assertEqual(len(sequential_outputs), 3)
        self.assertEqual(len(batch_outputs), 3)
        self.assertEqual(sequential_outputs[0], batch_outputs[0])
        self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
        # self.assertNotEqual(sequential_outputs[2], batch_outputs[2])

        table_querier = TableQuestionAnsweringPipeline(model=model,
                                                       tokenizer=tokenizer)
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query="how many movies has george clooney played in?",
        )
        self.assertEqual(
            outputs,
            {
                "answer": "7 february 1967",
                "coordinates": [(0, 3)],
                "cells": ["7 february 1967"]
            },
        )
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query=[
                "how many movies has george clooney played in?",
                "how old is he?", "what's his date of birth?"
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "7 february 1967",
                    "coordinates": [(0, 3)],
                    "cells": ["7 february 1967"]
                },
                {
                    "answer": "7 february 1967",
                    "coordinates": [(0, 3)],
                    "cells": ["7 february 1967"]
                },
                {
                    "answer": "7 february 1967",
                    "coordinates": [(0, 3)],
                    "cells": ["7 february 1967"]
                },
            ],
        )
        outputs = table_querier(
            table={
                "Repository": ["Transformers", "Datasets", "Tokenizers"],
                "Stars": ["36542", "4512", "3934"],
                "Contributors": ["651", "77", "34"],
                "Programming language":
                ["Python", "Python", "Rust, Python and NodeJS"],
            },
            query=[
                "What repository has the largest number of stars?",
                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
                "What is the number of repositories?",
                "What is the average number of stars?",
                "What is the total amount of stars?",
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
            ],
        )

        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table=None)
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table="")
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table={})
        with self.assertRaises(ValueError):
            table_querier(
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                })
        with self.assertRaises(ValueError):
            table_querier(
                query="",
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )
        with self.assertRaises(ValueError):
            table_querier(
                query=None,
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )
Exemple #5
0
    def test_small_model_tf(self):
        model_id = "lysandre/tiny-tapas-random-wtq"
        model = TFAutoModelForTableQuestionAnswering.from_pretrained(
            model_id, from_pt=True)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.assertIsInstance(model.config.aggregation_labels, dict)
        self.assertIsInstance(model.config.no_aggregation_label_index, int)

        table_querier = TableQuestionAnsweringPipeline(model=model,
                                                       tokenizer=tokenizer)
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query="how many movies has george clooney played in?",
        )
        self.assertEqual(
            outputs,
            {
                "answer": "AVERAGE > ",
                "coordinates": [],
                "cells": [],
                "aggregator": "AVERAGE"
            },
        )
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query=[
                "how many movies has george clooney played in?",
                "how old is he?", "what's his date of birth?"
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
            ],
        )
        outputs = table_querier(
            table={
                "Repository": ["Transformers", "Datasets", "Tokenizers"],
                "Stars": ["36542", "4512", "3934"],
                "Contributors": ["651", "77", "34"],
                "Programming language":
                ["Python", "Python", "Rust, Python and NodeJS"],
            },
            query=[
                "What repository has the largest number of stars?",
                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
                "What is the number of repositories?",
                "What is the average number of stars?",
                "What is the total amount of stars?",
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
            ],
        )

        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table=None)
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table="")
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table={})
        with self.assertRaises(ValueError):
            table_querier(
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                })
        with self.assertRaises(ValueError):
            table_querier(
                query="",
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )
        with self.assertRaises(ValueError):
            table_querier(
                query=None,
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )