Ejemplo n.º 1
0
def get_data():
    if request.method == 'POST':
        if (request.get_json() is None):
            data = request.form
        else:
            data = request.get_json()
        context = data['context']
        question = data['question']

        to_predict = [{
            'context': context,
            'qas': [{
                'question': question,
                'id': '0'
            }]
        }]

        model_type = "electra"
        model_name = "google/electra-base-discriminator"
        model = QuestionAnsweringModel(model_type=model_type,
                                       model_name=f"models/{model_type}/",
                                       use_cuda=False)

        preds, _ = model.predict(to_predict)

        print(preds[0]['answer'][0])
        if (preds[0]['answer'][0] == ""):
            result = "No answer found"
        else:
            result = preds[0]['answer'][0]

        return jsonify({'output': result})
Ejemplo n.º 2
0
def main():
    # load data
    train_data = load_standard_dataset(standard_train)

    # ======================= instantiate model ====================
    # models are from https://huggingface.co/models?pipeline_tag=question-answering
    # model = QuestionAnsweringModel('roberta', 'csarron/roberta-large-squad-v1', args=train_args)
    # model = QuestionAnsweringModel('electra', 'mrm8488/electra-large-finetuned-squadv1', args=train_args)
    # model = QuestionAnsweringModel('albert', 'Wikidepia/albert-bahasa-cased-squad', args=train_args)
    # model = QuestionAnsweringModel('bert', 'bert-base-cased', args=train_args)
    model = QuestionAnsweringModel('distilbert',
                                   'distilbert-base-uncased-distilled-squad',
                                   args=train_args)
    # model = QuestionAnsweringModel('bert', 'trained_models/bert', args=train_args)  # run eval from pre-trained model

    # =========================== train model ======================
    model.train_model(train_data)

    # ========================= do evaluation ======================
    dev_data = load_standard_dataset(standard_dev)
    result, texts = model.eval_model(dev_data,
                                     f1=f1_multiclass,
                                     acc=accuracy_score)
    print(f'Result: {result}')

    # ========================= do predictions =====================
    answers, probabilities = model.predict(dev_data, n_best_size=1)
    preds = {pred['id']: pred['answer'] for pred in answers}

    with open('results/squad_predictions.json', 'w') as f:
        for qid, answer in preds.items():
            f.write(f'{{"qid": "{qid}", "answer": "{answer}"}}\n')
Ejemplo n.º 3
0
class GetCases:
    def __init__(self):
        logging.basicConfig(level=logging.INFO)
        transformers_logger = logging.getLogger("transformers")
        transformers_logger.setLevel(logging.WARNING)
        self.model = QuestionAnsweringModel('distilbert',
                                            'outputs/',
                                            args={
                                                'reprocess_input_data': True,
                                                'overwrite_output_dir': True,
                                                'fp16': False
                                            },
                                            use_cuda=False)

    def train_model(self):
        train_data = []
        with open('C:/Users/NathanGrant/Downloads/rona/rona/training_data.json'
                  ) as f:
            train_data = json.load(f)
        self.model.train_model(train_data)

    def predict(self, news, county):
        to_predict = []
        county = re.sub(", [A-Z]+", " county", county).lower()
        temp = {
            'context': news,
            'qas': [{
                'question': 'Total deaths in ' + county,
                'id': '0'
            }]
        }
        to_predict.append(temp)
        pre = self.model.predict(to_predict)
        cases = [prediction['answer'] for prediction in pre]
        print(cases)
        if len(cases) > 0:
            for i in range(len(cases)):
                try:
                    cases[i] = int(cases[i])
                except:
                    cases[i] = w2n.word_to_num(cases[i])
        else:
            return 0
        return cases

    def evaluate_model(self, data):
        result, text = self.model.eval_model(data)
        print(text)
        print(result)
Ejemplo n.º 4
0
def predict(model_name):
    path = os.environ['MODEL_REPO'] + "/" + model_name  # get the path to BERT
    model = QuestionAnsweringModel(model_name, path + "/",
                                   use_cuda=False)  # Load the model
    question = request.get_json()  # get the question and context from input

    result = model.predict(question)  # predict the result

    # format the result
    answers = pd.DataFrame(result[0][0]['answer'][0:3], columns=["Answers"])
    answers["probabilities"] = result[1][0]['probability'][0:3]
    df_no_indices = answers.to_string(index=False)

    return json.dumps(df_no_indices, sort_keys=False,
                      indent=4), 200  # return the result
Ejemplo n.º 5
0
def answer_questions(questions, answer_text, model_path=None, use_cuda=False):
    """
    Takes a `question` dict list and an `answer_text` string (which contains the
    answer), and identifies the words within the `answer_text` that are the
    answer.

    Parameters
    ----------
    questions : list
    ex.)
        questions = [
        {
            'id': 1,
            'question': 'What are the three sources of European Union law?'
        },
        ...
    ]

    answer_text : str
    model_path : str
    use_cuda: bool

    Return
    -------
    preds : list
    ex.)
        [
            {
                'id': '1',
                'answer': 'primary law, secondary law and supplementary law'
            },
            ...
        ]
    """
    if model_path is None:
        model_path = os.path.dirname(os.path.abspath(__file__)) + '/outputs'

    # model = QuestionAnsweringModel('bert', './outputs/final', use_cuda=use_cuda)
    model = QuestionAnsweringModel('bert', model_path, use_cuda=use_cuda)
    data_to_predict = [{'context': answer_text, 'qas': questions}]

    preds = model.predict(data_to_predict)
    return preds
Ejemplo n.º 6
0
model = pickle.load(open('model.pkl','rb'))
print(model.predict([[4, 300, 500]]))

"""

import torch
import tensorflow as tf
import tensorflow_hub as hub
from simpletransformers.question_answering import QuestionAnsweringModel

model = QuestionAnsweringModel('distilbert',
                               'distilbert-base-uncased-distilled-squad',
                               args={
                                   'reprocess_input_data': True,
                                   'overwrite_output_dir': True
                               },
                               use_cuda=False)
to_predict = [{
    'context': 'This is the context used for demonstrating predictions.',
    'qas': [{
        'question': 'What is this context?',
        'id': '0'
    }]
}]

print(model.predict(to_predict))
pickle.dump(model, open('model.pkl', 'wb'))

model = pickle.load(open('model.pkl', 'rb'))
Ejemplo n.º 7
0
def answer_question(item: Item):
    print(item)

    train_data = [
        {
            "context":
            "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levis Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the golden anniversary with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as Super Bowl L), so that the logo could prominently feature the Arabic numerals 50.",
            "qas": [
                {
                    "id": "00001",
                    "is_impossible": False,
                    "question": "Who is the new champion?",
                    "answers": [{
                        "text": "Denver Broncos",
                        "answer_start": 178,
                    }],
                },
                {
                    "id": "00002",
                    "is_impossible": False,
                    "question": "What year was Super Bowl 50?",
                    "answers": [{
                        "text": "2016",
                        "answer_start": 347,
                    }],
                },
            ],
        },
    ]

    eval_data = [
        {
            "context":
            "Super Bowl 50 was an American football game to determine the champion of the National Football League (NFL) for the 2015 season. The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title. The game was played on February 7, 2016, at Levis Stadium in the San Francisco Bay Area at Santa Clara, California. As this was the 50th Super Bowl, the league emphasized the golden anniversary with various gold-themed initiatives, as well as temporarily suspending the tradition of naming each Super Bowl game with Roman numerals (under which the game would have been known as Super Bowl L), so that the logo could prominently feature the Arabic numerals 50.",
            "qas": [
                {
                    "id": "00001",
                    "is_impossible": False,
                    "question": "Who is the new champion?",
                    "answers": [{
                        "text": "Denver Broncos",
                        "answer_start": 178,
                    }],
                },
                {
                    "id": "00002",
                    "is_impossible": False,
                    "question": "What year was Super Bowl 50?",
                    "answers": [{
                        "text": "2016",
                        "answer_start": 347,
                    }],
                },
            ],
        },
    ]

    # define model args
    model_args = QuestionAnsweringArgs()
    model_args.num_train_epochs = 5
    model_args.reprocess_input_data = True
    model_args.overwrite_output_dir = True
    # model_args.use_early_stopping = True
    # model_args.early_stopping_delta = 0.01
    # model_args.early_stopping_metric = "mcc"
    # model_args.early_stopping_metric_minimize = False
    # model_args.early_stopping_patience = 5
    # model_args.evaluate_during_training_steps = 1000

    # Create the QuestionAnsweringModel
    model = QuestionAnsweringModel('distilbert',
                                   'distilbert-base-uncased-distilled-squad',
                                   args=model_args,
                                   use_cuda=False)

    # model.train_model(train_data, eval_data=eval_data)

    # Making predictions using the model.
    to_predict = [{
        'context': item.context,
        'qas': [{
            'question': item.question,
            'id': '0'
        }]
    }]

    results = model.predict(to_predict)

    return results
Ejemplo n.º 8
0
def test_question_answering(model_type, model_name):
    # Create dummy data to use for training.
    train_data = [
        {
            "context": "This is the first context",
            "qas": [
                {
                    "id": "00001",
                    "is_impossible": False,
                    "question": "Which context is this?",
                    "answers": [{"text": "the first", "answer_start": 8}],
                }
            ],
        },
        {
            "context": "Other legislation followed, including the Migratory Bird Conservation Act of 1929, a 1937 treaty prohibiting the hunting of right and gray whales,\
                and the Bald Eagle Protection Act of 1940. These later laws had a low cost to society—the species were relatively rare—and little opposition was raised",
            "qas": [
                {
                    "id": "00002",
                    "is_impossible": False,
                    "question": "What was the cost to society?",
                    "answers": [{"text": "low cost", "answer_start": 225}],
                },
                {
                    "id": "00003",
                    "is_impossible": False,
                    "question": "What was the name of the 1937 treaty?",
                    "answers": [{"text": "Bald Eagle Protection Act", "answer_start": 167}],
                },
                {"id": "00004", "is_impossible": True, "question": "How did Alexandar Hamilton die?", "answers": [],},
            ],
        },
    ]  # noqa

    for i in range(4):
        train_data.extend(train_data)

    # Save as a JSON file
    os.makedirs("data", exist_ok=True)
    with open("data/train.json", "w") as f:
        json.dump(train_data, f)

    logging.basicConfig(level=logging.WARNING)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    # Create the QuestionAnsweringModel
    model = QuestionAnsweringModel(
        model_type,
        model_name,
        args={"no_save": True, "reprocess_input_data": True, "overwrite_output_dir": True},
        use_cuda=False,
    )

    # Train the model
    model.train_model("data/train.json")

    # Evaluate the model. (Being lazy and evaluating on the train data itself)
    result, text = model.eval_model("data/train.json")

    # Making predictions using the model.
    to_predict = [
        {
            "context": "This is the context used for demonstrating predictions.",
            "qas": [{"question": "What is this context?", "id": "0"}],
        }
    ]

    model.predict(to_predict)
def test_question_answering():
    # Create dummy data to use for training.
    train_data = [
        {
            "context":
            "This is the first context",
            "qas": [{
                "id": "00001",
                "is_impossible": False,
                "question": "Which context is this?",
                "answers": [{
                    "text": "the first",
                    "answer_start": 8
                }],
            }],
        },
        {
            "context":
            "Other legislation followed, including the Migratory Bird"
            " Conservation Act of 1929, a 1937 treaty prohibiting the hunting of"
            " right and gray whales, and the Bald Eagle Protection Act of 1940."
            " These later laws had a low cost to society—the species were"
            " relatively rare—and little opposition was raised",
            "qas": [
                {
                    "id": "00002",
                    "is_impossible": False,
                    "question": "What was the cost to society?",
                    "answers": [{
                        "text": "low cost",
                        "answer_start": 225
                    }],
                },
                {
                    "id":
                    "00003",
                    "is_impossible":
                    False,
                    "question":
                    "What was the name of the 1937 treaty?",
                    "answers": [{
                        "text": "Bald Eagle Protection Act",
                        "answer_start": 167
                    }],
                },
            ],
        },
    ]

    # Save as a JSON file
    os.makedirs("data", exist_ok=True)
    with open("data/train.json", "w") as f:
        json.dump(train_data, f)

    # Create the QuestionAnsweringModel
    model = QuestionAnsweringModel(
        "distilbert",
        "distilbert-base-uncased-distilled-squad",
        args={
            "no_save": True,
            "reprocess_input_data": True,
            "overwrite_output_dir": True
        },
        use_cuda=False,
    )

    # Train the model
    model.train_model("data/train.json")

    # Evaluate the model. (Being lazy and evaluating on the train data itself)
    result, text = model.eval_model("data/train.json")

    # Making predictions using the model.
    to_predict = [{
        "context": "This is the context used for demonstrating predictions.",
        "qas": [{
            "question": "What is this context?",
            "id": "0"
        }],
    }]

    model.predict(to_predict)
Ejemplo n.º 10
0
    "manual_seed": 4,
    "max_seq_length": 512,
    "no_save": True,
    "n_best_size": 10,
    "lazy_loading": True,
    # "use_multiprocessing": False,
}

# Create the QuestionAnsweringModel
model = QuestionAnsweringModel("bert",
                               "bert-base-cased",
                               args=train_args,
                               use_cuda=True,
                               cuda_device=0)

# Train the model with JSON file
model.train_model("data/train.jsonl", eval_data="data/train.json")

# Making predictions using the model.
to_predict = [{
    "context":
    "Other legislation followed, including the Migratory Bird Conservation Act of 1929, a 1937 treaty prohibiting the hunting of right and gray whales,\
            and the Bald Eagle Protection Act of 1940. These later laws had a low cost to society—the species were relatively rare—and little opposition was raised",
    "qas": [{
        "question": "What was the name of the 1937 treaty?",
        "id": "0"
    }],
}]

print(model.predict(to_predict, n_best_size=2))
Ejemplo n.º 11
0
    'num_train_epochs': 1,
    'max_seq_length': 384,
    'doc_stride': 128,
    'overwrite_output_dir': True,
    'reprocess_input_data': False,
    'train_batch_size': 2,
    'gradient_accumulation_steps': 8,
    'save_model_every_epoch': False
}

model = QuestionAnsweringModel('bert',
                               'bert-base-cased',
                               use_cuda=False,
                               args=train_args)
model.train_model(train_data, output_dir=None)

#Prediction
with open('dev-v2.0.json', 'r') as f:
    dev_data = json.load(f)

dev_data = [item for topic in dev_data['data'] for item in topic['paragraphs']]

preds = model.predict(dev_data)

os.makedirs('results', exist_ok=True)

submission = {pred['id']: pred['answer'] for pred in preds}

with open('results/submission.json', 'w') as f:
    json.dump(submission, f)
Ejemplo n.º 12
0
        qas.append({'question': question, 'id': qid, 'is_impossible': False, 'answers': answers})
        output.append({'context': context.lower(), 'qas': qas})
    return output
qa_test = do_qa_test(test)
with open('/content/gdrive/My Drive/data/test.json', 'w') as outfile:
    json.dump(qa_test, outfile)
MODEL_PATH = '/content/gdrive/My Drive/model_deeplearning/'
#MODEL_PATH = 'https://drive.google.com/drive/folders/1CkjjRb6GJENfPQqfDJgVnzwipShmy4RE?usp=sharing'
model = QuestionAnsweringModel('distilbert', 
                               MODEL_PATH, 
                               args={'reprocess_input_data': True,
                                     'overwrite_output_dir': True,
                                     'learning_rate': 5e-5,
                                     'num_train_epochs': 3,
                                     'max_seq_length': 192,
                                     'doc_stride': 64,
                                     'fp16': False,
                                    },
                              use_cuda=True)
model.train_model('/content/gdrive/My Drive/data/train.json')
predictions = model.predict(qa_test)
predictions_df = pd.DataFrame.from_dict(predictions)

sub_df['selected_text'] = predictions_df['answer']

sub_df.to_csv('/content/gdrive/My Drive/sample_submission.csv', index=False)

print("File submitted successfully.")
#test_df.head()

Ejemplo n.º 13
0
qa_test

# !pip install seqeval
# !pip install transformers

%%time

from simpletransformers.question_answering import QuestionAnsweringModel

model = QuestionAnsweringModel('distilbert', 
                               '/kaggle/input/transformers-pretrained-distilbert/distilbert-base-uncased-distilled-squad/', 
                               args={'reprocess_input_data': True,
                                     'overwrite_output_dir': True,
                                     'learning_rate': 5e-5,
                                     'num_train_epochs': 4,
                                     'max_seq_length': 200,
                                     'doc_stride': 64,
                                     'fp16': False,
                                    },
                              use_cuda=True)
model.train_model(qa_train)

%%time

preds = model.predict(qa_test)
predic_df = pd.DataFrame.from_dict(preds)
sub_df['selected_text'] = predic_df['answer']
sub_df.to_csv("submission.csv", sep=',', index=False)

sub_df.head()
Ejemplo n.º 14
0
class TransformerReader(AbstractReader):
    def __init__(self):
        super().__init__()
        self.model = None

    def load(self, path):
        use_cuda = torch.cuda.is_available()

        if not use_cuda:
            raise UserWarning('CUDA not available.')

        try:
            import apex
            fp16 = True
        except ImportError:
            fp16 = False

        self.model = QuestionAnsweringModel(model_type='albert',
                                            model_name=path,
                                            use_cuda=use_cuda,
                                            args={
                                                'fp16': fp16,
                                                'n_best_size': 1,
                                            })

    def extract_answer(self, question, paragraphs):
        to_predict = []
        contexts = {}
        for idx, entry in enumerate(paragraphs):
            if not entry:
                continue
            context = self._get_context(entry['paragraphs'])
            contexts[idx] = context
            to_predict.append({
                'context': context,
                'qas': [{
                    'question': question,
                    'id': idx
                }]
            })

        if not to_predict:
            return [{
                'answer': '',
                'context': '',
                'confidence': 0,
                'supporting_facts': []
            }]

        predictions, probabilities = self.model.predict(to_predict)

        results = []
        for prediction, probability in zip(predictions, probabilities):
            prediction_id = prediction['id']
            context = contexts[prediction_id]

            answer = prediction['answer'][0]
            confidence = probability['probability'][0]

            confidence += paragraphs[prediction_id]['score']
            if not answer:
                confidence = 0

            results.append({
                'answer': answer,
                'confidence': confidence,
                'context': context if answer else '',
                'supporting_facts': [],
            })

        return results

    def _get_context(self, paragraphs):
        return ' '.join(map(lambda p: p['text'], paragraphs))
Ejemplo n.º 15
0
class Bot:
    """ Bot that provides answers to questions given context data containing the answer """
    def __init__(self):
        self.transformer_loggers = []
        for name in logging.root.manager.loggerDict:
            if len(name) >= 11 and name[:11] in ['transformer', 'simpletrans']:
                self.transformer_loggers.append(logging.getLogger(name))
                self.transformer_loggers[-1].setLevel(logging.ERROR)

        url_str = 'https://totalgood.org/midata/models/bert/cased_simpletransformers.zip'
        model_dir = os.path.join(DATA_DIR, 'simple-transformer')
        if not os.path.isdir(model_dir):
            os.mkdir(model_dir)

        if (not os.path.exists(os.path.join(model_dir, 'config.json'))
                or not os.path.exists(
                    os.path.join(model_dir, 'pytorch_model.bin'))
                or not os.path.exists(
                    os.path.join(model_dir, 'special_tokens_map.json'))
                or not os.path.exists(
                    os.path.join(model_dir, 'tokenizer_config.json'))
                or not os.path.exists(
                    os.path.join(model_dir, 'training_args.bin'))
                or not os.path.exists(os.path.join(model_dir, 'vocab.txt'))):
            zip_local_path = os.path.join(model_dir,
                                          'cased_simpletransformers.zip')
            with DownloadProgressBar(unit='B',
                                     unit_scale=True,
                                     miniters=1,
                                     desc=url_str.split('/')[-1]) as t:
                urllib.request.urlretrieve(url_str,
                                           filename=zip_local_path,
                                           reporthook=t.update_to)
            with zipfile.ZipFile(zip_local_path, 'r') as zip_file:
                zip_file.extractall(model_dir)
            os.remove(zip_local_path)

        process_count = cpu_count() - 2 if cpu_count() > 2 else 1
        args = {
            'process_count': process_count,
            'output_dir': model_dir,
            'cache_dir': model_dir,
            'no_cache': True,
            'use_cached_eval_features': False,
            'overwrite_output_dir': False,
            'silent': True
        }

        self.model = QuestionAnsweringModel('bert',
                                            model_dir,
                                            args=args,
                                            use_cuda=USE_CUDA)

    def encode_input(self, statement, context):
        """
        Packs statement and context strings into expected input format for the model

        >>> bot = Bot()
        >>> encoded = bot.encode_input('statement', 'context')
        >>> assert encoded[0]['qas'][0]['question'] == 'statement'
        >>> assert encoded[0]['context'] == 'context'
        """
        encoded = [{
            'qas': [{
                'id': str(uuid.uuid1()),
                'question': statement
            }],
            'context': context
        }]
        return encoded

    def decode_output(self, output):
        """
        Extracts reply string from the model's prediction output

        >>> bot = Bot()
        >>> bot.decode_output([{'id': 'unique_id', 'answer': 'response'}])
        'response'
        """
        return output[0]['answer']

    def reply(self, statement):
        responses = []
        docs = scrape_wikipedia.scrape_article_texts()
        for context in docs:
            encoded_input = self.encode_input(statement, context)
            encoded_output = self.model.predict(encoded_input)
            decoded_output = self.decode_output(encoded_output)
            if len(decoded_output) > 0:
                responses.append((1, decoded_output))
        return responses
Ejemplo n.º 16
0
to_predict.append([{"context": "Michael Kmit (Ukrainian: Михайло Кміт) (25 July 1910 in Stryi, Lviv – 22 May 1981 in Sydney, Australia) was a Ukrainian painter who spent twenty-five years in Australia. He is notable for introducing a neo-Byzantine style of painting to Australia, and winning a number of major Australian art prizes including the Blake Prize (1952) and the Sulman Prize (in both 1957 and 1970). In 1969 the Australian artist and art critic James Gleeson described Kmit as 'one of the most sumptuous colourists of our time'.",'qas': [{'question': "Who won the Blake Prize in 1952?", 'id': '40'}]}])

to_predict.append([{"context": "Michael Kmit (Ukrainian: Михайло Кміт) (25 July 1910 in Stryi, Lviv – 22 May 1981 in Sydney, Australia) was a Ukrainian painter who spent twenty-five years in Australia. He is notable for introducing a neo-Byzantine style of painting to Australia, and winning a number of major Australian art prizes including the Blake Prize (1952) and the Sulman Prize (in both 1957 and 1970). In 1969 the Australian artist and art critic James Gleeson described Kmit as 'one of the most sumptuous colourists of our time'.",'qas': [{'question': "When did Michael Kmit win the Sulman Prize?", 'id': '41'}]}])

to_predict.append([{"context": "Michael Kmit (Ukrainian: Михайло Кміт) (25 July 1910 in Stryi, Lviv – 22 May 1981 in Sydney, Australia) was a Ukrainian painter who spent twenty-five years in Australia. He is notable for introducing a neo-Byzantine style of painting to Australia, and winning a number of major Australian art prizes including the Blake Prize (1952) and the Sulman Prize (in both 1957 and 1970). In 1969 the Australian artist and art critic James Gleeson described Kmit as 'one of the most sumptuous colourists of our time'.",'qas': [{'question': "What did Kmit bring to Australia?", 'id': '42'}]}])

to_predict.append([{"context": "Lindsay Ann Czarniak (born November 7, 1977), is an American sports anchor and reporter. She currently works for Fox Sports as a studio host for NASCAR coverage and a sideline reporter for NFL games. After spending six years with WRC-TV, the NBC owned-and-operated station in Washington, D.C., Czarniak joined ESPN as a SportsCenter anchor in August 2011 and left ESPN in 2017. Czarniak served as a host and sportsdesk reporter for NBC Sports coverage of the 2008 Summer Olympics in Beijing, China.",'qas': [{'question': "Is Lindsey Czarniak English?", 'id': '43'}]}])

to_predict.append([{"context": "Lindsay Ann Czarniak (born November 7, 1977), is an American sports anchor and reporter. She currently works for Fox Sports as a studio host for NASCAR coverage and a sideline reporter for NFL games. After spending six years with WRC-TV, the NBC owned-and-operated station in Washington, D.C., Czarniak joined ESPN as a SportsCenter anchor in August 2011 and left ESPN in 2017. Czarniak served as a host and sportsdesk reporter for NBC Sports coverage of the 2008 Summer Olympics in Beijing, China.",'qas': [{'question': "When did Lindsey Czarniak work at ESPN?", 'id': '44'}]}])

to_predict.append([{"context": "Doris Lo (Chinese: 盧素娟, Pinyin: Lú Sùjuān; 20 July 1952 – 22 July 2006) was a Hong Kong voice actor who was best known for voicing the character Nobita Nobi for the Hong Kong version of the anime along with Lam Pou-chuen who voices the character Doraemon. Lo died at the age of 54 from colorectal cancer at Shatin Hospital in Hong Kong.",'qas': [{'question': "Who voiced Doraemon?", 'id': '45'}]}])

to_predict.append([{"context": "Doris Lo (Chinese: 盧素娟, Pinyin: Lú Sùjuān; 20 July 1952 – 22 July 2006) was a Hong Kong voice actor who was best known for voicing the character Nobita Nobi for the Hong Kong version of the anime along with Lam Pou-chuen who voices the character Doraemon. Lo died at the age of 54 from colorectal cancer at Shatin Hospital in Hong Kong.",'qas': [{'question': "What character did Doris Lo voice?", 'id': '46'}]}])

to_predict.append([{"context": "Czech Republic has four main daily newspapers: Lidové noviny (former dissident publication); Mladá fronta DNES (with a centre-right orientation); Právo (with a centre-left political position) and Blesk, all based in Prague. Both Lidové noviny and Mladá fronta DNES are a part of the MAFRA publishing group, owned by Andrej Babiš, the current Prime Minister of the Czech Republic. As of 2018, the MAFRA group is a part of a trust fund along with other Babiš's companies.",'qas': [{'question': "What Czech Republic newspaper has a centre-left political position?", 'id': '47'}]}])

to_predict.append([{"context": "Czech Republic has four main daily newspapers: Lidové noviny (former dissident publication); Mladá fronta DNES (with a centre-right orientation); Právo (with a centre-left political position) and Blesk, all based in Prague. Both Lidové noviny and Mladá fronta DNES are a part of the MAFRA publishing group, owned by Andrej Babiš, the current Prime Minister of the Czech Republic. As of 2018, the MAFRA group is a part of a trust fund along with other Babiš's companies.",'qas': [{'question': "Where is Lidové noviny based?", 'id': '48'}]}])

iteratorInt = 0
tempPredictionList = []

for x in to_predict:
	f = open("QA-Results.csv", "a+")                                    # Open CSV document
	tempPredictionList.append(model.predict(x))
	print("\n Result "+str(iteratorInt)+": "+(tempPredictionList[iteratorInt])[0]['answer']+"\n")
	f.write("\""+str(iteratorInt)+"\", \""+(tempPredictionList[iteratorInt])[0]['answer']+"\";\n")
	f.close()                                                           # Close CSV document
	iteratorInt+=1

# print(model.predict(to_predict))
                for line in lines:
                    words = line['words']
                    for word in words:
                        text = word.get('text')
                        outputString += text + " "
            prevNum = pageNum

        question = input("question:")
        to_predict = [{
            'context': outputString,
            'qas': [{
                'question': question,
                'id': '0'
            }]
        }]

        model = QuestionAnsweringModel('albert',
                                       'ahotrod/albert_xxlargev1_squad2_512',
                                       args={
                                           'max_seq_length': 512,
                                           "eval_batch_size": 3,
                                           "version_2_with_negative": True,
                                           'reprocess_input_data': True,
                                           'overwrite_output_dir': True,
                                           'silent': True,
                                           "n_best_size": 10
                                       })
        res = model.predict(to_predict)
        print(res[0]['answer'])
Ejemplo n.º 18
0
model = QuestionAnsweringModel(arch, m, args=train_args, use_cuda=True)

# Train the model with JSON file
# model.train_model()

# model.train_model(train_data)

# The list can also be used directly
# model.train_model(train_data)

# Evaluate the model. (Being lazy and evaluating on the train data itself)
# result, text = model.eval_model('data/train.json')

# print(result)
# print(text)

print('-------------------')

# to_predict = [{'context': "Method of Recharging a Transportation Card. ",
#                'qas': [{'question': 'Huawei PAY is not supported for transportation card recharge?', 'id': '0'}]
#                },
#               {'context': 'What can I do if I cannot see the entrance for adding Huawei Pay traffic cards? ',
#                'qas': [{'question': "I can't get a transit card?", 'id': '1'}]
#                }
#               ]

import pandas as pd

pred, prob = model.predict(test_data, n_best_size=1)
out_df = pd.DataFrame(pred)
out_df.to_csv('submission.csv')
Ejemplo n.º 19
0
    'weight_decay': 0,
    'learning_rate': 4e-5,
    'adam_epsilon': 1e-8,
    'warmup_ratio': 0.06,
    'warmup_steps': 0,
    'max_grad_norm': 1.0,
}

# load the trained model
model = QuestionAnsweringModel(model_type=model_type,
                               model_name=f"../models/{model_type}",
                               args=train_args,
                               use_cuda=True)
print("===Loaded fine-tuned model for predictions===")

# make predictions on dev data
preds, _ = model.predict(dev_data)
print("===Predicted on dev data successfully===")

predictions = {pred['id']: pred['answer'][0] for pred in preds}
print("prediction size = ", len(predictions))

# write predictions to file
os.makedirs(f"../output/{model_type}", exist_ok=True)
with open(f"../output/{model_type}/predictions.json", 'w') as f:
    json.dump(predictions, f)
print("Wrote predictions to output directory")
print(
    "run \"python check.py data/dev-v2.0.json output/{model_type}/predictions.json\" in project directory for validation F1 score."
)