def main():
    random.seed(1012)
    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt = '%m/%d/%Y %H:%M:%S',
                        level = logging.INFO)
    logger = logging.getLogger(__name__)
    chars = string.ascii_lowercase
    number_of_entity_trials = 10

    names = proc.generate_pairs_of_random_names(number_of_pairs=100, name_dir="../data/other/filtered_names.csv")

    bert_base_cased = HappyBERT("bert-base-cased")
        
    with open("../data/truism_data/social_data_sentences_2.json", "r") as f:
        social_sents = json.load(f)
        
    with open("../data/truism_data/social_data_2.json", "r") as f:
        social_config = json.load(f)

    logger.info("finished reading in social data")

    output_df = run_pipeline(model=bert_base_cased, 
                             fictitious_entities=names, 
                             sentences=social_sents, 
                             config=social_config, 
                             number_of_entity_trials=number_of_entity_trials,
                             logger=logger)

    output_df.to_csv("../data/masked_word_result_data/bert_w_names/bert_social_perf_{}.csv".format(number_of_entity_trials),
                     index=False)

    logger.info("finished saving social results")
Ejemplo n.º 2
0
"""
Tests for the "answers_to_question" method that can be accessed through a HappyBERT object
"""

from happytransformer import HappyBERT

happy_bert = HappyBERT('bert-large-uncased-whole-word-masking-finetuned-squad')

PARAGRAPH = (
    'McGill is a university located in Montreal. '
    'It was founded in 1821, making it the eight oldest university in Canada. '
    'It is currently ranked 31st worldwide according to the QS Global World Ranking '

)

QA_PAIRS = [
    ('When was McGill founded?', '1821'),
    ('Where is McGill located?', 'Montreal'),
    ('What is McGill\'s worldwide ranking?', '31st'),

]

def test_qa_multi():
    for question, expected_answer in QA_PAIRS:
        computed_answers = happy_bert.answers_to_question(question, PARAGRAPH, k=10)
        computed_answer = happy_bert.answer_question(question, PARAGRAPH)
        # k is being respected
        assert len(computed_answers) == 10
        # both answering methods yield correct result
        assert computed_answers[0]["text"].lower() == expected_answer.lower()
        assert computed_answer.lower() == expected_answer.lower()
Ejemplo n.º 3
0
from flask import Flask, jsonify, request, make_response
from flask_cors import CORS
from autocomplete.autocomplete import get_next_word
from question.question import get_answer
from happytransformer import HappyBERT
from autocomplete.learn import train_custom
from complete_my_song.autocomplete_generator import generate_main
from lyrics_generation.generator import get_song
import glob

bert = HappyBERT()

app = Flask(__name__)
CORS(app)


@app.route('/complete-markov-artists', methods=['GET'])
def complete_markov_artists():
    json_files = glob.glob("./autocomplete/jsons/*.json")
    return jsonify(
        sorted(
            list(
                map(lambda file: file[21:-5].replace('-', ' ').title(),
                    json_files))))


@app.route('/complete-gru-artists', methods=['GET'])
def complete_gru_artists():
    return jsonify([])
    # json_files = glob.glob("./autocomplete/jsons/*.json")
    # return jsonify(sorted(list(map(lambda file: file[21:-5].replace('-', ' ').title(), json_files))))
def eq_ish(x, y, epsilon):
    '''soft similarity check between two numbers'''
    return abs(y - x) < epsilon


def errors(func):
    '''determines whether function errors'''
    try:
        func()
    except:
        return True
    return False


happy = HappyBERT()
SENTENCE_PAIRS = [["How old are you?", "The Eiffel Tower is in Paris", False],
                  ["How old are you?", "I am 40 years old", True]]


def test_argument_errors():
    '''
    tests that the nsp module correctly rejects
    multi-sentence inputs
    '''
    two_sentences = "This is the first sentence. This is the second sentence"
    one_sentence = "This is one sentence."
    assert errors(
        lambda: happy.predict_next_sentence(two_sentences, one_sentence))
    assert errors(
        lambda: happy.predict_next_sentence(one_sentence, two_sentences))
Ejemplo n.º 5
0
#--------------------------------------#

num_list = [
    "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
    "ten", "no", "zero"
]

if __name__ == "__main__":
    model_str = sys.argv[1]
    input_filename = sys.argv[2]
    output_filename = sys.argv[3]
    cuda = True
    model = None
    if model_str.startswith("reload_"):
        if model_str.startswith("reload_bert"):
            model = HappyBERT(model_str.replace("reload_bert:", ""))
        elif model_str.startswith("reload_roberta"):
            model = HappyROBERTA(model_str.replace("reload_roberta:", ""))
    else:
        if model_str.startswith("bert"):
            # bert-base, bert-large
            model = HappyBERT(model_str + "-uncased")
        elif model_str.startswith("roberta"):
            # roberta-base, roberta-large
            model = HappyROBERTA(model_str)
        elif model_str.startswith("xlnet"):
            # ignore
            model = HappyXLNET(model_str + "-cased")
        elif model_str.startswith("gpt"):
            model = GPT2LMHeadModel.from_pretrained('gpt2')
            tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
Ejemplo n.º 6
0
#     print("Masked:", " ".join(tokenized_text))

#     print("Predicted token:", predicted_token)
#     prediction = predicted_token
#     print("Other options:")
#     # just curious about what the next few options look like.
#     for i in range(10):
#         predictions[0, masked_index, predicted_index] = -11100000
#         predicted_index = torch.argmax(predictions[0, masked_index]).item()
#         predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])
#         print(predicted_token)



#     response = jsonify(prediction)
#     # response.headers.add("Access-Control-Allow-Origin", "*")
#     return response

if __name__ == '__main__':
    # modelfile = 'models/final_prediction.pickle'
    # model = p.load(open(modelfile, 'rb'))
    # modelpath = "bert-base-uncased"
    # tokenizer = BertTokenizer.from_pretrained(modelpath)
    # model = BertForMaskedLM.from_pretrained(modelpath)
    # app.run(debug=True, host='127.0.0.1')
    #initialize the LMs 
    xlnet = HappyXLNET("xlnet-base-cased")
    roberta = HappyROBERTA("roberta-base")
    bert = HappyBERT("bert-base-uncased")
    app.run()
Ejemplo n.º 7
0
import json

num_list = [
    "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
    "ten", "no", "zero"
]

if __name__ == "__main__":
    model_str = sys.argv[1]
    input_filename = sys.argv[2]
    output_filename = sys.argv[3]
    model = None
    if model_str.startswith("reload_"):
        if model_str.startswith("reload_bert"):
            model_str = model_str.replace("reload_bert:", "")
            model = HappyBERT(model_str)
        elif model_str.startswith("reload_roberta"):
            model_str = model_str.replace("reload_roberta:", "")
            model = HappyROBERTA(model_str)
    else:
        if model_str.startswith("bert"):
            # bert-base, bert-large
            model = HappyBERT(model_str + "-uncased")
        elif model_str.startswith("roberta"):
            # roberta-base, roberta-large
            model = HappyROBERTA(model_str)
        elif model_str.startswith("xlnet"):
            # ignore
            model = HappyXLNET(model_str + "-cased")
    assert model is not None
Ejemplo n.º 8
0
# This test is here to see if we can
# minimize logging
from happytransformer import HappyBERT

transformer = HappyBERT()
predictions = transformer.predict_mask("Dogs make me [MASK] to eat",
                                       num_results=20)
# when runnning this, logs should be minimal