Ejemplo n.º 1
0
def qa():
	if request.method == 'GET':
		return render_template('QA.html', question="What is your question?")
	
	elif request.method == 'POST':
		my_context = request.form['my_context']
		my_question = request.form['my_question']

		my_bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1", trainable=False)
		vocab_file = my_bert_layer.resolved_object.vocab_file.asset_path.numpy()
		do_lower_case = my_bert_layer.resolved_object.do_lower_case.numpy()
		tokenizer = FullTokenizer(vocab_file, do_lower_case)

		my_input_dict, my_context_words, context_tok_to_word_id, question_tok_len = bqa.create_input_dict(my_question, my_context, tokenizer)
		start_logits, end_logits = bert_squad(my_input_dict, training=False)
		start_logits_context = start_logits.numpy()[0, question_tok_len+1:]
		end_logits_context = end_logits.numpy()[0, question_tok_len+1:]
		start_word_id = context_tok_to_word_id[np.argmax(start_logits_context)]
		end_word_id = context_tok_to_word_id[np.argmax(end_logits_context)]
		pair_scores = np.ones((len(start_logits_context), len(end_logits_context)))*(-1E10)
		for i in range(len(start_logits_context-1)):
			for j in range(i, len(end_logits_context)):
				pair_scores[i, j] = start_logits_context[i] + end_logits_context[j]
		pair_scores_argmax = np.argmax(pair_scores)
		start_word_id = context_tok_to_word_id[pair_scores_argmax // len(start_logits_context)]
		end_word_id = context_tok_to_word_id[pair_scores_argmax % len(end_logits_context)]
		predicted_answer = ' '.join(my_context_words[start_word_id:end_word_id+1])

		return render_template('QA.html',answer=predicted_answer, question=my_question, user_context=my_context)
Ejemplo n.º 2
0
optimizer = optimization.create_optimizer(
    init_lr=INIT_LR,
    num_train_steps=NB_BATCHES_TRAIN,
    num_warmup_steps=WARMUP_STEPS)

bert_squad = load_model(DIR_PATH + '/bert_squad_model', compile=False)
bert_squad.compile(optimizer, squad_loss_fn)

### Prediction Utils ###
my_bert_layer = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3",
    trainable=False)
vocab_file = my_bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = my_bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = FullTokenizer(vocab_file, do_lower_case)

def is_whitespace(c):
    '''
    Tell if a chain of characters corresponds to a whitespace or not.
    '''
    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
        return True
    return False

def whitespace_split(text):
    '''
    Take a text and return a list of "words" by splitting it according to
    whitespaces.
    '''
    doc_tokens = []
Ejemplo n.º 3
0
import tensorflow_hub as hub
import tensorflow as tf
from reading_datasets import read_dataset
# from main import convert_two_sentences_to_features
from official.nlp.bert.tokenization import FullTokenizer
import numpy as np
tf.enable_eager_execution()
model = hub.load("https://tfhub.dev/prvi/tf2nq/1")
vocab_file=b'C:\\Users\\LUISAL~1\\AppData\\Local\\Temp\\tfhub_modules\\88ac13afec2955fd14396e4582c251841b67429a\\assets\\vocab.txt'
tokenizer = FullTokenizer(vocab_file, do_lower_case=True)
frase_de_kaggle=[101,2040,2003,1996,2148,3060,2152,5849,1999,2414, 102, 998
, 991, 987, 988,2152,3222,1997,2148,3088,1999,2414, 967, 966
, 987, 988,3295, 967, 986,19817,10354,2389,6843,2675,1010,2414
, 965, 966, 987, 988,4769, 967, 986,19817,10354,2389,6843,2675
,1010,2414,1010,15868,2475,2078,1019,18927, 965, 966, 987, 988
,12093, 967, 986,4868,1080,2382,1531,2382,1005,1005,1050,1014
,1080,5718,1531,4261,1005,1005,1059,1013,4868,1012,2753,2620
,2475,1080,1050,1014,1012,14010,2683,1080,1059,1013,4868,1012
,2753,2620,2475,1025,1011,1014,1012,14010,2683,12093,1024,4868
,1080,2382,1531,2382,1005,1005,1050,1014,1080,5718,1531,4261
,1005,1005,1059,1013,4868,1012,2753,2620,2475,1080,1050,1014
,1012,14010,2683,1080,1059,1013,4868,1012,2753,2620,2475,1025
,1011,1014,1012,14010,2683, 965, 966, 987, 988,2152,5849, 967
, 986,10030, 965, 966, 970,11673,1997,2148,3088,2160, 985,1996
,2152,3222,1997,2148,3088,1999,2414,2003,1996,8041,3260,2013
,2148,3088,2000,1996,2142,2983,1012,2009,2003,2284,2012,2148
,3088,2160,1010,1037,2311,2006,19817,10354,2389,6843,2675,1010
,2414,1012,2004,2092,2004,4820,1996,4822,1997,1996,2152,5849
,1010,1996,2311,2036,6184,1996,2148,3060,19972,1012,2009,2038
,2042,1037,3694,2462,1008,3205,2311,2144,3196,1012, 964, 982
,8417, 961, 994, 992,1015,2381, 971, 992,1016,2156,2036, 971
Ejemplo n.º 4
0
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

# url_uncased="https://tfhub.dev/google/albert_base/3"
# url_uncased= "https://tfhub.dev/tensorflow/albert_en_base/1"
url_uncased = "https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/1"
bert_layer = hub.KerasLayer(url_uncased, trainable=False)
#
# vocab_file = bert_layer.resolved_object.sp_model_file.asset_path.numpy()
# tokenizer = FullSentencePieceTokenizer(vocab_file)
# print(tokenizer.convert_tokens_to_ids([102,1205,367]))
#
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = FullTokenizer(vocab_file)

del bert_layer


# del vocab_file
# del do_lower_case
#
# vocab_file=b'C:\\Users\\LUISAL~1\\AppData\\Local\\Temp\\tfhub_modules\\88ac13afec2955fd14396e4582c251841b67429a\\assets\\vocab.txt'
# tokenizer = FullTokenizer(vocab_file)
def statistics(y_start, y_end, y_true):
    import matplotlib.pyplot as plt
    n = len(y_start.shape[0])
    estadisticas_start = {}
    estadisticas_end = {}
    estadisticas_start_true = {}