def home(): K.clear_session() if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('No file selected for uploading') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) filepath=os.path.join(UPLOAD_FOLDER, filename) file.save(filepath) flash('File successfully uploaded') load_keras_model() kq = hienthi_kq(filepath) print(kq) response = {} response['path'] = 'static/images/'+filename response['text'] = kq return render_template('index.html', response=response) else: flash('Allowed file types are txt, pdf, png, jpg, jpeg, gif') return redirect(request.url)
def __init__(self, model_filename, tokenizer, mle_model, bigramer, max_len_questions, max_len_answers, strategy): self._tokenizer = tokenizer self._tokenizer_index_to_word = { index: word for (word, index) in self._tokenizer.word_index.items() } self._mle_model = mle_model self._bigramer = bigramer self._max_len_questions = max_len_questions self._max_len_answers = max_len_answers model_data = utils.load_keras_model(model_filename) _, self._encoder_inputs, self._encoder_states, self._decoder_inputs, self._decoder_embedding, self._decoder_lstm, self._decoder_dense = model_data self._enc_model, self._dec_model = utils.make_inference_models( self._encoder_inputs, self._encoder_states, self._decoder_inputs, self._decoder_embedding, self._decoder_lstm, self._decoder_dense) self._strategy = strategy
def load_from_params(cls): """ Creates a chatbot from params.py. Good for development, not for production. :return: Chatbot initialized from params.py. """ import params # load data questions, answers = load_data(params.data_file_directory, params.files, params.encoding) bigramer = Bigramer(params.bigramer) # prepare data manipulators VOCAB_SIZE = params.vocab_size tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, params.unknown_token) tokenized_questions, tokenized_answers = tokenize_q_a( tokenizer, questions, answers) # prepare data prepared_data = prepare_data(tokenized_questions, tokenized_answers) max_len_questions, max_len_answers, *_ = prepared_data # mle_model reversed_tokenizer_word_dict = { index: word for (word, index) in tokenizer.word_index.items() } mle_model = utils.fit_mle_model(tokenized_answers, reversed_tokenizer_word_dict) # load model model_data = utils.load_keras_model(params.model) _, encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense = model_data return cls(params.model, tokenizer, mle_model, bigramer, max_len_questions, max_len_answers, params.strategy)
# This will be replaced with the test data when grading the assignment data_path = '../data/data.npz' x, y = load_data(data_path) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) ############################################################################ # EDITABLE SECTION OF THE SCRIPT: if you need to edit the script, do it here ############################################################################ # Load the Linear Regression model linear_regressor = load_sklearn_model("./Linear_Regression.pickle") # Load the Neural Network model regressor = load_keras_model("./Neural_Network.pickle") # Model from Task 1 # Number of data points n = len(x) # Initialize array x3 = np.empty((0, n)) # Append x3 for x1, x2 in x: temp = sin(x1) * x2 x3 = np.append(x3, temp) # Build the X matrix X = np.insert(x, 2, x3, axis=1) # Make the predictions of the model
# Load the test CIFAR-10 data (x_train, y_train), (x_test, y_test) = utils.load_cifar10() # Pre-processing # Normalize each pixel of each channel so that the range is [0, 1]; # each pixel is represented by an integer value in the 0-255 range. x_train, x_test = x_train / 255., x_test / 255. # Create one-hot encoding of the labels; # pre-process targets in order to perform multi-class classification. n_classes = 3 y_train = tf_utils.to_categorical(y_train, n_classes) y_test = tf_utils.to_categorical(y_test, n_classes) # Load the trained models model_task2 = utils.load_keras_model('../deliverable/nn_task2.h5') model_task1 = utils.load_keras_model('../deliverable/nn_task1.h5') # Predict on the given samples y_pred_task1 = model_task1.predict(x_test) y_pred_task2 = model_task2.predict(x_test) # prepare data for f1 score: y_test_f1 = np.argmax(y_test, axis=1).astype(int) y_pred_task1_f1 = np.argmax(y_pred_task1, axis=1).astype(int) y_pred_task2_f1 = np.argmax(y_pred_task2, axis=1).astype(int) """ Accuracy comparison between model T1 and model T2 """ # data check assert y_test.shape == y_pred_task1.shape
def analyze_checkpoints(): questions, answers = load_data(params.data_file_directory, params.files, None) VOCAB_SIZE = 15001 tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, 'UNK') # tokenizer = utils.load_and_unpickle("test_models/tokenizer") tokenized_questions, tokenized_answers = tokenize_q_a( tokenizer, questions, answers) reversed_tokenizer_word_dict = { index: text for text, index in tokenizer.word_index.items() } mle_model = utils.fit_mle_model(tokenized_answers, reversed_tokenizer_word_dict) max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = \ prepare_data(tokenized_questions, tokenized_answers) checkpoints = [ params.dir_name + file for file in os.listdir(params.dir_name) if file.endswith("hdf5") ] print(f"{len(checkpoints)} checkpoints") results = defaultdict(list) model_score = [] # model evaluations section questions, answers = load_data(params.data_file_directory, params.test_files) enc_in_data, dec_in_data, dec_out_data = generate_test_values( questions[:1000], answers[:1000], tokenizer) # generating answer and perplexity section texts = questions[:5] for checkpoint in checkpoints: net_model, encoder_inputs, encoder_states, decoder_inputs, \ decoder_embedding, decoder_lstm, decoder_dense = utils.load_keras_model(checkpoint) enc_model, dec_model = conversation.make_inference_models( encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense) score = net_model.evaluate([enc_in_data, dec_in_data], dec_out_data) model_score.append(score) print(score) for text in texts: print(text) states_values = enc_model.predict( conversation.str_to_tokens(tokenizer, text, max_len_questions)) empty_target_seq = np.zeros((1, 1)) empty_target_seq[0, 0] = tokenizer.word_index['start'] end_index = tokenizer.word_index['end'] predictions, _ = beam_search(states_values, empty_target_seq, dec_model, end_index) decoded_texts = [] for prediction in predictions: decoded_text = ['start'] for word_index in prediction[1:]: decoded_text.append( reversed_tokenizer_word_dict.get(word_index, 'UNK')) decoded_texts.append(decoded_text) result = choose_best_fit(decoded_texts, mle_model) results[text].append(result) utils.pickle_and_save(results, params.perplexity_file) utils.pickle_and_save(model_score, params.model_summary_file)
def test(): questions, answers = load_data("prepare_data/output_files", "preprocessed_cornell", None) VOCAB_SIZE = 15001 tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, 'UNK') tokenized_questions, tokenized_answers = tokenize_q_a( tokenizer, questions, answers) reversed_tokenizer_word_dict = { index: text for text, index in tokenizer.word_index.items() } mle_model = utils.fit_mle_model(tokenized_answers, reversed_tokenizer_word_dict) max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = \ prepare_data(tokenized_questions, tokenized_answers) _, encoder_inputs, encoder_states, decoder_inputs, \ decoder_embedding, decoder_lstm, decoder_dense = utils.load_keras_model('cornell.hdf5') enc_model, dec_model = conversation.make_inference_models( encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense) texts = [ 'stop talking shit', 'it is peanut butter jelly time', 'Are we going to pass this lecture', 'Where are you from', 'do you like me', 'carrot', 'tell me your biggest secret', 'How are you', 'do you know me', 'what does fox say', 'i am happy', 'this is america', 'kill me', 'do not forget to brush your teeth' ] for text in texts: print(text) states_values = enc_model.predict( conversation.str_to_tokens(tokenizer, text, max_len_questions)) empty_target_seq = np.zeros((1, 1)) empty_target_seq[0, 0] = tokenizer.word_index['start'] end_index = tokenizer.word_index['end'] predictions, _ = beam_search(states_values, empty_target_seq, dec_model, end_index) decoded_texts = [] for prediction in predictions: decoded_text = ['start'] for word_index in prediction[1:]: decoded_text.append( reversed_tokenizer_word_dict.get(word_index, 'UNK')) decoded_texts.append(decoded_text) print(utils.choose_best(decoded_texts, mle_model)) # for prediction in predictions: # decoded_translation = '' # for sampled_word_index in prediction[1:]: # decoded_translation += ' {}'.format(reversed_tokenizer_word_dict[sampled_word_index]) # print(decoded_translation) # print(predictions) print()
print() if __name__ == '__main__': questions, answers = load_data(params.data_file_directory, params.files, params.encoding) VOCAB_SIZE = params.vocab_size tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, params.unknown_token) tokenized_questions, tokenized_answers = tokenize_q_a( tokenizer, questions, answers) prepared_data = prepare_data(tokenized_questions, tokenized_answers) max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = prepared_data model_data = utils.load_keras_model(params.model) _, encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense = model_data enc_model, dec_model = make_inference_models(encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense) end_index = tokenizer.word_index['end'] for _ in range(10): states_values = enc_model.predict( str_to_tokens(tokenizer, input('Enter question : '), max_len_questions)) empty_target_seq = np.zeros((1, 1)) empty_target_seq[0, 0] = tokenizer.word_index['start']
def predict(data): model = load_keras_model() predictions = model.predict_classes(data) for i in range(5): print('%s => %d (expected %d)' % (X[i].tolist(), predictions[i], y[i]))
import utils from data import load_data, create_tokenizer, tokenize_q_a, prepare_data if __name__ == '__main__': questions, answers = load_data(params.data_file_directory, params.files) VOCAB_SIZE = params.vocab_size tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, params.unknown_token) tokenized_questions, tokenized_answers = tokenize_q_a( tokenizer, questions, answers) max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = \ prepare_data(tokenized_questions, tokenized_answers) # new_model = load_model('model_test.h5') new_model = utils.load_keras_model('checkpoints/train2/cp-0004.hdf5')[0] """ Może nadpisać poprzednie checkpointy!!!!!!!! nie zacznie od checkpoint + 1 tylko od 1 """ checkpoint_path = params.checkpoints_save_path checkpoint = ModelCheckpoint( checkpoint_path, verbose=1, save_weights_only=params.checkpoints_save_weights_only, period=params.checkpoints_frequency) callbacks_list = [checkpoint] # fit the model new_model.fit([encoder_input_data, decoder_input_data], decoder_output_data, callbacks=callbacks_list,