Esempio n. 1
0
def home():
    K.clear_session()
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        if file.filename == '':
            flash('No file selected for uploading')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            filepath=os.path.join(UPLOAD_FOLDER, filename)
            file.save(filepath)
            flash('File successfully uploaded')
            load_keras_model()
            kq = hienthi_kq(filepath)
            print(kq)

            response = {}
            response['path'] = 'static/images/'+filename
            response['text'] = kq 
            return render_template('index.html', response=response)

        else:
            flash('Allowed file types are txt, pdf, png, jpg, jpeg, gif')
            return redirect(request.url)
Esempio n. 2
0
    def __init__(self, model_filename, tokenizer, mle_model, bigramer,
                 max_len_questions, max_len_answers, strategy):
        self._tokenizer = tokenizer
        self._tokenizer_index_to_word = {
            index: word
            for (word, index) in self._tokenizer.word_index.items()
        }

        self._mle_model = mle_model
        self._bigramer = bigramer
        self._max_len_questions = max_len_questions
        self._max_len_answers = max_len_answers

        model_data = utils.load_keras_model(model_filename)
        _, self._encoder_inputs, self._encoder_states, self._decoder_inputs, self._decoder_embedding, self._decoder_lstm, self._decoder_dense = model_data

        self._enc_model, self._dec_model = utils.make_inference_models(
            self._encoder_inputs, self._encoder_states, self._decoder_inputs,
            self._decoder_embedding, self._decoder_lstm, self._decoder_dense)
        self._strategy = strategy
Esempio n. 3
0
    def load_from_params(cls):
        """
        Creates a chatbot from params.py.
        Good for development, not for production.

        :return: Chatbot initialized from params.py.
        """

        import params
        # load data
        questions, answers = load_data(params.data_file_directory,
                                       params.files, params.encoding)
        bigramer = Bigramer(params.bigramer)

        # prepare data manipulators
        VOCAB_SIZE = params.vocab_size
        tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE,
                                     params.unknown_token)
        tokenized_questions, tokenized_answers = tokenize_q_a(
            tokenizer, questions, answers)

        # prepare data
        prepared_data = prepare_data(tokenized_questions, tokenized_answers)
        max_len_questions, max_len_answers, *_ = prepared_data

        # mle_model
        reversed_tokenizer_word_dict = {
            index: word
            for (word, index) in tokenizer.word_index.items()
        }
        mle_model = utils.fit_mle_model(tokenized_answers,
                                        reversed_tokenizer_word_dict)

        # load model
        model_data = utils.load_keras_model(params.model)
        _, encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense = model_data

        return cls(params.model, tokenizer, mle_model, bigramer,
                   max_len_questions, max_len_answers, params.strategy)
Esempio n. 4
0
    # This will be replaced with the test data when grading the assignment
    data_path = '../data/data.npz'
    x, y = load_data(data_path)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    ############################################################################
    # EDITABLE SECTION OF THE SCRIPT: if you need to edit the script, do it here
    ############################################################################

    # Load the Linear Regression model
    linear_regressor = load_sklearn_model("./Linear_Regression.pickle")
    # Load the Neural Network model
    regressor = load_keras_model("./Neural_Network.pickle")

    # Model from Task 1

    # Number of data points
    n = len(x)
    # Initialize array
    x3 = np.empty((0, n))
    # Append x3
    for x1, x2 in x:
        temp = sin(x1) * x2
        x3 = np.append(x3, temp)
    # Build the X matrix
    X = np.insert(x, 2, x3, axis=1)

    # Make the predictions of the model
    # Load the test CIFAR-10 data
    (x_train, y_train), (x_test, y_test) = utils.load_cifar10()

    # Pre-processing
    # Normalize each pixel of each channel so that the range is [0, 1];
    # each pixel is represented by an integer value in the 0-255 range.
    x_train, x_test = x_train / 255., x_test / 255.

    # Create one-hot encoding of the labels;
    # pre-process targets in order to perform multi-class classification.
    n_classes = 3
    y_train = tf_utils.to_categorical(y_train, n_classes)
    y_test = tf_utils.to_categorical(y_test, n_classes)

    # Load the trained models
    model_task2 = utils.load_keras_model('../deliverable/nn_task2.h5')
    model_task1 = utils.load_keras_model('../deliverable/nn_task1.h5')

    # Predict on the given samples
    y_pred_task1 = model_task1.predict(x_test)
    y_pred_task2 = model_task2.predict(x_test)

    # prepare data for f1 score:
    y_test_f1 = np.argmax(y_test, axis=1).astype(int)
    y_pred_task1_f1 = np.argmax(y_pred_task1, axis=1).astype(int)
    y_pred_task2_f1 = np.argmax(y_pred_task2, axis=1).astype(int)

    """ Accuracy comparison between model T1 and model T2 """

    # data check
    assert y_test.shape == y_pred_task1.shape
Esempio n. 6
0
def analyze_checkpoints():
    questions, answers = load_data(params.data_file_directory, params.files,
                                   None)
    VOCAB_SIZE = 15001

    tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, 'UNK')
    # tokenizer = utils.load_and_unpickle("test_models/tokenizer")

    tokenized_questions, tokenized_answers = tokenize_q_a(
        tokenizer, questions, answers)

    reversed_tokenizer_word_dict = {
        index: text
        for text, index in tokenizer.word_index.items()
    }
    mle_model = utils.fit_mle_model(tokenized_answers,
                                    reversed_tokenizer_word_dict)

    max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = \
        prepare_data(tokenized_questions, tokenized_answers)

    checkpoints = [
        params.dir_name + file for file in os.listdir(params.dir_name)
        if file.endswith("hdf5")
    ]
    print(f"{len(checkpoints)} checkpoints")

    results = defaultdict(list)
    model_score = []

    # model evaluations section
    questions, answers = load_data(params.data_file_directory,
                                   params.test_files)
    enc_in_data, dec_in_data, dec_out_data = generate_test_values(
        questions[:1000], answers[:1000], tokenizer)

    # generating answer and perplexity section
    texts = questions[:5]

    for checkpoint in checkpoints:
        net_model, encoder_inputs, encoder_states, decoder_inputs, \
        decoder_embedding, decoder_lstm, decoder_dense = utils.load_keras_model(checkpoint)

        enc_model, dec_model = conversation.make_inference_models(
            encoder_inputs, encoder_states, decoder_inputs, decoder_embedding,
            decoder_lstm, decoder_dense)

        score = net_model.evaluate([enc_in_data, dec_in_data], dec_out_data)
        model_score.append(score)
        print(score)
        for text in texts:
            print(text)
            states_values = enc_model.predict(
                conversation.str_to_tokens(tokenizer, text, max_len_questions))
            empty_target_seq = np.zeros((1, 1))
            empty_target_seq[0, 0] = tokenizer.word_index['start']
            end_index = tokenizer.word_index['end']

            predictions, _ = beam_search(states_values, empty_target_seq,
                                         dec_model, end_index)

            decoded_texts = []
            for prediction in predictions:
                decoded_text = ['start']
                for word_index in prediction[1:]:
                    decoded_text.append(
                        reversed_tokenizer_word_dict.get(word_index, 'UNK'))
                decoded_texts.append(decoded_text)
            result = choose_best_fit(decoded_texts, mle_model)
            results[text].append(result)

    utils.pickle_and_save(results, params.perplexity_file)
    utils.pickle_and_save(model_score, params.model_summary_file)
def test():
    questions, answers = load_data("prepare_data/output_files",
                                   "preprocessed_cornell", None)
    VOCAB_SIZE = 15001

    tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, 'UNK')
    tokenized_questions, tokenized_answers = tokenize_q_a(
        tokenizer, questions, answers)

    reversed_tokenizer_word_dict = {
        index: text
        for text, index in tokenizer.word_index.items()
    }
    mle_model = utils.fit_mle_model(tokenized_answers,
                                    reversed_tokenizer_word_dict)

    max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = \
        prepare_data(tokenized_questions, tokenized_answers)

    _, encoder_inputs, encoder_states, decoder_inputs, \
        decoder_embedding, decoder_lstm, decoder_dense = utils.load_keras_model('cornell.hdf5')

    enc_model, dec_model = conversation.make_inference_models(
        encoder_inputs, encoder_states, decoder_inputs, decoder_embedding,
        decoder_lstm, decoder_dense)

    texts = [
        'stop talking shit', 'it is peanut butter jelly time',
        'Are we going to pass this lecture', 'Where are you from',
        'do you like me', 'carrot', 'tell me your biggest secret',
        'How are you', 'do you know me', 'what does fox say', 'i am happy',
        'this is america', 'kill me', 'do not forget to brush your teeth'
    ]
    for text in texts:
        print(text)
        states_values = enc_model.predict(
            conversation.str_to_tokens(tokenizer, text, max_len_questions))
        empty_target_seq = np.zeros((1, 1))
        empty_target_seq[0, 0] = tokenizer.word_index['start']
        end_index = tokenizer.word_index['end']

        predictions, _ = beam_search(states_values, empty_target_seq,
                                     dec_model, end_index)

        decoded_texts = []
        for prediction in predictions:
            decoded_text = ['start']
            for word_index in prediction[1:]:
                decoded_text.append(
                    reversed_tokenizer_word_dict.get(word_index, 'UNK'))
            decoded_texts.append(decoded_text)
        print(utils.choose_best(decoded_texts, mle_model))

        # for prediction in predictions:
        #     decoded_translation = ''
        #     for sampled_word_index in prediction[1:]:
        #         decoded_translation += ' {}'.format(reversed_tokenizer_word_dict[sampled_word_index])
        #     print(decoded_translation)

        # print(predictions)
        print()
Esempio n. 8
0
    print()


if __name__ == '__main__':
    questions, answers = load_data(params.data_file_directory, params.files,
                                   params.encoding)
    VOCAB_SIZE = params.vocab_size
    tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE,
                                 params.unknown_token)
    tokenized_questions, tokenized_answers = tokenize_q_a(
        tokenizer, questions, answers)

    prepared_data = prepare_data(tokenized_questions, tokenized_answers)
    max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = prepared_data

    model_data = utils.load_keras_model(params.model)
    _, encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense = model_data

    enc_model, dec_model = make_inference_models(encoder_inputs,
                                                 encoder_states,
                                                 decoder_inputs,
                                                 decoder_embedding,
                                                 decoder_lstm, decoder_dense)

    end_index = tokenizer.word_index['end']
    for _ in range(10):
        states_values = enc_model.predict(
            str_to_tokens(tokenizer, input('Enter question : '),
                          max_len_questions))
        empty_target_seq = np.zeros((1, 1))
        empty_target_seq[0, 0] = tokenizer.word_index['start']
Esempio n. 9
0
def predict(data):
    model = load_keras_model()
    predictions = model.predict_classes(data)
    for i in range(5):
        print('%s => %d (expected %d)' % (X[i].tolist(), predictions[i], y[i]))
Esempio n. 10
0
import utils
from data import load_data, create_tokenizer, tokenize_q_a, prepare_data

if __name__ == '__main__':
    questions, answers = load_data(params.data_file_directory, params.files)
    VOCAB_SIZE = params.vocab_size
    tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE,
                                 params.unknown_token)
    tokenized_questions, tokenized_answers = tokenize_q_a(
        tokenizer, questions, answers)

    max_len_questions, max_len_answers, encoder_input_data, decoder_input_data, decoder_output_data = \
        prepare_data(tokenized_questions, tokenized_answers)

    # new_model = load_model('model_test.h5')
    new_model = utils.load_keras_model('checkpoints/train2/cp-0004.hdf5')[0]
    """
        Może nadpisać poprzednie checkpointy!!!!!!!! nie zacznie od checkpoint + 1 tylko od 1
    """
    checkpoint_path = params.checkpoints_save_path
    checkpoint = ModelCheckpoint(
        checkpoint_path,
        verbose=1,
        save_weights_only=params.checkpoints_save_weights_only,
        period=params.checkpoints_frequency)
    callbacks_list = [checkpoint]

    # fit the model
    new_model.fit([encoder_input_data, decoder_input_data],
                  decoder_output_data,
                  callbacks=callbacks_list,