Ejemplo n.º 1
0
    log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, ""),
                           result_desc="NeuralTopicModel")

    log_writer.write_any('model', autoencoder.to_json(), 'w+', True)
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, 'g', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss {}'.format(
        dataset_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        log_writer.get_plot_path(dataset_helper.get_dataset_name(), "loss"))
    plt.clf()
    """topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in]
    topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out]
    log_writer = LogWriter(log_file_desc='LDATestsRegularize{}'.format(regularization))
    log_writer.write_2D_list('topic_words_in', topic_words_in)
    log_writer.write_2D_list('topic_words_out', topic_words_out)"""

    topic_words_in_max = get_extremes(weight_in, num_of_topics,
                                      num_of_important_words, reverse_word_map,
                                      True, 'topic_words_in_max', log_writer,
                                      dataset_helper.get_dataset_name())
    topic_words_in_min = get_extremes(weight_in, num_of_topics,
                                      num_of_important_words, reverse_word_map,
                                      False, 'topic_words_in_min', log_writer,
                                      dataset_helper.get_dataset_name())
    result.append(datasets_helper.get_dataset_name())
    model.summary(print_fn=result.append)
    results.append(result)
    results_saver.add_log("Done. Finishing this dataset.")
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss {}'.format(
        datasets_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        results_saver.get_plot_path(datasets_helper.get_dataset_name(),
                                    "loss"))

    plt.clf()
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy {}'.format(
        datasets_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        results_saver.get_plot_path(datasets_helper.get_dataset_name(), "acc"))
    plt.clf()
    model.add(
        Bidirectional(LSTM(enhanced_num_of_topics, return_sequences=True)))
    #model.add(Bidirectional(LSTM(enhanced_num_of_topics,  return_sequences=True)))
    model.add(Bidirectional(LSTM(enhanced_num_of_topics)))
    #model.add(Bidirectional(LSTM(enhanced_num_of_topics, activation='relu')))#,dropout=0.1,recurrent_dropout=0.5,input_shape=(num_of_words,),return_sequences=True))
    #model.add(LSTM(40,activation='relu'))
    #model.add(Dense(enhanced_num_of_topics, activation='relu', input_shape=(num_of_words,)))
    #model.add(Dense(enhanced_num_of_topics, activation='relu'))
    model.add(Dense(datasets_helper.get_num_of_topics(), activation='softmax'))

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print(model.summary())
    plot_model(model,
               results_saver.get_plot_path("", "model-graph"),
               show_shapes=True)
    results_saver.add_log("Done. Now lets get training.")
    batch_size = 512
    history = model.fit(
        x=Training_Text_Generator_RNN(datasets_helper.get_train_file_path(),
                                      batch_size,
                                      datasets_helper.get_num_of_train_texts(),
                                      num_of_words, tokenizer, ";",
                                      datasets_helper.get_num_of_topics()),
        epochs=5,
        validation_data=Training_Text_Generator_RNN(
            datasets_helper.get_train_file_path(),
            batch_size,
            validation_count,
            num_of_words,