import json
import logging
from graph import io
import tqdm

logging.basicConfig(level=logging.DEBUG)

if __name__ == "__main__":

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.ERROR)

    data_folder = "../data/"

    relations_data, _ = io.load_relation_graphs_from_file(
        "../data/wikipedia-wikidata/enwiki-20160501/semantic-graphs-filtered-training.02_06.json",
        load_vertices=False)
    logging.debug('Loaded, size: {}'.format(len(relations_data)))

    ne_tagger = nltk.tag.stanford.StanfordNERTagger(
        "../resources/stanfordcorenlp/models-3.7.0/edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz",
        path_to_jar=
        "../resources/stanfordcorenlp/stanford-ner-2015-12-09/stanford-ner-3.6.0.jar"
    )
    pos_tagger = nltk.tag.stanford.StanfordPOSTagger(
        "../resources/stanfordcorenlp/models-3.7.0/edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger",
        path_to_jar=
        "../resources/stanfordcorenlp/stanford-postagger-full-2015-12-09/stanford-postagger-3.6.0.jar"
    )
    webquestions_utterances_tokens = [
        q_obj['tokens'] for q_obj in relations_data
        '-s',
        action='store_true',
        help="Use only a portion of the training and validation sets.")

    args = parser.parse_args()

    model_name = args.model_name
    mode = args.mode

    with open(args.model_params) as f:
        model_params = json.load(f)

    embeddings, word2idx = embeddings.load(args.word_embeddings)
    print("Loaded embeddings:", embeddings.shape)

    training_data, _ = io.load_relation_graphs_from_file(args.train_set,
                                                         load_vertices=True)

    val_data, _ = io.load_relation_graphs_from_file(args.val_set,
                                                    load_vertices=True)

    if args.s:
        training_data = training_data[:len(training_data) // 3]
        print("Training data size set to: {}".format(len(training_data)))
        val_data = val_data[:len(val_data) // 3]
        print("Validation data size set to: {}".format(len(val_data)))

    if mode in ['test', 'train-plus-test']:
        print("Reading the property index")
        with open(args.models_folder + model_name + ".property2idx") as f:
            property2idx = ast.literal_eval(f.read())
    elif args.property_index:
Ejemplo n.º 3
0
    parser.add_argument('--models_folder', default="../trainedmodels/")

    args = parser.parse_args()

    embedding_matrix, word2idx = embeddings.load(
        keras_models.model_params['wordembeddings'])
    print("Loaded embeddings:", embedding_matrix.shape)
    max_sent_len = keras_models.model_params['max_sent_len']
    n_out = len(keras_models.property2idx)

    model = getattr(keras_models,
                    args.model_name)(keras_models.model_params,
                                     embedding_matrix, max_sent_len, n_out)
    model.load_weights(args.models_folder + args.model_name + ".kerasmodel")
    print("Loaded the model")
    test_data, _ = io.load_relation_graphs_from_file(args.test_set,
                                                     load_vertices=True)
    print("Loaded the test set")

    graphs_to_indices = keras_models.to_indices
    if "Context" in args.model_name:
        to_one_hot = embeddings.timedistributed_to_one_hot
        graphs_to_indices = keras_models.to_indices_with_extracted_entities
    elif "CNN" in args.model_name:
        graphs_to_indices = keras_models.to_indices_with_relative_positions

    test_as_indices = list(graphs_to_indices(test_data, word2idx))
    test_data = None

    print("Results on the test set")
    predictions_classes, predictions = evaluate(model, test_as_indices[:-1],
                                                test_as_indices[-1])