Beispiel #1
0
triples = list()
root = objectify.Element("frameinstances")
for filename in documents:
    frame_instance_triples = list()

    # read file
    log.info("opening file {0}".format(filename))
    with open(filename, 'r') as f:
        text = f.read()

    # semantic parsing
    if config.get('semantics', 'module') == 'boxer':
        # tokenization
        if not options.tokenized:
            log.info("Tokenization with t")
            tokens = candc.tokenize(text)
            if not tokens:
                log.error("error during tokenization of file '{0}', exiting".format(filename))
                continue
            tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]])
        else:
            tokenized = text


        log.info("Parsing with Boxer")
        semantics = candc.get_all(tokenized)
        if not semantics:
            log.error("error during the execution of Boxer on file '{0}', exiting".format(filename))
            continue

    elif config.get('semantics', 'module') == 'semafor':
Beispiel #2
0
else:
    output_format = options.format

triples = list()
frame_instance_triples = list()
root = objectify.Element("frameinstances")
for filename in documents:
    # read file
    log.info("opening file {0}".format(filename))
    with open(filename) as f:
        text = f.read()

    # tokenization
    if not options.tokenized:
        log.info("Tokenization")
        tokens = tokenize(text)
        if not tokens:
            log.error(
                "error during tokenization of file '{0}', exiting".format(
                    filename))
            continue
        tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]])
    else:
        tokenized = text

    log.info("Parsing")
    drs = get_all(tokenized)
    if not drs:
        log.error("error during the execution of Boxer on file '{0}', exiting".
                  format(filename))
        continue
    output_format = options.format


triples = list()
frame_instance_triples = list()
root = objectify.Element("frameinstances")
for filename in documents:
    # read file
    log.info("opening file {0}".format(filename))
    with open(filename) as f:
        text = f.read()

    # tokenization
    if not options.tokenized:
        log.info("Tokenization")
        tokens = tokenize(text)
        if not tokens:
            log.error("error during tokenization of file '{0}', exiting".format(filename))
            continue
        tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]])
    else:
        tokenized = text


    log.info("Parsing")
    drs = get_all(tokenized)
    if not drs:
        log.error("error during the execution of Boxer on file '{0}', exiting".format(filename))
        continue
    log.info("Word sense disambiguation and entity linking")
    synsets, entities = disambiguation(tokenized, drs)