Beispiel #1
0
    # semantic parsing
    if config.get('semantics', 'module') == 'boxer':
        # tokenization
        if not options.tokenized:
            log.info("Tokenization with t")
            tokens = candc.tokenize(text)
            if not tokens:
                log.error("error during tokenization of file '{0}', exiting".format(filename))
                continue
            tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]])
        else:
            tokenized = text


        log.info("Parsing with Boxer")
        semantics = candc.get_all(tokenized)
        if not semantics:
            log.error("error during the execution of Boxer on file '{0}', exiting".format(filename))
            continue

    elif config.get('semantics', 'module') == 'semafor':
        log.info("Parsing with Semafor")
        semantics, tokenized = semafor.parse(text)

        if not semantics:
            log.error("error during the execution of Semafor on file '{0}', exiting".format(filename))
            continue

    log.info("Word sense disambiguation and entity linking")
    synsets, entities = disambiguation(tokenized, semantics)
    if synsets==None or entities==None:
Beispiel #2
0
    # tokenization
    if not options.tokenized:
        log.info("Tokenization")
        tokens = tokenize(text)
        if not tokens:
            log.error(
                "error during tokenization of file '{0}', exiting".format(
                    filename))
            continue
        tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]])
    else:
        tokenized = text

    log.info("Parsing")
    drs = get_all(tokenized)
    if not drs:
        log.error("error during the execution of Boxer on file '{0}', exiting".
                  format(filename))
        continue
    log.info("Word sense disambiguation and entity linking")
    synsets, entities = disambiguation(tokenized, drs)
    if synsets == None or entities == None:
        log.error(
            "error during the disambiguation of file '{0}', exiting".format(
                filename))
        continue

    # extracting co-mentions
    if options.comentions:
        dbpedia_entities = set(map(lambda x: x['entity'], entities))
        text = f.read()

    # tokenization
    if not options.tokenized:
        log.info("Tokenization")
        tokens = tokenize(text)
        if not tokens:
            log.error("error during tokenization of file '{0}', exiting".format(filename))
            continue
        tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]])
    else:
        tokenized = text


    log.info("Parsing")
    drs = get_all(tokenized)
    if not drs:
        log.error("error during the execution of Boxer on file '{0}', exiting".format(filename))
        continue
    log.info("Word sense disambiguation and entity linking")
    synsets, entities = disambiguation(tokenized, drs)
    if synsets==None or entities==None:
		log.error("error during the disambiguation of file '{0}', exiting".format(filename))
		continue

    # extracting co-mentions
    if options.comentions:
        dbpedia_entities = set(map(lambda x: x['entity'], entities))
        for entity1, entity2 in combinations(dbpedia_entities, 2):
            if (entity1 != 'null' and
                entity2 != 'null'):