triples = list() root = objectify.Element("frameinstances") for filename in documents: frame_instance_triples = list() # read file log.info("opening file {0}".format(filename)) with open(filename, 'r') as f: text = f.read() # semantic parsing if config.get('semantics', 'module') == 'boxer': # tokenization if not options.tokenized: log.info("Tokenization with t") tokens = candc.tokenize(text) if not tokens: log.error("error during tokenization of file '{0}', exiting".format(filename)) continue tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]]) else: tokenized = text log.info("Parsing with Boxer") semantics = candc.get_all(tokenized) if not semantics: log.error("error during the execution of Boxer on file '{0}', exiting".format(filename)) continue elif config.get('semantics', 'module') == 'semafor':
else: output_format = options.format triples = list() frame_instance_triples = list() root = objectify.Element("frameinstances") for filename in documents: # read file log.info("opening file {0}".format(filename)) with open(filename) as f: text = f.read() # tokenization if not options.tokenized: log.info("Tokenization") tokens = tokenize(text) if not tokens: log.error( "error during tokenization of file '{0}', exiting".format( filename)) continue tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]]) else: tokenized = text log.info("Parsing") drs = get_all(tokenized) if not drs: log.error("error during the execution of Boxer on file '{0}', exiting". format(filename)) continue
output_format = options.format triples = list() frame_instance_triples = list() root = objectify.Element("frameinstances") for filename in documents: # read file log.info("opening file {0}".format(filename)) with open(filename) as f: text = f.read() # tokenization if not options.tokenized: log.info("Tokenization") tokens = tokenize(text) if not tokens: log.error("error during tokenization of file '{0}', exiting".format(filename)) continue tokenized = "\n".join([' '.join(sentence) for sentence in tokens[:-1]]) else: tokenized = text log.info("Parsing") drs = get_all(tokenized) if not drs: log.error("error during the execution of Boxer on file '{0}', exiting".format(filename)) continue log.info("Word sense disambiguation and entity linking") synsets, entities = disambiguation(tokenized, drs)