def main(argv): args = parse_args(argv, _add_logger_args) config_logger(args) logger.info('Starting') from srl_nlp.analysers.boxer import BoxerLocalAPI boxer = BoxerLocalAPI() kb_fr_path = path.join(args.kb_path, args.kb_fr) kb_fe_path = path.join(args.kb_path, args.kb_fe) anno = PrologBoxerAnnotator(boxer, kb_fr_path, kb_fe_path) print 'LF: %s' % boxer.sentence2LF(args.sentence) if args.frame_matching: print 'Frame Matching:' out, err = anno.frameMatching(args.sentence, out_error=True, lf_file_name=args.tmp_lf_file) logger.debug(err) print '\'%s\n\'' % '\n'.join(map(str, out)) if args.frame_element_matching: print '\nFrame Element Matching:' out, err = anno.frameElementMatching(args.sentence, out_error=True, lf_file_name=args.tmp_lf_file) logger.debug(err) print '\'%s\n\'' % '\n'.join(map(str, out)) if args.matching: print '\nMatching:' out, err = anno.matching(args.sentence, lf_file_name=args.tmp_lf_file) logger.debug(err) print '\'%s\n\'' % '\n'.join(map(str, out)) # semeval_parser = SemEval07XMLAdapter() # for f_name, annoset in anno.sem_annotations(args.sentence, out).items(): # print "Frame:{f_name}\n\t{anno}".format(f_name=f_name, anno=semeval_parser._anno_set2XML(annoset)) logger.info('Done')
def main(argv): args = parse_args(argv, _add_logger_args) config_logger(args) logger.info('Initialization') # Select the appropriate document parsers for the input and output input_parser = PARSERS_AVAILABLE[args.input_format]() logger.info('Parsing {file}'.format(file=args.data_base_path)) # Parse corpus with open(args.data_base_path) as db_file: docs = input_parser.parseXML(db_file) logger.info('Done parsing') logger.info('Creating base') examples = [] for d_id, doc in enumerate(docs): boxer = BoxerLocalAPI() for example in examples_from_doc(boxer, d_id=d_id, doc=doc): examples.append(example) # shuffle examples random.Random(args.seed).shuffle(examples) if args.train_ratio: len_train = (args.train_ratio * len(examples) / (args.train_ratio + 1)) train, test = examples[:len_train], examples[len_train:] write_to_file(args.root, train, 'train') write_to_file(args.root, test, 'test') else: write_to_file(args.root, examples, 'train')
def main(argv): """ Process the arguments in order to return the theory. The theory is the union of the set of the deep role rules and the set of the frame matching rules. """ args = parse_args(argv, _add_logger_args) config_logger(args) logger.info(args) logger.info('Starting FrameNet') parser = NetXMLParser() fn = parser.parse('framenet/fndata-1.7') if args.frame_related: logger.info('Initialization of frame matching rule inference') frame_matching_rules = make_frame_matching_rules(get_lus2frames(fn)) if args.out_file: logger.info('Saving to %s' % args.out_file) stream = open(args.out_file, 'a') else: logger.info('Writing to stdout') stream = stdout with stream as f: f.write("%% Frame Matching Rules\n") for rule in frame_matching_rules: f.write(rule) f.write('\n') ######################## # # # Frame Element Rules # # # ######################## if args.frame_element: examples = get_all_examples(fn, args.example_file) logger.info('Starting Boxer') boxer = BoxerLocalAPI(expand_predicates=True) logger.info('Boxer started') if args.limit: sliced_examples = examples[:int(args.limit)] else: sliced_examples = examples logger.info('There are %d examples being considered' % len(sliced_examples)) theory = make_theory(RuleGenerator(boxer), sliced_examples) if args.out_file: logger.info('Saving to %s' % args.out_file) stream = open(args.out_file, 'a') else: stream = stdout with stream as f: f.write("\n%% Deep Role Rules\n") for rule in theory: f.write(rule) f.write('\n')
def main(argv): """ Runs the pipeline in a predefined format of documents """ args = parse_args(argv, add_logger_args) config_logger(args) logger.info('Initializing Boxer') # boxer = BoxerWebAPI() boxer = BoxerLocalAPI(TokenizerLocalAPI(), CandCLocalAPI(), expand_predicates=args.expand_boxer_predicates) logger.info('Initializing Dependence Tree') depTree = DependencyTreeLocalAPI() base_dir = args.dir_path file_paths = listdir(base_dir) if args.random: shuffle(file_paths) if args.max_docs != None: # limit the number of documents read length = min(int(args.max_docs), len(file_paths)) else: length = len(file_paths) if args.output_format == 'pl': # define the whay the info whil be stored and dumped output = Processor2PL() elif args.output_format == 'problog': output = Processor2ProbLog() else: output = Processor2JSON() out_count = 0 # iterate trhough all files in the specified dir # args.out_file logger.info('Reading files') for count, file_path in enumerate(file_paths[args.skip:( args.skip + length)]): # enumerate is used only to set count with open(path.join(base_dir, file_path), 'r') as raw_text: output.add_doc(raw_text, count, boxer, depTree, replace=args.replace_entities) if args.break_output and (count + 1) % args.break_output == 0: # save to files output.save_output(args.out_file, out_count) output.erase() out_count += 1 logger.info("%6.2f%% Read", 100 * float(count + 1) / length) # logs progress # ensure the last file recives its dump if args.break_output: if length % args.break_output != 0: output.save_output(args.out_file, out_count) # logs progress else: output.save_output(args.out_file)
def main(argv): args = parse_args(argv, add_logger_args) config_logger(args) out = {} logger.info('Starting at %s', args.dir_path) run_tree(args.dir_path, _runAleph_out_parser, args.file_prefix, logger, d=out) if args.output_file: with open(args.output_file, 'w') as out_stream: dump(out, out_stream) else: print out logger.info('Done')
def main(argv): args = parse_args(argv, _add_logger_args) config_logger(args) logger.info('Initialization') boxer = BoxerLocalAPI() # Select the appropriate document parsers for the input and output input_parser = PARSERS_AVAILABLE[args.input_format]() output_parser = PARSERS_AVAILABLE[args.output_format]() # Get the paths of the rules kb_fr_path = path.join(args.kb_path, args.kb_fr) kb_fe_path = path.join(args.kb_path, args.kb_fe) annotator = PrologBoxerAnnotator(boxer, kb_fr_path, kb_fe_path) logger.info('{anno} is parsing {file}'.format( anno=annotator, file=args.data_base_path)) # Parse corpus with open(args.data_base_path) as db_file: docs = input_parser.parseXML(db_file) logger.info('Done parsing') out_docs = eval_corpus(annotator, boxer, docs, skip_frame_matching=args.skip_frame_matching) if args.annotation_output_path: logger.info("Storing results at {f_name}".format( f_name=args.annotation_output_path)) with open(args.annotation_output_path, 'w') as f_out: output_parser.doc2XML(out_docs[0], f_out) else: logger.info("Printing results to stdout") output_parser.doc2XML(out_docs[0]) logger.info('Done')
def main(argv): args = parse_args(argv, add_logger_args) config_logger(args) # Read config file in conf if not args.configuration_file: args.configuration_file = 'tests.conf' with open(args.configuration_file, 'r') as f: conf = json.load(f) if args.engine == 'aleph': engine = Aleph() else: engine = ProbLog() # if not dir_name dir make it # if not engine dir make it ensure_dir(args.dir_name) engine_path = path.join(args.dir_name, args.engine) ensure_dir(engine_path) write_exp(conf, args.kb, args.base, args.facts, args.negatives, engine_path, engine, args.copy_kb)
def main(argv): args = parse_args(argv, _add_logger_args) config_logger(args) logger.info('Loading XML file') with open(args.input_file, 'r') as f: tree = XMLTree.parse(f) logger.info('XML tree ready') root = tree.getroot() adapter = PARSERS_AVAILABLE.get(args.parser, SemEval07XMLAdapter)() logger.info('Parsing XML tree') try: docs = adapter.parseXML(root) except KeyError: raise KeyError('Consider using another parser type by using the option --parser') logger.info('Done parsing XML tree') if args.check_examples: for doc in docs: for sentence in doc: converted = sentence.get_fn_example().str_no_annotation() print converted print sentence.get_fn_example() # raw_input() if converted != sentence.text: logger.critical("{sent} was not properly processed".format(sent=sentence)) if args.output_file is not None: logger.info('Writing pickle file') with open(args.output_file, 'wb') as f: pickle.dump(docs, f) if args.output_xml_file is not None: logger.info('Writing XML file') with open(args.output_xml_file, 'w') as f: for doc in docs: adapter.doc2XML(doc, xml_file=f)
def main(argv): args = parse_args(argv, _add_logger_args) config_logger(args) logger.info('Starting at %s', args.dir_path) run_tree(args.dir_path, _runAleph, args.file_prefix) logger.info('Done')