def main(args=None): usage = "%prog dataset [CoreNLP_dir]" parser = OptionParser(usage=usage) parser.add_option('-s', dest='skip_corenlp', action="store_true", default=False, help='Skip processing by CoreNLP and just parse the xml output; default=%default') parser.add_option('-x', dest='extension', default='.xml', help='Extension for CoreNLP files; default=%default') parser.add_option('-o', dest='overwrite', action="store_true", default=False, help='Redo parsing for all files; default=%default') parser.add_option('-n', dest='nice', action="store_true", default=False, help='Call CoreNLP with "nice"; default=%default') (options, args) = parser.parse_args() skip_corenlp = options.skip_corenlp overwrite = options.overwrite extension = options.extension nice = options.nice print skip_corenlp project_name = args[0] if not skip_corenlp: corenlp_dir = args[1] else: corenlp_dir = '' global verbosity dirs.make_base_dir(project_name) run_pipeline(skip_corenlp, corenlp_dir, overwrite, extension, nice)
def main(): usage = "%prog dataset" parser = OptionParser(usage=usage) parser.add_option('-n', dest='n_files', default=None, help='Only process the first n files: default=%default') parser.add_option('--lemmas', action="store_false", dest="use_lemmas", default=True, help='If false, use word instead of lemmas: default=%default') #parser.add_option('--word2vec', dest='word2vec', default=None, # help='word2vec.bin file: default=%default') parser.add_option('--min_wf', dest='min_wf', default=4, help='Exclude target words that occur less than this many times: default=%default') parser.add_option('--min_tuples', dest='min_tuples', default=3, help='Exclude entities with less than this many tuples: default=%default') (options, args) = parser.parse_args() use_lemmas = options.use_lemmas n_files = options.n_files word2vec_file = None min_wf = int(options.min_wf) min_tuples = int(options.min_tuples) if len(args) < 1: #sys.exit("Please specify a project / dataset") project = 'mfc_v3' else: project = args[0] dirs.make_base_dir(project) trees, clustered_indices = find_entities(n_files, use_lemmas) filtered_indices, bamman_entities = get_bamman_entities(trees, clustered_indices, word2vec_file=word2vec_file, min_role_vocab=min_wf, min_tuples=min_tuples)
def main(): # Handle input options and arguments usage = "%prog project" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() project = args[0] dirs.make_base_dir(project) preprocess_for_easysrl()
def main(): # Handle input options and arguments usage = "%prog project semafor_dir" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() project = args[0] semafor_dir = args[1] dirs.make_base_dir(project) sentences = parse_tokens(os.path.join(semafor_dir, 'tokenized')) parse_semafor_output(os.path.join(semafor_dir, 'fes'), sentences)
def main(): # Handle input options and arguments usage = "%prog project" parser = OptionParser(usage=usage) (options, args) = parser.parse_args() project = args[0] dirs.make_base_dir(project) input_filename = dirs.data_raw_text_file write_sentences(input_filename)
def main(): usage = "%prog dataset" parser = OptionParser(usage=usage) #parser.add_option('--keyword', dest='key', default=None, # help='Keyword argument: default=%default') #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False, # help='Keyword argument: default=%default') (options, args) = parser.parse_args() if len(args) < 1: sys.exit("Please specify a project / dataset") project = args[0] dirs.make_base_dir(project) extract_story_elements()
def main(): usage = "%prog dataset output_filename" parser = OptionParser(usage=usage) #parser.add_option('--keyword', dest='key', default=None, # help='Keyword argument: default=%default') #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False, # help='Keyword argument: default=%default') (options, args) = parser.parse_args() if len(args) < 2: sys.exit("Please specify a project / dataset and output filename") project = args[0] output_filename = args[1] dirs.make_base_dir(project) identify_rnn_targets(output_filename)
def main(): usage = "%prog project" parser = OptionParser(usage=usage) #parser.add_option('--keyword', dest='key', default=None, # help='Keyword argument: default=%default') #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False, # help='Keyword argument: default=%default') (options, args) = parser.parse_args() if len(args) < 1: sys.exit("Please specify a project name") project_name = args[0] dirs.make_base_dir(project_name) entities_file = os.path.join(dirs.data_stanford_dir, 'all_groups.json') cluster_entities(entities_file)