Exemple #1
0
def main(args=None):

    usage = "%prog dataset [CoreNLP_dir]"
    parser = OptionParser(usage=usage)
    parser.add_option('-s', dest='skip_corenlp', action="store_true", default=False,
                      help='Skip processing by CoreNLP and just parse the xml output; default=%default')
    parser.add_option('-x', dest='extension', default='.xml',
                      help='Extension for CoreNLP files; default=%default')
    parser.add_option('-o', dest='overwrite', action="store_true", default=False,
                      help='Redo parsing for all files; default=%default')
    parser.add_option('-n', dest='nice', action="store_true", default=False,
                      help='Call CoreNLP with "nice"; default=%default')


    (options, args) = parser.parse_args()
    skip_corenlp = options.skip_corenlp
    overwrite = options.overwrite
    extension = options.extension
    nice = options.nice
    print skip_corenlp

    project_name = args[0]
    if not skip_corenlp:
        corenlp_dir = args[1]
    else:
        corenlp_dir = ''

    global verbosity

    dirs.make_base_dir(project_name)

    run_pipeline(skip_corenlp, corenlp_dir, overwrite, extension, nice)
Exemple #2
0
def main():
    usage = "%prog dataset"
    parser = OptionParser(usage=usage)
    parser.add_option('-n', dest='n_files', default=None,
                      help='Only process the first n files: default=%default')
    parser.add_option('--lemmas', action="store_false", dest="use_lemmas", default=True,
                      help='If false, use word instead of lemmas: default=%default')
    #parser.add_option('--word2vec', dest='word2vec', default=None,
    #                  help='word2vec.bin file: default=%default')
    parser.add_option('--min_wf', dest='min_wf', default=4,
                      help='Exclude target words that occur less than this many times: default=%default')
    parser.add_option('--min_tuples', dest='min_tuples', default=3,
                      help='Exclude entities with less than this many tuples: default=%default')

    (options, args) = parser.parse_args()
    use_lemmas = options.use_lemmas
    n_files = options.n_files
    word2vec_file = None
    min_wf = int(options.min_wf)
    min_tuples = int(options.min_tuples)

    if len(args) < 1:
        #sys.exit("Please specify a project / dataset")
        project = 'mfc_v3'
    else:
        project = args[0]
    dirs.make_base_dir(project)

    trees, clustered_indices = find_entities(n_files, use_lemmas)
    filtered_indices, bamman_entities = get_bamman_entities(trees, clustered_indices, word2vec_file=word2vec_file, min_role_vocab=min_wf, min_tuples=min_tuples)
def main():
    # Handle input options and arguments
    usage = "%prog project"
    parser = OptionParser(usage=usage)
    (options, args) = parser.parse_args()

    project = args[0]

    dirs.make_base_dir(project)

    preprocess_for_easysrl()
def main():
    # Handle input options and arguments
    usage = "%prog project semafor_dir"
    parser = OptionParser(usage=usage)
    (options, args) = parser.parse_args()

    project = args[0]
    semafor_dir = args[1]

    dirs.make_base_dir(project)

    sentences = parse_tokens(os.path.join(semafor_dir, 'tokenized'))
    parse_semafor_output(os.path.join(semafor_dir, 'fes'), sentences)
def main():
    # Handle input options and arguments
    usage = "%prog project"
    parser = OptionParser(usage=usage)
    (options, args) = parser.parse_args()

    project = args[0]

    dirs.make_base_dir(project)

    input_filename = dirs.data_raw_text_file

    write_sentences(input_filename)
def main():
    usage = "%prog dataset"
    parser = OptionParser(usage=usage)
    #parser.add_option('--keyword', dest='key', default=None,
    #                  help='Keyword argument: default=%default')
    #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False,
    #                  help='Keyword argument: default=%default')

    (options, args) = parser.parse_args()

    if len(args) < 1:
        sys.exit("Please specify a project / dataset")

    project = args[0]
    dirs.make_base_dir(project)

    extract_story_elements()
def main():
    usage = "%prog dataset output_filename"
    parser = OptionParser(usage=usage)
    #parser.add_option('--keyword', dest='key', default=None,
    #                  help='Keyword argument: default=%default')
    #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False,
    #                  help='Keyword argument: default=%default')

    (options, args) = parser.parse_args()

    if len(args) < 2:
        sys.exit("Please specify a project / dataset and output filename")

    project = args[0]
    output_filename = args[1]
    dirs.make_base_dir(project)

    identify_rnn_targets(output_filename)
Exemple #8
0
def main():

    usage = "%prog project"
    parser = OptionParser(usage=usage)
    #parser.add_option('--keyword', dest='key', default=None,
    #                  help='Keyword argument: default=%default')
    #parser.add_option('--boolarg', action="store_true", dest="boolarg", default=False,
    #                  help='Keyword argument: default=%default')


    (options, args) = parser.parse_args()
    if len(args) < 1:
        sys.exit("Please specify a project name")
    project_name = args[0]

    dirs.make_base_dir(project_name)

    entities_file = os.path.join(dirs.data_stanford_dir, 'all_groups.json')

    cluster_entities(entities_file)