def run(): props = { 'annotators': 'tokenize,ssplit,pos,lemma,parse', 'pipelineLanguage': 'en', 'outputFormat': 'json' } corenlp = StanfordCoreNLP( r'/home/tcastrof/workspace/stanford/stanford-corenlp-full-2018-02-27', memory='8g') logging.info('Load corpus') trainset, devset, testset = load.run() logging.info('Preparing test set...') testset = preprocess(testset, corenlp=corenlp, set_='test') json.dump(testset, open(WRITE_TEST_PATH, 'w')) logging.info('Preparing development set...') devset = preprocess(devset, corenlp=corenlp, set_='dev') json.dump(devset, open(WRITE_DEV_PATH, 'w')) logging.info('Preparing trainset...') trainset = preprocess(trainset, corenlp=corenlp, set_='train') json.dump(trainset, open(WRITE_TRAIN_PATH, 'w')) corenlp.close()
def run(): props = { 'annotators': 'tokenize,ssplit,pos,lemma,parse', 'pipelineLanguage': 'en', 'outputFormat': 'json' } corenlp = StanfordCoreNLP( r'/home/tcastrof/workspace/stanford/stanford-corenlp-full-2018-02-27') logging.info('Load corpus', extra=d) trainset, devset = load.run() logging.info('Preparing development set...', extra=d) devset = utils.prepare_corpus(devset, corenlp=corenlp, props=props) json.dump(devset, open(WRITE_DEV_PATH, 'w')) logging.info('Preparing trainset...', extra=d) trainset = utils.prepare_corpus(trainset, corenlp=corenlp, props=props) json.dump(trainset, open(WRITE_TRAIN_PATH, 'w')) corenlp.close()
def do_load(self, line): """ Execute load command. Available loaders: {load} For more information about a particular loader type the following (in this case we get help for the `tsv` loader): > help load tsv Examples: > load data = tsv("examples/data/data.csv", delim=',') > sol Solution ======== data/3 ====== data(2,"cow","boy") = true. data/4 ====== data(0,"a","b","3.0") = true. data(1,"c","d","4.0") = true. """ try: changed = load.run(self.interp, line) except: show_traceback() readline.write_history_file(self.hist) else: self._changed(changed)
q2 = re.sub(r'[^\w\s]', ' ', rel_comment['text'].lower()).strip() q2 = [ w for w in nltk.word_tokenize(q2.lower()) if w not in stop ] q2 = ' '.join(q2) trainset.append({'source': q1, 'target': q2}) trainset.append({'source': q2, 'target': q1}) return trainset def save(trainset): if not os.path.exists(TRANSLATION_PATH): os.mkdir(TRANSLATION_PATH) with open(os.path.join(TRANSLATION_PATH, 'semeval.de'), 'w') as f: f.write('\n'.join(map(lambda x: x['source'], trainset))) with open(os.path.join(TRANSLATION_PATH, 'semeval.en'), 'w') as f: f.write('\n'.join(map(lambda x: x['target'], trainset))) if __name__ == '__main__': print('Load corpus') trainset, devset = load.run() print('Preparing training questions for alignment') trainset = prepare_questions(trainset) print('Saving Parallel data') save(trainset)
def main(): parser = argparse.ArgumentParser(description="The dyna interpreter!") parser.add_argument("--version", action="store_true", help="Print version information.") parser.add_argument("source", nargs="*", type=path, help="Path to Dyna source file.") parser.add_argument("-i", dest="interactive", action="store_true", help="Fire-up REPL after runing solver..") parser.add_argument("-o", "--output", dest="output", type=argparse.FileType("wb"), help="Write solution to file.") parser.add_argument("--post-process", nargs="*", help="run post-processor.") parser.add_argument("--load", nargs="*", help="run loaders.") parser.add_argument("--debug", action="store_true", help="Debug planner, normalizer and parser.") args = parser.parse_args() if args.version: try: print (dynahome / "dist/VERSION").text() # XREF:VERSION except IOError: print "failed to obtain version info." exit(0) interp = Interpreter() crash_handler() if args.source: if len(args.source) > 1: # concatenate files with file(interp.compiler.tmp / "tmp.dyna", "wb") as g: for f in args.source: if not f.exists(): print "File `%s` does not exist." % f return with file(f) as f: g.write("\n") g.write("%" * 80) g.write("\n") g.write("%% ") g.write(f.name) g.write("\n") g.write(f.read()) args.source = g.name else: [args.source] = args.source if not args.source.exists(): print "File `%s` does not exist." % args.source return if args.debug: import debug debug.main(args.source, browser=True) exit(1) try: plan = interp.dynac(args.source) except DynaCompilerError as e: print e exit(1) interp.load_plan(plan) interp.run_agenda() if args.load: for cmd in args.load: load.run(interp, cmd) if args.post_process: for cmd in args.post_process: post.run(interp, cmd) if args.load or args.post_process or args.source: interp.dump_charts(args.output) # should be a post-processor if args.interactive or not args.source: repl = REPL(interp) def repl_crash(): # all files the interpreter generated with file(dotdynadir / "crash-repl.log", "wb") as f: for line in repl.lines: print >> f, line crash_handler.hooks.append(repl_crash) repl.cmdloop()
def main(): parser = argparse.ArgumentParser(description="The dyna interpreter!") parser.add_argument('--version', action='store_true', help='Print version information.') parser.add_argument('source', nargs='*', type=path, help='Path to Dyna source file.') parser.add_argument('-i', dest='interactive', action='store_true', help='Fire-up REPL after runing solver..') parser.add_argument('-o', '--output', dest='output', type=argparse.FileType('wb'), help='Write solution to file.') parser.add_argument('--post-process', nargs='*', help='run post-processor.') parser.add_argument('--load', nargs='*', help='run loaders.') parser.add_argument('--debug', action='store_true', help='Debug planner, normalizer and parser.') args = parser.parse_args() if args.version: try: print(dynahome / 'dist/VERSION').text() # XREF:VERSION except IOError: print 'failed to obtain version info.' exit(0) interp = Interpreter() crash_handler() if args.source: if len(args.source) > 1: # concatenate files with file(interp.compiler.tmp / 'tmp.dyna', 'wb') as g: for f in args.source: if not f.exists(): print 'File `%s` does not exist.' % f return with file(f) as f: g.write('\n') g.write('%' * 80) g.write('\n') g.write('%% ') g.write(f.name) g.write('\n') g.write(f.read()) args.source = g.name else: [args.source] = args.source if not args.source.exists(): print 'File `%s` does not exist.' % args.source return if args.debug: import debug debug.main(args.source, browser=True) exit(1) try: plan = interp.dynac(args.source) except DynaCompilerError as e: print e exit(1) interp.load_plan(plan) interp.run_agenda() if args.load: for cmd in args.load: load.run(interp, cmd) if args.post_process: for cmd in args.post_process: post.run(interp, cmd) if args.load or args.post_process or args.source: interp.dump_charts(args.output) # should be a post-processor if args.interactive or not args.source: repl = REPL(interp) def repl_crash(): # all files the interpreter generated with file(dotdynadir / 'crash-repl.log', 'wb') as f: for line in repl.lines: print >> f, line crash_handler.hooks.append(repl_crash) repl.cmdloop()