예제 #1
0
def run():
    props = {
        'annotators': 'tokenize,ssplit,pos,lemma,parse',
        'pipelineLanguage': 'en',
        'outputFormat': 'json'
    }
    corenlp = StanfordCoreNLP(
        r'/home/tcastrof/workspace/stanford/stanford-corenlp-full-2018-02-27',
        memory='8g')

    logging.info('Load corpus')
    trainset, devset, testset = load.run()

    logging.info('Preparing test set...')
    testset = preprocess(testset, corenlp=corenlp, set_='test')
    json.dump(testset, open(WRITE_TEST_PATH, 'w'))

    logging.info('Preparing development set...')
    devset = preprocess(devset, corenlp=corenlp, set_='dev')
    json.dump(devset, open(WRITE_DEV_PATH, 'w'))

    logging.info('Preparing trainset...')
    trainset = preprocess(trainset, corenlp=corenlp, set_='train')
    json.dump(trainset, open(WRITE_TRAIN_PATH, 'w'))

    corenlp.close()
예제 #2
0
def run():
    props = {
        'annotators': 'tokenize,ssplit,pos,lemma,parse',
        'pipelineLanguage': 'en',
        'outputFormat': 'json'
    }
    corenlp = StanfordCoreNLP(
        r'/home/tcastrof/workspace/stanford/stanford-corenlp-full-2018-02-27')

    logging.info('Load corpus', extra=d)
    trainset, devset = load.run()

    logging.info('Preparing development set...', extra=d)
    devset = utils.prepare_corpus(devset, corenlp=corenlp, props=props)
    json.dump(devset, open(WRITE_DEV_PATH, 'w'))

    logging.info('Preparing trainset...', extra=d)
    trainset = utils.prepare_corpus(trainset, corenlp=corenlp, props=props)
    json.dump(trainset, open(WRITE_TRAIN_PATH, 'w'))

    corenlp.close()
예제 #3
0
    def do_load(self, line):
        """
        Execute load command.

        Available loaders:

            {load}

        For more information about a particular loader type the following (in
        this case we get help for the `tsv` loader):

            > help load tsv

        Examples:

        > load data = tsv("examples/data/data.csv", delim=',')
        > sol
        Solution
        ========
        data/3
        ======
        data(2,"cow","boy") = true.

        data/4
        ======
        data(0,"a","b","3.0") = true.
        data(1,"c","d","4.0") = true.

        """
        try:
            changed = load.run(self.interp, line)
        except:
            show_traceback()
            readline.write_history_file(self.hist)
        else:
            self._changed(changed)
예제 #4
0
파일: repl.py 프로젝트: nwf/dyna
    def do_load(self, line):
        """
        Execute load command.

        Available loaders:

            {load}

        For more information about a particular loader type the following (in
        this case we get help for the `tsv` loader):

            > help load tsv

        Examples:

        > load data = tsv("examples/data/data.csv", delim=',')
        > sol
        Solution
        ========
        data/3
        ======
        data(2,"cow","boy") = true.

        data/4
        ======
        data(0,"a","b","3.0") = true.
        data(1,"c","d","4.0") = true.

        """
        try:
            changed = load.run(self.interp, line)
        except:
            show_traceback()
            readline.write_history_file(self.hist)
        else:
            self._changed(changed)
예제 #5
0
                        q2 = re.sub(r'[^\w\s]', ' ',
                                    rel_comment['text'].lower()).strip()
                        q2 = [
                            w for w in nltk.word_tokenize(q2.lower())
                            if w not in stop
                        ]
                        q2 = ' '.join(q2)

                        trainset.append({'source': q1, 'target': q2})
                        trainset.append({'source': q2, 'target': q1})
    return trainset


def save(trainset):
    if not os.path.exists(TRANSLATION_PATH):
        os.mkdir(TRANSLATION_PATH)

    with open(os.path.join(TRANSLATION_PATH, 'semeval.de'), 'w') as f:
        f.write('\n'.join(map(lambda x: x['source'], trainset)))

    with open(os.path.join(TRANSLATION_PATH, 'semeval.en'), 'w') as f:
        f.write('\n'.join(map(lambda x: x['target'], trainset)))


if __name__ == '__main__':
    print('Load corpus')
    trainset, devset = load.run()
    print('Preparing training questions for alignment')
    trainset = prepare_questions(trainset)
    print('Saving Parallel data')
    save(trainset)
예제 #6
0
파일: main.py 프로젝트: nwf/dyna
def main():
    parser = argparse.ArgumentParser(description="The dyna interpreter!")

    parser.add_argument("--version", action="store_true", help="Print version information.")
    parser.add_argument("source", nargs="*", type=path, help="Path to Dyna source file.")
    parser.add_argument("-i", dest="interactive", action="store_true", help="Fire-up REPL after runing solver..")
    parser.add_argument("-o", "--output", dest="output", type=argparse.FileType("wb"), help="Write solution to file.")
    parser.add_argument("--post-process", nargs="*", help="run post-processor.")
    parser.add_argument("--load", nargs="*", help="run loaders.")
    parser.add_argument("--debug", action="store_true", help="Debug planner, normalizer and parser.")

    args = parser.parse_args()

    if args.version:
        try:
            print (dynahome / "dist/VERSION").text()  # XREF:VERSION
        except IOError:
            print "failed to obtain version info."
        exit(0)

    interp = Interpreter()

    crash_handler()

    if args.source:

        if len(args.source) > 1:
            # concatenate files
            with file(interp.compiler.tmp / "tmp.dyna", "wb") as g:
                for f in args.source:
                    if not f.exists():
                        print "File `%s` does not exist." % f
                        return
                    with file(f) as f:
                        g.write("\n")
                        g.write("%" * 80)
                        g.write("\n")
                        g.write("%% ")
                        g.write(f.name)
                        g.write("\n")
                        g.write(f.read())
            args.source = g.name
        else:
            [args.source] = args.source

        if not args.source.exists():
            print "File `%s` does not exist." % args.source
            return

        if args.debug:
            import debug

            debug.main(args.source, browser=True)
            exit(1)

        try:
            plan = interp.dynac(args.source)
        except DynaCompilerError as e:
            print e
            exit(1)

        interp.load_plan(plan)
        interp.run_agenda()

    if args.load:
        for cmd in args.load:
            load.run(interp, cmd)

    if args.post_process:
        for cmd in args.post_process:
            post.run(interp, cmd)

    if args.load or args.post_process or args.source:
        interp.dump_charts(args.output)  # should be a post-processor

    if args.interactive or not args.source:
        repl = REPL(interp)

        def repl_crash():
            # all files the interpreter generated
            with file(dotdynadir / "crash-repl.log", "wb") as f:
                for line in repl.lines:
                    print >> f, line

        crash_handler.hooks.append(repl_crash)

        repl.cmdloop()
예제 #7
0
파일: main.py 프로젝트: chirag2796/dyna
def main():
    parser = argparse.ArgumentParser(description="The dyna interpreter!")

    parser.add_argument('--version',
                        action='store_true',
                        help='Print version information.')
    parser.add_argument('source',
                        nargs='*',
                        type=path,
                        help='Path to Dyna source file.')
    parser.add_argument('-i',
                        dest='interactive',
                        action='store_true',
                        help='Fire-up REPL after runing solver..')
    parser.add_argument('-o',
                        '--output',
                        dest='output',
                        type=argparse.FileType('wb'),
                        help='Write solution to file.')
    parser.add_argument('--post-process',
                        nargs='*',
                        help='run post-processor.')
    parser.add_argument('--load', nargs='*', help='run loaders.')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Debug planner, normalizer and parser.')

    args = parser.parse_args()

    if args.version:
        try:
            print(dynahome / 'dist/VERSION').text()  # XREF:VERSION
        except IOError:
            print 'failed to obtain version info.'
        exit(0)

    interp = Interpreter()

    crash_handler()

    if args.source:

        if len(args.source) > 1:
            # concatenate files
            with file(interp.compiler.tmp / 'tmp.dyna', 'wb') as g:
                for f in args.source:
                    if not f.exists():
                        print 'File `%s` does not exist.' % f
                        return
                    with file(f) as f:
                        g.write('\n')
                        g.write('%' * 80)
                        g.write('\n')
                        g.write('%% ')
                        g.write(f.name)
                        g.write('\n')
                        g.write(f.read())
            args.source = g.name
        else:
            [args.source] = args.source

        if not args.source.exists():
            print 'File `%s` does not exist.' % args.source
            return

        if args.debug:
            import debug
            debug.main(args.source, browser=True)
            exit(1)

        try:
            plan = interp.dynac(args.source)
        except DynaCompilerError as e:
            print e
            exit(1)

        interp.load_plan(plan)
        interp.run_agenda()

    if args.load:
        for cmd in args.load:
            load.run(interp, cmd)

    if args.post_process:
        for cmd in args.post_process:
            post.run(interp, cmd)

    if args.load or args.post_process or args.source:
        interp.dump_charts(args.output)  # should be a post-processor

    if args.interactive or not args.source:
        repl = REPL(interp)

        def repl_crash():
            # all files the interpreter generated
            with file(dotdynadir / 'crash-repl.log', 'wb') as f:
                for line in repl.lines:
                    print >> f, line

        crash_handler.hooks.append(repl_crash)

        repl.cmdloop()