Exemple #1
0
    def _process_text(parser, session, text, all_names, xform):
        """ Low-level utility function to parse text and return the result of
            a transformation function (xform) for each sentence.
            Set all_names = True to get a comprehensive name register.
            Set all_names = False to get a simple name register.
            Set all_names = None to get no name register. """
        t0 = time.time()
        # Demarcate paragraphs in the input
        text = mark_paragraphs(text)
        # Tokenize the result
        token_stream = tokenize(text)
        toklist = list(
            recognize_entities(token_stream, enclosing_session=session))
        t1 = time.time()
        pgs, stats = TreeUtility._process_toklist(parser, session, toklist,
                                                  xform)

        if all_names is None:
            register = None
        else:
            from queries.builtin import create_name_register

            register = create_name_register(toklist,
                                            session,
                                            all_names=all_names)

        t2 = time.time()
        stats["tok_time"] = t1 - t0
        stats["parse_time"] = t2 - t1
        stats["total_time"] = t2 - t0
        return (pgs, stats, register)
Exemple #2
0
    def _process_text(parser, session, text, all_names, xform):
        """ Low-level utility function to parse text and return the result of
            a transformation function (xform) for each sentence.
            Set all_names = True to get a comprehensive name register.
            Set all_names = False to get a simple name register.
            Set all_names = None to get no name register. """
        t0 = time.time()
        # Demarcate paragraphs in the input
        text = mark_paragraphs(text)
        # Tokenize the result
        token_stream = tokenize(text)
        toklist = list(recognize_entities(token_stream, enclosing_session=session))
        t1 = time.time()
        pgs, stats = TreeUtility._process_toklist(parser, session, toklist, xform)

        if all_names is None:
            register = None
        else:
            from query import create_name_register
            register = create_name_register(toklist, session, all_names=all_names)

        t2 = time.time()
        stats["tok_time"] = t1 - t0
        stats["parse_time"] = t2 - t1
        stats["total_time"] = t2 - t0
        return (pgs, stats, register)