Python Fetcher.mark_paragraphs 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fetcher

클래스/타입: Fetcher

메소드/함수: mark_paragraphs

hotexamples.com에서의 예제들: 3

Python Fetcher.mark_paragraphs - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fetcher.Fetcher.mark_paragraphs에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Fetcher(30)

fetch(14)

stop(7)

_get_helper(5)

mark_paragraphs(3)

insertDataToMySQL(2)

tokenize_html(2)

children(2)

get(2)

raw_fetch_url(2)

make_soup(2)

host(1)

populate_products_has_tags(1)

header(1)

populate_products(1)

populate_categories(1)

len(1)

movie_inf(1)

left(1)

install_modpack(1)

insert_thumbs(1)

populate_tags(1)

resample(1)

query_video_information(1)

statistics(1)

url_valid(1)

url(1)

update_thumbs(1)

update_modpack(1)

to_decoder(1)

sync_topic_to_es(1)

start(1)

request(1)

setReferer(1)

setCredentials(1)

send(1)

search_movie(1)

runFetcher(1)

run(1)

get_subscriptions(1)

get_user_data(1)

get_latest_season(1)

get_ranked_pages(1)

coutries_data(1)

extract_dns(1)

entries(1)

download(1)

downLoadContent(1)

create_table(1)

예제 #1

파일 보기

 def _process_text(parser, session, text, all_names, xform):
     """ Low-level utility function to parse text and return the result of
         a transformation function (xform) for each sentence.
         Set all_names = True to get a comprehensive name register.
         Set all_names = False to get a simple name register.
         Set all_names = None to get no name register. """
     t0 = time.time()
     # Demarcate paragraphs in the input
     text = Fetcher.mark_paragraphs(text)
     # Tokenize the result
     toklist = list(tokenize_and_recognize(text, enclosing_session=session))
     t1 = time.time()
     pgs, stats = TreeUtility._process_toklist(parser, session, toklist,
                                               xform)
     if all_names is None:
         register = None
     else:
         from query import create_name_register
         register = create_name_register(toklist,
                                         session,
                                         all_names=all_names)
     t2 = time.time()
     stats["tok_time"] = t1 - t0
     stats["parse_time"] = t2 - t1
     stats["total_time"] = t2 - t0
     return (pgs, stats, register)

예제 #2

파일 보기

파일: article.py 프로젝트: busla/Reynir

    def tag_text(session, text):
        """ Parse plain text and return the parsed paragraphs as lists of sentences
            where each sentence is a list of tagged tokens """

        t0 = time.time()
        # Demarcate paragraphs in the input
        text = Fetcher.mark_paragraphs(text)
        # Tokenize the result
        toklist = list(tokenize(text, enclosing_session=session))
        # Paragraph list, containing sentences, containing tokens
        pgs = []
        t1 = time.time()

        with Fast_Parser(
                verbose=False) as bp:  # Don't emit diagnostic messages

            ip = IncrementalParser(bp, toklist, verbose=True)

            for p in ip.paragraphs():
                pgs.append([])
                for sent in p.sentences():
                    if sent.parse():
                        # Parsed successfully
                        pgs[-1].append(
                            Article._dump_tokens(sent.tokens, sent.tree, None))
                    else:
                        # Errror in parse
                        pgs[-1].append(
                            Article._dump_tokens(sent.tokens, None, None,
                                                 sent.err_index))

            t2 = time.time()
            stats = dict(num_tokens=ip.num_tokens,
                         num_sentences=ip.num_sentences,
                         num_parsed=ip.num_parsed,
                         ambiguity=ip.ambiguity,
                         tok_time=t1 - t0,
                         parse_time=t2 - t1,
                         total_time=t2 - t0)

        # Add a name register to the result
        register = create_name_register(toklist, session)

        return (pgs, stats, register)

예제 #3

파일 보기

def analyze():
    """ Analyze text manually entered by the user, i.e. not coming from an article """

    text = request.form.get("text", "").strip()[0:_MAX_TEXT_LENGTH]

    with SessionContext(commit=True) as session:

        # Demarcate paragraphs in the input
        text = Fetcher.mark_paragraphs(text)
        # Tokenize the result
        toklist = list(tokenize(text, enclosing_session=session))
        # Paragraph list, containing sentences, containing tokens
        pgs = []

        with Fast_Parser(
                verbose=False) as bp:  # Don't emit diagnostic messages

            ip = IncrementalParser(bp, toklist, verbose=True)

            for p in ip.paragraphs():
                pgs.append([])
                for sent in p.sentences():
                    if sent.parse():
                        # Parsed successfully
                        pgs[-1].append(
                            ArticleProxy._dump_tokens(sent.tokens, sent.tree,
                                                      None))
                    else:
                        # Errror in parse
                        pgs[-1].append(
                            ArticleProxy._dump_tokens(sent.tokens, None, None,
                                                      sent.err_index))

            stats = dict(num_tokens=ip.num_tokens,
                         num_sentences=ip.num_sentences,
                         num_parsed=ip.num_parsed,
                         ambiguity=ip.ambiguity)
            # Add a name register to the result
            register = create_name_register(toklist, session)

    # Return the tokens as a JSON structure to the client
    return jsonify(result=pgs, stats=stats, register=register)