Python text_lines_from_local_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: invenio.bibclassify_text_extractor

메소드/함수: text_lines_from_local_file

hotexamples.com에서의 예제들: 4

Python text_lines_from_local_file - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 invenio.bibclassify_text_extractor.text_lines_from_local_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: bibclassify_engine.py 프로젝트: aw-bib/tind-invenio

def output_keywords_for_sources(input_sources, taxonomy_name, output_mode="text",
    output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False,
    match_mode="full", no_cache=False, with_author_keywords=False,
    rebuild_cache=False, only_core_tags=False, extract_acronyms=False,
    **kwargs):
    """Outputs the keywords for each source in sources."""


    # Inner function which does the job and it would be too much work to
    # refactor the call (and it must be outside the loop, before it did
    # not process multiple files)
    def process_lines():
        if output_mode == "text":
            print "Input file: %s" % source

        output = get_keywords_from_text(text_lines,
                                          taxonomy_name,
                                          output_mode=output_mode,
                                          output_limit=output_limit,
                                          spires=spires,
                                          match_mode=match_mode,
                                          no_cache=no_cache,
                                          with_author_keywords=with_author_keywords,
                                          rebuild_cache=rebuild_cache,
                                          only_core_tags=only_core_tags,
                                          extract_acronyms=extract_acronyms
                                          )
        print output

    # Get the fulltext for each source.
    for entry in input_sources:
        log.info("Trying to read input file %s." % entry)
        text_lines = None
        source = ""
        if os.path.isdir(entry):
            for filename in os.listdir(entry):
                if filename.startswith('.'):
                    continue
                filename = os.path.join(entry, filename)
                if os.path.isfile(filename):
                    text_lines = extractor.text_lines_from_local_file(filename)
                    if text_lines:
                        source = filename
                        process_lines()
        elif os.path.isfile(entry):
            text_lines = extractor.text_lines_from_local_file(entry)
            if text_lines:
                source = os.path.basename(entry)
                process_lines()
        else:
            # Treat as a URL.
            text_lines = extractor.text_lines_from_url(entry,
                user_agent=make_user_agent_string("BibClassify"))
            if text_lines:
                source = entry.split("/")[-1]
                process_lines()

예제 #2

파일 보기

def output_keywords_for_sources(input_sources, taxonomy_name, output_mode="text",
    output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False,
    match_mode="full", no_cache=False, with_author_keywords=False,
    rebuild_cache=False, only_core_tags=False, extract_acronyms=False,
    **kwargs):
    """Outputs the keywords for each source in sources."""


    # Inner function which does the job and it would be too much work to
    # refactor the call (and it must be outside the loop, before it did
    # not process multiple files)
    def process_lines():
        if output_mode == "text":
            print "Input file: %s" % source

        output = get_keywords_from_text(text_lines,
                                          taxonomy_name,
                                          output_mode=output_mode,
                                          output_limit=output_limit,
                                          spires=spires,
                                          match_mode=match_mode,
                                          no_cache=no_cache,
                                          with_author_keywords=with_author_keywords,
                                          rebuild_cache=rebuild_cache,
                                          only_core_tags=only_core_tags,
                                          extract_acronyms=extract_acronyms
                                          )
        print output

    # Get the fulltext for each source.
    for entry in input_sources:
        log.info("Trying to read input file %s." % entry)
        text_lines = None
        source = ""
        if os.path.isdir(entry):
            for filename in os.listdir(entry):
                filename = os.path.join(entry, filename)
                if os.path.isfile(filename):
                    text_lines = extractor.text_lines_from_local_file(filename)
                    if text_lines:
                        source = filename
                        process_lines()
        elif os.path.isfile(entry):
            text_lines = extractor.text_lines_from_local_file(entry)
            if text_lines:
                source = os.path.basename(entry)
                process_lines()
        else:
            # Treat as a URL.
            text_lines = extractor.text_lines_from_url(entry,
                user_agent=make_user_agent_string("BibClassify"))
            if text_lines:
                source = entry.split("/")[-1]
                process_lines()

예제 #3

파일 보기

파일: bibclassify_engine.py 프로젝트: aw-bib/tind-invenio

def get_keywords_from_local_file(local_file, taxonomy_name, output_mode="text",
    output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False,
    match_mode="full", no_cache=False, with_author_keywords=False,
    rebuild_cache=False, only_core_tags=False, extract_acronyms=False,
    **kwargs ):
    """Outputs keywords reading a local file. Arguments and output are the same
    as for @see: get_keywords_from_text() """

    log.info("Analyzing keywords for local file %s." % local_file)
    text_lines = extractor.text_lines_from_local_file(local_file)

    return get_keywords_from_text(text_lines,
                                  taxonomy_name,
                                  output_mode=output_mode,
                                  output_limit=output_limit,
                                  spires=spires,
                                  match_mode=match_mode,
                                  no_cache=no_cache,
                                  with_author_keywords=with_author_keywords,
                                  rebuild_cache=rebuild_cache,
                                  only_core_tags=only_core_tags,
                                  extract_acronyms=extract_acronyms)

예제 #4

파일 보기

def get_keywords_from_local_file(local_file, taxonomy_name, output_mode="text",
    output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False,
    match_mode="full", no_cache=False, with_author_keywords=False,
    rebuild_cache=False, only_core_tags=False, extract_acronyms=False,
    **kwargs ):
    """Outputs keywords reading a local file. Arguments and output are the same
    as for @see: get_keywords_from_text() """

    log.info("Analyzing keywords for local file %s." % local_file)
    text_lines = extractor.text_lines_from_local_file(local_file)

    return get_keywords_from_text(text_lines,
                                  taxonomy_name,
                                  output_mode=output_mode,
                                  output_limit=output_limit,
                                  spires=spires,
                                  match_mode=match_mode,
                                  no_cache=no_cache,
                                  with_author_keywords=with_author_keywords,
                                  rebuild_cache=rebuild_cache,
                                  only_core_tags=only_core_tags,
                                  extract_acronyms=extract_acronyms)