예제 #1
0
def get_keywords_from_text(text_lines, taxonomy=None, output_mode="text",
    output_limit=CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False,
    match_mode="full", no_cache=False, with_author_keywords=False,
    rebuild_cache=False, only_core_tags=False):
    """Returns a formatted string containing the keywords for a single
    document."""
    global _SKWS
    global _CKWS
    if not _SKWS:
        if taxonomy is not None:
            _SKWS, _CKWS = get_regular_expressions(taxonomy,
                rebuild=rebuild_cache, no_cache=no_cache)
        else:
            write_message("ERROR: Please specify an ontology in order to "
                "extract keywords.", stream=sys.stderr, verbose=1)

    text_lines = cut_references(text_lines)
    fulltext = normalize_fulltext("\n".join(text_lines))

    author_keywords = None
    if with_author_keywords:
        author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext)

    if match_mode == "partial":
        fulltext = _get_partial_text(fulltext)

    single_keywords = get_single_keywords(_SKWS, fulltext)

    composite_keywords = get_composite_keywords(_CKWS, fulltext,
        single_keywords)

    return _get_keywords_output(single_keywords, composite_keywords, taxonomy,
        author_keywords, output_mode, output_limit, spires, only_core_tags)
예제 #2
0
def get_keywords_from_local_file(local_file, taxonomy, rebuild_cache=False,
    match_mode="full", no_cache=False, with_author_keywords=False):

    text_lines = text_lines_from_local_file(local_file)

    global _SKWS
    global _CKWS
    if not _SKWS:
        if taxonomy is not None:
            _SKWS, _CKWS = get_regular_expressions(taxonomy,
                rebuild=rebuild_cache, no_cache=no_cache)
        else:
            write_message("ERROR: Please specify an ontology in order to "
                "extract keywords.", stream=sys.stderr, verbose=1)

    text_lines = cut_references(text_lines)
    fulltext = normalize_fulltext("\n".join(text_lines))

    author_keywords = None
    if with_author_keywords:
        author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext)

    if match_mode == "partial":
        fulltext = _get_partial_text(fulltext)

    single_keywords = get_single_keywords(_SKWS, fulltext)

    composite_keywords = get_composite_keywords(_CKWS, fulltext,
        single_keywords)

    return (single_keywords, composite_keywords)
def get_keywords_from_text(text_lines, taxonomy=None, output_mode="text",
    output_limit=CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False,
    match_mode="full", no_cache=False, with_author_keywords=False,
    rebuild_cache=False, only_core_tags=False):
    """Returns a formatted string containing the keywords for a single
    document."""
    global _SKWS
    global _CKWS
    if not _SKWS:
        if taxonomy is not None:
            _SKWS, _CKWS = get_regular_expressions(taxonomy,
                rebuild=rebuild_cache, no_cache=no_cache)
        else:
            write_message("ERROR: Please specify an ontology in order to "
                "extract keywords.", stream=sys.stderr, verbose=1)

    text_lines = cut_references(text_lines)
    fulltext = normalize_fulltext("\n".join(text_lines))

    author_keywords = None
    if with_author_keywords:
        author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext)

    if match_mode == "partial":
        fulltext = _get_partial_text(fulltext)

    single_keywords = get_single_keywords(_SKWS, fulltext)

    composite_keywords = get_composite_keywords(_CKWS, fulltext,
        single_keywords)

    return _get_keywords_output(single_keywords, composite_keywords, taxonomy,
        author_keywords, output_mode, output_limit, spires, only_core_tags)
def get_keywords_from_local_file(local_file, taxonomy, rebuild_cache=False,
    match_mode="full", no_cache=False, with_author_keywords=False):

    text_lines = text_lines_from_local_file(local_file)

    global _SKWS
    global _CKWS
    if not _SKWS:
        if taxonomy is not None:
            _SKWS, _CKWS = get_regular_expressions(taxonomy,
                rebuild=rebuild_cache, no_cache=no_cache)
        else:
            write_message("ERROR: Please specify an ontology in order to "
                "extract keywords.", stream=sys.stderr, verbose=1)

    text_lines = cut_references(text_lines)
    fulltext = normalize_fulltext("\n".join(text_lines))

    author_keywords = None
    if with_author_keywords:
        author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext)

    if match_mode == "partial":
        fulltext = _get_partial_text(fulltext)

    single_keywords = get_single_keywords(_SKWS, fulltext)

    composite_keywords = get_composite_keywords(_CKWS, fulltext,
        single_keywords)

    return (single_keywords, composite_keywords)
예제 #5
0
def extract_author_keywords(skw_db, ckw_db, fulltext):
    """Finds out human defined keyowrds in a text string. Searches for
    the string "Keywords:" and its declinations and matches the
    following words.

    @var skw_db: list single kw object
    @var ckw_db: list of composite kw objects
    @var fulltext: utf-8 string
    @return: dictionary of matches in a formt {
          <keyword object>, [matched skw or ckw object, ....]
          }
          or empty {}
    """
    akw = {}
    K = reader.KeywordToken
    for k, v in keyworder.get_author_keywords(skw_db, ckw_db, fulltext).items():
        akw[K(k, type='author-kw')] = v
    return akw
예제 #6
0
def extract_author_keywords(skw_db, ckw_db, fulltext):
    """Finds out human defined keyowrds in a text string. Searches for
    the string "Keywords:" and its declinations and matches the
    following words.

    @var skw_db: list single kw object
    @var ckw_db: list of composite kw objects
    @var fulltext: utf-8 string
    @return: dictionary of matches in a formt {
          <keyword object>, [matched skw or ckw object, ....]
          }
          or empty {}
    """
    akw = {}
    K = reader.KeywordToken
    for k, v in keyworder.get_author_keywords(skw_db, ckw_db,
                                              fulltext).items():
        akw[K(k, type='author-kw')] = v
    return akw