def get_keywords_from_text(text_lines, taxonomy=None, output_mode="text", output_limit=CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False, match_mode="full", no_cache=False, with_author_keywords=False, rebuild_cache=False, only_core_tags=False): """Returns a formatted string containing the keywords for a single document.""" global _SKWS global _CKWS if not _SKWS: if taxonomy is not None: _SKWS, _CKWS = get_regular_expressions(taxonomy, rebuild=rebuild_cache, no_cache=no_cache) else: write_message("ERROR: Please specify an ontology in order to " "extract keywords.", stream=sys.stderr, verbose=1) text_lines = cut_references(text_lines) fulltext = normalize_fulltext("\n".join(text_lines)) author_keywords = None if with_author_keywords: author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext) if match_mode == "partial": fulltext = _get_partial_text(fulltext) single_keywords = get_single_keywords(_SKWS, fulltext) composite_keywords = get_composite_keywords(_CKWS, fulltext, single_keywords) return _get_keywords_output(single_keywords, composite_keywords, taxonomy, author_keywords, output_mode, output_limit, spires, only_core_tags)
def get_keywords_from_local_file(local_file, taxonomy, rebuild_cache=False, match_mode="full", no_cache=False, with_author_keywords=False): text_lines = text_lines_from_local_file(local_file) global _SKWS global _CKWS if not _SKWS: if taxonomy is not None: _SKWS, _CKWS = get_regular_expressions(taxonomy, rebuild=rebuild_cache, no_cache=no_cache) else: write_message("ERROR: Please specify an ontology in order to " "extract keywords.", stream=sys.stderr, verbose=1) text_lines = cut_references(text_lines) fulltext = normalize_fulltext("\n".join(text_lines)) author_keywords = None if with_author_keywords: author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext) if match_mode == "partial": fulltext = _get_partial_text(fulltext) single_keywords = get_single_keywords(_SKWS, fulltext) composite_keywords = get_composite_keywords(_CKWS, fulltext, single_keywords) return (single_keywords, composite_keywords)
def get_keywords_from_text(text_lines, taxonomy=None, output_mode="text", output_limit=CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False, match_mode="full", no_cache=False, with_author_keywords=False, rebuild_cache=False, only_core_tags=False): """Returns a formatted string containing the keywords for a single document.""" global _SKWS global _CKWS if not _SKWS: if taxonomy is not None: _SKWS, _CKWS = get_regular_expressions(taxonomy, rebuild=rebuild_cache, no_cache=no_cache) else: write_message("ERROR: Please specify an ontology in order to " "extract keywords.", stream=sys.stderr, verbose=1) text_lines = cut_references(text_lines) fulltext = normalize_fulltext("\n".join(text_lines)) author_keywords = None if with_author_keywords: author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext) if match_mode == "partial": fulltext = _get_partial_text(fulltext) single_keywords = get_single_keywords(_SKWS, fulltext) composite_keywords = get_composite_keywords(_CKWS, fulltext, single_keywords) return _get_keywords_output(single_keywords, composite_keywords, taxonomy, author_keywords, output_mode, output_limit, spires, only_core_tags)
def get_keywords_from_local_file(local_file, taxonomy, rebuild_cache=False, match_mode="full", no_cache=False, with_author_keywords=False): text_lines = text_lines_from_local_file(local_file) global _SKWS global _CKWS if not _SKWS: if taxonomy is not None: _SKWS, _CKWS = get_regular_expressions(taxonomy, rebuild=rebuild_cache, no_cache=no_cache) else: write_message("ERROR: Please specify an ontology in order to " "extract keywords.", stream=sys.stderr, verbose=1) text_lines = cut_references(text_lines) fulltext = normalize_fulltext("\n".join(text_lines)) author_keywords = None if with_author_keywords: author_keywords = get_author_keywords(_SKWS, _CKWS, fulltext) if match_mode == "partial": fulltext = _get_partial_text(fulltext) single_keywords = get_single_keywords(_SKWS, fulltext) composite_keywords = get_composite_keywords(_CKWS, fulltext, single_keywords) return (single_keywords, composite_keywords)
def extract_composite_keywords(ckw_db, fulltext, skw_spans): """Returns a list of composite keywords bound with the number of occurrences found in the text string. @var ckw_db: list of KewordToken objects (they are supposed to be composite ones) @var fulltext: string to search in @skw_spans: dictionary of already identified single keywords @return : dictionary of matches in a format { <keyword object>, [[position, position...], [info_about_matches] ], .. } or empty {} """ return keyworder.get_composite_keywords(ckw_db, fulltext, skw_spans) or {}
def extract_composite_keywords(ckw_db, fulltext, skw_spans): """Returns a list of composite keywords bound with the number of occurrences found in the text string. @var ckw_db: list of KewordToken objects (they are supposed to be composite ones) @var fulltext: string to search in @skw_spans: dictionary of already identified single keywords @return : dictionary of matches in a format { <keyword object>, [[position, position...], [info_about_matches] ], .. } or empty {} """ return keyworder.get_composite_keywords(ckw_db, fulltext, skw_spans) or {}