Esempio n. 1
0
 def test_remove_keywords_using_list(self):
     """For each of the test case initialize a new KeywordProcessor.
     Add the keywords the test case to KeywordProcessor.
     Remove the keywords in remove_keyword_dict
     Extract keywords and check if they match the expected result for the test case.
     """
     for test_id, test_case in enumerate(self.test_cases):
         keyword_processor = KeywordProcessor()
         keyword_processor.add_keywords_from_dict(test_case['keyword_dict'])
         for key in test_case['remove_keyword_dict']:
             keyword_processor.remove_keywords_from_list(test_case['remove_keyword_dict'][key])
         keywords_extracted = keyword_processor.extract_keywords(test_case['sentence'])
         self.assertEqual(keywords_extracted, test_case['keywords'],
                          "keywords_extracted don't match the expected results for test case: {}".format(test_id))
Esempio n. 2
0
def get_gbif_keyprocessor(source_path: str) -> KeywordProcessor:
    """
    Get GBIF keyprocessor with all species/family/genus names needed.
    """
    gbif = pd.read_csv(source_path)
    gbif = gbif.dropna()
    all_species = gbif["canonicalName"].unique().tolist()
    all_family = gbif["family"].unique().tolist()
    all_genus = gbif["genus"].unique().tolist()
    all_names = set(all_species + all_family + all_genus)
    keyword_processor = KeywordProcessor()
    for name in all_names:
        keyword_processor.add_keyword(name)

    keyword_processor.remove_keywords_from_list(list(list_stopwords))
    return keyword_processor
Esempio n. 3
0
 def test_remove_keyword_from_list(self):
     keyword_processor = KeywordProcessor()
     keyword_list = "java"
     with pytest.raises(AttributeError):
         keyword_processor.remove_keywords_from_list(keyword_list)
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(
    keyword_processor.extract_keywords(
        'I am a product manager for a java_2e platform'))
# output ['product management', 'java']
keyword_processor.remove_keyword('java_2e')
print(
    keyword_processor.extract_keywords(
        'I am a product manager for a java_2e platform'))
# ['product management']

# you can also remove keywords from a list/ dictionary
keyword_processor.remove_keywords_from_dict({"product management": ["PM"]})
keyword_processor.remove_keywords_from_list(["java programing"])
keyword_processor.extract_keywords(
    'I am a product manager for a java_2e platform')
# output ['product management']

# 查询添加关键词的个数
keyword_processor = KeywordProcessor()
# 字典格式的关键词,其对应的key为最终匹配出的词,但key不记入关键词搜索的范围
keyword_dict = {
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(len(keyword_processor))
# output 4
# 删除关键词
keyword_processor = KeywordProcessor()
keyword_dict = {
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(keyword_processor.extract_keywords('I am a product manager for a java_2e platform'))
# output ['product management', 'java']
keyword_processor.remove_keyword('java_2e')
print(keyword_processor.extract_keywords('I am a product manager for a java_2e platform'))
# ['product management']

# you can also remove keywords from a list/ dictionary
keyword_processor.remove_keywords_from_dict({"product management": ["PM"]})
keyword_processor.remove_keywords_from_list(["java programing"])
keyword_processor.extract_keywords('I am a product manager for a java_2e platform')
# output ['product management']

# 查询添加关键词的个数
keyword_processor = KeywordProcessor()
# 字典格式的关键词,其对应的key为最终匹配出的词,但key不记入关键词搜索的范围
keyword_dict = {
    "java": ["java_2e", "java programing"],
    "product management": ["PM", "product manager"]
}
keyword_processor.add_keywords_from_dict(keyword_dict)
print(len(keyword_processor))
# output 4

# 检查关键词是否已经添加