def word_detector(blob_text):
    keyword_detector = RakunDetector(hyperparameters)
    keywords = keyword_detector.find_keywords(blob_text, input_type="text")
    #print(type(keywords))
    keyword_detector.verbose = False
    words = [i for i, j in keywords]
    #print("Keywords identified are: ", words)
    #keyword_detector.visualize_network()

    return (words)
Esempio n. 2
0
from mrakun import RakunDetector
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

hyperparameters = {"edit_distance_threshold":3,
                   "num_keywords" : 10,
                   "pair_diff_length":2,
                   "stopwords" : stopwords.words('english'),
                   "bigram_count_threshold":2,
                   "lemmatizer" : WordNetLemmatizer(),
                   "num_tokens":[1]}

keyword_detector = RakunDetector(hyperparameters)
example_data = "../datasets/wiki20/docsutf8/7183.txt"
keywords = keyword_detector.find_keywords(example_data)
print(keywords)
keyword_detector.visualize_network()
keyword_detector.verbose = False
keyword_detector.validate_on_corpus("../datasets/Schutz2008")