def word_detector(blob_text): keyword_detector = RakunDetector(hyperparameters) keywords = keyword_detector.find_keywords(blob_text, input_type="text") #print(type(keywords)) keyword_detector.verbose = False words = [i for i, j in keywords] #print("Keywords identified are: ", words) #keyword_detector.visualize_network() return (words)
from mrakun import RakunDetector from nltk.stem import WordNetLemmatizer from nltk.corpus import stopwords hyperparameters = {"edit_distance_threshold":3, "num_keywords" : 10, "pair_diff_length":2, "stopwords" : stopwords.words('english'), "bigram_count_threshold":2, "lemmatizer" : WordNetLemmatizer(), "num_tokens":[1]} keyword_detector = RakunDetector(hyperparameters) example_data = "../datasets/wiki20/docsutf8/7183.txt" keywords = keyword_detector.find_keywords(example_data) print(keywords) keyword_detector.visualize_network() keyword_detector.verbose = False keyword_detector.validate_on_corpus("../datasets/Schutz2008")