def analyse(tab):
    keys = [
        'Наименование продукта/технологии', 'Уникальные характеристики',
        'Задачи, которые решает продукт', 'Технические характеристики',
        'Ожидаемые эффекты'
    ]
    res = []
    r = Rake()
    r.language = "russian"
    # Extraction given the text.
    for i in tab:
        text = "\n".join(list(map(lambda x: i[x], keys)))
        r.extract_keywords_from_text(text)
        ranked = r.get_ranked_phrases_with_scores()
        res.append(ranked)
    return res
import pandas as pd

with open('testBase.txt', 'r', encoding='utf-8') as f:
    texts = f.read().replace('\t', '\n\n').split('===')

# normTexts = list(map(lambda x: stopWfilter(x).split(), texts))

normTexts = list(map(stopWfilter, texts))

# tfidf_vectorizer = TfidfVectorizer()
# values = tfidf_vectorizer.fit_transform(normTexts[0])
#
# # Show the Model as a pandas DataFrame
# feature_names = tfidf_vectorizer.get_feature_names()
# pdf = pd.DataFrame(values.toarray(), columns = feature_names)
# print(pdf)

from rake_nltk import Rake
r = Rake()
r.language = "russian"
# Extraction given the text.
for te in texts:
    r.extract_keywords_from_text(te)
    ranked = r.get_ranked_phrases_with_scores()
    print(ranked[:5])
# # To get keyword phrases ranked highest to lowest.
# r.get_ranked_phrases()
#
# # To get keyword phrases ranked highest to lowest with scores.
# r.get_ranked_phrases_with_scores()