Beispiel #1
0
def rake():
    from rake_nltk import Rake, Metric
    #r = Rake()
    r = Rake(ranking_metric=Metric.WORD_FREQUENCY)
    words = ""
    for disc in discs:
        words = words + ". " + disc
    r.extract_keywords_from_text(words)
    print r.get_ranked_phrases_with_scores()
    '''
Beispiel #2
0
def sheet(text):
    #adresa site-ului, adaugam xls
    doclink = "http://www.scholarpedia.org" + text
    print(doclink)
    wb = Workbook()
    sheet1 = wb.add_sheet('Sheet 1')
    headers1 = {
        'user-agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
    }
    document = Document()
    link1 = requests.get(doclink, headers=headers1).text
    soup1 = BeautifulSoup(link1, 'lxml')
    #extragem info din tagul p si h1
    selectall2 = soup1.find_all("p")
    selecttitle = str(*soup1.find_all("h1"))
    print(selecttitle)
    #print(*selectall,sep='\n')
    stri2 = ""
    i = 0
    for lin in selectall2:
        stri2 += str(lin)
        i = i + 1
    #filtram textul de taguri html
    clean = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
    stri2 = re.sub(clean, '', stri2)
    sheet1.write(0, 0, 'Cuvinte Cheie')
    sheet1.write(0, 1, 'Rake Rank')
    #utilizam algortimtul rake
    r = Rake()
    r.extract_keywords_from_text(stri2)
    print("\n".join(r.get_ranked_phrases()))
    print(*r.get_ranked_phrases_with_scores(), sep='\n')
    #rank,key=r.get_ranked_phrases_with_scores()
    rank = r.get_ranked_phrases_with_scores()
    print(rank)
    word = [0 for x in range(len(rank))]
    ranked = [0 for x in range(len(rank))]
    j = 0
    for khh in rank:
        ranked[j], word[j] = khh
        j = j + 1
    #inscriem in xls
    for g in range(len(ranked)):
        sheet1.write(g + 1, 0, word[g])
        sheet1.write(g + 1, 1, ranked[g])
    if path.exists("key.xls"):
        os.remove("key.xls")
        wb.save('key.xls')
    else:
        wb.save('key.xls')
def summarize(text):
    # sent_tokenize breaks target text into sentences.
    blocks = [nltk.sent_tokenize(paragraph) for paragraph in text.split("\n")]
    sentence_list = []
    for block in blocks:
        sentence_list.extend(block)

    r = Rake(min_length=2)
    r.extract_keywords_from_text(text)
    #temp_keywords = r.get_ranked_phrases_with_scores()[:10]
    #keywords = [x[1] for x in temp_keywords]
    keyphrases = [list(x) for x in r.get_ranked_phrases_with_scores()[:20]]

    for phrase in keyphrases:
        print(phrase)

    print()

    important_sentences = [sentence_list[0]]

    #prev_sentence = ""
    for i in sentence_list:
        is_important = False
        for j in keyphrases:
            for word in j[1:]:
                if (word in i.lower()):
                    is_important = True
                    break
        #if is_important and prev!= None and i not in important_sentences:
        #    important_sentences.append(prev + "\n" + i)
        #prev = i
        if is_important and not (i in important_sentences):
            important_sentences.append(i)

    return "\n".join(important_sentences)
Beispiel #4
0
class TextAnalyser(object):
    def __init__(self):
        self.threshold = 0.99
        self.__rake = Rake()
        self.__stemmer = LancasterStemmer()
        self.__lemma = WordNetLemmatizer()
        self.__stopwords = ['alt']
        pass

    def extract(self, text):
        self.__rake.extract_keywords_from_text(text.strip())
        scores = self.__rake.get_ranked_phrases_with_scores()
        keywords = self.unpack_keywords(scores)
        words = filter(lambda x: x[1] not in self.__stopwords and x[1].isalnum(), keywords)

        filtered_words = map(lambda x: x[1], filter(lambda x: x[0] > self.threshold, words))

        lemms = map(lambda x: self.__lemma.lemmatize(x), filtered_words)
        stems = map(lambda x: self.__stemmer.stem(x), lemms)

        return stems

    @staticmethod
    def unpack_keywords(keywords):
        words = []

        for k in keywords:
            for p in k[1].split(' '):
                words.append((k[0], p))

        return words
Beispiel #5
0
class Extractor:
    def __init__(self, topic, MIN_LENGTH=1, MAX_LENGTH=4):
        self.proc = Processor(topic)
        self.raker = Rake(min_length=MIN_LENGTH, max_length=MAX_LENGTH)
        self.topic = topic

    def extract_keywords(self):
        self.proc.processor()
        file = open(self.proc.btoken_file_path, 'r')
        text = file.read()
        self.raker.extract_keywords_from_text(text)
        file.close()

    def extract_keywords_with_scores(self):
        text_with_scores = self.raker.get_ranked_phrases_with_scores()

        return text_with_scores

    def load_to_data(self):
        pair = list()
        text_score = self.extract_keywords_with_scores()
        RAKE_file = open("../DataFile/RAKE/" + self.topic + ".json", 'w+')
        for c in text_score:
            temp = [("score", c[0]), ("text", c[1])]
            pair.append(dict(temp))
        print()
        json.dump(pair, RAKE_file, indent=4)
Beispiel #6
0
def GetSignificant(mytext):
    r = Rake()
    #Internal Testing
    r.extract_keywords_from_text(mytext)

    listSig = {}
    #Get extracted scores and phrases
    extractedScores = r.get_ranked_phrases_with_scores()

    #calculate Average
    scoreAvg = 0
    for score, phrase in extractedScores:
        scoreAvg += score
    scoreAvg = scoreAvg / len(extractedScores)
    #adding this causes problems with all equal scores
    #scoreAvg += scoreAvg/2
    for score, phrase in extractedScores:
        if phrase.count(' ') > 0 and score >= scoreAvg:
            for word in phrase.split():
                tmp = wn.synsets(word)[0].pos()
                #print (word, ":", tmp)
            listSig.update({phrase: score})

        elif score >= scoreAvg:
            tmp = wn.synsets(phrase)[0].pos()
            #print (phrase, ":", tmp)
            listSig.update({phrase: score})
            #swntmp = phrase + "." + tmp + ".01"
            #print (swn.senti_synset(swntmp))
    s = " "
    return s.join(list(listSig))
Beispiel #7
0
    def test_extract_keywords_from_text(self):
        r = Rake()

        text = '''Compatibility of systems of linear constraints over the set of
        natural numbers. Criteria of compatibility of a system of linear
        Diophantine equations, strict inequations, and nonstrict inequations are
        considered. Upper bounds for components of a minimal set of solutions
        and algorithms of construction of minimal generating sets of solutions
        for all types of systems are given. These criteria and the corresponding
        algorithms for constructing a minimal supporting set of solutions can be
        used in solving all the considered types of systems and systems of mixed
        types.'''

        r.extract_keywords_from_text(text)

        ranked_phrases = [
            'minimal generating sets', 'linear diophantine equations',
            'minimal supporting set', 'minimal set', 'linear constraints',
            'upper bounds', 'strict inequations', 'nonstrict inequations',
            'natural numbers', 'mixed types', 'corresponding algorithms',
            'considered types', 'set', 'types', 'considered', 'algorithms',
            'used', 'systems', 'system', 'solving', 'solutions', 'given',
            'criteria', 'construction', 'constructing', 'components',
            'compatibility'
        ]
        self.assertEqual(r.get_ranked_phrases(), ranked_phrases)
        self.assertEqual(
            [phrase for _, phrase in r.get_ranked_phrases_with_scores()],
            ranked_phrases)
def keyphrases(text, mu=2, sig=1.5):
    """
    determines and ranks keyphrases from `text`. the keyphrases are
    weighted such that short keyphrases (2-3 words) are preferred. 
    moveover, keyphrases not adhering the rules defined in
    `filter_by_pos` are abandoned. 
    """
    assert type(text) is list, "the text is not a list"
    r = Rake(punctuations=". , ? ! - : ; \" \' ( )".split(),
             language='english')
    try:
        text[0].index(" ")
    except ValueError:
        pass
    else:
        raise ValueError("expecting a list a strings not a single string")

    text = " ".join(text)
    r.extract_keywords_from_text(text)

    # the scores are weighted by their length (# tokens)
    # using a normal distribution
    n = scipy.stats.norm(mu, sig)
    scores = r.get_ranked_phrases_with_scores()
    scores = [(s * n.pdf(len(f.split())), f) for s, f in scores]

    scores = sorted(scores, key=lambda x: -x[0])
    return filter_by_pos(scores)
Beispiel #9
0
def get_key_ngrams(document,
                   max_grams=3,
                   include_numbers=False,
                   single_letters=False):
    # Uses stopwords for english from NLTK, and all puntuation characters.
    r = Rake()
    # r=Rake(<language>) # To use it in a specific language supported by nltk.
    # If you want to provide your own set of stop words and punctuations to
    # r = Rake(<list of stopwords>, <string of puntuations to ignore>)
    if not include_numbers:
        document = remove_punc_and_nums(document)  # numbers are replaced by NN

    document = document.lower()

    # lemmatize
    document = lemmatize(document)
    r.extract_keywords_from_text(document)

    # To get keyword phrases ranked highest to lowest.
    phrases = r.get_ranked_phrases_with_scores()

    data = {'{}grams'.format(x + 1): [] for x in range(max_grams)}

    for score, phrase in phrases:
        if 'NN' in phrase.upper():
            continue
        splitted = phrase.split()
        if any(map(lambda x: len(x) < 3, splitted)):
            continue
        score = round(score, 2)
        length = len(splitted)
        if length <= max_grams:
            data['{}grams'.format(length)].append((phrase, score))
    return data
Beispiel #10
0
    def update_facet_dict(self, sentence):
        """
        Args:
        sentence

        Rets:
        Nothing
        """
        non_facet_noun = self._non_facet_noun
        r = Rake()
        r.extract_keywords_from_text(sentence)
        dat = r.get_ranked_phrases_with_scores(
        )  # extract the (key phrases, score) pair
        idx = self.calc_num_notes(
        ) - 1  # index of the added sentence, aka current sentence
        tmp = []
        for pair in dat:
            score = pair[0]
            phrase = nlp(pair[1])
            tmp.extend([
                (token.lemma_, (idx, score)) for token in phrase
                if token.pos_ == "NOUN" and token.lemma_ not in non_facet_noun
            ])

        # d2.update({k:v for k,v in d1.iteritems() if v})
        self._facet_dict.update(
            dict(tmp)
        )  # update the dict with {lemma_word: (idx, score)} # can use score in an advanced version
Beispiel #11
0
def get_topics(meeting_id):
    """ generates topics for the meeting with the given id """
    meeting = Meeting.objects.with_id(meeting_id)
    string = ""
    for transcript in meeting.transcript:
        string += transcript.transcription + " "
    print(string)
    r = Rake()  # initializes Rake with English (all punc) as default lang
    r.extract_keywords_from_text(string)

    topic_data = r.get_ranked_phrases_with_scores()
    count = 0
    data = []
    for topic in topic_data:
        if topic[0] < 5 or count == 10:
            break
        else:
            data.append(str(topic[1]))
            count = count + 1

    return_data = " ".join(data).split(" ")
    no_reps = []
    for d in return_data:
        if d not in no_reps:
            no_reps.append(d)
    meeting.topics = no_reps
    meeting.save()
    return redirect(url_for('meetings.edit_meeting', id=meeting_id))
Beispiel #12
0
def keywordRake(fullText):
    r = Rake("stopList.txt")
    a = r.extract_keywords_from_text(fullText)
    b = r.get_ranked_phrases()
    c = r.get_ranked_phrases_with_scores()
    print(b)
    print(c)
Beispiel #13
0
def get_rake_keyphrases_from_text(text, stopwords=None, printset = string.printable):
    if stopwords is None: 
        stopwords = get_word_list_from_file("Stopwords.txt")
    rake_object = Rake(stopwords = stopwords)
    rake_object.extract_keywords_from_text(text)
    rake_keywords = rake_object.get_ranked_phrases_with_scores()
    return rake_keywords
Beispiel #14
0
def gen_keywords(body, max_keywords=-1):
    r = Rake(min_length=1, max_length=1)
    r.extract_keywords_from_text(body)
    # Returns a rank and a phrase as a list of tuples
    # (rank <float>, phrase <string>)
    phrases = r.get_ranked_phrases_with_scores()
    return [phrase for (rank, phrase) in phrases if rank > 0.8]
 def parsed_observation(input_text):
     '''Method to parse Observation from Observation sheet or Nature of check from reference checklist sheet'''
     from rake_nltk import Rake
     import re
     import string
     # Uses stopwords for english from NLTK, and all puntuation characters by
     # default
     r = Rake()
     text=str(input_text)
     #pattern = re.compile(r'\b(' + r'|'.join(stopwords.words('english')) + r')\b\s*')
     #text = pattern.sub('', text)
     #input_str = text.lower()    
     result1 = re.sub(r'\d+','', text)    
     result2 = result1.translate(str.maketrans("","",string.punctuation))
     r.extract_keywords_from_text(result2)
     # Extraction given the list of strings where each string is a sentence.
     #r.extract_keywords_from_sentences(<list of sentences>)
     # To get keyword phrases ranked highest to lowest.
     r.get_ranked_phrases()
     # To get keyword phrases ranked highest to lowest with scores.
     keywords_ranked_phrases=r.get_ranked_phrases_with_scores()
     my_list=[]
     my_list_final=[]
     for (i,j) in keywords_ranked_phrases:       
         my_list.append(j)
     pattern = "[0-9,).(!?]*"
     my_list_new = [re.sub(pattern, '', i) for i in my_list]
     while("" in my_list_new) : 
         my_list_new.remove("")
     for word in my_list_new:
         if len(word)>1:
             my_list_final.append(word)
         else:
             continue
     return sorted(list(set(my_list_final)),key=len,reverse=True)
Beispiel #16
0
    def process_text(self):
        # Remove new lines and turn to lower case
        text = re.sub('\n', ' ', self.text).lower()

        # Extract keyphrases using Rake
        # TODO also possible to extract keywords from sentence
        rake = Rake()
        if self.text_type == 'article':
            rake.extract_keywords_from_text(text)
        elif self.text_type == 'social':
            rake.extract_keywords_from_sentences(text)
        self.all_phrases = rake.get_ranked_phrases_with_scores()
        # word_freq_dist = rake.get_word_frequency_distribution()

        # Tokenize text
        self.article_text_tokenized = word_tokenize(text)

        # Tokenize phrases
        self.all_phrases_tokenized = self.tokenize_phrases()

        # Tag all phrases and remove all but noun words
        self.all_phrases_tagged = pos_tag_phrase_pairs(self.all_phrases)
        self.all_phrases_tagged_nouns = filter_pos(self.all_phrases_tagged,
                                                   "nouns")

        # Convert list of tagged nouns back to a string phrase
        self.string_phrases_nouns = self.tuple_list_to_string_list()
Beispiel #17
0
def conext(request):
    #import pdb;pdb.set_trace()
    extracted_list = list()
    csrfContext = RequestContext(request)
    if request.method == 'POST':

        try:
            json_data = json.loads(request.body)
            text = json_data.get('sentence')

        except:
            text = []
        if text:
            r = Rake()
            r.extract_keywords_from_text(text)
            extracted_list = r.get_ranked_phrases_with_scores()
            dataBounding = DataBounding(text, extracted_list)
            response_string = ''
            for item in extracted_list:
                value = '<i class="fa fa-dot-circle-o" aria-hidden="true"></i> ' + 'Keyword: ' + str(
                    item[1].encode('utf8')) + ' | Score: ' + str(
                        item[0]) + '<br>'
                response_string += value
            string_json = {}
            string_json['keywordswithscores'] = response_string

        else:
            pass
            #extracted_list.update({'Error':'Error occured at service response.'})
    return JsonResponse(string_json, safe=False)
Beispiel #18
0
def get_all_keywords(text, word_boolean):

    if word_boolean:  # word - i.e. length should be exactly 1
        r = Rake(ranking_metric=Metric.DEGREE_TO_FREQUENCY_RATIO, max_length=1)
    else:  # phrase, i.e. made up of multiple words
        r = Rake(ranking_metric=Metric.DEGREE_TO_FREQUENCY_RATIO, min_length=2)

    total_phrases = []

    for local_text in text:
        r.extract_keywords_from_text(
            local_text)  # use the Rake instance to find the keywords
        local_phrases = r.get_ranked_phrases_with_scores(
        )  # apply these keywords to a local variable
        total_phrases.append(local_phrases)

    all_keywords = []

    # get_ranked_phrases_with_scores gives a list of tuples - tuple[0] is score, 1 is word
    for list in total_phrases:
        for tuple in list:
            all_keywords.append(tuple[1])

    # clean some of the keywords - don't want a single number or small/uninformative words
    # amount goes from 200 keywords to 186 - len(df.columns) = 186, 1 column per keyword
    for word in all_keywords:
        if word.isdigit():  # remove numbers
            all_keywords.remove(word)
        elif len(word) < 5:  # remove very short words
            all_keywords.remove(word)

    print(all_keywords)
    return all_keywords
def extract_keywords(dump_path, packages):
    print("{0} Gathering descriptions and computing keywords...".format(
        datetime.datetime.now()))
    rake = Rake()
    keywords = {}
    for doc in get_descriptions(packages):
        try:
            if "translatedDescriptionHtml" in doc:
                html_description = unicode(
                    doc.get("translatedDescriptionHtml"))
            else:
                html_description = unicode(doc.get("descriptionHtml"))
            # remove html elements
            description = re.sub(r'<.*?>', '', html_description)
            # substitute non-ascii chars with stop words (e.g. dot)
            description = re.sub(r'[^\x00-\x7F]+', ' . ', description)
            rake.extract_keywords_from_text(description)
            ranking = rake.get_ranked_phrases_with_scores()
            for pair in ranking:
                keywords[pair[1]] = keywords.get(pair[1], 0) + pair[0]
        except AttributeError:
            continue
    sorted_keywords = sorted(keywords.items(), key=itemgetter(1))
    with open(dump_path, 'w') as outfile:
        json.dump(sorted_keywords, outfile, indent=2)
Beispiel #20
0
    def _get_keyphrases(self):
        # Extract keywords and phrases from the current document so we know
        # what to search for in ES.
        r = Rake()
        r.extract_keywords_from_text(self.instance.content)
        key_phrases = [
            keyphrase[1] for keyphrase in r.get_ranked_phrases_with_scores()
            if keyphrase[0] >= self.min_rank_score
        ]
        if not key_phrases:
            key_phrases = [
                keyphrase[1] for keyphrase in
                r.get_ranked_phrases_with_scores()[:self.default_list_length]
            ]

        return key_phrases
Beispiel #21
0
def extractKeywords(description, query):
    try:
        r = Rake()

        print("Extracting keywords from joined sequence with query: " + query)

        r.extract_keywords_from_text(description)

        print("Keywords extracted from text with query: " + query)

        keywords = r.get_ranked_phrases_with_scores()

        print("Extraction finished for query: " + query + " writing to file")

        df = pd.DataFrame(columns = ['rank', 'keyword_set'])

        for pair in keywords:
            num = (len(df) + 1)
            df.loc[num] = pair

        dirtitle =  query + '.csv'
        if not os.path.exists(totalKeywordDirectory):
            os.mkdir(totalKeywordDirectory)

        filenamelocation = os.path.join(totalKeywordDirectory, dirtitle)

        df.to_csv(filenamelocation, encoding='utf-8')

        print("File created for query: " + query)

    except Exception as e:
        print(e)
Beispiel #22
0
def Method4(n=5):
    M4_input = input_text
    r = Rake()
    r.extract_keywords_from_text(M4_input)
    r.get_ranked_phrases()
    for key in r.get_ranked_phrases_with_scores()[:n]:
        print(removePunctuation(key[1].strip()))
Beispiel #23
0
def simple_rake(text):
    rakeExtract = Rake(min_length=1, max_length=4)
    rakeExtract.extract_keywords_from_text(text)
    result = rakeExtract.get_ranked_phrases_with_scores()
    top_words = []
    for (score, phrase) in result:
        top_words.append(phrase)
    return result
def keywords_rake(text):
    r = Rake(stopwords=stopwords.words('english'),
             ranking_metric=Metric.DEGREE_TO_FREQUENCY_RATIO,
             max_length=1)

    r.extract_keywords_from_text(text)
    keywords = r.get_ranked_phrases_with_scores()
    print(keywords)
Beispiel #25
0
def memorize():
    r = Rake()
    filename = 'testocr.png'
    img = np.array(Image.open(filename))
    text = pytesseract.image_to_string(img)
    print(text)
    print(r.extract_keywords_from_text(text), r.get_ranked_phrases(),
          r.get_ranked_phrases_with_scores())
Beispiel #26
0
def return_best_sentences_rake_nltk(text, threshold):
    r = Rake(ranking_metric=Metric.DEGREE_TO_FREQUENCY_RATIO)
    r.extract_keywords_from_text(text)
    ranked_words = r.get_ranked_phrases_with_scores()
    for i in range(len(ranked_words)):
        if ranked_words[i][0] < threshold:
            return ranked_words[:i]
    return ranked_words
Beispiel #27
0
def extract_phrases(text):
    """
    Calls the RAKE API to extract the relevant phrases of the given text
    :param text: String, the text to be analyzed
    :return: Array containing the phrases and their scores
    """
    extractor = Rake()
    extractor.extract_keywords_from_text(text)
    return extractor.get_ranked_phrases_with_scores()
def keyword(strg):
    '''Keyword and Concept extraction'''
    if strg:
        strg = linkrem(strg)
        r = Rake()
        a = r.extract_keywords_from_text(strg)

        return r.get_ranked_phrases()[:10], r.get_ranked_phrases_with_scores(
        )[:10]
Beispiel #29
0
 def extractKeywords(self, textDescription):
     """Get the keyword phrases from the descriptions using NLP"""
     r = Rake()
     r.extract_keywords_from_text(textDescription)
     results = r.get_ranked_phrases(
     )  # To get all keyword phrases ranked highest to lowest.
     result_scores = r.get_ranked_phrases_with_scores()
     print(results)
     return results
class ArticleScarping(Scraping):
    def __init__(self, url: str, authorHtmlTag: dict, dateHtmlTag: dict,
                 articleHtmlTag: dict):
        Scraping.__init__(self, url)
        #find hmtl sections with the given tag
        self.authorHtmlTag = authorHtmlTag
        self.dateHtmlTag = dateHtmlTag
        self.articleHtmlTag = articleHtmlTag
        # init keywords extractor

    def parseArticle(self):

        return self.parseHtmlTag(self.parsedHtml,
                                 self.articleHtmlTag).get_text()

    def parseAuthor(self):
        return self.parseHtmlTag(self.parsedHtml,
                                 self.authorHtmlTag).get_text()

    def parseDate(self):
        try:
            return str(self.parsedHtml.time['datetime'])
        except:
            return self.parseHtmlTag(self.parsedHtml,
                                     self.dateHtmlTag).get_text()

    def parseText(self):
        return self.parseHtmlTag(self.parsedHtml,
                                 self.articleHtmlTag).get_text()

    def getKeyWords(self):
        '''
        20 a word
        :return:
        '''
        self.rake = Rake(max_length=30)
        self.rake.extract_keywords_from_text(self.parseText())
        result = self.rake.get_ranked_phrases_with_scores()
        resultlist = [elem[1] for elem in result[:10]]
        return resultlist

    def parseTitle(self):
        title = "nan"
        if (self.parsedHtml.title is not None):
            title = self.parsedHtml.title.string
        return title

    def getArticleJson(self):
        json = {
            "title": self.parseTitle(),
            "date": self.parseDate(),
            "author": self.parseAuthor(),
            "article": self.parseText(),
            "keywords": self.getKeyWords(),
            "url": self.url
        }
        return json
Beispiel #31
0
#!/usr/bin/python3
# coding: utf-8
# pip install rake-nltk
from rake_nltk import Rake
from nltk import tokenize
r = Rake()  # Uses stopwords for english from NLTK, and all puntuation characters by default
##################################################################
## Extraction given the text.
mytext = '''Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered.
            Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
            These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.'''
r.extract_keywords_from_text(mytext)
print(r.get_ranked_phrases())  # To get keyword phrases ranked highest to lowest.
# ['linear diophantine equations', 'minimal generating sets', 'minimal supporting set', 'minimal set', 'upper bounds', 'strict inequations', 'nonstrict inequations', 'mixed types', 'corresponding algorithms', 'considered types', 'types', 'considered', 'algorithms', 'used', 'systems', 'system', 'solving', 'solutions', 'given', 'criteria', 'construction', 'constructing', 'components', 'compatibility']
print(r.get_ranked_phrases_with_scores())  # To get keyword phrases ranked highest to lowest with scores.
# [(9.0, 'linear diophantine equations'), (8.666666666666666, 'minimal generating sets'), (8.166666666666666, 'minimal supporting set'), (5.166666666666666, 'minimal set'), (4.0, 'upper bounds'), (4.0, 'strict inequations'), (4.0, 'nonstrict inequations'), (3.666666666666667, 'mixed types'), (3.5, 'corresponding algorithms'), (3.166666666666667, 'considered types'), (1.6666666666666667, 'types'), (1.5, 'considered'), (1.5, 'algorithms'), (1.0, 'used'), (1.0, 'systems'), (1.0, 'system'), (1.0, 'solving'), (1.0, 'solutions'), (1.0, 'given'), (1.0, 'criteria'), (1.0, 'construction'), (1.0, 'constructing'), (1.0, 'components'), (1.0, 'compatibility')]
##################################################################
## Extraction given the list of strings where each string is a sentence.
r.extract_keywords_from_sentences(tokenize.sent_tokenize(mytext))
print(r.get_ranked_phrases())
# ['linear diophantine equations', 'minimal generating sets', 'minimal supporting set', 'minimal set', 'upper bounds', 'strict inequations', 'nonstrict inequations', 'mixed types', 'corresponding algorithms', 'considered types', 'types', 'considered', 'algorithms', 'used', 'systems', 'system', 'solving', 'solutions', 'given', 'criteria', 'construction', 'constructing', 'components', 'compatibility']
print(r.get_ranked_phrases_with_scores())
# [(9.0, 'linear diophantine equations'), (8.666666666666666, 'minimal generating sets'), (8.166666666666666, 'minimal supporting set'), (5.166666666666666, 'minimal set'), (4.0, 'upper bounds'), (4.0, 'strict inequations'), (4.0, 'nonstrict inequations'), (3.666666666666667, 'mixed types'), (3.5, 'corresponding algorithms'), (3.166666666666667, 'considered types'), (1.6666666666666667, 'types'), (1.5, 'considered'), (1.5, 'algorithms'), (1.0, 'used'), (1.0, 'systems'), (1.0, 'system'), (1.0, 'solving'), (1.0, 'solutions'), (1.0, 'given'), (1.0, 'criteria'), (1.0, 'construction'), (1.0, 'constructing'), (1.0, 'components'), (1.0, 'compatibility')]