Python TextRank.TextRank 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: text_rank

클래스/타입: TextRank

메소드/함수: TextRank

hotexamples.com에서의 예제들: 11

Python TextRank.TextRank - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 text_rank.TextRank.TextRank에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TextRank(11)

summarize(3)

rank(2)

_cut(1)

co_occur_graph_to_matrix(1)

co_occurance_matrix(1)

evaluate_newspaper_summary(1)

evaluate_textrank_summary(1)

generate_ranks(1)

get_similarity_graph(1)

get_top_sentence_indices(1)

process_html(1)

rank_sentences(1)

setup(1)

summarize_from_html(1)

text_rank(1)

예제 #1

파일 보기

파일: main.py 프로젝트: keep-steady/Naver-News-Summarizer

    def summarizeText(self, text, lines):
        length = len(text)
        text_div1 = text[0:int(length / 2)]
        text_div2 = text[int(length / 2):int(length - 1)]

        textrank_div1 = TextRank(text_div1)
        textrank_div2 = TextRank(text_div2)
        textresult_div1 = textrank_div1.summarize(10)
        textresult_div2 = textrank_div2.summarize(10)
        textresult = textresult_div1 + textresult_div2
        resultrank = TextRank(textresult)

        return resultrank.summarize(lines)

예제 #2

파일 보기

    def summarize(self, text, n_sents=3):
        """ Summarize a given text and get top sentences """
        try:
            prediction = dict()

            if text:
                if self.lang_code in self.valid_langs:
                    if Utility.get_doc_length(text) > self.n_words:
                        # generate sentences, normalized sentences from text
                        sents, norm_sents = self.p.text_preprocessing(text)
                        # generate doc-term-matrix, term-doc-matrix
                        dt_matrix = self.generate_doc_term_matrix(norm_sents)
                        td_matrix = self.generate_term_doc_matrix(dt_matrix)

                        if self.method == "LSA":
                            lsa = LSA(self.k, td_matrix)
                            term_topic_matrix, singular_values, topic_doc_matrix = lsa.u, lsa.s, lsa.vt
                            # remove singular values below given treshold
                            singular_values = lsa.filter_singular_values(
                                singular_values, self.sv_threshold)
                            # get salience scores from top singular values & topic document matrix
                            salience_scores = lsa.get_salience_scores(
                                singular_values, topic_doc_matrix)
                            # get the top sentence indices for summarization
                            top_sentence_indices = lsa.get_top_sent_indices(
                                salience_scores, n_sents)
                            summary = self.generate_summary(
                                sents, top_sentence_indices)
                        elif self.method == "TEXT_RANK":
                            tr = TextRank(dt_matrix, td_matrix)
                            # build similarity graph
                            similarity_matrix = tr.similiarity_matrix
                            similarity_graph = tr.get_similarity_graph(
                                similarity_matrix)
                            # compute pagerank scores for all sentences
                            ranked_sents = tr.rank_sentences(similarity_graph)
                            # get the top sentence indices for summarization
                            top_sentence_indices = tr.get_top_sentence_indices(
                                ranked_sents, n_sents)
                            summary = self.generate_summary(
                                sents, top_sentence_indices)
                        else:
                            return "no method found"

                        # apply cleaning for readability
                        summary = Utility.remove_multiple_whitespaces(summary)
                        summary = Utility.remove_trailing_whitespaces(summary)
                        prediction["summary"] = summary
                        prediction["message"] = "successful"
                    else:
                        return "required at least {} words".format(
                            self.n_words)
                else:
                    return "language not supported".format()
            else:
                return "required textual content"
            return prediction
        except Exception:
            logging.error("exception occured", exc_info=True)

예제 #3

파일 보기

파일: main.py 프로젝트: keep-steady/Naver-News-Summarizer

    def summarizeTextList(self, textList, lines):
        length = len(textList)
        textList1 = textList[0:int(length / 2)]
        textList2 = textList[int(length / 2):int(length - 1)]

        text1 = ''
        text2 = ''
        for sentence in textList1:
            text1 += sentence + ' '
        for sentence in textList2:
            text2 += sentence + ' '

        textrank1 = TextRank(text1)
        textrank2 = TextRank(text2)
        textresult1 = textrank1.summarize(10)
        textresult2 = textrank2.summarize(10)
        textresult = textresult1 + textresult2
        resultrank = TextRank(textresult)

        return resultrank.summarize(lines)

예제 #4

파일 보기

def select_algorithm(algo, text, num):
    if algo == 'Wordfreq':
        obj = WordFrequency(text, num)
    elif algo == 'TextRank':
        obj = TextRank(text, num)
    elif algo == 'TF_IDF':
        obj = TF_IDF(text, num)
    return obj.summarize_text()


#def download_file():
#	filename = 'summary.pdf'
#	file_dir = 'files'
#	return send_from_directory(file_dir, filename=filename))

예제 #5

파일 보기

파일: explain.py 프로젝트: youngwoo-umass/news_controversy

def top_topics_from_lm():
    print("top_topics_from_lm")
    LM = load_pickled_data("LM_classifier_10.pickle")
    voca = load_voca()
    corpus_info = CorpusInfo()
    articles = load_articles()
    ids, docs = zip(*articles)

    print("Predicting")
    labels = LM.predict_parallel(docs)
    cont_ids, _ = zip(*filter(lambda x: x[1], zip(ids, labels)))
    cont_ids = set(cont_ids)
    print("{}% are controversial".format(len(cont_ids) / len(docs)))
    save_pickle_data(cont_ids, "cont_ids")

    print("Initialize TextRank")
    text_rank = TextRank(docs, voca)
    scorer = LM.token_odd

    payloads = []
    for id, doc in articles:
        if id in cont_ids:
            param = (doc, scorer, corpus_info, text_rank, 4)
            payloads.append(param)

    n_thread = 30
    p = Pool(n_thread)
    g_phrase_score = Counter()
    print("Mapping")
    for phrase_score in p.map(top_phrase_by_scorer, payloads):
        for phrase, score in phrase_score:
            g_phrase_score[phrase] += score

    textrize = get_textrizer_plain(voca)

    result = []
    for phrase, score in g_phrase_score.most_common(300):
        plain_phrase = textrize(str2arr(phrase))
        print("{}\t{}".format(plain_phrase, score))
        result.append((plain_phrase, score))
    save_pickle_data(result, "cont_topics_lm.pickle")

예제 #6

파일 보기

파일: text_rank_script.py 프로젝트: sunnyyang1576/Impact-of-Government

    text = re.sub("\u3000", "", text)

    text_list.append(text)

sample_text = text_list[1]

# set up the text rank parameters

allowPOS = ["n"]
stopwords = ["为了"]
span = 3

# This part implements the Text rank algorithm

## initilization
tr_keyword = TextRank(allowPOS, stopwords, span)

## implement the text rank
tr_keyword.text_rank(sample_text, 10)

# Text relationship Study

## cut the text into list of words
word_pair = tr_keyword._cut(sample_text)

## create the co-occurance matrix (this is for )
co_graph = tr_keyword.co_occurance_matrix(word_pair)
df = tr_keyword.co_occur_graph_to_matrix(co_graph, normalization=True)

## Visulize the text graph

예제 #7

파일 보기

 def setUp(self):
     self.text_rank = TextRank()

예제 #8

파일 보기

from flask_migrate import Migrate
from sqlalchemy import create_engine
from settings import Settings
from models import database, Feedback, Account
from account_service import AccountService
from feedback_service import FeedbackService
from text_rank import TextRank

debug = os.environ.get("DEBUG", "false").lower() == "true"
ENV = os.environ.get("ENV", "dev")
DD_API_URL = "https://api.datadoghq.com/api/v1/"

log = logging.getLogger("summarizer_server")

app = Flask(__name__)
textrank = TextRank()
textrank.setup()

# db setup
settings = Settings()
app.config.from_object(settings)
database.init_app(app)

engine = create_engine(settings.SQLALCHEMY_DATABASE_URI)
database.metadata.create_all(engine)

accountservice = AccountService()
feedbackservice = FeedbackService()


@app.route("/v1/")

예제 #9

파일 보기

파일: skill_mining.py 프로젝트: vogali/skill-mining

 def load_text_rank(self):
     tr = TextRank()
     tr.generate_ranks()
     self.node_weights = tr.node_weights

예제 #10

파일 보기

파일: test.py 프로젝트: wikty/AutoAbstract

def test_text_rank(sentences):
    ranker = TextRank(sentences)
    return ranker.rank()

예제 #11

파일 보기

파일: extract_abstract.py 프로젝트: wikty/AutoAbstract

def text_rank_extract_abstract(sentences, k=5):
    ranker = TextRank(sentences)
    rank_list = ranker.rank()[:k]
    rank_list = sorted(rank_list, key=lambda item: item['index'])
    return '\n'.join([''.join(item['sentence']) for item in rank_list])