def pyteasSummarize(filename, title, num_sents=1): """ TextTeaser/PyTeaser uses basic summarization features and build from it. Those features are: 1. Title feature is used to score the sentence with the regards to the title. It is calculated as the count of words which are common to title of the document and sentence. 2. Sentence length is scored depends on how many words are in the sentence. TextTeaser defined a constant “ideal” (with value 20), which represents the ideal length of the summary, in terms of number of words. Sentence length is calculated as a normalized distance from this value. 3. Sentence position is where the sentence is located. I learned that introduction and conclusion will have higher score for this feature. 4. Keyword frequency is just the frequency of the words used in the whole text in the bag-of-words model (after removing stop words). """ from pyteaser import Summarize fh = open(filename, "r") text = fh.read() fh.close() summary = Summarize(title, text) sents = " ".join([str(sentence) for sentence in summary[:num_sents]]) print("") print("####### From PyTeaser #######") print(sents)
def TeaserSum(input_file, output_file): senlist = ReadText(input_file) text = open(input_file).read() title = tpinfo.topic_sen print title res_sum = Summarize(title, text) out_pid = codecs.open(output_file, 'w') """ cnt = 0 for sen in res_sum: out_pid.write(sen + '\n') cnt += 1 if cnt >= 2: break """ N_word = 100 cnt = 0 outsum = [] while cnt < len(res_sum): if N_word > len(res_sum[cnt].split()): outsum.append(res_sum[cnt]) N_word -= len(res_sum[cnt].split()) cnt += 1 else: break if N_word >= 0 and cnt < len(res_sum): subset = ' '.join(res_sum[cnt].split()[:N_word]) outsum.append(subset) for sen in outsum: out_pid.write(sen + '\n') out_pid.close() return
def summarize_bill(title, text): """ Clean up bill text and summarizes using TextRank Uses Python 2.7, as TextTeaser does not support Python 3 """ text = remove_page_artifacts(title, text) text = remove_special_characters(text) text = remove_uppercase(text) text = remove_legal_phrases(text) if ' To ' in text: text = text.split(' To ', 1)[1] text = replace_formal_words(text) text = remove_filler_phrases(text) text = fix_capitalization(text) # Ensure a just single space between each word text = re.sub(r'\s+', ' ', text) text = fix_punctuation_errors(text) # print '\n\n'.join(nltk.sent_tokenize(text)) # Get top 5 sentences from TextRank summarization algorithm summary = Summarize(title, text) # Remove leading whitespace summary = [sentence.lstrip() for sentence in summary] return summary
def IW(): IW_URL = 'http://www.indiewire.com/t/reviews/' IWSoup = BeautifulSoup(get(IW_URL).text, 'html.parser') iw_dict = {} for entry in IWSoup.find_all('header', class_='entry-header'): r_dict = {} name_str = entry.text name = name_str[2:name_str.find('Review') - 2] r_dict['name'] = name link = entry.a['href'] r_dict['link'] = link r_soup = BeautifulSoup(get(link).text, 'html.parser') r_critic = r_soup.find_all('meta', attrs={'name': 'author'})[0]['content'] r_text = r_soup.find_all('meta', attrs={'name': 'body'})[0]['content'] r_sentiment = TextBlob(r_text).sentiment r_polarity = r_sentiment[0] r_subjectivity = r_sentiment[1] summary = Summarize(name, r_text) r_blurb = ' '.join(summary) r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative' r_dict['critic'] = r_critic r_dict['text'] = r_text r_dict['blurb'] = r_blurb r_dict['review_subjectivity'] = r_subjectivity r_dict['review_polarity'] = r_polarity r_dict['sentiment'] = r_sentiment r_dict['thumbsup'] = r_thumbsup iw_dict[name] = r_dict return iw_dict
def catchContent(): number_to = t.count( '(//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")])' ) df_to = pd.DataFrame(index=range(0, number_to), columns=['Sno', 'Title', 'URL', 'Summary', 'Img_URL']) t.hover('//div[@class="container footer-main"]') t.wait(2) for n in range(1, number_to): title = t.read( '//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")][{}]//div[contains(@class, "article-listing_content")]//h2' .format(n)) URL_o = t.read( '//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")][{}]//@href' .format(n)) URL = "https://www.todayonline.com" + str(URL_o) Img_link = t.read( '//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")][{}]//img/@src' .format(n)) df_to.iloc[n - 1, 0] = n df_to.iloc[n - 1, 1] = title.decode('utf-8') df_to.iloc[n - 1, 2] = URL df_to.iloc[n - 1, 4] = Img_link for i in range(0, df_to.shape[0]): if df_to['Img_URL'][i] == "": df_to['Img_URL'][i] = np.nan df_to.dropna(subset=['Img_URL'], inplace=True, how='any') df_to = df_to.reset_index(drop=True) df_to['Sno'] = df_to.index df_to = util.fixImgLink( df_to, "https://cf-templates-fghyux9ggb7t-ap-southeast-1.s3-ap-southeast-1.amazonaws.com/todayOnline.png" ) for n in range(0, df_to.shape[0]): t.url(df_to.URL[n]) t.wait(4) t.hover('//div[@class="article-detail_subscription"]') t.wait(2) number_p = t.count('//div/p[not(@class)]') Content = "" for i in range(1, number_p - 2): cont = t.read('//div/p[not(@class)][{}]'.format(i)) Content = Content + "" + cont summaries = Summarize(df_to.Title[n], unicode(str(Content), "utf-8")) df_to.iloc[n - 1, 3] = summaries[0] return df_to
def ChicagoScraper(): chi = {} for x in CHISoup.find_all('div', class_='trb_brk_gc_i'): title = x.find_all('a')[0].find_all('figure')[0].find_all( 'img')[0]['title'] if 'review:' in title: r_dict = {} r_url = CHI_URL + x.find_all('a')[0]['href'] title = str(title[:title.find('review')]).replace("'", "").strip() r_soup = BeautifulSoup(get(r_url).text, 'html.parser') r_critic = r_soup.find_all('meta', attrs={'name': 'author'})[0]['content'] r_text = '' for piece in r_soup.find_all( 'div', class_='trb_ar_page')[0].find_all('p'): r_text += piece.text r_sentiment = TextBlob(r_text).sentiment r_polarity = r_sentiment[0] r_subjectivity = r_sentiment[1] summary = Summarize(title, r_text) r_blurb = ' '.join(summary) r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative' print title + ": " + str(r_polarity) r_dict['critic'] = r_critic r_dict['text'] = r_text r_dict['blurb'] = r_blurb r_dict['review_subjectivity'] = r_subjectivity r_dict['review_polarity'] = r_polarity r_dict['sentiment'] = r_sentiment r_dict['thumbsup'] = r_thumbsup r_dict['name'] = title r_dict['link'] = r_url chi[title] = r_dict return chi
def scrapeHWR(): HWR_URL = 'https://www.hollywoodreporter.com/topic/movie-reviews' hwr_soup = BeautifulSoup(get(HWR_URL).text, 'html.parser') hwr_dict = {} for noodle in hwr_soup.find_all('a', class_='topic-card__link'): a = smart_str(noodle['title']).replace("'", '') a = a[:a.find(': Film Review')] hwr_dict[a] = noodle['href'] for key in hwr_dict: if Movie.objects.filter(name=key).exists(): print '%s already exists' % (key) else: print '%s ...adding this movie' % (key) newMovie = Movie(name=key, director=defaultDir) newMovie.save() for key in hwr_dict: review_soup = BeautifulSoup(get(hwr_dict[key]).text, 'html.parser') r_author = review_soup.find_all('meta', attrs={"name": "sailthru.author" })[0]['content'] if Critic.objects.filter(name=r_author).exists(): print '%s already exists!' % (r_author) else: newCritic = Critic(name=r_author, organization=HWRorg) print 'adding %s' % (newCritic) newCritic.save() review_text = "" for item in review_soup.find_all('p', style='margin-bottom: 0in;'): review_text += item.text if len(review_text) == 0: for item in review_soup.find_all('p'): review_text += item.text review_sentiment = TextBlob(review_text).sentiment summary = Summarize(key, review_text) r_blurb = ' '.join(summary) #print r_blurb rtext_list.append(review_text) r_subjectivity = review_sentiment[1] r_polarity = review_sentiment[0] r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative' r_url = hwr_dict[key] newReviews = Reviews(movie=Movie.objects.get(name=key), review_url=r_url, review_polarity=r_polarity, review_subjectivity=r_subjectivity, thumbsup=r_thumbsup, blurb=r_blurb, text=review_text, critic=Critic.objects.get(name=r_author), organization=HWRorg) if Reviews.objects.filter(movie=Movie.objects.get(name=key), organization=HWRorg).exists(): print 'This review of %s already exists' % (key) else: print 'adding review' reviews_toadd.append(newReviews) newReviews.save()
def wiki_summary(name): try: page = wikipedia.page(name) summary = wikipedia.summary(name, sentences=20) url = page.url output = Summarize(page.title, summary) return name, url, ' '.join(output) except wikipedia.PageError: logging.warning("No wiki entry for '{0}'.".format(name)) return None
def single_txt(txtname): code = 0 message = "success" title = "" time = "" text = "" f2 = open(txtname) i = 0 while 1: line = f2.readline() if not line: break if i == 0: title = line if i == 1: time = line if i >= 2: text = text + line i = i + 1 if i < 2: code = 1 message = "wrong format" key2 = keywords5(text) # pprint(key2) summaries = Summarize(title, text) # pprint(summaries) abstract = '' for summary in summaries: abstract = abstract + summary + " " sentimentstr = client.Sentiment({'text': text}) sentiment = sentimentstr['polarity_confidence'] positive = sentimentstr['polarity'] if positive == 'positive': sentiment = abs(sentiment - 0.5) * 2 * 0.8 + 0.2 if positive == 'negative': sentiment = -(abs(sentiment - 0.5) * 2 * 0.8 + 0.2) if positive == 'neutral': if len(text) % 2 == 1: sentiment = (0.2 - abs(sentiment - 0.5) * 2 * 0.2) else: sentiment = -(0.2 - abs(sentiment - 0.5) * 2 * 0.2) data = { 'title': title.strip('\n'), 'time': time.strip('\n'), 'abstract': abstract, 'keywords': key2, 'sentiment': sentiment } return code, message, data
async def shorten(message, url = 'wesring.com'):#I am the error page await client.send_message(message.channel, "Im reading, give me a second") #TODO: Write own html scraper logInfo("Parsing: " + url) article = Article(url) article.download() article.parse() #TODO: Write own summary function summary = "".join(Summarize(article.title, article.text)) await client.send_message(message.channel, "\n\nSummary:\n " + summary) logInfo("done")
def testText(self): article_title = u'Framework for Partitioning and Execution of Data Stream Applications in Mobile Cloud Computing' article_text = u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources. In this paper, we focus on the third approach in supporting mobile data stream applica- tions. More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data. To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations. We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application. Different from existing works, the framework not only allows the dynamic partitioning for a single user but also supports the sharing of computation in- stances among multiple users in the cloud to achieve efficient utilization of the underlying cloud resources. Meanwhile, the framework has better scalability because it is designed on the elastic cloud fabrics. Based on the framework, we design a genetic algorithm for optimal computation parti- tion. Both numerical evaluation and real world experiment have been performed, and the results show that the par- titioned application can achieve at least two times better performance in terms of throughput than the application without partitioning.' summarised_article_text = [ u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.', u'Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.', u'In this paper, we focus on the third approach in supporting mobile data stream applica- tions.', u'More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.', u'We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application.' ] self.assertEqual(Summarize(article_title, article_text), summarised_article_text)
def summarize_file(filename): with open(filename, 'r') as f: title = f.readline().decode('cp1252') content = "" for line in f: if line: content += line.decode('cp1252') print filename + " / " + title summary = Summarize(title, content) for bullet_point in summary: print "[+] " + bullet_point
def SummarizeUrl(url): summaries = [] try: article = grab_link(url) except IOError: print 'IOError' return None #print ">>> " + str(high) + " - " + item['Source'] + " >>> " + highsen if (article is None or article.cleaned_text is None or article.title is None): return None text = str(article.cleaned_text.encode('utf-8', 'ignore')) title = str(article.title.encode('utf-8', 'ignore')) summaries = Summarize(title, text) return summaries
def Slate(): slate_dict = {} for item in SlateSoup.find_all('div', class_="tile long-hed stacked"): if 'reviewed' in item.find_all('a')[0]['href']: try: r_url = item.find_all('a')[0]['href'] r_soup = BeautifulSoup(get(r_url).text, 'html.parser') r_title = r_soup.find_all( 'h1', class_='article__hed')[0].findAll('em')[0].text r_title_slug = r_title.lower().replace(' ', '-').replace( '(', '').replace(')', '') if r_title_slug in r_url: print 'scraping Slate review for %s' % (r_title) r_dict = {} r_text = '' for item in r_soup.find_all('p', class_='slate-paragraph'): r_text += item.text r_sentiment = TextBlob(r_text).sentiment r_polarity = r_sentiment[0] r_subjectivity = r_sentiment[1] summary = Summarize(r_title, r_text) r_blurb = ' '.join(summary) r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative' r_critic = r_soup.find_all('meta', attrs={'name': 'author'})[0]['content'] r_dict['critic'] = r_critic r_dict['text'] = r_text r_dict['name'] = r_title r_dict['link'] = r_url r_dict['blurb'] = r_blurb r_dict['review_subjectivity'] = r_subjectivity r_dict['review_polarity'] = r_polarity r_dict['sentiment'] = r_sentiment r_dict['thumbsup'] = r_thumbsup slate_dict[r_title] = r_dict except: print 'movie not found. skipping...' return slate_dict
def scrapeNYTMovie(): review_dict = {} url = 'https://www.nytimes.com/reviews/movies' html_soup = BeautifulSoup(get(url).text, 'html.parser') movie_length = len(html_soup.find_all('a', class_='story-link')) print movie_length for i in range(movie_length): souplet = html_soup.find_all('a', class_='story-link')[i] r_url = 'https://www.nytimes.com/' + souplet['href'] r_name = souplet.h2.text.strip() r_get = get(r_url) r_soup = BeautifulSoup(r_get.text, 'html.parser') review_text = "" chunk_count = len( r_soup.find_all('p', class_='story-body-text story-content')) author = r_soup.find_all('meta', attrs={"name": "author"})[0]['content'] for i in range(chunk_count): review_text += r_soup.find_all( 'p', class_='story-body-text story-content')[i].text review_sentiment = TextBlob(review_text).sentiment review_polarity = review_sentiment[0] review_subjectivity = review_sentiment[1] r_thumbsup = 'Positive' if review_polarity > 0 else 'Negative' summary = Summarize(r_name, review_text) r_blurb = ' '.join(summary) review_dict[souplet.h2.text.strip()] = { 'thumbsup': r_thumbsup, 'blurb': r_blurb, 'name': r_name, 'link': r_url, 'text': review_text, 'review_subjectivity': review_subjectivity, 'critic': author, 'organization': 'NYT', 'review_polarity': review_polarity } return review_dict
def pyteaser_summary(article_title, article_text,SENTENCES_COUNT): summary = Summarize(article_title, article_text) return summary[:SENTENCES_COUNT]
''' pip install pyteaser ==> run the code directly You can also assign your own text and title and call the Summarize(title,text) ''' from pyteaser import Summarize, SummarizeUrl article_title = u'Framework for Partitioning and Execution of Data Stream Applications in Mobile Cloud Computing' article_text = u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources. In this paper, we focus on the third approach in supporting mobile data stream applica- tions. More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data. To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations. We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application. Different from existing works, the framework not only allows the dynamic partitioning for a single user but also supports the sharing of computation in- stances among multiple users in the cloud to achieve efficient utilization of the underlying cloud resources. Meanwhile, the framework has better scalability because it is designed on the elastic cloud fabrics. Based on the framework, we design a genetic algorithm for optimal computation parti- tion. Both numerical evaluation and real world experiment have been performed, and the results show that the par- titioned application can achieve at least two times better performance in terms of throughput than the application without partitioning.' ranks = Summarize(article_title, article_text) print "*" * 20, "ONE SENTENCE SUMMARY", "*" * 20 print ranks[0] print() print "*" * 20, "TWO SENTENCE SUMMARY", "*" * 20 print ranks[:2] ''' OUTPUT: ******************** ONE SENTENCE SUMMARY ******************** The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. () ******************** TWO SENTENCE SUMMARY ******************** [u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.', u'Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.'] (venv) Pemas-MacBook-Air:Machine_Learning-NLP_programs pemagurung$ '''
def test(self): self.assertEqual(Summarize(self.article_title, self.article_text), self.summarised_article_text)
else: newCritic = Critic(name=r_author, organization=VarietyOrg) print newCritic newCritic.save() print 'added %s from %s to critic database' % (r_author, 'variety') r_dict['critic'] = r_author for beefchunk in r_soup.find_all('meta', attrs={ 'class': 'swiftype' })[0].find_all('meta', attrs={'class': 'swiftype'}): if len(beefchunk['content']) > 140: r_text = beefchunk['content'] r_dict['text'] = r_text r_sentiment = TextBlob(r_text).sentiment r_polarity = r_sentiment[0] r_subjectivity = r_sentiment[1] summary = Summarize(name, r_text) r_blurb = ' '.join(summary) r_dict['review_subjectivity'] = r_subjectivity r_dict['review_polarity'] = r_polarity r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative' r_dict['thumbsup'] = r_thumbsup r_dict['blurb'] = r_blurb if Reviews.objects.filter( movie=Movie.objects.get(name=name), critic=Critic.objects.get(name=r_author, organization=VarietyOrg)).exists(): print 'Review of %s from %s already exists!' % (name, r_author) else: print 'adding review' mymov = Movie.objects.get(name=name) mycritic = Critic.objects.get(name=r_author,
def wholeArticleFromQuery(query): g = d.get_zci(query, True) if "(" in g: return Summarize(query, g, 1500) else: return SummarizeUrl(g, 1500)
def longSummaryFromQuery(query): g = d.get_zci(query, True) if "(" in g: return Summarize(query, g, 15) else: return SummarizeUrl(g, 15)
from pyteaser import Summarize import sys text_file = sys.argv[1] title_file = sys.argv[2] text = open(text_file, 'rb').read() title = open(title_file, 'rb').read() summaries = Summarize(text, title) print summaries
'w') clean_reviews_with_removal = open(clean_reviews_with_removal_path, 'r') clean_reviews_without_removal = open(clean_reviews_without_removal_path, 'r') texts_with_removal = clean_reviews_with_removal.readlines() texts_without_removal = clean_reviews_without_removal.readlines() # this is just for printing progress progress_count = [] for i in range(0, 100, 7): progress_count.append(int(i / 100 * len(texts_with_removal))) print("Summarizing with removal using pyteaser ... ", end="", flush=True) for i, text in enumerate(texts_with_removal): summary = text if (text.count(".") > 1): summary = Summarize('', text) summary = summary.replace('\n', ' ') pyteaser_summary_with_removal.write("{}\n".format(summary)) # Prints the progress count if i in progress_count: print("{}%..".format(int(i / len(texts_with_removal) * 100)), end="", flush=True) print("100%") print("Summarizing without removal using pyteaser ... ", end="", flush=True) for i, text in enumerate(texts_without_removal): summary = text if (text.count(".") > 1): summary = Summarize('', text) summary = summary.replace('\n', ' ')
def summary(): title = request.get_json()['title'] text = request.get_json()['text'] summaries = Summarize(title, text) summary = " ".join(summaries) return jsonify(text=summary)
为了让公众更好了解信息技术相关领域科技知识,计算所精心策划了一系列互动科普活动,包括“解密开宝箱”、“科学探索印章之旅”、“网络攻防现场演示”、“编程竟如此简单”、“超级计算机管理模拟体验”、参观“计算的脚步”展厅、科学体验互动活动等30余项丰富多彩的互动科普项目。生动、形象地向公众展示应用信息技术的魅力,激发青少年对信息技术的兴趣和热爱,通过寓教于乐的科普形式,让青少年们在游戏中学到知识、引起兴趣、培养科学精神。 说起编程,不少人的第一反应就是两个字:枯燥。针对这点,中科院计算所的科研人员们开发了一个有趣的应用,专门为小朋友量身定制的,采用图形化的编程界面,小朋友们很快就明白了编程的方法,编写出了一个个小程序,控制这小车在场地里跑来跑去。 小朋友编程控制工程车模型 新型“蠕虫”病毒WannaCry全球范围内爆发,让全世界的人们在计算机病毒的嚣张面前结结实实地“蓝瘦、香菇”了一把。WannaCry到底是何方神圣,网络攻防现场演示活动中为大家进行了解答。计算所还为公众请来了包括国内网络安全界被尊称为“TK教主”的“tombkeeper”等大咖,为公众演示了包括:扫描条码如何导致系统被入侵;发射携带攻击信息的激光束如何做到让条码阅读器在系统上执行任意命令;如何在移动端窃取通讯录、短信内容等隐私信息。这一系列网络攻防的现场演示让公众大开眼界。 “TK教主”带领观众体验网络入侵 从沙子到芯片 互动展区 观众参观 观众体验互动展示 小朋友成功解密打开宝箱 有妈妈在带孩子参加完计算所的公众科学日活动后,在朋友圈里分享了感受,她写到“第一次参加公众开放日活动,这里简直是孩子们的游乐场!带上一本通关护照,根据线索打开一个一个的密码箱,最终通关大奖也在密码箱里!小朋友看到了第一台103计算机模型,体验了虚拟现实,还开动脑筋翻译了摩尔斯码,当然最开心的还是穿梭于各层寻找密码箱!必须一提的是,小朋友第一次站在数据机房里,没有感叹眼前不断闪烁的指示灯,没有抱怨轰隆隆的噪声,而是惊讶的问了一句:‘妈妈,这是什么味道呀!’。原来这是我们家小猴子对数据机房的第一印象是高精尖的计算技术。以直观有趣的方式为孩子们启蒙和科普,这是计算所开放日最赞的地方!” """.encode('utf-8') summaries = Summarize('英国伦敦恐袭事件', content1) print summaries[0].decode('utf-8')
print("Summarizing...", url) summary_sentences = SummarizeUrl(url) if summary_sentences: total_summaries.append(" ".join(summary_sentences)) print("Done processing one file") print("Finished first pass through all records") print("Recombining and summarizing...") while len(total_summaries) > 15: summaries_to_join = int(len(total_summaries) / 15) if summaries_to_join == 1: break if summaries_to_join > 20: summaries_to_join = 20 combined_summaries = [ " ".join(total_summaries[i:i + summaries_to_join]) for i in range(0, len(total_summaries), summaries_to_join) ] total_summaries = [ " ".join(Summarize("Hurricane Florence", summary).split("\n")) for summary in combined_summaries ] print( "Finished pass through recombined summaries... Number of summaries left = %d" % len(total_summaries)) print("Final summary:") for summary in total_summaries: print(summary) print("\n")