Python Summarize Examples, pyteaser.Summarize Python Examples

Example #1

0

Show file

File: txtSummary.py Project: kinshuk330/text-summarization-comparison

def pyteasSummarize(filename, title, num_sents=1):
    """
    TextTeaser/PyTeaser uses basic summarization features and build from it. Those features are:
    1. Title feature is used to score the sentence with the regards to the title. It is calculated 
    as the count of words which are common to title of the document and sentence.
    2. Sentence length is scored depends on how many words are in the sentence. TextTeaser defined 
    a constant “ideal” (with value 20), which represents the ideal length of the summary, in terms 
    of number of words. Sentence length is calculated as a normalized distance from this value.
    3. Sentence position is where the sentence is located. I learned that introduction and conclusion 
    will have higher score for this feature.
    4. Keyword frequency is just the frequency of the words used in the whole text in the bag-of-words 
    model (after removing stop words).
    """

    from pyteaser import Summarize

    fh = open(filename, "r")
    text = fh.read()
    fh.close()

    summary = Summarize(title, text)
    sents = " ".join([str(sentence) for sentence in summary[:num_sents]])

    print("")
    print("####### From PyTeaser #######")
    print(sents)

Example #2

0

Show file

def TeaserSum(input_file, output_file):
    senlist = ReadText(input_file)
    text = open(input_file).read()
    title = tpinfo.topic_sen
    print title
    res_sum = Summarize(title, text)
    out_pid = codecs.open(output_file, 'w')
    """
  cnt = 0
  for sen in res_sum:
    out_pid.write(sen + '\n')
    cnt += 1
    if cnt >= 2:
      break
  """
    N_word = 100
    cnt = 0
    outsum = []
    while cnt < len(res_sum):
        if N_word > len(res_sum[cnt].split()):
            outsum.append(res_sum[cnt])
            N_word -= len(res_sum[cnt].split())
            cnt += 1
        else:
            break

    if N_word >= 0 and cnt < len(res_sum):
        subset = ' '.join(res_sum[cnt].split()[:N_word])
        outsum.append(subset)
    for sen in outsum:
        out_pid.write(sen + '\n')
    out_pid.close()
    return

Example #3

0

Show file

def summarize_bill(title, text):
    """
    Clean up bill text and summarizes using TextRank
    Uses Python 2.7, as TextTeaser does not support Python 3
    """
    text = remove_page_artifacts(title, text)
    text = remove_special_characters(text)
    text = remove_uppercase(text)
    text = remove_legal_phrases(text)
    if ' To ' in text:
        text = text.split(' To ', 1)[1]
    text = replace_formal_words(text)
    text = remove_filler_phrases(text)
    text = fix_capitalization(text)

    # Ensure a just single space between each word
    text = re.sub(r'\s+', ' ', text)

    text = fix_punctuation_errors(text)
    # print '\n\n'.join(nltk.sent_tokenize(text))
    # Get top 5 sentences from TextRank summarization algorithm
    summary = Summarize(title, text)

    # Remove leading whitespace
    summary = [sentence.lstrip() for sentence in summary]
    return summary

Example #4

0

Show file

def IW():
    IW_URL = 'http://www.indiewire.com/t/reviews/'
    IWSoup = BeautifulSoup(get(IW_URL).text, 'html.parser')
    iw_dict = {}
    for entry in IWSoup.find_all('header', class_='entry-header'):
        r_dict = {}
        name_str = entry.text
        name = name_str[2:name_str.find('Review') - 2]

        r_dict['name'] = name
        link = entry.a['href']
        r_dict['link'] = link

        r_soup = BeautifulSoup(get(link).text, 'html.parser')
        r_critic = r_soup.find_all('meta', attrs={'name':
                                                  'author'})[0]['content']
        r_text = r_soup.find_all('meta', attrs={'name': 'body'})[0]['content']
        r_sentiment = TextBlob(r_text).sentiment
        r_polarity = r_sentiment[0]
        r_subjectivity = r_sentiment[1]
        summary = Summarize(name, r_text)
        r_blurb = ' '.join(summary)
        r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative'

        r_dict['critic'] = r_critic
        r_dict['text'] = r_text
        r_dict['blurb'] = r_blurb
        r_dict['review_subjectivity'] = r_subjectivity
        r_dict['review_polarity'] = r_polarity
        r_dict['sentiment'] = r_sentiment
        r_dict['thumbsup'] = r_thumbsup

        iw_dict[name] = r_dict

    return iw_dict

Example #5

0

Show file

def catchContent():
    number_to = t.count(
        '(//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")])'
    )

    df_to = pd.DataFrame(index=range(0, number_to),
                         columns=['Sno', 'Title', 'URL', 'Summary', 'Img_URL'])

    t.hover('//div[@class="container footer-main"]')
    t.wait(2)

    for n in range(1, number_to):
        title = t.read(
            '//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")][{}]//div[contains(@class, "article-listing_content")]//h2'
            .format(n))
        URL_o = t.read(
            '//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")][{}]//@href'
            .format(n))
        URL = "https://www.todayonline.com" + str(URL_o)

        Img_link = t.read(
            '//div[@class="col"]/div[contains(@class, "today")]/ul/li[contains(@class, "col-md-12")][{}]//img/@src'
            .format(n))

        df_to.iloc[n - 1, 0] = n
        df_to.iloc[n - 1, 1] = title.decode('utf-8')
        df_to.iloc[n - 1, 2] = URL
        df_to.iloc[n - 1, 4] = Img_link

    for i in range(0, df_to.shape[0]):
        if df_to['Img_URL'][i] == "":
            df_to['Img_URL'][i] = np.nan

    df_to.dropna(subset=['Img_URL'], inplace=True, how='any')
    df_to = df_to.reset_index(drop=True)
    df_to['Sno'] = df_to.index

    df_to = util.fixImgLink(
        df_to,
        "https://cf-templates-fghyux9ggb7t-ap-southeast-1.s3-ap-southeast-1.amazonaws.com/todayOnline.png"
    )

    for n in range(0, df_to.shape[0]):
        t.url(df_to.URL[n])
        t.wait(4)
        t.hover('//div[@class="article-detail_subscription"]')
        t.wait(2)

        number_p = t.count('//div/p[not(@class)]')

        Content = ""

        for i in range(1, number_p - 2):
            cont = t.read('//div/p[not(@class)][{}]'.format(i))
            Content = Content + "" + cont

        summaries = Summarize(df_to.Title[n], unicode(str(Content), "utf-8"))
        df_to.iloc[n - 1, 3] = summaries[0]

    return df_to

Example #6

0

Show file

File: chicagotribune.py Project: sanjeev-/manatwee

def ChicagoScraper():
    chi = {}
    for x in CHISoup.find_all('div', class_='trb_brk_gc_i'):
        title = x.find_all('a')[0].find_all('figure')[0].find_all(
            'img')[0]['title']
        if 'review:' in title:
            r_dict = {}
            r_url = CHI_URL + x.find_all('a')[0]['href']
            title = str(title[:title.find('review')]).replace("'", "").strip()
            r_soup = BeautifulSoup(get(r_url).text, 'html.parser')
            r_critic = r_soup.find_all('meta', attrs={'name':
                                                      'author'})[0]['content']
            r_text = ''
            for piece in r_soup.find_all(
                    'div', class_='trb_ar_page')[0].find_all('p'):
                r_text += piece.text
            r_sentiment = TextBlob(r_text).sentiment
            r_polarity = r_sentiment[0]
            r_subjectivity = r_sentiment[1]
            summary = Summarize(title, r_text)
            r_blurb = ' '.join(summary)
            r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative'
            print title + ": " + str(r_polarity)
            r_dict['critic'] = r_critic
            r_dict['text'] = r_text
            r_dict['blurb'] = r_blurb
            r_dict['review_subjectivity'] = r_subjectivity
            r_dict['review_polarity'] = r_polarity
            r_dict['sentiment'] = r_sentiment
            r_dict['thumbsup'] = r_thumbsup
            r_dict['name'] = title
            r_dict['link'] = r_url
            chi[title] = r_dict
    return chi

Example #7

0

Show file

File: hollywood_reporter.py Project: sanjeev-/manatwee

def scrapeHWR():
    HWR_URL = 'https://www.hollywoodreporter.com/topic/movie-reviews'
    hwr_soup = BeautifulSoup(get(HWR_URL).text, 'html.parser')
    hwr_dict = {}
    for noodle in hwr_soup.find_all('a', class_='topic-card__link'):
        a = smart_str(noodle['title']).replace("'", '')
        a = a[:a.find(': Film Review')]
        hwr_dict[a] = noodle['href']
    for key in hwr_dict:
        if Movie.objects.filter(name=key).exists():
            print '%s already exists' % (key)
        else:
            print '%s ...adding this movie' % (key)
            newMovie = Movie(name=key, director=defaultDir)
            newMovie.save()
    for key in hwr_dict:
        review_soup = BeautifulSoup(get(hwr_dict[key]).text, 'html.parser')
        r_author = review_soup.find_all('meta',
                                        attrs={"name": "sailthru.author"
                                               })[0]['content']
        if Critic.objects.filter(name=r_author).exists():
            print '%s already exists!' % (r_author)
        else:
            newCritic = Critic(name=r_author, organization=HWRorg)
            print 'adding %s' % (newCritic)
            newCritic.save()
        review_text = ""
        for item in review_soup.find_all('p', style='margin-bottom: 0in;'):
            review_text += item.text
        if len(review_text) == 0:
            for item in review_soup.find_all('p'):
                review_text += item.text
        review_sentiment = TextBlob(review_text).sentiment
        summary = Summarize(key, review_text)
        r_blurb = ' '.join(summary)
        #print r_blurb
        rtext_list.append(review_text)
        r_subjectivity = review_sentiment[1]
        r_polarity = review_sentiment[0]
        r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative'
        r_url = hwr_dict[key]
        newReviews = Reviews(movie=Movie.objects.get(name=key),
                             review_url=r_url,
                             review_polarity=r_polarity,
                             review_subjectivity=r_subjectivity,
                             thumbsup=r_thumbsup,
                             blurb=r_blurb,
                             text=review_text,
                             critic=Critic.objects.get(name=r_author),
                             organization=HWRorg)
        if Reviews.objects.filter(movie=Movie.objects.get(name=key),
                                  organization=HWRorg).exists():
            print 'This review of %s already exists' % (key)
        else:
            print 'adding review'
            reviews_toadd.append(newReviews)
            newReviews.save()

Example #8

0

Show file

def wiki_summary(name):
    try:
        page = wikipedia.page(name)
        summary = wikipedia.summary(name, sentences=20)
        url = page.url
        output = Summarize(page.title, summary)
        return name, url, ' '.join(output)
    except wikipedia.PageError:
        logging.warning("No wiki entry for '{0}'.".format(name))
        return None

Example #9

0

Show file

File: main.py Project: Grisw/www.datanewsstudio.org

def single_txt(txtname):
    code = 0
    message = "success"
    title = ""
    time = ""
    text = ""

    f2 = open(txtname)
    i = 0
    while 1:
        line = f2.readline()
        if not line:
            break
        if i == 0:
            title = line
        if i == 1:
            time = line
        if i >= 2:
            text = text + line
        i = i + 1

    if i < 2:
        code = 1
        message = "wrong format"
    key2 = keywords5(text)
    # pprint(key2)
    summaries = Summarize(title, text)
    # pprint(summaries)
    abstract = ''
    for summary in summaries:
        abstract = abstract + summary + " "

    sentimentstr = client.Sentiment({'text': text})
    sentiment = sentimentstr['polarity_confidence']
    positive = sentimentstr['polarity']
    if positive == 'positive':
        sentiment = abs(sentiment - 0.5) * 2 * 0.8 + 0.2
    if positive == 'negative':
        sentiment = -(abs(sentiment - 0.5) * 2 * 0.8 + 0.2)
    if positive == 'neutral':
        if len(text) % 2 == 1:
            sentiment = (0.2 - abs(sentiment - 0.5) * 2 * 0.2)
        else:
            sentiment = -(0.2 - abs(sentiment - 0.5) * 2 * 0.2)

    data = {
        'title': title.strip('\n'),
        'time': time.strip('\n'),
        'abstract': abstract,
        'keywords': key2,
        'sentiment': sentiment
    }
    return code, message, data

Example #10

0

Show file

File: Discord_tldr.py Project: WesR/Discord-tldr-bot

async def shorten(message, url = 'wesring.com'):#I am the error page
    await client.send_message(message.channel, "Im reading, give me a second")
    #TODO: Write own html scraper
    logInfo("Parsing: " + url)
    article = Article(url)
    article.download()
    article.parse()

    #TODO: Write own summary function
    summary = "".join(Summarize(article.title, article.text))
    await client.send_message(message.channel, "\n\nSummary:\n " + summary)
    logInfo("done")

Example #11

0

Show file

    def testText(self):
        article_title = u'Framework for Partitioning and Execution of Data Stream Applications in Mobile Cloud Computing'
        article_text = u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources. In this paper, we focus on the third approach in supporting mobile data stream applica- tions. More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data. To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations. We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application. Different from existing works, the framework not only allows the dynamic partitioning for a single user but also supports the sharing of computation in- stances among multiple users in the cloud to achieve efficient utilization of the underlying cloud resources. Meanwhile, the framework has better scalability because it is designed on the elastic cloud fabrics. Based on the framework, we design a genetic algorithm for optimal computation parti- tion. Both numerical evaluation and real world experiment have been performed, and the results show that the par- titioned application can achieve at least two times better performance in terms of throughput than the application without partitioning.'

        summarised_article_text = [
            u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.',
            u'Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.',
            u'In this paper, we focus on the third approach in supporting mobile data stream applica- tions.',
            u'More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.',
            u'We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application.'
        ]

        self.assertEqual(Summarize(article_title, article_text),
                         summarised_article_text)

Example #12

0

Show file

File: demo.py Project: treestompz/PyTeaser

def summarize_file(filename):
    with open(filename, 'r') as f:
        title = f.readline().decode('cp1252')

        content = ""
        for line in f:
            if line:
                content += line.decode('cp1252')

        print filename + " / " + title
        summary = Summarize(title, content)

        for bullet_point in summary:
            print "[+] " + bullet_point

Example #13

0

Show file

def SummarizeUrl(url):
	summaries = []
	try:
		article = grab_link(url)
	except IOError:
		print 'IOError'
		return None
	
	#print ">>> " + str(high) + " - " + item['Source'] + " >>> " + highsen
	if (article is None or article.cleaned_text is None or article.title is None):
		return None

	text = str(article.cleaned_text.encode('utf-8', 'ignore'))
	title = str(article.title.encode('utf-8', 'ignore'))
	summaries = Summarize(title, text)
	return summaries

Example #14

0

Show file

File: slate.py Project: sanjeev-/manatwee

def Slate():
    slate_dict = {}
    for item in SlateSoup.find_all('div', class_="tile long-hed stacked"):
        if 'reviewed' in item.find_all('a')[0]['href']:
            try:
                r_url = item.find_all('a')[0]['href']

                r_soup = BeautifulSoup(get(r_url).text, 'html.parser')
                r_title = r_soup.find_all(
                    'h1', class_='article__hed')[0].findAll('em')[0].text

                r_title_slug = r_title.lower().replace(' ', '-').replace(
                    '(', '').replace(')', '')

                if r_title_slug in r_url:
                    print 'scraping Slate review for %s' % (r_title)
                    r_dict = {}
                    r_text = ''
                    for item in r_soup.find_all('p', class_='slate-paragraph'):
                        r_text += item.text
                    r_sentiment = TextBlob(r_text).sentiment
                    r_polarity = r_sentiment[0]
                    r_subjectivity = r_sentiment[1]
                    summary = Summarize(r_title, r_text)
                    r_blurb = ' '.join(summary)
                    r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative'
                    r_critic = r_soup.find_all('meta',
                                               attrs={'name':
                                                      'author'})[0]['content']
                    r_dict['critic'] = r_critic
                    r_dict['text'] = r_text
                    r_dict['name'] = r_title
                    r_dict['link'] = r_url
                    r_dict['blurb'] = r_blurb
                    r_dict['review_subjectivity'] = r_subjectivity
                    r_dict['review_polarity'] = r_polarity
                    r_dict['sentiment'] = r_sentiment
                    r_dict['thumbsup'] = r_thumbsup
                    slate_dict[r_title] = r_dict

            except:
                print 'movie not found. skipping...'

    return slate_dict

Example #15

0

Show file

def scrapeNYTMovie():
    review_dict = {}
    url = 'https://www.nytimes.com/reviews/movies'
    html_soup = BeautifulSoup(get(url).text, 'html.parser')
    movie_length = len(html_soup.find_all('a', class_='story-link'))
    print movie_length
    for i in range(movie_length):
        souplet = html_soup.find_all('a', class_='story-link')[i]
        r_url = 'https://www.nytimes.com/' + souplet['href']
        r_name = souplet.h2.text.strip()
        r_get = get(r_url)
        r_soup = BeautifulSoup(r_get.text, 'html.parser')
        review_text = ""
        chunk_count = len(
            r_soup.find_all('p', class_='story-body-text story-content'))
        author = r_soup.find_all('meta', attrs={"name":
                                                "author"})[0]['content']
        for i in range(chunk_count):
            review_text += r_soup.find_all(
                'p', class_='story-body-text story-content')[i].text
        review_sentiment = TextBlob(review_text).sentiment
        review_polarity = review_sentiment[0]
        review_subjectivity = review_sentiment[1]
        r_thumbsup = 'Positive' if review_polarity > 0 else 'Negative'
        summary = Summarize(r_name, review_text)
        r_blurb = ' '.join(summary)
        review_dict[souplet.h2.text.strip()] = {
            'thumbsup': r_thumbsup,
            'blurb': r_blurb,
            'name': r_name,
            'link': r_url,
            'text': review_text,
            'review_subjectivity': review_subjectivity,
            'critic': author,
            'organization': 'NYT',
            'review_polarity': review_polarity
        }
    return review_dict

Example #16

0

Show file

def pyteaser_summary(article_title, article_text,SENTENCES_COUNT):
    summary = Summarize(article_title, article_text)
    return summary[:SENTENCES_COUNT]

Example #17

0

Show file

File: pyteaser_summarizer.py Project: sshubham913/Machine_Learning-NLP_programs

'''
pip install pyteaser
==> run the code directly
You can also assign your own text and title and call the Summarize(title,text)
'''
from pyteaser import Summarize, SummarizeUrl

article_title = u'Framework for Partitioning and Execution of Data Stream Applications in Mobile Cloud Computing'
article_text = u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources. In this paper, we focus on the third approach in supporting mobile data stream applica- tions. More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data. To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations. We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application. Different from existing works, the framework not only allows the dynamic partitioning for a single user but also supports the sharing of computation in- stances among multiple users in the cloud to achieve efficient utilization of the underlying cloud resources. Meanwhile, the framework has better scalability because it is designed on the elastic cloud fabrics. Based on the framework, we design a genetic algorithm for optimal computation parti- tion. Both numerical evaluation and real world experiment have been performed, and the results show that the par- titioned application can achieve at least two times better performance in terms of throughput than the application without partitioning.'

ranks = Summarize(article_title, article_text)
print "*" * 20, "ONE SENTENCE SUMMARY", "*" * 20
print ranks[0]
print()
print "*" * 20, "TWO SENTENCE SUMMARY", "*" * 20
print ranks[:2]
'''
OUTPUT:
******************** ONE SENTENCE SUMMARY ********************
The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.
()
******************** TWO SENTENCE SUMMARY ********************
[u'The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.', u'Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.']
(venv) Pemas-MacBook-Air:Machine_Learning-NLP_programs pemagurung$ 
'''

Example #18

0

Show file

 def test(self):
     self.assertEqual(Summarize(self.article_title, self.article_text),
                      self.summarised_article_text)

Example #19

0

Show file

File: variety.py Project: sanjeev-/manatwee

 else:
     newCritic = Critic(name=r_author, organization=VarietyOrg)
     print newCritic
     newCritic.save()
     print 'added %s from %s to critic database' % (r_author, 'variety')
 r_dict['critic'] = r_author
 for beefchunk in r_soup.find_all('meta', attrs={
         'class': 'swiftype'
 })[0].find_all('meta', attrs={'class': 'swiftype'}):
     if len(beefchunk['content']) > 140:
         r_text = beefchunk['content']
 r_dict['text'] = r_text
 r_sentiment = TextBlob(r_text).sentiment
 r_polarity = r_sentiment[0]
 r_subjectivity = r_sentiment[1]
 summary = Summarize(name, r_text)
 r_blurb = ' '.join(summary)
 r_dict['review_subjectivity'] = r_subjectivity
 r_dict['review_polarity'] = r_polarity
 r_thumbsup = 'Positive' if r_polarity > 0 else 'Negative'
 r_dict['thumbsup'] = r_thumbsup
 r_dict['blurb'] = r_blurb
 if Reviews.objects.filter(
         movie=Movie.objects.get(name=name),
         critic=Critic.objects.get(name=r_author,
                                   organization=VarietyOrg)).exists():
     print 'Review of %s from %s already exists!' % (name, r_author)
 else:
     print 'adding review'
     mymov = Movie.objects.get(name=name)
     mycritic = Critic.objects.get(name=r_author,

Example #20

0

Show file

File: extractText_teaser.py Project: X54329/QuickGuide

def wholeArticleFromQuery(query):
    g = d.get_zci(query, True)
    if "(" in g:
        return Summarize(query, g, 1500)
    else:
        return SummarizeUrl(g, 1500)

Example #21

0

Show file

File: extractText_teaser.py Project: X54329/QuickGuide

def longSummaryFromQuery(query):
    g = d.get_zci(query, True)
    if "(" in g:
        return Summarize(query, g, 15)
    else:
        return SummarizeUrl(g, 15)

Example #22

0

Show file

File: pyteaser_summarizer.py Project: coolrb/email_summary-1

from pyteaser import Summarize
import sys

text_file = sys.argv[1]
title_file = sys.argv[2]
text = open(text_file, 'rb').read()
title = open(title_file, 'rb').read()

summaries = Summarize(text, title)
print summaries

Example #23

0

Show file

File: bleu-scores.py Project: apurva3010/IST664-NLP-Project-ExrtactiveTextSummarization

                                        'w')
clean_reviews_with_removal = open(clean_reviews_with_removal_path, 'r')
clean_reviews_without_removal = open(clean_reviews_without_removal_path, 'r')
texts_with_removal = clean_reviews_with_removal.readlines()
texts_without_removal = clean_reviews_without_removal.readlines()

# this is just for printing progress
progress_count = []
for i in range(0, 100, 7):
    progress_count.append(int(i / 100 * len(texts_with_removal)))

print("Summarizing with removal using pyteaser ... ", end="", flush=True)
for i, text in enumerate(texts_with_removal):
    summary = text
    if (text.count(".") > 1):
        summary = Summarize('', text)
    summary = summary.replace('\n', ' ')
    pyteaser_summary_with_removal.write("{}\n".format(summary))
    # Prints the progress count
    if i in progress_count:
        print("{}%..".format(int(i / len(texts_with_removal) * 100)),
              end="",
              flush=True)
print("100%")

print("Summarizing without removal using pyteaser ... ", end="", flush=True)
for i, text in enumerate(texts_without_removal):
    summary = text
    if (text.count(".") > 1):
        summary = Summarize('', text)
    summary = summary.replace('\n', ' ')

Example #24

0

Show file

File: summarization.py Project: liuchang1437/AKS-demo

def summary():
    title = request.get_json()['title']
    text = request.get_json()['text']
    summaries = Summarize(title, text)
    summary = " ".join(summaries)
    return jsonify(text=summary)

Example #25

0

Show file

File: test_pyteaser.py Project: yuelinsoft/Crawler

　　为了让公众更好了解信息技术相关领域科技知识，计算所精心策划了一系列互动科普活动，包括“解密开宝箱”、“科学探索印章之旅”、“网络攻防现场演示”、“编程竟如此简单”、“超级计算机管理模拟体验”、参观“计算的脚步”展厅、科学体验互动活动等30余项丰富多彩的互动科普项目。生动、形象地向公众展示应用信息技术的魅力，激发青少年对信息技术的兴趣和热爱，通过寓教于乐的科普形式，让青少年们在游戏中学到知识、引起兴趣、培养科学精神。

　　说起编程，不少人的第一反应就是两个字：枯燥。针对这点，中科院计算所的科研人员们开发了一个有趣的应用，专门为小朋友量身定制的，采用图形化的编程界面，小朋友们很快就明白了编程的方法，编写出了一个个小程序，控制这小车在场地里跑来跑去。


小朋友编程控制工程车模型

　　新型“蠕虫”病毒WannaCry全球范围内爆发，让全世界的人们在计算机病毒的嚣张面前结结实实地“蓝瘦、香菇”了一把。WannaCry到底是何方神圣，网络攻防现场演示活动中为大家进行了解答。计算所还为公众请来了包括国内网络安全界被尊称为“TK教主”的“tombkeeper”等大咖，为公众演示了包括：扫描条码如何导致系统被入侵；发射携带攻击信息的激光束如何做到让条码阅读器在系统上执行任意命令；如何在移动端窃取通讯录、短信内容等隐私信息。这一系列网络攻防的现场演示让公众大开眼界。


“TK教主”带领观众体验网络入侵


从沙子到芯片 互动展区


观众参观


观众体验互动展示


小朋友成功解密打开宝箱

　　有妈妈在带孩子参加完计算所的公众科学日活动后，在朋友圈里分享了感受，她写到“第一次参加公众开放日活动，这里简直是孩子们的游乐场！带上一本通关护照，根据线索打开一个一个的密码箱，最终通关大奖也在密码箱里！小朋友看到了第一台103计算机模型，体验了虚拟现实，还开动脑筋翻译了摩尔斯码，当然最开心的还是穿梭于各层寻找密码箱！必须一提的是，小朋友第一次站在数据机房里，没有感叹眼前不断闪烁的指示灯，没有抱怨轰隆隆的噪声，而是惊讶的问了一句：‘妈妈，这是什么味道呀！’。原来这是我们家小猴子对数据机房的第一印象是高精尖的计算技术。以直观有趣的方式为孩子们启蒙和科普，这是计算所开放日最赞的地方！”
    """.encode('utf-8')

summaries = Summarize('英国伦敦恐袭事件', content1)

print summaries[0].decode('utf-8')

Example #26

0

Show file

File: extractive.py Project: ffrankies/BigDataTextSummarization

                print("Summarizing...", url)
                summary_sentences = SummarizeUrl(url)
                if summary_sentences:
                    total_summaries.append(" ".join(summary_sentences))
        print("Done processing one file")

    print("Finished first pass through all records")
    print("Recombining and summarizing...")
    while len(total_summaries) > 15:
        summaries_to_join = int(len(total_summaries) / 15)
        if summaries_to_join == 1:
            break
        if summaries_to_join > 20:
            summaries_to_join = 20
        combined_summaries = [
            " ".join(total_summaries[i:i + summaries_to_join])
            for i in range(0, len(total_summaries), summaries_to_join)
        ]
        total_summaries = [
            " ".join(Summarize("Hurricane Florence", summary).split("\n"))
            for summary in combined_summaries
        ]
        print(
            "Finished pass through recombined summaries... Number of summaries left = %d"
            % len(total_summaries))

    print("Final summary:")
    for summary in total_summaries:
        print(summary)
        print("\n")