Example #1
0
def summarize():
    title = request.form['title']
    text = request.form['text']
    text = " ".join(text.replace("\n", " ").split())
    tt = TextTeaser()
    sentences = tt.summarize(title, text, 5)
    return render_template('summarize.html',
                           title=title,
                           text=text,
                           summary=sentences)
Example #2
0
def summarize():
    title = request.form.get('title')
    text = request.form.get('text')

    tt = TextTeaser()

    sentences = tt.summarize(title, text)
    summary = {"sentences": []}
    for sentence in sentences:
        print sentence
        summary["sentences"].append(sentence)

    return jsonify(summary)
Example #3
0
def summarize(user,
              room_name,
              msg_limit=None,
              days_limit=None,
              hours_limit=None,
              min_limit=None,
              title=None):
    if title == None:
        title = '%s Summary' % room_name
    text = compile_messages(user, room_name, msg_limit, days_limit,
                            hours_limit, min_limit)
    tt = TextTeaser()
    return indent_tagged(tt.summarize(title, text),
                         utils.get_emails_with_users(user, room_name).values())
Example #4
0
def summary(event, context):
    tt = TextTeaser()

    # stackAPI_return:
    # concept
    # code
    # title
    # is_code

    data = json.load(open('data.json'))
    pprint(data)
Example #5
0
def hello():
    url = request.args.get('url', '')
    print(url)

    article = Article(url)
    article.download()
    article.parse()

    title = article.title
    print(title)
    text = article.text
    print(text)

    tt = TextTeaser()

    sentences = tt.summarize(title, text)

    for sentence in sentences:
        print(sentence)

    return jsonify(sentences)
Example #6
0
def tags():

    

    if request.method == 'POST':

        global session

        if not session:
            session = authfromSwellRT()

        data = request.get_json()
        app.logger.info(data)
        #Initialisation for context
        wave_id = data['waveid']
        description = data['data']['text']
        name = data['data']['name']

        #Generating tags
        tags = json.dumps(mytagger(data['data']['text'],10), default=lambda x: str(x).strip('"\''))

        #Generating summary of 4 lines
        tt = TextTeaser()
        sentences = tt.summarize(name, description)
        summary = json.dumps(sentences[:4])

        
       
        #For logs
        app.logger.info(tags)
        app.logger.info(summary);
        
        post2swellRT(session,wave_id,tags,summary)
        
        return json.dumps(True)
    else:
        tags = json.dumps("Hello from Teem Tag",10, default=lambda x: str(x).strip('"\''))
        return tags
Example #7
0
def summarize_url(url, arc90=False):
    # arc90 helps us get the content of the article without the comments and shit
    # used in Safari's Reader view, Flipboard, and Treesaver.
    # https://stackoverflow.com/questions/4672060/web-scraping-how-to-identify-main-content-on-a-webpage

    CHAR_LIMIT = 100000  # blocks urls that have too much text that would bog us down
    # TODO: save results so that we avoid querying the same thing again
    # URL's can be PKs

    if not url:
        return

    r = requests.get(url)
    tt = TextTeaser()

    if arc90:
        doc = Document(r.text)
        title = doc.title()
        soup = BeautifulSoup(doc.summary(), "html.parser")
    else:
        soup = BeautifulSoup(r.text, "html.parser")
        title = soup.title.text

    text = ' '.join(map(lambda p: p.text, soup.find_all('p')))

    if len(text) < CHAR_LIMIT:
        summary = ' '.join(tt.summarize(title, text))
    else:
        summary = 'Text exceeds the ' + str(CHAR_LIMIT) + ' character limit.'

    return {
        'title': title,
        'url': url,
        'length': len(text),
        'summary': summary,
        'minutes': len(text.split(' ')) // 200
    }
Example #8
0
import requests
from bs4 import BeautifulSoup as bs
import urllib3
from PyPDF2 import PdfFileWriter, PdfFileReader
import json
import pymysql.cursors
from gensim.summarization import keywords
from textteaser import TextTeaser
import re

tt = TextTeaser()
connection = pymysql.connect(host='localhost',
                             user='******',
                             password='',
                             db='compliance',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)


def ocr_space_file(filename, overlay=False, api_key='api', language='eng'):

    payload = {
        'isOverlayRequired': overlay,
        'apikey': api_key,
        'language': language,
    }
    with open(filename, 'rb') as f:
        r = requests.post(
            'https://api.ocr.space/parse/image',
            files={filename: f},
            data=payload,
Example #9
0
def textteaser_test():

    summary = open("summary_list.txt", "a", encoding='utf-8-sig')
    sys.stdout = summary

    # obtain the input article from url
    #url = "http://www.nytimes.com/2016/11/17/us/politics/donald-trump-administration-twitter.html?ref=politics"
    #parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))

    # obtain the input article from plain text files
    parser = PlaintextParser.from_file("input_sample.txt", Tokenizer(LANGUAGE))

    # define the language, by dafult it is English
    stemmer = Stemmer(LANGUAGE)

    # SumBasic algorithm
    summarizer = SumBasicSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    print("SumBasic:")
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    print("\n")

    # LSA algorithm
    summarizer = LsaSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    print("Latent Semantic Analysis:")
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    print("\n")

    # TextRank algorithm
    summarizer = TextRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    print("TextRank:")
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    print("\n")

    # LexRank algorithm
    summarizer = LexRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    print("LexRank:")
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    print("\n")

    #Featured-LexRank algorithm
    with open('input_sample.txt', 'r', encoding='utf-8-sig') as f:
        first_line = f.readline()
    title = first_line
    with open('input_sample.txt', 'r', encoding='utf-8-sig') as f:
        text = f.read()
    tt = TextTeaser()

    sentences = tt.summarize(title, text)
    file = open("tt.txt", "w", encoding='utf-8-sig')
    print("Featured-LexRank:")
    for sentence in sentences:
        file.write("%s\n" % sentence)
    file.close()

    parser = PlaintextParser.from_file("tt.txt", Tokenizer(LANGUAGE))
    summarizer = LexRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    print("\n")

    summary.close()
Example #10
0
# article source: https://blogs.dropbox.com/developers/2015/03/limitations-of-the-get-method-in-http/

#text = "We spend a lot of time thinking about web API design, and we learn a lot from other APIs and discussion with their authors. In the hopes that it helps others, we want to share some thoughts of our own. In this post, we’ll discuss the limitations of the HTTP GET method and what we decided to do about it in our own API.  As a rule, HTTP GET requests should not modify server state. This rule is useful because it lets intermediaries infer something about the request just by looking at the HTTP method.  For example, a browser doesn’t know exactly what a particular HTML form does, but if the form is submitted via HTTP GET, the browser knows it’s safe to automatically retry the submission if there’s a network error. For forms that use HTTP POST, it may not be safe to retry so the browser asks the user for confirmation first.  HTTP-based APIs take advantage of this by using GET for API calls that don’t modify server state. So if an app makes an API call using GET and the network request fails, the app’s HTTP client library might decide to retry the request. The library doesn’t need to understand the specifics of the API call.  The Dropbox API tries to use GET for calls that don’t modify server state, but unfortunately this isn’t always possible. GET requests don’t have a request body, so all parameters must appear in the URL or in a header. While the HTTP standard doesn’t define a limit for how long URLs or headers can be, most HTTP clients and servers have a practical limit somewhere between 2 kB and 8 kB.  This is rarely a problem, but we ran up against this constraint when creating the /delta API call. Though it doesn’t modify server state, its parameters are sometimes too long to fit in the URL or an HTTP header. The problem is that, in HTTP, the property of modifying server state is coupled with the property of having a request body.  We could have somehow contorted /delta to mesh better with the HTTP worldview, but there are other things to consider when designing an API, like performance, simplicity, and developer ergonomics. In the end, we decided the benefits of making /delta more HTTP-like weren’t worth the costs and just switched it to HTTP POST.  HTTP was developed for a specific hierarchical document storage and retrieval use case, so it’s no surprise that it doesn’t fit every API perfectly. Maybe we shouldn’t let HTTP’s restrictions influence our API design too much.  For example, independent of HTTP, we can have each API function define whether it modifies server state. Then, our server can accept GET requests for API functions that don’t modify server state and don’t have large parameters, but still accept POST requests to handle the general case. This way, we’re opportunistically taking advantage of HTTP without tying ourselves to it."
text = xlrd.open_workbook('./generation_text/4.xls')
table = text.sheets()[0]  # 获取所有表格(worksheet)的名字

rows = table.nrows
text1 = []
cout = 0
'''for i in range(rows):
   # print (6)3
    text1.append("第"+str(table.cell(i,1).value)+'分钟'+str(table.cell(i,0).value)+"比分"+str(table.cell(i,2).value)+'。'+"\n")
    cout+=1
'''
with open('./textteaser/trainer/0.txt', 'r') as f:
    data = f.readlines()
    print(data)
for i in data:
    cout += 1
print(cout)
tt = TextTeaser()
print(cout)
sentences = tt.summarize(data, count=cout // 2)

fo = open('./textteaser/trainer/1.txt', 'w')

for sentence in sentences:
    fo.write(sentence.strip('\n'))
    print(sentence)
fo.close()
Example #11
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import os
sys.path.append("textteaser")
sys.path.insert(1, "..")
from textteaser import TextTeaser
from textteaser import TextTeaser

# article source: https://blogs.dropbox.com/developers/2015/03/limitations-of-the-get-method-in-http/
title = "山东济南楼市新规:优先满足刚需购房 不得要求全款买房"
text = '''来自山东齐鲁晚报下属的齐鲁壹点网站的消息,4月26日,山东省济南市城乡建设委员会发布《关于进一步规范商品房销售行为的通知》(以下简称“《通知》”),该通知主要针对各房地产开发企业、房产中介及承销机构,意在规范相关方的商品房销售行为。  《通知》要求,销售商品房时,应优先满足无住房记录的刚性购房者需求。不得要求购房人一次性付款或一次性付款优先选房,不得拒绝购房人正常使用住房公积金或商业个人贷款购房,不得要求住宅销售捆绑车位及地下室。  住宅项目申请商品房预售许可证时,应提交销售方案。销售方案包括:房源信息、销售方式、付款方式、意向购房者组成(30%首付、60%首付、全款客户占比情况)。销售方案审批通过后,向社会公示。  商品住宅项目形象进度满足预售要求的,应当一次性申请预售。  在取得《商品房预售许可证》后,应本着公开、公平、公正的原则对外销售。一次性公开全部准售房源,公示销售进度控制表,在销售现场醒目位置明码标价,并告知所有购房者。  对于违反规定的相关房地产开发企业,将依法责令立即整改,拒不整改的,依法予以行政处罚,记入房地产开发企业信用档案,向社会公示。整改完成前,暂停项目合同网签及后续预售审批项目的办理。  《通知》发布的背景则为,近期,济南市城乡建设委员会接到多份来自“12345”市民热线转办及市民群众来电来信,反映济南市部分热点区域住宅项目存在全款购房、全款优先选房、拒绝使用商业贷款或个人公积金贷款等歧视刚性需求购房者,以及住宅销售捆绑车位、地下室销售等行为,这些行为严重扰乱了房地产市场秩序,造成了极其恶劣的社会影响。  此前中国山东网曾报道,被国家明令叫停的设置购房门槛的情况又在济南出现。为此,济南市住建委,住建委的工作人员向中国山东网明确表示,选择全款购买还是贷款购买是购房人的基本权利,开发商不得刻意设置购房门槛限制购买,更不允许以捆绑地下室或者捆绑车位的形式进行销售,此类行为一经查处,济南市住建委将对该楼盘进行包括吊销预售证,拉入诚信黑名单等一系列处罚,维护济南房地产市场的平稳。'''

stopWordsPath = os.path.dirname(
    os.path.abspath(__file__)) + '/textteaser/trainer/stopWords.txt'
tt = TextTeaser(stopWordsPath, text)

sentences = tt.summarize(title, text)

for sentence in sentences:
    print(sentence)
def make_short_summary2(title, text):
    # third party program, uses nltk, makes decent summaries
    tt = TextTeaser()
    sentences = tt.summarize(title, text)
    
    return sentences
Example #13
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from textteaser import TextTeaser

# article source: https://blogs.dropbox.com/developers/2015/03/limitations-of-the-get-method-in-http/
title = "Limitations of the GET method in HTTP"
text = "We spend a lot of time thinking about web API design, and we learn a lot from other APIs and discussion with their authors. In the hopes that it helps others, we want to share some thoughts of our own. In this post, we’ll discuss the limitations of the HTTP GET method and what we decided to do about it in our own API.  As a rule, HTTP GET requests should not modify server state. This rule is useful because it lets intermediaries infer something about the request just by looking at the HTTP method.  For example, a browser doesn’t know exactly what a particular HTML form does, but if the form is submitted via HTTP GET, the browser knows it’s safe to automatically retry the submission if there’s a network error. For forms that use HTTP POST, it may not be safe to retry so the browser asks the user for confirmation first.  HTTP-based APIs take advantage of this by using GET for API calls that don’t modify server state. So if an app makes an API call using GET and the network request fails, the app’s HTTP client library might decide to retry the request. The library doesn’t need to understand the specifics of the API call.  The Dropbox API tries to use GET for calls that don’t modify server state, but unfortunately this isn’t always possible. GET requests don’t have a request body, so all parameters must appear in the URL or in a header. While the HTTP standard doesn’t define a limit for how long URLs or headers can be, most HTTP clients and servers have a practical limit somewhere between 2 kB and 8 kB.  This is rarely a problem, but we ran up against this constraint when creating the /delta API call. Though it doesn’t modify server state, its parameters are sometimes too long to fit in the URL or an HTTP header. The problem is that, in HTTP, the property of modifying server state is coupled with the property of having a request body.  We could have somehow contorted /delta to mesh better with the HTTP worldview, but there are other things to consider when designing an API, like performance, simplicity, and developer ergonomics. In the end, we decided the benefits of making /delta more HTTP-like weren’t worth the costs and just switched it to HTTP POST.  HTTP was developed for a specific hierarchical document storage and retrieval use case, so it’s no surprise that it doesn’t fit every API perfectly. Maybe we shouldn’t let HTTP’s restrictions influence our API design too much.  For example, independent of HTTP, we can have each API function define whether it modifies server state. Then, our server can accept GET requests for API functions that don’t modify server state and don’t have large parameters, but still accept POST requests to handle the general case. This way, we’re opportunistically taking advantage of HTTP without tying ourselves to it."

tt = TextTeaser()
sentences = tt.summarize(title, text)
for sentence in sentences:
  print(sentence)

title = "Será que a Bel Pesce aprendeu mesmo a lição?"
text = """
Quando decidi escrever e buscar ser uma referência sobre empreendedorismo, conversei com um amigo e ele foi duro comigo — agradeço — dizendo que eu não deveria falar de algo que eu nunca tive grandes êxitos. Por questões de autoridade, eu não deveria querer ser uma referência, antes de ser uma, mesmo tendo uma faculdade de administração e um MBA em gestão estratégica de empresas, alguns negócios testados, tendo passado e contribuído em mais de 250 empresas, eu não tinha um respaldo para solidificar minha fala. Foi ali que eu mudei o discurso de “faça isso”, para “eu tento fazer isso.” E isso não garante nada, falar de empreendedorismo e inovação sem ter nada (ainda) grandioso para mostrar a respeito é frágil demais.
E aqui entra o marketing.
Bel Pesce fez seu nome por ter estudado no MIT, trabalhado no Google e Microsoft e ter ajudado a construir uma empresa, a Lemon no Vale do Silício. Brilhante né?! Seria, se o trabalho no Google e na Microsoft não fossem um estágio de 3 meses de verão, se ela fosse co-founder da empresa citada ou alguma coisa mais efetiva por lá.
Eu conheço muita gente nessa vida, meu DataEu é bem sofisticado, gente de todo canto, de diversas áreas, rico, pobre, gente que passou por variadas situações e tem muita divergência de opinião e visão de mundo. Tem gente que trabalha (trabalha mesmo) no Google, pra Amazon, startups brasileiras incríveis, empreendedores que são reis no Vale do Silício, que estudam ou estudaram no MIT, Harvard, Stanford, Oxford, Erasmus de Rotterdam (considerada a melhor escola de empreendedorismo do mundo), gente que representa o governo francês na União Européia, diretor de multinacional, vice-presidente de multinacional, milionário, multimilionário — infelizmente não conheço nenhum bilionário -, etc. O que quero dizer com isso?
Conheço no mínimo umas 50 pessoas 10 vezes mais importante e com história que realmente vale a pena ser explorada, mas não são, ou por opção, ou por falta de oportunidade.
Quando eu conheci a Bel Pesce, eu realmente fiquei empolgada para ouvir o que ela tinha para falar. Eu amo gente foda, gente que fez coisas que nunca fiz, que consegue cativar e ser reconhecida, enfim, eu gosto de gente brilhante.
Li o livro dela, achei bacana, bem escrito, nada glorioso, primoroso ou fora de série, mas atende bem a proposta. Comecei a ver os vídeos, a segui-la no twitter, a acompanhar no Periscope e foi ali, bem no meio daquela vontade de consumir um mundo do qual não tive a oportunidade de conhecer, que tive uma frustração bem grande.
O conhecimento que ela passava era tão profundo quanto um discurso da Dilma, mais raso que o nível que chegou a Cantareira. Algo como: “Essa empresa é top, é show, o que eles fazem é muito 10!”, “Empreender só depende de você”, “vá atrás dos seus sonhos”, “faça meta do dia”, sobre o negócio dela: “um negócio disruptivo, inovador, disuptamente novo”. Foi ali que fui atrás para entender quem era e porque ela tinha se tornado quem era. Essa conta não fechava. Deixei pra lá, não sou obrigada a consumir o que não quero e quem quiser que consuma. Ponto final!
Segui a vida… até que no início desse ano fiz um post questionando os “empreendedores motivacionais”, porque de repente eles se multiplicaram na internet. Eu não aguentava mais meta do dia, frases motivacionais, usei a expressão “essa geração Bel Pesce é legal mas a gente precisa mais no nosso dia a dia”. A crítica não era a ela, mas ao modelo replicado exaustivamente por diversas outras pessoas que se inspiraram nela.


Foi uma muvuca só. Aparentemente as pessoas tinham muito para falar sobre isso.
Conheci muita gente incrível por causa disso, inclusive uma das pessoas que me ajudaram a estruturar o atual projeto que estou trabalhando. Até brinquei que se desse certo, eu faria um post “Como a Bel Pesce me ajudou a ganhar meus primeiros milhões”.
Eu fui chamada de invejosa, diversas vezes, e coisa pior. Vieram dizer que a conheciam, que ela é um doce, e que era muito feio falar publicamente de uma pessoa. — ? Mesmo essa pessoa sendo uma pessoa pública?! Ué?! — O Murilo Gun, outra pessoa questionável, me chamou de “Bruna alguma coisa” em seu podcast e assim por diante.
Foi nesse momento que percebi que a Menina do Vale tinha virado, graças a ela mesma e sua constante autopromoção, um mito. E como todo famoso fruto da internet, de suas legiões incontáveis de fanáticos seguidores, é praticamente impossível questionar sem que os fãs da pessoa venham argumentar que você esteja criticando porque está com inveja.
Example #14
0
from textteaser import TextTeaser
import json
import sys

data = json.load(sys.argv[1])
tt = TextTeaser()
for x in range(len(data["concept"])):
    data["concept"][x] = " ".join(
        tt.summarize(data["Title"], data["concept"][x]))

print(json.dump(data, f, ensure_ascii=False))
sys.stdout.flush()
Example #15
0
from textteaser import TextTeaser
import sys
reload(sys)
sys.setdefaultencoding('utf8')

tt = TextTeaser()

with open("page18499Leftwingpolitics.txt", "r") as fl:
    data = fl.read()
    fl.close()
sentences = tt.summarize("Politics", data)
for sen in sentences:
    print sen

print("end")
Example #16
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from textteaser import TextTeaser

# article source: https://blogs.dropbox.com/developers/2015/03/limitations-of-the-get-method-in-http/
title = "Limitations of the GET method in HTTP"
text = "We spend a lot of time thinking about web API design, and we learn a lot from other APIs and discussion with their authors. In the hopes that it helps others, we want to share some thoughts of our own. In this post, we’ll discuss the limitations of the HTTP GET method and what we decided to do about it in our own API.  As a rule, HTTP GET requests should not modify server state. This rule is useful because it lets intermediaries infer something about the request just by looking at the HTTP method.  For example, a browser doesn’t know exactly what a particular HTML form does, but if the form is submitted via HTTP GET, the browser knows it’s safe to automatically retry the submission if there’s a network error. For forms that use HTTP POST, it may not be safe to retry so the browser asks the user for confirmation first.  HTTP-based APIs take advantage of this by using GET for API calls that don’t modify server state. So if an app makes an API call using GET and the network request fails, the app’s HTTP client library might decide to retry the request. The library doesn’t need to understand the specifics of the API call.  The Dropbox API tries to use GET for calls that don’t modify server state, but unfortunately this isn’t always possible. GET requests don’t have a request body, so all parameters must appear in the URL or in a header. While the HTTP standard doesn’t define a limit for how long URLs or headers can be, most HTTP clients and servers have a practical limit somewhere between 2 kB and 8 kB.  This is rarely a problem, but we ran up against this constraint when creating the /delta API call. Though it doesn’t modify server state, its parameters are sometimes too long to fit in the URL or an HTTP header. The problem is that, in HTTP, the property of modifying server state is coupled with the property of having a request body.  We could have somehow contorted /delta to mesh better with the HTTP worldview, but there are other things to consider when designing an API, like performance, simplicity, and developer ergonomics. In the end, we decided the benefits of making /delta more HTTP-like weren’t worth the costs and just switched it to HTTP POST.  HTTP was developed for a specific hierarchical document storage and retrieval use case, so it’s no surprise that it doesn’t fit every API perfectly. Maybe we shouldn’t let HTTP’s restrictions influence our API design too much.  For example, independent of HTTP, we can have each API function define whether it modifies server state. Then, our server can accept GET requests for API functions that don’t modify server state and don’t have large parameters, but still accept POST requests to handle the general case. This way, we’re opportunistically taking advantage of HTTP without tying ourselves to it."

tt = TextTeaser()

sentences = tt.summarize(title, text)

for sentence in sentences:
    print sentence
Example #17
0
# article source: https://blogs.dropbox.com/developers/2015/03/limitations-of-the-get-method-in-http/
# title = "Limitations of the GET method in HTTP"
# text = "We spend a lot of time thinking about web API design, and we learn a lot from other APIs and discussion with their authors. In the hopes that it helps others, we want to share some thoughts of our own. In this post, we’ll discuss the limitations of the HTTP GET method and what we decided to do about it in our own API.  As a rule, HTTP GET requests should not modify server state. This rule is useful because it lets intermediaries infer something about the request just by looking at the HTTP method.  For example, a browser doesn’t know exactly what a particular HTML form does, but if the form is submitted via HTTP GET, the browser knows it’s safe to automatically retry the submission if there’s a network error. For forms that use HTTP POST, it may not be safe to retry so the browser asks the user for confirmation first.  HTTP-based APIs take advantage of this by using GET for API calls that don’t modify server state. So if an app makes an API call using GET and the network request fails, the app’s HTTP client library might decide to retry the request. The library doesn’t need to understand the specifics of the API call.  The Dropbox API tries to use GET for calls that don’t modify server state, but unfortunately this isn’t always possible. GET requests don’t have a request body, so all parameters must appear in the URL or in a header. While the HTTP standard doesn’t define a limit for how long URLs or headers can be, most HTTP clients and servers have a practical limit somewhere between 2 kB and 8 kB.  This is rarely a problem, but we ran up against this constraint when creating the /delta API call. Though it doesn’t modify server state, its parameters are sometimes too long to fit in the URL or an HTTP header. The problem is that, in HTTP, the property of modifying server state is coupled with the property of having a request body.  We could have somehow contorted /delta to mesh better with the HTTP worldview, but there are other things to consider when designing an API, like performance, simplicity, and developer ergonomics. In the end, we decided the benefits of making /delta more HTTP-like weren’t worth the costs and just switched it to HTTP POST.  HTTP was developed for a specific hierarchical document storage and retrieval use case, so it’s no surprise that it doesn’t fit every API perfectly. Maybe we shouldn’t let HTTP’s restrictions influence our API design too much.  For example, independent of HTTP, we can have each API function define whether it modifies server state. Then, our server can accept GET requests for API functions that don’t modify server state and don’t have large parameters, but still accept POST requests to handle the general case. This way, we’re opportunistically taking advantage of HTTP without tying ourselves to it."

# article source: http://www.xinhuanet.com/world/2018-08/07/c_129928410.htm
title = "新闻分析:土美关系急转直下能否峰回路转"
text='''近期,作为北约盟友的美国和土耳其相互制裁,近乎撕破脸的架势。有报道称,土耳其将派出代表团赴华盛顿就双边关系紧张局势进行沟通。
  分析人士指出,土美关系不断恶化的现实,暴露了两个重要的北约盟友在一系列国际和地区事务上的深刻分歧,双方持续对抗将加剧北约内部分化,让业已混乱的中东局势更加复杂。
  争议焦点
  7月举行北约峰会上,土耳其总统埃尔多安和美国总统特朗普实现会面,此后双边关系有所回暖。美土关系再度急转直下的一个争议焦点,是美籍牧师布伦森案。
  土耳其2016年7月发生未遂军事政变。政变平息后,土耳其方面逮捕布伦森,指控他从事反政府间谍活动。美方就释放布伦森一事与土耳其方面多次交涉。北约峰会特朗普同埃尔多安会面之后,外界原本对此案有所期待,没承想口水仗不断升级,甚至演变为相互制裁。
  美国财政部8月1日宣布对土内政部长与司法部长实行制裁。这是美国政府首次对北约成员国实施经济制裁,实属罕见。埃尔多安政府之后随即宣布对等报复措施,冻结美司法部长和内政部长在土资产。对此,美国助理国家安全顾问朱莉安·史密斯说,这意味着“美土关系已经处于危机之中”。
  四重矛盾
  实际上,此轮争端只是2016年以来土美关系持续走低的一个缩影。自土耳其发生未遂军事政变后,土美关系进入新的波动周期,双方在核心利益上的分歧越来越深,突出表现在四个方面。
  首先,引渡宗教人士居伦问题。土政府认为,侨居美国的宗教人士居伦是2016年政变的幕后主谋,并将其领导的“居伦运动”列为恐怖组织。土方多次要求将其引渡回国接受调查,但美国至今没有同意。此外,围绕“居伦运动”的调查中,有数十名土美双重国籍公民被捕。
  其二,叙利亚库尔德武装的合法性问题。库尔德武装问题始终是土耳其关于领土安全的重大关切,土耳其军队在土东南部和叙利亚、伊拉克境内多次打击库尔德人武装,而美国却在叙利亚战场扶持“叙利亚民主军”等库尔德武装,引发土耳其担忧。这一问题也暴露出土美在叙利亚战场的利益分歧。
  第三,军购问题。土耳其的常备军数量位居北约第二。近期,土耳其欲购买俄罗斯S-400防空导弹系统一事持续发酵,引发美国和其他北约盟友担忧。美国威胁向土停售F-35战机,土方则威胁收回美空军基地。美国最新通过的“2019财年国防授权法案”中还要求暂停向土耳其交付F-35战机。
  第四,制裁伊朗问题。美国宣布于7日对伊朗部分非能源领域重启制裁。土方此前表示,不会响应美国的制裁要求。数据显示,今年前4个月土耳其从伊朗进口原油超过300万吨,占土耳其全部原油进口的55%。分析人士认为,土耳其作为缺油国,对伊朗石油和天然气供应有依赖,短期内不会加入对伊朗的制裁。
  渐行渐远
  土耳其是北约中唯一的中东国家,美国需要土耳其在中东地区发挥作用,更不愿看到土耳其倒向俄罗斯。美国国务卿蓬佩奥最近在东南亚访问时仍不忘强调,“土耳其是北约盟友,美国非常愿意和它继续开展合作”。
  然而,土耳其对自身的定位是地区大国,在外交和安全防务上有着很强的自主意识,特别是近年来其在地区事务中发挥更大影响力的意愿不断增强,这使得它与美国等传统西方盟友间在战略方向上的矛盾日益凸显。
  美国国务院前中东事务高级官员韦恩·怀特指出,美国和土耳其已经有太多矛盾,两国早已渐行渐远,布伦森案不过是最新导火索。尤其是美国在叙利亚支持库尔德武装的做法可能让土耳其和俄罗斯、伊朗进一步走近,这将让土耳其和西方的裂痕进一步拉大。
  在美国智库布鲁金斯学会高级研究员达雷尔·韦斯特看来,总体而言,土耳其将在对外政策上更加疏远美国,这将让美国的中东政策面临更多未来挑战。
  不过,美国华盛顿近东政策研究所资深研究员戴维·波洛克认为,美土关系虽面临很多问题,但鉴于美国仍表明愿就分歧同土耳其继续对话,这说明双方并不想立即撕破脸,依旧可能在包括叙利亚库尔德问题、美对土耳其军售等问题上寻求妥协并合作。'''

tt = TextTeaser()

sentences_score_list = tt.summarize(title, text)

for sentence, score in sentences_score_list:
    print(sentence + str(score))
Example #18
0
from textteaser import TextTeaser

tt = TextTeaser()
sentences = tt.summarize(
    "CERB",
    "I can’t believe people are dropping their CERB cheque’s on new phones and designer shit like you guys get $2000 and don’t know how to act?? can’t wait for tax season to bite y’all in the ass. Let's be clear, people on Permanent Disability are expected to survive on half of what CERB is monthly. The billionaire class sure wants all those minimum wage earners back on the job fast, you know, the ones making the billionaires all that money... and especially before those workers get used to making more money collecting CERB than they do in their minimum wage (essential) job. Right on schedule. Right-wing suggests without evidence that people won't work because of CERB. Wealthy shareholders opposed to a $15 min wage think workers should be working to make them more dividends, even if some of you will contract Covid19 & die in the service of greed. The $2000/month CERB highlights the cruelty of our provincial social assistance and disability systems, which expect recipients to live on far less on a permanent basis, and jump through far greater hoops to receive the funds. The level of brain worms to think that CERB is 'getting paid to not work' instead of 'receiving money to not die'. Y’all are sooo stupid for buying designer with your CERB cheques. CERB provides the perfect template for a guaranteed minimum wage, something that more and more people are warming up to thanks to CERB, and that is having the Financial Post collectively lose their shit apparently. When are you, all MPPs, medical officers, and government staff going to give up your full salary and go on CERB ($1200/month) like the rest of us peasants in Ontario. All of you should earn $1200 a month until you open up the province. Enjoy the hurt that we do. After weeks of confusion for vulnerable people seeking support, the province has decided to deduct CERB income from ODSP & OW recipients. The scheme lets the province make money off the backs of recipients during the pandemic. Just make the CERB universal. As the NDP has been saying for over a month now. Emergency Student Benefit gets a failing grade - just make the CERB universal. The free meals for healthcare workers is such a kind sentiment, but if the infrastructure plus capacity is there, it would be amazing for these efforts to be directed to folks who aren’t generating an income during the pandemic, with children/family to feed/look after, all on CERB. Why are students not receiving the same amount on CERB? As a longtime post-secondary educator, many students often had families, bills & rent! Students need real help now & should not be treated as less deserving of the same assistance.The system needs to shift to support equity! A lot of vulnerable people are waiting to get helped such as pregnant women who are not qualified for cerb or ei and were working got lay offs from jobs. I know a lot of ppl who are spending their cerb money in non essential shoppings and on one side ppl r struggling to live. I received an email about a thriving business whose workers quit to go on CERB - it’s fraud, but it’s happening, and more must be done to ensure benefits go to those in need, not to those who just want to take the summer off. Living with Covid 19 has been incredibly difficult because the federal government has failed to deliver the CERB to eligible Canadians. Evidently they are completely uninterested in fixing the problem. Canadians deserve a government that treats us fairly, not this trainwreck. Foreign students cannot apply for CESB. Full stop. Like all qualifying workers, international students who earned > $5,000 over 12 months can apply for the CERB if they lost their jobs because of COVID-19. The CERB is not a student benefit"
)

for sentence in sentences:
    print(sentence)
Example #19
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from textteaser import TextTeaser

# article source: https://blogs.dropbox.com/developers/2015/03/limitations-of-the-get-method-in-http/
title = "Limitations of the GET method in HTTP"
# text = "We spend a lot of time thinking about web API design, and we learn a lot from other APIs and discussion with their authors. In the hopes that it helps others, we want to share some thoughts of our own. In this post, we’ll discuss the limitations of the HTTP GET method and what we decided to do about it in our own API.  As a rule, HTTP GET requests should not modify server state. This rule is useful because it lets intermediaries infer something about the request just by looking at the HTTP method.  For example, a browser doesn’t know exactly what a particular HTML form does, but if the form is submitted via HTTP GET, the browser knows it’s safe to automatically retry the submission if there’s a network error. For forms that use HTTP POST, it may not be safe to retry so the browser asks the user for confirmation first.  HTTP-based APIs take advantage of this by using GET for API calls that don’t modify server state. So if an app makes an API call using GET and the network request fails, the app’s HTTP client library might decide to retry the request. The library doesn’t need to understand the specifics of the API call.  The Dropbox API tries to use GET for calls that don’t modify server state, but unfortunately this isn’t always possible. GET requests don’t have a request body, so all parameters must appear in the URL or in a header. While the HTTP standard doesn’t define a limit for how long URLs or headers can be, most HTTP clients and servers have a practical limit somewhere between 2 kB and 8 kB.  This is rarely a problem, but we ran up against this constraint when creating the /delta API call. Though it doesn’t modify server state, its parameters are sometimes too long to fit in the URL or an HTTP header. The problem is that, in HTTP, the property of modifying server state is coupled with the property of having a request body.  We could have somehow contorted /delta to mesh better with the HTTP worldview, but there are other things to consider when designing an API, like performance, simplicity, and developer ergonomics. In the end, we decided the benefits of making /delta more HTTP-like weren’t worth the costs and just switched it to HTTP POST.  HTTP was developed for a specific hierarchical document storage and retrieval use case, so it’s no surprise that it doesn’t fit every API perfectly. Maybe we shouldn’t let HTTP’s restrictions influence our API design too much.  For example, independent of HTTP, we can have each API function define whether it modifies server state. Then, our server can accept GET requests for API functions that don’t modify server state and don’t have large parameters, but still accept POST requests to handle the general case. This way, we’re opportunistically taking advantage of HTTP without tying ourselves to it."
# text = "Every day I interview dozens of engineers for open positions in my own or in partner organisations. Most of these people have a job or just left their current position. This is the case because the existing market allows even beginners to easily find a job. The universal truth is that good engineers always have stable work. You may ask yourself why people seek a change and guess what? Almost every engineer I interview says they are experiencing a toxic political environment in their current organisation, doesn’t feel ownership in the organisation or is a victim of bad management or not transparent communication. This is not a surprise for me. Being internal and external in many organisations throughout the world, whether I was an engineer, lead or onboarding one of my teams, I experienced a lot of political tension. I have personally left stable workplaces or refused to work with a customer because of extreme political issues that produced a toxic environment, and usually because of the personal ambitions of several executive members in those companies. Lately, I have been participating in a lot of discussions regarding matrixed organisations and how agile transformation can increase productivity and ROI, and I absolutely agree that the agile, lean approach is definitely a good direction to go. But, there is one big, bold BUT. No method or approach can save an organisation from people acting only for their own profit and creating sophisticated, political rules of the game that serve their personal goals. What does it mean for a business owner? The reality is that businesses who experience digital transformation still depend vastly on the people who are coming from mostly conservative management backgrounds with a standard conservative education where University degree and the brands of consultancies you worked in mostly decide your position in the organisation and not necessarily skills or experience. I do not want to name classical consultancies which produce tons of C-level executives or business consultants, most of them get the desired positions in the companies without understanding digital business and the people who work there. They are trained to be politicians as well as the standard way of organisation is the only way they follow but that does not mean it’s the most beneficial way for company or business owners. Politics and processes do not grow great teams and products, culture does. Culture in the organisation, especially in digital business, is the most important aspect. Only great teams create great businesses, and that’s what makes the IT ecosystem so successful and IT companies outperform standard industries in revenue and growth. And yet culture is often misinterpreted. Some managers, especially HR managers, are still coming from the same conservative companies I have mentioned above. They try to create dogmatic values for the company, or even rename HR to something like the “People and Culture Department”, but believe me, that does not help either. No matter how many team events you organise or how many values you create in the company, engineers are a totally different kind of people. Being an engineer and switching to the business line I understand perfectly, we the IT people are very rational and look at the bottom of things, things, like having a list of top 5 company values and repeating them every morning, does not make the company better when you notice political motivation behind it. On the contrary, transparent, clear business goals and good communication makes the company better and keeps the team motivated. One of the cases I experienced during my consulting career is a very common example. A company and a team works toward a common goal, and a lot of time and effort has been spent on their product. Then, suddenly a new C-level executive joins the company. He comes from a traditional consultancy without any IT background and changes the whole strategy of the organisation. You would ask yourself why would that person do that? Simple answer, they need to show a change, justify salary, bonus or even shares in the company. Sometimes they could even use political ambition to get more power or eliminate the competitors in the company and neglect common sense and close down projects out of his control in a night without proper communication or transparent reasonable answer and fires people responsible for them. Now as a business owner imagine the feelings of your best engineers who believed in something? Does not matter what you do for them or how many values you create you just erased part of their life just because one of your C-level reps acted for his own political benefit. Do you think the team will continue work for you with the same pace or they will suddenly believe in your new values after you decided to destroy the business for the sake of the new approach of a person whom you hired because of his name or University degree? The answer is one big bold NO, this is the perfect timing when organisations start losing employees, basically what happens is employees start playing the same politics. They pretend they believe in values but use every second wisely to change a job or find a better place, even with same conditions but without politics, organisations that will value them and move to the common goal without buzzwords like “meritocracy” or “high values” that have one purpose make the politics in the organisation even more successful. I understood this lesson very good and now managing more than 200 Engineers on a daily basis I have understood one thing, I should never play politics, all people in the organisation should have one goal and this goal should be measurable, organisation should have clear revenue goals and everyone should know them, all team members should be in constant communication with me and not through surveys but through one to ones and be free to express their opinions. People should exactly know what is the goal of the company, how to achieve it, the direction the company moves to and what is their benefit when the company gets there. You can not trick engineers with “we work for the stability”, finding a job for an engineer is a no-brainer so stable salary is not an incentive anymore, people should enjoy their work and should be motivated to outperform. Company policies or values should not block people, they should push them towards achieving greater results for themselves and if the company is structured wisely, achieving a greater value for an employee means achieving a greater value for the business and increasing ROI. The best feedback I have heard in my life that there is no politics in our company, personal tensions are not actual because people have a common goal and their goal is going to the same direction as company goal both culturally and financially. Acting as a leader and business owner I will always do my best to avoid hiring people who will make the company environment full of politics. Also every leader should on a constant basis identify those people who act only in their own political benefit because from my experience people who lack self-confidence or skills usually behave over protective and move the reality into political background to have a chance to survive and grow in the company and as a business owner I try to do my best to avoid these people in my organisation and would advise other business owners to do the same. Article published by A.I. Evangelist, Startup Advisor and Entrepreneur Albert Cyberhulk."
text = "Big news today, you didn’t — you said you didn’t tape James Comey. Do you want to explain that? Why did you want him to believe that you possibly did that? Well, I didn't tape him. You never know what's happening when you see that the Obama administration and perhaps longer than that was doing all of this unmasking and surveillance. And you read all about it and I've been reading about it the last couple of months, the seriousness of the — and horrible situation with surveillance all over the place. And you have been hearing the word “unmasking,” a word you probably never heard before. So you never know what's out there, but I didn't tape and I don't have any tape, and I didn't tape. But when he found out that I, you know, that there may be tapes out there, whether it's governmental tapes or anything else, and who knows, I think his story may have changed. I mean, you will have to take a look at that because then he has to tell what actually took place at the events. And my story didn't change. My story was always a straight story. My story was always the truth. But you'll have to determine for yourself whether or not his story changed. But I did not tape. It was a smart way to make sure he stayed honest in those hearings. Well, it wasn't very stupid, I can tell you that. He was — he did admit that what I said was right. And if you look further back, before he heard about that, I think maybe he wasn't admitting that, so you'll have to do a little investigative reporting to determine that. But I don't think it will be that hard. Robert Mueller do you think he should recuse himself? He is friends with James Comey. He has hired attorneys that were part of Hillary Clinton's foundation and given money to both President Obama and Hillary Clinton's campaign. Should he recuse himself? He is very, very good friends with Comey, which is very bothersome. Uh, but he is also — we are going to have to see. We are going to have to see in terms — look, there has been no obstruction. There has been no collusion. There has been leaking by Comey. But there’s been no collusion and no obstruction, and virtually everybody agrees to that. So we’ll have to see. I can say that the people that have been hired are all Hillary Clinton supporters. Some of them worked for Hillary Clinton. I mean, the whole thing is ridiculous if you want to know the truth from that standpoint. But Robert Mueller is an honorable man, and hopefully he will come up with an honorable solution."


tt = TextTeaser()

sentences = tt.summarize(title, text)

for sentence in sentences:
  print sentence
Example #20
0
 def generate_summary_textteaser(self, input_text):
     tt = TextTeaser('TextTeaserApiTest')
     return tt.summarize(text=input_text, title='Test', url=None)
Example #21
0
import os
import sys
import json

# textteaser
from textteaser import TextTeaser
tt = TextTeaser()
# gensim
from gensim.summarization.summarizer import summarize
# sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.reduction import ReductionSummarizer
from sumy.summarizers.kl import KLSummarizer
LANGUAGE = "chinese"
SENTENCES_COUNT = 3
stemmer = Stemmer(LANGUAGE)
tokenizer = Tokenizer(LANGUAGE)
# bert-extractive-summarizer
from summarizer import SingleModel
model = SingleModel(model='bert-base-chinese', vector_size=768)
def overload(body, minl=10, maxl=600):
    return body.split('\n')
model.process_content_sentences = overload


import jieba
Example #22
0
def summarize(user, room_name, msg_limit=None, days_limit=None, hours_limit=None, min_limit=None, title=None):
    if title == None:
        title = '%s Summary' % room_name
    text = compile_messages(user, room_name, msg_limit, days_limit, hours_limit, min_limit)
    tt = TextTeaser()
    return indent_tagged(tt.summarize(title, text), utils.get_emails_with_users(user, room_name).values())