Ejemplos de grab_link en Python, ejemplos de pyteaser.grab_link en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: tasks.py Proyecto: connorgiles/CG-Work

def TechCrunch(latest):
    logging.info("TECH CRUNCH CALLED")
    url = "http://feeds.feedburner.com/TechCrunch/"
    feed = feedparser.parse(url)

    for item in feed["items"]:

      dt = standardize("%a, %d %b %Y %H:%M:%S +0000", item["published"])
      ts = makeTimestamp(dt)

      if int(ts) > int(latest): # there are new articles
        sum_article = SummarizeUrl(item["links"][0]["href"])
        full_article = grab_link(item["links"][0]["href"]).cleaned_text

        #logging.info(full_article)
        if len(sum_article) >= 3:
          ArticleModel(title = item["title"], 
                      author = item["author"],
                      published = str(dt),
                      published_timestamp = int(ts), 
                      image_url = str(item["media_content"][0]["url"]),
                      publication = "TechCrunch",
                      summarized_article = sum_article,
                      full_article = full_article,
                      upvoters = [],
                      upvotes = 0
                      ).put()

      else: # there are no new articles
        return

Ejemplo n.º 2

0

Mostrar archivo

Archivo: tasks.py Proyecto: connorgiles/CG-Work

def TheVerge(latest):
  url = "http://www.theverge.com/rss/index.xml"
  feed = feedparser.parse(url)
  dt = 0

  for item in feed["items"]:
    try:
      dt = standardize("%Y-%m-%d %H:%M:%S-04:00", item["published"].replace("T", " "))
    except: 
      dt = standardize("%Y-%m-%d %H:%M:%S-4:00", item["published"].replace("T", " "))

    ts = makeTimestamp(dt)

    if int(ts) > int(latest): # there are new articles
        sum_article = SummarizeUrl(item["links"][0]["href"])
        full_article = grab_link(item["links"][0]["href"]).cleaned_text
        
        if len(sum_article) >= 3:
          ArticleModel(title = item["title"], 
                     author = item["author"],
                     published = str(dt),
                     published_timestamp = int(ts), 
                     image_url = str(item["content"][0]["value"].split()[2][4:].replace('"', "")),
                     publication = "TheVerge",
                     summarized_article = sum_article,
                     full_article = full_article,
                     upvoters = [],
                     upvotes = 0
                     ).put()

        logging.info(item["title"] + " from The Verge has been stored")

    else: # there are no new articles
        return

Ejemplo n.º 3

0

Mostrar archivo

Archivo: tasks.py Proyecto: connorgiles/CG-Work

def VentureBeat(latest):
  logging.info("VENTURE BEAT CALLED")
  url = "http://feeds.venturebeat.com/VentureBeat"
  feed = feedparser.parse(url)

  for item in feed["items"]:

      dt = standardize("%a, %d %b %Y %H:%M:%S GMT", item["published"])
      ts = makeTimestamp(dt)

      if int(ts) > int(latest): # there are new articles
        sum_article = SummarizeUrl(item["links"][0]["href"])
        full_article = grab_link(item["links"][0]["href"]).cleaned_text

        logging.info(full_article)
        if len(sum_article) >= 3:
          ArticleModel(title = item["title"], 
                     author = item["author"],
                     published = str(dt),
                     published_timestamp = int(ts), 
                     image_url = str(item["links"][1]["href"].replace("resize", "")),
                     publication = "VentureBeat",
                     summarized_article = sum_article,
                     full_article = full_article,
                     upvoters = [],
                     upvotes = 0
                     ).put()

        logging.info(item["title"] + " from VentureBeat has been stored")

      else: # there are no new articles
        return

Ejemplo n.º 4

0

Mostrar archivo

Archivo: tasks.py Proyecto: connorgiles/CG-Work

def FastCompany(latest):
  logging.info("FAST COMPANY CALLED")
  url = "http://feeds.feedburner.com/fastcompany/headlines"
  feed = feedparser.parse(url)

  for item in feed["items"]:

    dt = standardize("%a, %d %b %Y %H:%M:%S GMT", item["published"])
    ts = makeTimestamp(dt)

    if int(ts) > int(latest): # there are new articles
        sum_article = SummarizeUrl(item["links"][0]["href"])
        full_article = grab_link(item["links"][0]["href"]).cleaned_text

        logging.info(full_article)
        if len(sum_article) >= 3:
          ArticleModel(title = item["title"], 
                     author = item["author"],
                     published = str(dt),
                     published_timestamp = int(ts), 
                     image_url = str(item["media_content"][0]["url"]),
                     publication = "FastCompany",
                     summarized_article = sum_article,
                     full_article = full_article,
                     upvoters = [],
                     upvotes = 0
                     ).put()

        logging.info(item["title"] + " from FastCompany has been stored")

    else: # there are no new articles
        return

Ejemplo n.º 5

0

Mostrar archivo

Archivo: app.py Proyecto: mikkogozalo/alternateaser

def tease():
    if request.form is None or 'url' not in request.form or request.form['url'] == '':
        return jsonify({'status': 'error', 'message': 'Please enter a valid URL.'}), 200
    article = pyteaser.grab_link(request.form['url'])
    if article is None or article.cleaned_text == "":
        return jsonify({'status': 'error', 'message': 'Sorry, I can\'t summarize that website. :('}), 200
    
    entry = {}
    entry['slug'] = os.urandom(3).encode('hex')
    entry['title'] = str(article.title.encode('utf-8', 'ignore'))
    text = str(article.cleaned_text.encode('utf-8', 'ignore'))
    entry['summary'] = pyteaser.Summarize(entry['title'], text)
    db.article.insert(entry)
    return jsonify({'status': 'redirect', 'message': url_for('article', slug = entry['slug'])}), 200

Ejemplo n.º 6

0

Mostrar archivo

Archivo: summary.py Proyecto: petre2dor/summarizeServer

    def on_get(self, req, resp):
        """Handles GET requests"""
        try:
            article = grab_link(req.get_param('url'))
        except IOError:
            print 'IOError'
            return None

        if not (article and article.cleaned_text and article.title):
            return None

        summaries = Summarize(unicode(article.title),
                              unicode(article.cleaned_text))
        body = "";
        for summary in summaries:
            body = body + " " + summary;

        resp.status = falcon.HTTP_200  # This is the default status
        resp.body = json.dumps({'title':article.title, 'body':body}, encoding='utf-8')

Ejemplo n.º 7

0

Mostrar archivo

Archivo: library.py Proyecto: prometheanbob/clowdflows

def streaming_get_article_text(input_dict,widget,stream=None):
    import pyteaser
    text = pyteaser.grab_link(input_dict['url'])
    output_dict = {}
    output_dict['text']=text.cleaned_text
    return output_dict

Ejemplo n.º 8

0

Mostrar archivo

Archivo: library.py Proyecto: cybertrust1/textflows

def streaming_get_article_text(input_dict, widget, stream=None):
    import pyteaser
    text = pyteaser.grab_link(input_dict['url'])
    output_dict = {}
    output_dict['text'] = text.cleaned_text
    return output_dict