Ejemplo n.º 1
0
 def post(self, id):
     source = ndb.Key(urlsafe=id).get()
     for field in source_fields:
         not_set = 28479847385045
         val = not_set
         content = self.request.get(field['name'])
         if content:
             t = field.get('type')
             if t == 'text':
                 val = content
                 if field.get('split'):
                     val = [v.strip() for v in val.split(field['split'])]
             elif t == 'number':
                 val = float(content)
             elif t == 'file_url':
                 f = util.get_uploaded_file(self.request, field['name'])
                 if f:
                     name, mime, data = f
                     if field.get('image') and 'max_size' in field:
                         val = store_resized_image(data, field['max_size'])
                     else:
                         val = file_storage.upload_file_and_get_url(data, mime)
         if val != not_set:
             setattr(source, field['name'], val)
     source.direct_fetch_data = None
     source.put()
     source_search.add_source_to_index(source)
     self.redirect('')
Ejemplo n.º 2
0
def source_fetch(source):
    debug("SF: Doing fetch for source: {0}".format(source.url))
    result = _source_fetch(source)
    debug("SF: Done with source fetch for {0}; result type: {1}".format(source.url, (result.method if result else None)))
    added_any = False
    now = datetime.datetime.now()
    to_put = []
    tasks_to_enqueue = []
    if result:
        if result.feed_title:
            source.title = result.feed_title
        if result.brand:
            source.brand = result.brand
        
        titles = [entry['title'] for entry in result.entries if entry['title']]
        source.shared_title_suffix = shared_suffix(titles)
        
        entries = result.entries[:min(25, len(result.entries))]
        entry_ids = [Article.id_for_article(entry['url'], source.url) for entry in entries]
        print "ENTRY IDs:", entry_ids
        print "ENtry id lens: ", str(map(len, entry_ids))
        article_futures = [Article.get_or_insert_async(id) for id in entry_ids]
        articles = [future.get_result() for future in article_futures]
        print "ARTICLE_OBJECTS:", articles
        
        for i, (entry, article) in enumerate(zip(entries, articles)):
            if not article.url:
                added_any = True
                article.added_date = now
                article.added_order = i
                article.source = source.key
                article.url = canonical_url(entry.get('url'))
                article.submission_url = canonical_url(entry.get('submission_url'))
                if entry['published']:
                    article.published = entry['published']
                else:
                    article.published = datetime.datetime.now()
                if not article.title:
                    article.title = entry['title']
                to_put.append(article)
                delay = (i+1) * 4 # wait 5 seconds between each
                tasks_to_enqueue.append(article.create_fetch_task(delay=delay))
    debug("SF: About to put {0} items".format(len(to_put)))
    if len(to_put):
        ndb.put_multi(to_put)
    debug("SF: About to enqueue")
    if len(tasks_to_enqueue):
        taskqueue.Queue('articles').add_async(tasks_to_enqueue)
    debug("SF: done enqueuing")
    if added_any:
        source.most_recent_article_added_date = now
    source_search.add_source_to_index(source)
    source.last_fetched = now
    source.put()