def bootstrap_feedpages(): print "Mongo DB feed_pages: %s" % MFeedPage.objects().count() # db.feed_pages.drop() print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count() print "FeedPages: %s" % FeedPage.objects.count() pprint(db.feed_pages.index_information()) feeds = Feed.objects.all().order_by('-average_stories_per_month') feed_count = feeds.count() i = 0 for feed in feeds: i += 1 print "%s/%s: %s" % (i, feed_count, feed,) sys.stdout.flush() if not MFeedPage.objects(feed_id=feed.pk): feed_page = FeedPage.objects.filter(feed=feed).values() if feed_page: del feed_page[0]['id'] feed_page[0]['feed_id'] = feed.pk try: MFeedPage(**feed_page[0]).save() except: print '\n\n!\n\n' continue print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
def bootstrap_feedpages(): print "Mongo DB feed_pages: %s" % MFeedPage.objects().count() # db.feed_pages.drop() print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count() print "FeedPages: %s" % FeedPage.objects.count() pprint(db.feed_pages.index_information()) feeds = Feed.objects.all().order_by('-average_stories_per_month') feed_count = feeds.count() i = 0 for feed in feeds: i += 1 print "%s/%s: %s" % ( i, feed_count, feed, ) sys.stdout.flush() if not MFeedPage.objects(feed_id=feed.pk): feed_page = FeedPage.objects.filter(feed=feed).values() if feed_page: del feed_page[0]['id'] feed_page[0]['feed_id'] = feed.pk try: MFeedPage(**feed_page[0]).save() except: print '\n\n!\n\n' continue print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
def fetch_image_from_page_data(self): image = None image_file = None if self.page_data: content = self.page_data elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page: key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key) compressed_content = key.get_contents_as_string() stream = StringIO(compressed_content) gz = gzip.GzipFile(fileobj=stream) try: content = gz.read() except IOError: content = None else: content = MFeedPage.get_data(feed_id=self.feed.pk) url = self._url_from_html(content) if not url: try: content = requests.get(self.feed.feed_link).content url = self._url_from_html(content) except (AttributeError, SocketError, requests.ConnectionError, requests.models.MissingSchema, requests.sessions.InvalidSchema, requests.sessions.TooManyRedirects, requests.models.InvalidURL, requests.models.ChunkedEncodingError, requests.models.ContentDecodingError, LocationParseError, OpenSSLError, PyAsn1Error), e: logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
def fetch_image_from_page_data(self): image = None image_file = None if self.page_data: content = self.page_data elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page: key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key) compressed_content = key.get_contents_as_string() stream = StringIO(compressed_content) gz = gzip.GzipFile(fileobj=stream) try: content = gz.read() except IOError: content = None else: content = MFeedPage.get_data(feed_id=self.feed.pk) url = self._url_from_html(content) if not url: try: content = requests.get(self.cleaned_feed_link).content url = self._url_from_html(content) except (AttributeError, SocketError, requests.ConnectionError, requests.models.MissingSchema, requests.sessions.InvalidSchema, requests.sessions.TooManyRedirects, requests.models.InvalidURL, requests.models.ChunkedEncodingError, requests.models.ContentDecodingError, httplib.IncompleteRead, LocationParseError, OpenSSLError, PyAsn1Error), e: logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
def fetch_image_from_page_data(self): image = None image_file = None if self.page_data: content = self.page_data # Deleted By Xinyan Lu : No S3 storage # elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page: # key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key) # compressed_content = key.get_contents_as_string() # stream = StringIO(compressed_content) # gz = gzip.GzipFile(fileobj=stream) # try: # content = gz.read() # except IOError: # content = None else: content = MFeedPage.get_data(feed_id=self.feed.pk) # Modified By Xinyan Lu : content may be None if content: url = self._url_from_html(content) else: url = None if url: image, image_file = self.get_image_from_url(url) return image, image_file, url
def fetch_image_from_page_data(self): image = None image_file = None content = MFeedPage.get_data(feed_id=self.feed.pk) url = self._url_from_html(content) if url: image, image_file = self.get_image_from_url(url) return image, image_file, url
def load_feed_page(request, feed_id): if not feed_id: raise Http404 data = MFeedPage.get_data(feed_id=feed_id) if not data: data = "Fetching feed..." return HttpResponse(data, mimetype='text/html')
def load_feed_page(request): feed_id = int(request.GET.get('feed_id', 0)) if feed_id == 0: raise Http404 data = MFeedPage.get_data(feed_id=feed_id) if not data: data = "Fetching feed..." return HttpResponse(data, mimetype='text/html')
def fetch_image_from_page_data(self): image = None image_file = None content = None if self.page_data: content = self.page_data elif settings.BACKED_BY_AWS.get('pages_on_node'): domain = Site.objects.get_current().domain url = "https://%s/original_page/%s" % ( domain, self.feed.pk, ) try: page_response = requests.get(url) if page_response.status_code == 200: content = page_response.content except requests.ConnectionError: pass elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page: key = settings.S3_CONN.Bucket( settings.S3_PAGES_BUCKET_NAME).Object( key=self.feed.s3_pages_key) compressed_content = key.get()["Body"].read() stream = BytesIO(compressed_content) gz = gzip.GzipFile(fileobj=stream) try: content = gz.read() except IOError: pass else: content = MFeedPage.get_data(feed_id=self.feed.pk) url = self._url_from_html(content) if not url: try: content = requests.get(self.cleaned_feed_link, timeout=10).content url = self._url_from_html(content) except (AttributeError, SocketError, requests.ConnectionError, requests.models.MissingSchema, requests.sessions.InvalidSchema, requests.sessions.TooManyRedirects, requests.models.InvalidURL, requests.models.ChunkedEncodingError, requests.models.ContentDecodingError, http.client.IncompleteRead, requests.adapters.ReadTimeout, LocationParseError, OpenSSLError, PyAsn1Error, ValueError) as e: logging.debug( " ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e) if url: image, image_file = self.get_image_from_url(url) return image, image_file, url
def load_feed_page(request): feed_id = None try: feed_id = int(request.REQUEST.get('feed_id', 0)) except ValueError: feed_id_matches = re.search(r'(\d+)', request.REQUEST['feed_id']) if feed_id_matches: feed_id = int(feed_id_matches.group(1)) if not feed_id: raise Http404 data = MFeedPage.get_data(feed_id=feed_id) if not data: data = "Fetching feed..." return HttpResponse(data, mimetype='text/html')
def fetch_image_from_page_data(self): image = None image_file = None if self.page_data: content = self.page_data elif settings.BACKED_BY_AWS.get('pages_on_s3') and self.feed.s3_page: key = settings.S3_PAGES_BUCKET.get_key(self.feed.s3_pages_key) compressed_content = key.get_contents_as_string() stream = StringIO(compressed_content) gz = gzip.GzipFile(fileobj=stream) content = gz.read() else: content = MFeedPage.get_data(feed_id=self.feed.pk) url = self._url_from_html(content) if url: image, image_file = self.get_image_from_url(url) return image, image_file, url