def add_missing_feeds(self): all_feeds = self.flat() subs = [us.feed_id for us in UserSubscription.objects.filter(user=self.user).only('feed')] missing_subs = set(all_feeds) - set(subs) if missing_subs: logging.debug(" ---> %s is missing %s subs. Adding %s..." % ( self.user, len(missing_subs), missing_subs)) for feed_id in missing_subs: feed = Feed.get_by_id(feed_id) if feed: us, _ = UserSubscription.objects.get_or_create(user=self.user, feed=feed, defaults={ 'needs_unread_recalc': True }) if not us.needs_unread_recalc: us.needs_unread_recalc = True us.save() missing_folder_feeds = set(subs) - set(all_feeds) if missing_folder_feeds: user_sub_folders = json.decode(self.folders) logging.debug(" ---> %s is missing %s folder feeds. Adding %s..." % ( self.user, len(missing_folder_feeds), missing_folder_feeds)) for feed_id in missing_folder_feeds: feed = Feed.get_by_id(feed_id) if feed and feed.pk == feed_id: user_sub_folders = add_object_to_folder(feed_id, "", user_sub_folders) self.folders = json.encode(user_sub_folders) self.save()
def exception_retry(request): user = get_user(request) feed_id = get_argument_or_404(request, 'feed_id') reset_fetch = json.decode(request.POST['reset_fetch']) feed = Feed.get_by_id(feed_id) original_feed = feed if not feed: raise Http404 feed.schedule_feed_fetch_immediately() changed = False if feed.has_page_exception: changed = True feed.has_page_exception = False if feed.has_feed_exception: changed = True feed.has_feed_exception = False if not feed.active: changed = True feed.active = True if changed: feed.save(update_fields=[ 'has_page_exception', 'has_feed_exception', 'active' ]) original_fetched_once = feed.fetched_once if reset_fetch: logging.user(request, "~FRRefreshing exception feed: ~SB%s" % (feed)) feed.fetched_once = False else: logging.user(request, "~FRForcing refreshing feed: ~SB%s" % (feed)) feed.fetched_once = True if feed.fetched_once != original_fetched_once: feed.save(update_fields=['fetched_once']) feed = feed.update(force=True, compute_scores=False, verbose=True) feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: return {'code': -1} usersub.calculate_feed_scores(silent=False) feeds = { feed.pk: usersub and usersub.canonical(full=True), feed_id: usersub.canonical(full=True) } return {'code': 1, 'feeds': feeds}
def exception_retry(request): user = get_user(request) feed_id = get_argument_or_404(request, 'feed_id') reset_fetch = json.decode(request.POST['reset_fetch']) feed = Feed.get_by_id(feed_id) original_feed = feed if not feed: raise Http404 feed.schedule_feed_fetch_immediately() changed = False if feed.has_page_exception: changed = True feed.has_page_exception = False if feed.has_feed_exception: changed = True feed.has_feed_exception = False if not feed.active: changed = True feed.active = True if changed: feed.save(update_fields=['has_page_exception', 'has_feed_exception', 'active']) original_fetched_once = feed.fetched_once if reset_fetch: logging.user(request, "~FRRefreshing exception feed: ~SB%s" % (feed)) feed.fetched_once = False else: logging.user(request, "~FRForcing refreshing feed: ~SB%s" % (feed)) feed.fetched_once = True if feed.fetched_once != original_fetched_once: feed.save(update_fields=['fetched_once']) feed = feed.update(force=True, compute_scores=False, verbose=True) feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: return {'code': -1} usersub.calculate_feed_scores(silent=False) feeds = {feed.pk: usersub and usersub.canonical(full=True), feed_id: usersub.canonical(full=True)} return {'code': 1, 'feeds': feeds}
def __unicode__(self): user = User.objects.get(pk=self.user_id) if self.feed_id: feed = Feed.get_by_id(self.feed_id) else: feed = User.objects.get(pk=self.social_user_id) return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, feed)
def forwards(self, orm): from apps.rss_feeds.models import MStory, Feed import time db = pymongo.Connection(settings.MONGODB_HOST) batch = 0 start = 0 for f in xrange(start, Feed.objects.latest('pk').pk): if f < batch*100000: continue start = time.time() try: try: feed = Feed.get_by_id(f) except Feed.DoesNotExist: continue if not feed: continue cp1 = time.time() - start # if feed.active_premium_subscribers < 1: continue stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False)\ .only('_id', 'story_feed_id', 'story_permalink') cp2 = time.time() - start for story in stories: db.newsblur.stories.update({"_id": story.id}, {"$set": { "story_hash": story.story_hash }}) cp3 = time.time() - start print "%3s stories: %s (%s/%s/%s)" % (stories.count(), feed, round(cp1, 2), round(cp2, 2), round(cp3, 2)) except Exception, e: print " ***> (%s) %s" % (f, e)
def forwards(self, orm): for f in xrange(Feed.objects.latest('pk').pk): feed = Feed.get_by_id(f) if not feed: continue stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False) print "%3s stories: %s " % (stories.count(), feed) for story in stories: story.save()
def run(self, feed_pks, **kwargs): try: from apps.rss_feeds.models import Feed #from apps.statistics.models import MStatistics r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL) #mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0)) #compute_scores = bool(mongodb_replication_lag < 10) options = { # 'quick': float(MStatistics.get('quick_fetch', 0)), # 'compute_scores': compute_scores, # 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed or feed.pk != int(feed_pk): logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if feed: feed.update(**options) except Exception, e: logging.error(str(e)+\ traceback.format_exc()+'\n'+\ 'error from: UpdateFeeds\n') if settings.SEND_ERROR_MAILS: mail_admins("Error in UpdateFeeds",str(e)+'\n'+traceback.format_exc())
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL) mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) profiler = DBProfilerMiddleware() profiler_activated = profiler.process_celery() if profiler_activated: mongo_middleware = MongoDumpMiddleware() mongo_middleware.process_celery(profiler) redis_middleware = RedisDumpMiddleware() redis_middleware.process_celery(profiler) options = { 'quick': float(MStatistics.get('quick_fetch', 0)), 'updates_off': MStatistics.get('updates_off', False), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed or feed.pk != int(feed_pk): logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if feed: feed.update(**options) if profiler_activated: profiler.process_celery_finished()
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL) mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) options = { 'quick': float(MStatistics.get('quick_fetch', 0)), 'updates_off': MStatistics.get('updates_off', False), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed or feed.pk != int(feed_pk): logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if feed: feed.update(**options)
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) options = { 'fake': bool(MStatistics.get('fake_fetch')), 'quick': float(MStatistics.get('quick_fetch', 0)), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: try: feed = Feed.get_by_id(feed_pk) if not feed: raise Feed.DoesNotExist feed.update(**options) except Feed.DoesNotExist: logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL) mongodb_replication_lag = int( MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) options = { 'quick': float(MStatistics.get('quick_fetch', 0)), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed or feed.pk != int(feed_pk): logging.info( " ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if feed: feed.update(**options)
def TaskBrokenFeeds(): from apps.rss_feeds.models import Feed settings.LOG_TO_STREAM = True now = datetime.datetime.utcnow() start = time.time() r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL) logging.debug(" ---> ~SN~FBQueuing broken feeds...") # Force refresh feeds refresh_feeds = Feed.objects.filter( active=True, fetched_once=False, active_subscribers__gte=1).order_by('?')[:100] refresh_count = refresh_feeds.count() cp1 = time.time() logging.debug(" ---> ~SN~FBFound %s active, unfetched broken feeds" % refresh_count) # Mistakenly inactive feeds hours_ago = (now - datetime.timedelta(minutes=10)).strftime('%s') old_tasked_feeds = r.zrangebyscore('tasked_feeds', 0, hours_ago) inactive_count = len(old_tasked_feeds) if inactive_count: r.zremrangebyscore('tasked_feeds', 0, hours_ago) # r.sadd('queued_feeds', *old_tasked_feeds) for feed_id in old_tasked_feeds: r.zincrby('error_feeds', 1, feed_id) feed = Feed.get_by_id(feed_id) feed.set_next_scheduled_update() logging.debug( " ---> ~SN~FBRe-queuing ~SB%s~SN dropped/broken feeds (~SB%s/%s~SN queued/tasked)" % (inactive_count, r.scard('queued_feeds'), r.zcard('tasked_feeds'))) cp2 = time.time() old = now - datetime.timedelta(days=1) old_feeds = Feed.objects.filter( next_scheduled_update__lte=old, active_subscribers__gte=1).order_by('?')[:500] old_count = old_feeds.count() cp3 = time.time() logging.debug( " ---> ~SN~FBTasking ~SBrefresh:~FC%s~FB inactive:~FC%s~FB old:~FC%s~SN~FB broken feeds... (%.4s/%.4s/%.4s)" % ( refresh_count, inactive_count, old_count, cp1 - start, cp2 - cp1, cp3 - cp2, )) Feed.task_feeds(refresh_feeds, verbose=False) Feed.task_feeds(old_feeds, verbose=False) logging.debug( " ---> ~SN~FBTasking broken feeds took ~SB%s~SN seconds (~SB%s~SN/~FG%s~FB~SN/%s tasked/queued/scheduled)" % (int((time.time() - start)), r.zcard('tasked_feeds'), r.scard('queued_feeds'), r.zcard('scheduled_updates')))
def handle(self, *args, **options): if options['daemonize']: daemonize() if options['title']: feed = Feed.objects.get(feed_title__icontains=options['title']) else: feed = Feed.get_by_id(options['feed']) feed.update(force=options['force'], single_threaded=True, verbose=2)
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed if not isinstance(feed_pks, list): feed_pks = [feed_pks] options = {} for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) feed.update(options=options)
def force_push(request): user = get_user(request) feed_id = request.REQUEST['feed_id'] count = int(request.REQUEST.get('count', 1)) logging.user(user, "~BM~FWForce pushing %s stories: ~SB%s" % (count, Feed.get_by_id(feed_id))) sent_count, user_count = MUserFeedNotification.push_feed_notifications(feed_id, new_stories=count, force=True) return {"message": "Pushed %s notifications to %s users" % (sent_count, user_count)}
def feed_autocomplete(request): query = request.GET.get("term") or request.GET.get("query") version = int(request.GET.get("v", 1)) format = request.GET.get("format", "autocomplete") # user = get_user(request) # if True or not user.profile.is_premium: # return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query) if not query: return dict(code=-1, message="Specify a search 'term'.", feeds=[], term=query) if "." in query: try: parts = urlparse(query) if not parts.hostname and not query.startswith("http"): parts = urlparse("http://%s" % query) if parts.hostname: query = parts.hostname except: logging.user(request, "~FGAdd search, could not parse url in ~FR%s" % query) feed_ids = Feed.autocomplete(query) feeds = [Feed.get_by_id(feed_id) for feed_id in feed_ids] if format == "autocomplete": feeds = [ { "id": feed.pk, "value": feed.feed_address, "label": feed.feed_title, "tagline": feed.data and feed.data.feed_tagline, "num_subscribers": feed.num_subscribers, } for feed in feeds if feed ] else: feeds = [feed.canonical(full=True) for feed in feeds] feeds = sorted(feeds, key=lambda f: -1 * f["num_subscribers"]) feed_ids = [f["id"] for f in feeds] feed_icons = dict((icon.feed_id, icon) for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids)) for feed in feeds: if feed["id"] in feed_icons: feed_icon = feed_icons[feed["id"]] if feed_icon.data: feed["favicon_color"] = feed_icon.color feed["favicon"] = feed_icon.data logging.user(request, "~FGAdd Search: ~SB%s ~SN(%s matches)" % (query, len(feeds))) if version > 1: return {"feeds": feeds, "term": query} else: return feeds
def feed_autocomplete(request): query = request.GET.get('term') or request.GET.get('query') version = int(request.GET.get('v', 1)) format = request.GET.get('format', 'autocomplete') # user = get_user(request) # if True or not user.profile.is_premium: # return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query) if not query: return dict(code=-1, message="Specify a search 'term'.", feeds=[], term=query) if '.' in query: try: parts = urlparse(query) if not parts.hostname and not query.startswith('http'): parts = urlparse('http://%s' % query) if parts.hostname: query = parts.hostname except: logging.user(request, "~FGAdd search, could not parse url in ~FR%s" % query) feed_ids = Feed.autocomplete(query) feeds = list(set([Feed.get_by_id(feed_id) for feed_id in feed_ids])) feeds = [feed for feed in feeds if feed and not feed.branch_from_feed] if format == 'autocomplete': feeds = [{ 'id': feed.pk, 'value': feed.feed_address, 'label': feed.feed_title, 'tagline': feed.data and feed.data.feed_tagline, 'num_subscribers': feed.num_subscribers, } for feed in feeds] else: feeds = [feed.canonical(full=True) for feed in feeds] feeds = sorted(feeds, key=lambda f: -1 * f['num_subscribers']) feed_ids = [f['id'] for f in feeds] feed_icons = dict((icon.feed_id, icon) for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids)) for feed in feeds: if feed['id'] in feed_icons: feed_icon = feed_icons[feed['id']] if feed_icon.data: feed['favicon_color'] = feed_icon.color feed['favicon'] = feed_icon.data logging.user(request, "~FGAdd Search: ~SB%s ~SN(%s matches)" % (query, len(feeds),)) if version > 1: return { 'feeds': feeds, 'term': query, } else: return feeds
def push_feed_notifications(cls, feed_id, new_stories, force=False): feed = Feed.get_by_id(feed_id) notifications = MUserFeedNotification.users_for_feed(feed.pk) logging.debug( " ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories" % (feed, len(notifications), new_stories)) r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1) mstories = MStory.objects.filter( story_hash__in=latest_story_hashes).order_by('-story_date') stories = Feed.format_stories(mstories) total_sent_count = 0 for user_feed_notification in notifications: sent_count = 0 last_notification_date = user_feed_notification.last_notification_date try: usersub = UserSubscription.objects.get( user=user_feed_notification.user_id, feed=user_feed_notification.feed_id) except UserSubscription.DoesNotExist: continue classifiers = user_feed_notification.classifiers(usersub) if classifiers == None: if settings.DEBUG: logging.debug("Has no usersubs") continue for story in stories: if sent_count >= 3: if settings.DEBUG: logging.debug("Sent too many, ignoring...") continue if story['story_date'] <= last_notification_date and not force: if settings.DEBUG: logging.debug( "Story date older than last notification date: %s <= %s" % (story['story_date'], last_notification_date)) continue if story[ 'story_date'] > user_feed_notification.last_notification_date: user_feed_notification.last_notification_date = story[ 'story_date'] user_feed_notification.save() story['story_content'] = html.unescape(story['story_content']) sent = user_feed_notification.push_story_notification( story, classifiers, usersub) if sent: sent_count += 1 total_sent_count += 1 return total_sent_count, len(notifications)
def NewFeeds(feed_pks): from apps.rss_feeds.models import Feed if not isinstance(feed_pks, list): feed_pks = [feed_pks] options = {} for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed: continue feed.update(options=options)
def index_feeds_for_search(cls, feed_ids, user_id): from apps.rss_feeds.models import Feed user = User.objects.get(pk=user_id) logging.user(user, "~SB~FCIndexing %s~FC by request..." % feed_ids) for feed_id in feed_ids: feed = Feed.get_by_id(feed_id) if not feed: continue feed.index_stories_for_search()
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed if not isinstance(feed_pks, list): feed_pks = [feed_pks] options = { 'force': True, } for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) feed.update(options=options)
def add_missing_feeds(self): all_feeds = self.flat() subs = [us.feed_id for us in UserSubscription.objects.filter(user=self.user).only("feed")] missing_feeds = set(all_feeds) - set(subs) if missing_feeds: logging.debug(" ---> %s is missing %s feeds. Adding %s..." % (self.user, len(missing_feeds), missing_feeds)) for feed_id in missing_feeds: feed = Feed.get_by_id(feed_id) if feed: UserSubscription.objects.get_or_create(user=self.user, feed=feed)
def __unicode__(self): notification_types = [] if self.is_email: notification_types.append('email') if self.is_web: notification_types.append('web') if self.is_ios: notification_types.append('ios') if self.is_android: notification_types.append('android') return "%s/%s: %s -> %s" % ( User.objects.get(pk=self.user_id).username, Feed.get_by_id(self.feed_id), ','.join(notification_types), self.last_notification_date, )
def __str__(self): notification_types = [] if self.is_email: notification_types.append('email') if self.is_web: notification_types.append('web') if self.is_ios: notification_types.append('ios') if self.is_android: notification_types.append('android') return "%s/%s: %s -> %s" % ( User.objects.get(pk=self.user_id).username, Feed.get_by_id(self.feed_id), ','.join(notification_types), self.last_notification_date, )
def count_feed(cls, feed_id): feed = Feed.get_by_id(feed_id) print feed found_folders = defaultdict(int) user_ids = [sub['user_id'] for sub in UserSubscription.objects.filter(feed=feed).values('user_id')] usf = UserSubscriptionFolders.objects.filter(user_id__in=user_ids) for sub in usf: user_sub_folders = json.decode(sub.folders) folder_title = cls.feed_folder_parent(user_sub_folders, feed.pk) if not folder_title: continue found_folders[folder_title.lower()] += 1 # print "%-20s - %s" % (folder_title if folder_title != '' else '[Top]', sub.user_id) print sorted(found_folders.items(), key=lambda f: f[1], reverse=True)
def exception_retry(request): user = get_user(request) feed_id = get_argument_or_404(request, "feed_id") reset_fetch = json.decode(request.POST["reset_fetch"]) feed = Feed.get_by_id(feed_id) original_feed = feed if not feed: raise Http404 feed.next_scheduled_update = datetime.datetime.utcnow() feed.has_page_exception = False feed.has_feed_exception = False feed.active = True if reset_fetch: logging.user(request, "~FRRefreshing exception feed: ~SB%s" % (feed)) feed.fetched_once = False else: logging.user(request, "~FRForcing refreshing feed: ~SB%s" % (feed)) feed.fetched_once = True feed.save() feed = feed.update(force=True, compute_scores=False, verbose=True) feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: return {"code": -1} usersub.calculate_feed_scores(silent=False) feeds = {feed.pk: usersub and usersub.canonical(full=True), feed_id: usersub.canonical(full=True)} return {"code": 1, "feeds": feeds}
def run(self, feed_id, xml, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 60) options = { 'feed_xml': xml, 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } feed = Feed.get_by_id(feed_id) feed.update(options=options)
def index_subscriptions_chunk_for_search(self, feed_ids): from apps.rss_feeds.models import Feed r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL) user = User.objects.get(pk=self.user_id) logging.user(user, "~FCIndexing %s feeds..." % len(feed_ids)) for feed_id in feed_ids: feed = Feed.get_by_id(feed_id) if not feed: continue feed.index_stories_for_search() r.publish(user.username, 'search_index_complete:feeds:%s' % ','.join([str(f) for f in feed_ids]))
def audit(cls): categories = cls.objects.all() for category in categories: logging.info( f" ---> Auditing category: {category} {category.feed_ids}") keep_feed_ids = [] for feed_id in category.feed_ids: feed = Feed.get_by_id(feed_id) if feed: logging.info(f" \t---> Keeping feed: {feed_id} {feed}") keep_feed_ids.append(feed.pk) else: logging.info(f" \t***> Skipping missing feed: {feed_id}") category.feed_ids = keep_feed_ids category.save()
def run(self, feed_id, xml, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics mongodb_replication_lag = int( MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 60) options = { 'feed_xml': xml, 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } feed = Feed.get_by_id(feed_id) feed.update(options=options)
def add_missing_feeds(self): all_feeds = self.flat() subs = [us.feed_id for us in UserSubscription.objects.filter(user=self.user).only("feed")] missing_feeds = set(all_feeds) - set(subs) if missing_feeds: logging.debug(" ---> %s is missing %s feeds. Adding %s..." % (self.user, len(missing_feeds), missing_feeds)) for feed_id in missing_feeds: feed = Feed.get_by_id(feed_id) if feed: us, _ = UserSubscription.objects.get_or_create( user=self.user, feed=feed, defaults={"needs_unread_recalc": True} ) if not us.needs_unread_recalc: us.needs_unread_recalc = True us.save()
def push_feed_notifications(cls, feed_id, new_stories, force=False): feed = Feed.get_by_id(feed_id) notifications = MUserFeedNotification.users_for_feed(feed.pk) logging.debug(" ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories" % ( feed, len(notifications), new_stories)) r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1) mstories = MStory.objects.filter(story_hash__in=latest_story_hashes).order_by('-story_date') stories = Feed.format_stories(mstories) total_sent_count = 0 for user_feed_notification in notifications: sent_count = 0 last_notification_date = user_feed_notification.last_notification_date try: usersub = UserSubscription.objects.get(user=user_feed_notification.user_id, feed=user_feed_notification.feed_id) except UserSubscription.DoesNotExist: continue classifiers = user_feed_notification.classifiers(usersub) if classifiers == None: logging.debug("Has no usersubs") continue for story in stories: if sent_count >= 3: logging.debug("Sent too many, ignoring...") continue if story['story_date'] <= last_notification_date and not force: logging.debug("Story date older than last notification date: %s <= %s" % (story['story_date'], last_notification_date)) continue if story['story_date'] > user_feed_notification.last_notification_date: user_feed_notification.last_notification_date = story['story_date'] user_feed_notification.save() story['story_content'] = HTMLParser().unescape(story['story_content']) sent = user_feed_notification.push_story_notification(story, classifiers, usersub) if sent: sent_count += 1 total_sent_count += 1 return total_sent_count, len(notifications)
def add_missing_feeds(self): all_feeds = self.flat() subs = [us.feed_id for us in UserSubscription.objects.filter(user=self.user).only('feed')] missing_feeds = set(all_feeds) - set(subs) if missing_feeds: logging.debug(" ---> %s is missing %s feeds. Adding %s..." % ( self.user, len(missing_feeds), missing_feeds)) for feed_id in missing_feeds: feed = Feed.get_by_id(feed_id) if feed: us, _ = UserSubscription.objects.get_or_create(user=self.user, feed=feed, defaults={ 'needs_unread_recalc': True }) if not us.needs_unread_recalc: us.needs_unread_recalc = True us.save()
def force_push(request): """ Intended to force a push notification for a feed for testing. Handier than the console. """ user = get_user(request) feed_id = request.GET['feed_id'] count = int(request.GET.get('count', 1)) logging.user( user, "~BM~FWForce pushing %s stories: ~SB%s" % (count, Feed.get_by_id(feed_id))) sent_count, user_count = MUserFeedNotification.push_feed_notifications( feed_id, new_stories=count, force=True) return { "message": "Pushed %s notifications to %s users" % (sent_count, user_count) }
def UpdateFeeds(feed_pks): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL) mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) profiler = DBProfilerMiddleware() profiler_activated = profiler.process_celery() if profiler_activated: mongo_middleware = MongoDumpMiddleware() mongo_middleware.process_celery(profiler) redis_middleware = RedisDumpMiddleware() redis_middleware.process_celery(profiler) options = { 'quick': float(MStatistics.get('quick_fetch', 0)), 'updates_off': MStatistics.get('updates_off', False), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed or feed.pk != int(feed_pk): logging.info( " ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if not feed: continue try: feed.update(**options) except SoftTimeLimitExceeded, e: feed.save_feed_history(505, 'Timeout', e) logging.info( " ---> [%-30s] ~BR~FWTime limit hit!~SB~FR Moving on to next feed..." % feed) if profiler_activated: profiler.process_celery_finished()
def run(self, feed_pks, **kwargs): try: from apps.rss_feeds.models import Feed r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL) options = {} if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) # if feed is null or feed.pk != feed_pk, then delete the feed_pk from tasked_feeds # otherwise delete the feed_pk after the fetch is over. if not feed or feed.pk != int(feed_pk): logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if feed: feed.update(**options) except Exception, e: logging.error(str(e) + traceback.format_exc() + '\n' + 'error from: UpdateFeeds\n')
def forwards(self, orm): from apps.rss_feeds.models import MStory, Feed import time batch = 0 for f in xrange(Feed.objects.latest('pk').pk): if f < batch*100000: continue start = time.time() try: feed = Feed.get_by_id(f) except Feed.DoesNotExist: continue if not feed: continue cp1 = time.time() - start if feed.active_premium_subscribers < 1: continue stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False) cp2 = time.time() - start try: for story in stories: story.save() except Exception, e: print " ***> (%s) %s" % (f, e) cp3 = time.time() - start print "%3s stories: %s (%s/%s/%s)" % (stories.count(), feed, round(cp1, 2), round(cp2, 2), round(cp3, 2))
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics mongodb_replication_lag = int(MStatistics.get("mongodb_replication_lag", 0)) compute_scores = bool(mongodb_replication_lag < 10) options = { "fake": bool(MStatistics.get("fake_fetch")), "quick": float(MStatistics.get("quick_fetch", 0)), "compute_scores": compute_scores, "mongodb_replication_lag": mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: try: feed = Feed.get_by_id(feed_pk) feed.update(**options) except Feed.DoesNotExist: logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL) mongodb_replication_lag = int( MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) profiler = DBProfilerMiddleware() profiler_activated = profiler.process_celery() if profiler_activated: mongo_middleware = MongoDumpMiddleware() mongo_middleware.process_celery(profiler) redis_middleware = RedisDumpMiddleware() redis_middleware.process_celery(profiler) options = { 'quick': float(MStatistics.get('quick_fetch', 0)), 'updates_off': MStatistics.get('updates_off', False), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: feed = Feed.get_by_id(feed_pk) if not feed or feed.pk != int(feed_pk): logging.info( " ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk)) r.zrem('tasked_feeds', feed_pk) if feed: feed.update(**options) if profiler_activated: profiler.process_celery_finished()
def run(self, feed_pks, **kwargs): from apps.rss_feeds.models import Feed from apps.statistics.models import MStatistics mongodb_replication_lag = int( MStatistics.get('mongodb_replication_lag', 0)) compute_scores = bool(mongodb_replication_lag < 10) options = { 'fake': bool(MStatistics.get('fake_fetch')), 'quick': float(MStatistics.get('quick_fetch', 0)), 'compute_scores': compute_scores, 'mongodb_replication_lag': mongodb_replication_lag, } if not isinstance(feed_pks, list): feed_pks = [feed_pks] for feed_pk in feed_pks: try: feed = Feed.get_by_id(feed_pk) feed.update(**options) except Feed.DoesNotExist: logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
def subscribe(cls, user_id, category_title): category = cls.objects.get(title=category_title) for feed_id in category.feed_ids: us, _ = UserSubscription.objects.get_or_create( feed_id=feed_id, user_id=user_id, defaults={ 'needs_unread_recalc': True, 'active': True, }) usf, created = UserSubscriptionFolders.objects.get_or_create( user_id=user_id, defaults={'folders': '[]'}) usf.add_folder('', category.title) folders = json.decode(usf.folders) for feed_id in category.feed_ids: feed = Feed.get_by_id(feed_id) if not feed: continue folders = add_object_to_folder(feed.pk, category.title, folders) usf.folders = json.encode(folders) usf.save()
def exception_change_feed_link(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_link = request.POST['feed_link'] timezone = request.user.profile.timezone code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user(request, "~FRFixing feed exception by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed_address = feedfinder.feed(feed_link) if feed_address: code = 1 feed.has_page_exception = False feed.active = True feed.fetched_once = False feed.feed_link = feed_link feed.feed_address = feed_address duplicate_feed = feed.schedule_feed_fetch_immediately() if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.schedule_feed_fetch_immediately() new_feed.has_page_exception = False new_feed.active = True new_feed.save() else: # Branch good feed logging.user(request, "~FRBranching feed by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed, _ = Feed.objects.get_or_create(feed_address=feed.feed_address, feed_link=feed_link) code = 1 if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_link_locked = True feed.save() feed = feed.update() feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=request.user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=request.user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { 'code': -1, 'feed_fetch_history': fetch_history['feed_fetch_history'], 'page_fetch_history': fetch_history['page_fetch_history'], 'push_history': fetch_history['push_history'], } usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, feed_id=usersub.feed_id) if feed and feed.has_feed_exception: code = -1 feeds = { original_feed.pk: usersub.canonical(full=True, classifiers=classifiers), } fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed_id, 'feed_fetch_history': fetch_history['feed_fetch_history'], 'page_fetch_history': fetch_history['page_fetch_history'], 'push_history': fetch_history['push_history'], }
def __init__(self, feed_id, options): self.feed = Feed.get_by_id(feed_id) self.options = options self.fpf = None
def refresh_feed(self): self.feed = Feed.get_by_id(self.feed_id) if self.feed_id != self.feed.pk: logging.debug(" ***> Feed has changed: from %s to %s" % (self.feed_id, self.feed.pk)) self.feed_id = self.feed.pk
def save_story(request, token=None): code = 0 story_url = request.POST['story_url'] user_tags = request.POST.getlist('user_tags') or request.POST.getlist('user_tags[]') or [] add_user_tag = request.POST.get('add_user_tag', None) title = request.POST['title'] content = request.POST.get('content', None) rss_url = request.POST.get('rss_url', None) user_notes = request.POST.get('user_notes', None) feed_id = request.POST.get('feed_id', None) or 0 feed = None message = None profile = None if request.user.is_authenticated(): profile = request.user.profile else: try: profile = Profile.objects.get(secret_token=token) except Profile.DoesNotExist: code = -1 if token: message = "Not authenticated, couldn't find user by token." else: message = "Not authenticated, no token supplied and not authenticated." if not profile: return HttpResponse(json.encode({ 'code': code, 'message': message, 'story': None, }), content_type='text/plain') if feed_id: feed = Feed.get_by_id(feed_id) else: if rss_url: logging.user(request.user, "~FBFinding feed (save_story): %s" % rss_url) feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True) if not feed: logging.user(request.user, "~FBFinding feed (save_story): %s" % story_url) feed = Feed.get_feed_from_url(story_url, create=True, fetch=True) if feed: feed_id = feed.pk if content: content = lxml.html.fromstring(content) content.make_links_absolute(story_url) content = lxml.html.tostring(content) else: importer = TextImporter(story=None, story_url=story_url, request=request, debug=settings.DEBUG) document = importer.fetch(skip_save=True, return_document=True) content = document['content'] if not title: title = document['title'] if add_user_tag: user_tags = user_tags + [tag for tag in add_user_tag.split(',')] starred_story = MStarredStory.objects.filter(user_id=profile.user.pk, story_feed_id=feed_id, story_guid=story_url).limit(1).first() if not starred_story: story_db = { "story_guid": story_url, "story_permalink": story_url, "story_title": title, "story_feed_id": feed_id, "story_content": content, "story_date": datetime.datetime.now(), "starred_date": datetime.datetime.now(), "user_id": profile.user.pk, "user_tags": user_tags, "user_notes": user_notes, } starred_story = MStarredStory.objects.create(**story_db) logging.user(profile.user, "~BM~FCStarring story from site: ~SB%s: %s" % (story_url, user_tags)) message = "Saving story from site: %s: %s" % (story_url, user_tags) else: starred_story.story_content = content starred_story.story_title = title starred_story.user_tags = user_tags starred_story.story_permalink = story_url starred_story.story_guid = story_url starred_story.story_feed_id = feed_id starred_story.user_notes = user_notes starred_story.save() logging.user(profile.user, "~BM~FC~SBUpdating~SN starred story from site: ~SB%s: %s" % (story_url, user_tags)) message = "Updating saved story from site: %s: %s" % (story_url, user_tags) MStarredStoryCounts.schedule_count_tags_for_user(request.user.pk) response = HttpResponse(json.encode({ 'code': code, 'message': message, 'story': starred_story, }), content_type='text/plain') response['Access-Control-Allow-Origin'] = '*' response['Access-Control-Allow-Methods'] = 'POST' return response
def share_story(request, token=None): code = 0 story_url = request.POST['story_url'] comments = request.POST['comments'] title = request.POST['title'] content = request.POST.get('content', None) rss_url = request.POST.get('rss_url', None) feed_id = request.POST.get('feed_id', None) or 0 feed = None message = None profile = None if request.user.is_authenticated: profile = request.user.profile else: try: profile = Profile.objects.get(secret_token=token) except Profile.DoesNotExist: code = -1 if token: message = "Not authenticated, couldn't find user by token." else: message = "Not authenticated, no token supplied and not authenticated." if not profile: return HttpResponse(json.encode({ 'code': code, 'message': message, 'story': None, }), content_type='text/plain') if feed_id: feed = Feed.get_by_id(feed_id) else: if rss_url: logging.user(request.user, "~FBFinding feed (share_story): %s" % rss_url) feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True) if not feed: logging.user(request.user, "~FBFinding feed (share_story): %s" % story_url) feed = Feed.get_feed_from_url(story_url, create=True, fetch=True) if feed: feed_id = feed.pk if content: content = lxml.html.fromstring(content) content.make_links_absolute(story_url) content = lxml.html.tostring(content) else: importer = TextImporter(story=None, story_url=story_url, request=request, debug=settings.DEBUG) document = importer.fetch(skip_save=True, return_document=True) content = document['content'] if not title: title = document['title'] shared_story = MSharedStory.objects.filter(user_id=profile.user.pk, story_feed_id=feed_id, story_guid=story_url).limit(1).first() if not shared_story: story_db = { "story_guid": story_url, "story_permalink": story_url, "story_title": title, "story_feed_id": feed_id, "story_content": content, "story_date": datetime.datetime.now(), "user_id": profile.user.pk, "comments": comments, "has_comments": bool(comments), } shared_story = MSharedStory.objects.create(**story_db) socialsubs = MSocialSubscription.objects.filter(subscription_user_id=profile.user.pk) for socialsub in socialsubs: socialsub.needs_unread_recalc = True socialsub.save() logging.user(profile.user, "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments)) message = "Sharing story from site: %s: %s" % (story_url, comments) else: shared_story.story_content = content shared_story.story_title = title shared_story.comments = comments shared_story.story_permalink = story_url shared_story.story_guid = story_url shared_story.has_comments = bool(comments) shared_story.story_feed_id = feed_id shared_story.save() logging.user(profile.user, "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" % (story_url, comments)) message = "Updating shared story from site: %s: %s" % (story_url, comments) try: socialsub = MSocialSubscription.objects.get(user_id=profile.user.pk, subscription_user_id=profile.user.pk) except MSocialSubscription.DoesNotExist: socialsub = None if socialsub: socialsub.mark_story_ids_as_read([shared_story.story_hash], shared_story.story_feed_id, request=request) else: RUserStory.mark_read(profile.user.pk, shared_story.story_feed_id, shared_story.story_hash) shared_story.publish_update_to_subscribers() response = HttpResponse(json.encode({ 'code': code, 'message': message, 'story': shared_story, }), content_type='text/plain') response['Access-Control-Allow-Origin'] = '*' response['Access-Control-Allow-Methods'] = 'POST' return response
def __unicode__(self): feed = Feed.get_by_id(self.feed_id) return "%s - %s (%s)" % (feed, self.folder, self.count)
def refresh_feed(self): self.feed = Feed.get_by_id(self.feed_id)