Example #1
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist("story_id")
    feed_id = int(request.REQUEST["feed_id"])

    try:
        usersub = UserSubscription.objects.select_related("feed").get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)

    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()

    data = dict(code=0, payload=story_ids)

    if len(story_ids) > 1:
        logging.info(" ---> [%s] ~FYRead %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.info(" ---> [%s] ~FYRead story in feed: %s" % (request.user, usersub.feed))

    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(" ---> [%s] ~BRMarked story as read: Duplicate Story -> %s" % (request.user, story_id))

    return data
Example #2
0
def bootstrap_userstories():
    print "Mongo DB userstories: %s" % MUserStory.objects().count()
    # db.userstories.drop()
    print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()

    print "UserStories: %s" % UserStory.objects.all().count()
    pprint(db.userstories.index_information())

    userstories = UserStory.objects.all().values()
    for userstory in userstories:
        try:
            story = Story.objects.get(pk=userstory['story_id'])
        except Story.DoesNotExist:
            continue
        try:
            userstory['story'] = MStory.objects(story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0]
        except:
            print '!',
            continue
        print '.',
        del userstory['id']
        del userstory['opinion']
        del userstory['story_id']
        try:
            MUserStory(**userstory).save()
        except:
            print '\n\n!\n\n'
            continue

    print "\nMongo DB userstories: %s" % MUserStory.objects().count()
Example #3
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])
    
    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=story_ids)
    
    if len(story_ids) > 1:
        logging.debug(" ---> [%s] Read %s stories in feed: %s" % (request.user, len(story_ids), usersub.feed))
    else:
        logging.debug(" ---> [%s] Read story in feed: %s" % (request.user, usersub.feed))
        
    for story_id in story_ids:
        story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
        now = datetime.datetime.utcnow()
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=now)
        try:
            m.save()
        except OperationError:
            logging.info(' ---> [%s] *** Marked story as read: Duplicate Story -> %s' % (request.user, story_id))
    
    return data
def bootstrap_userstories():
    print "Mongo DB userstories: %s" % MUserStory.objects().count()
    # db.userstories.drop()
    print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()

    print "UserStories: %s" % UserStory.objects.all().count()
    pprint(db.userstories.index_information())

    userstories = UserStory.objects.all().values()
    for userstory in userstories:
        try:
            story = Story.objects.get(pk=userstory['story_id'])
        except Story.DoesNotExist:
            continue
        try:
            userstory['story'] = MStory.objects(
                story_feed_id=story.story_feed.pk,
                story_guid=story.story_guid)[0]
        except:
            print '!',
            continue
        print '.',
        del userstory['id']
        del userstory['opinion']
        del userstory['story_id']
        try:
            MUserStory(**userstory).save()
        except:
            print '\n\n!\n\n'
            continue

    print "\nMongo DB userstories: %s" % MUserStory.objects().count()
Example #5
0
def delete_feed(request):
    feed_id = int(request.POST['feed_id'])
    user_sub = get_object_or_404(UserSubscription, user=request.user, feed=feed_id)
    user_sub.delete()
    
    MUserStory.objects(user_id=request.user.pk, feed_id=feed_id).delete()
    
    def _find_feed_in_folders(old_folders):
        new_folders = []
        
        for k, folder in enumerate(old_folders):
            if isinstance(folder, int):
                if folder == feed_id:
                    logging.info(" ---> [%s] Delete folder: %s'th item: %s folders/feeds" % (
                        request.user, k, len(old_folders)
                    ))
                    # folders.remove(folder)
                else:
                    new_folders.append(folder)
            elif isinstance(folder, dict):
                for f_k, f_v in folder.items():
                    new_folders.append({f_k: _find_feed_in_folders(f_v)})

        return new_folders
        
    user_sub_folders_object = UserSubscriptionFolders.objects.get(user=request.user)
    user_sub_folders = json.decode(user_sub_folders_object.folders)
    user_sub_folders = _find_feed_in_folders(user_sub_folders)
    user_sub_folders_object.folders = json.encode(user_sub_folders)
    user_sub_folders_object.save()
    
    return dict(code=1)
Example #6
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]

        for feed_id in feed_queue:
            ret_entries = {
                ENTRY_NEW: 0,
                ENTRY_UPDATED: 0,
                ENTRY_SAME: 0,
                ENTRY_ERR: 0
            }
            start_time = datetime.datetime.utcnow()

            try:
                feed = self.refresh_feed(feed_id)

                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()

                if ((fetched_feed and ret_feed == FEED_OK)
                        or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    feed = self.refresh_feed(feed_id)

                    if ret_entries.get(ENTRY_NEW) or self.options[
                            'force'] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug(
                                '   ---> [%-30s] Unread count took too long...'
                                % (unicode(feed)[:30], ))
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] Feed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
Example #7
0
    def process_feed_wrapper(self, feed_queue):
        """ wrapper for ProcessFeed
        """
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
        for feed_id in feed_queue:
            ret_entries = {ENTRY_NEW: 0, ENTRY_UPDATED: 0, ENTRY_SAME: 0, ENTRY_ERR: 0}
            start_time = datetime.datetime.utcnow()

            feed = self.refresh_feed(feed_id)

            try:
                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()

                if (fetched_feed and ret_feed == FEED_OK) or self.options["force"]:
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    feed = self.refresh_feed(feed_id)

                    if ret_entries.get(ENTRY_NEW) or self.options["force"] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        user_subs = UserSubscription.objects.filter(feed=feed)
                        logging.debug(
                            u"   ---> [%-30s] Computing scores for all feed subscribers: %s subscribers"
                            % (unicode(feed)[:30], user_subs.count())
                        )
                        stories_db = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)
                        for sub in user_subs:
                            cache.delete("usersub:%s" % sub.user_id)
                            silent = False if self.options["verbose"] >= 2 else True
                            sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
                    cache.delete("feed_stories:%s-%s-%s" % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug("   ---> [%-30s] Feed is now gone..." % (unicode(feed)[:30]))
                return
Example #8
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
            
        for feed_id in feed_queue:
            ret_entries = {
                ENTRY_NEW: 0,
                ENTRY_UPDATED: 0,
                ENTRY_SAME: 0,
                ENTRY_ERR: 0
            }
            start_time = time.time()
            ret_feed = FEED_ERREXC
            try:
                feed = self.refresh_feed(feed_id)

                if self.options.get('fake'):
                    logging.debug('   ---> [%-30s] ~BGFaking fetch, skipping...' % (unicode(feed)[:30],))
                    continue
                
                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                
                if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    
                    feed = self.refresh_feed(feed_id)
                    
                    if ret_entries.get(ENTRY_NEW) or self.options['force']:
                        if not feed.known_good:
                            feed.known_good = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug('   ---> [%-30s] Unread count took too long...' % (unicode(feed)[:30],))
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30]))
                continue
Example #9
0
def mark_feed_as_read(request):
    feed_id = int(request.REQUEST['feed_id'])
    feed = Feed.objects.get(id=feed_id)
    code = 0
    
    us = UserSubscription.objects.get(feed=feed, user=request.user)
    try:
        us.mark_feed_read()
    except IntegrityError:
        code = -1
    else:
        code = 1
        
    logging.info(" ---> [%s] Marking feed as read: %s" % (request.user, feed,))
    MUserStory.objects(user_id=request.user.pk, feed_id=feed_id).delete()
    return dict(code=code)
Example #10
0
    def trim_feed(self):
        from apps.reader.models import MUserStory

        trim_cutoff = 500
        if self.active_subscribers <= 1:
            trim_cutoff = 50
        elif self.active_subscribers <= 3:
            trim_cutoff = 100
        elif self.active_subscribers <= 5:
            trim_cutoff = 150
        elif self.active_subscribers <= 10:
            trim_cutoff = 250
        elif self.active_subscribers <= 25:
            trim_cutoff = 350
        stories = MStory.objects(story_feed_id=self.pk).order_by("-story_date")
        if stories.count() > trim_cutoff:
            # print 'Found %s stories in %s. Trimming...' % (stories.count(), self),
            story_trim_date = stories[trim_cutoff].story_date
            extra_stories = MStory.objects(story_feed_id=self.pk, story_date__lte=story_trim_date)
            extra_stories.delete()
            # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count()
            userstories = MUserStory.objects(feed_id=self.pk, read_date__lte=story_trim_date)
            if userstories.count():
                # print "Found %s user stories. Deleting..." % userstories.count()
                userstories.delete()
Example #11
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, 
                                                       feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
        
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
Example #12
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    except Feed.DoesNotExist:
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, 
                                                       feed=duplicate_feed[0].feed)
            except Feed.DoesNotExist:
                return dict(code=-1)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request.user, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
        
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    m = MUserStory.objects(story=story, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
Example #13
0
 def trim_feed(self, verbose=False):
     from apps.reader.models import MUserStory
     trim_cutoff = 500
     if self.active_subscribers <= 1:
         trim_cutoff = 50
     elif self.active_subscribers <= 3:
         trim_cutoff = 100
     elif self.active_subscribers <= 5:
         trim_cutoff = 150
     elif self.active_subscribers <= 10:
         trim_cutoff = 250
     elif self.active_subscribers <= 25:
         trim_cutoff = 350
     stories = MStory.objects(
         story_feed_id=self.pk, ).order_by('-story_date')
     if stories.count() > trim_cutoff:
         if verbose:
             print 'Found %s stories in %s. Trimming to %s...' % (
                 stories.count(), self, trim_cutoff)
         story_trim_date = stories[trim_cutoff].story_date
         extra_stories = MStory.objects(story_feed_id=self.pk,
                                        story_date__lte=story_trim_date)
         extra_stories.delete()
         # print "Deleted stories, %s left." % MStory.objects(story_feed_id=self.pk).count()
         userstories = MUserStory.objects(feed_id=self.pk,
                                          read_date__lte=story_trim_date)
         if userstories.count():
             # print "Found %s user stories. Deleting..." % userstories.count()
             userstories.delete()
Example #14
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
    
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    
    if story.story_date < usersub.mark_read_date:
        # Story is outside the mark as read range, so invert all stories before.
        newer_stories = MStory.objects(story_feed_id=story.story_feed_id,
                                       story_date__gte=story.story_date,
                                       story_date__lte=usersub.mark_read_date
                                       ).only('story_guid')
        newer_stories = [s.story_guid for s in newer_stories]
        usersub.mark_read_date = story.story_date - datetime.timedelta(minutes=1)
        usersub.needs_unread_recalc = True
        usersub.save()
        
        # Mark stories as read only after the mark_read_date has been moved, otherwise
        # these would be ignored.
        data = usersub.mark_story_ids_as_read(newer_stories, request=request)
        
    m = MUserStory.objects(story_id=story_id, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
Example #15
0
def mark_story_as_unread(request):
    story_id = request.POST['story_id']
    feed_id = int(request.POST['feed_id'])

    usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=dict(story_id=story_id))
    logging.user(request, "~FY~SBUnread~SN story in feed: %s" % (usersub.feed))
    
    story = MStory.objects(story_feed_id=feed_id, story_guid=story_id)[0]
    
    if story.story_date < usersub.mark_read_date:
        # Story is outside the mark as read range, so invert all stories before.
        newer_stories = MStory.objects(story_feed_id=story.story_feed_id,
                                       story_date__gte=story.story_date,
                                       story_date__lte=usersub.mark_read_date
                                       ).only('story_guid')
        newer_stories = [s.story_guid for s in newer_stories]
        usersub.mark_read_date = story.story_date - datetime.timedelta(minutes=1)
        usersub.needs_unread_recalc = True
        usersub.save()
        
        # Mark stories as read only after the mark_read_date has been moved, otherwise
        # these would be ignored.
        data = usersub.mark_story_ids_as_read(newer_stories, request=request)
        
    m = MUserStory.objects(story_id=story_id, user_id=request.user.pk, feed_id=feed_id)
    m.delete()
    
    return data
Example #16
0
def calculate_metrics():
    from apps.rss_feeds.models import MStory
    from apps.reader.models import MUserStory

    return {
        'stories': MStory.objects().count(),
        'read_stories': MUserStory.objects().count(),
    }
Example #17
0
    def calculate_metrics(self):
        from apps.rss_feeds.models import MStory
        from apps.reader.models import MUserStory

        return {
            'stories': MStory.objects().count(),
            'read_stories': MUserStory.objects().count(),
        }
Example #18
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(
            user=request.user, feed=feed_id)
    except (UserSubscription.DoesNotExist, Feed.DoesNotExist):
        duplicate_feed = DuplicateFeed.objects.filter(
            duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(
                    user=request.user, feed=duplicate_feed[0].feed)
            except (UserSubscription.DoesNotExist, Feed.DoesNotExist):
                return dict(code=-1)
        else:
            return dict(code=-1)

    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()

    data = dict(code=0, payload=story_ids)

    if len(story_ids) > 1:
        logging.user(
            request.user,
            "~FYRead %s stories in feed: %s" % (len(story_ids), usersub.feed))
    else:
        logging.user(request.user,
                     "~FYRead story in feed: %s" % (usersub.feed))

    for story_id in story_ids:
        try:
            story = MStory.objects(story_feed_id=feed_id,
                                   story_guid=story_id)[0]
        except IndexError:
            # Story has been deleted, probably by feed_fetcher.
            continue
        now = datetime.datetime.utcnow()
        date = now if now > story.story_date else story.story_date  # For handling future stories
        m = MUserStory(story=story,
                       user_id=request.user.pk,
                       feed_id=feed_id,
                       read_date=date)
        try:
            m.save()
        except OperationError:
            logging.user(
                request.user,
                "~BRMarked story as read: Duplicate Story -> %s" % (story_id))
            m = MUserStory.objects.get(story=story,
                                       user_id=request.user.pk,
                                       feed_id=feed_id)
            m.read_date = date
            m.save()

    return data
Example #19
0
def mark_feed_as_read(request):
    feed_ids = [int(f) for f in request.REQUEST.getlist('feed_id') if f]
    code = 0
    for feed_id in feed_ids:
        try:
            feed = Feed.objects.get(id=feed_id)
        except Feed.DoesNotExist:
            continue
        code = 0
    
        us = UserSubscription.objects.get(feed=feed, user=request.user)
        try:
            us.mark_feed_read()
        except IntegrityError:
            code = -1
        else:
            code = 1
        
        logging.user(request.user, "~FMMarking feed as read: ~SB%s" % (feed,))
        MUserStory.objects(user_id=request.user.pk, feed_id=feed_id).delete()
    return dict(code=code)
Example #20
0
def mark_story_as_read(request):
    story_ids = request.REQUEST.getlist('story_id')
    feed_id = int(request.REQUEST['feed_id'])

    try:
        usersub = UserSubscription.objects.select_related('feed').get(user=request.user, feed=feed_id)
    except (UserSubscription.DoesNotExist, Feed.DoesNotExist):
        duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
        if duplicate_feed:
            try:
                usersub = UserSubscription.objects.get(user=request.user, 
                                                       feed=duplicate_feed[0].feed)
            except (UserSubscription.DoesNotExist, Feed.DoesNotExist):
                return dict(code=-1)
        else:
            return dict(code=-1)
                
    if not usersub.needs_unread_recalc:
        usersub.needs_unread_recalc = True
        usersub.save()
        
    data = dict(code=0, payload=story_ids)
    
    if len(story_ids) > 1:
        logging.user(request, "~FYRead %s stories in feed: %s" % (len(story_ids), usersub.feed))
    else:
        logging.user(request, "~FYRead story in feed: %s" % (usersub.feed))
        
    for story_id in story_ids:
        try:
            story = MStory.objects.get(story_feed_id=feed_id, story_guid=story_id)
        except MStory.DoesNotExist:
            # Story has been deleted, probably by feed_fetcher.
            continue
        now = datetime.datetime.utcnow()
        date = now if now > story.story_date else story.story_date # For handling future stories
        m = MUserStory(story=story, user_id=request.user.pk, feed_id=feed_id, read_date=date, story_id=story_id)
        try:
            m.save()
        except OperationError:
            logging.user(request, "~BRMarked story as read: Duplicate Story -> %s" % (story_id))
            logging.user(request, "~BRRead now date: %s, story_date: %s, story_id: %s." % (m.read_date, story.story_date, story.story_guid))
            logging.user(request, "~BRSubscription mark_read_date: %s, oldest_unread_story_date: %s" % (
                usersub.mark_read_date, usersub.oldest_unread_story_date))
            m = MUserStory.objects.get(story=story, user_id=request.user.pk, feed_id=feed_id)
            logging.user(request, "~BROriginal read date: %s, story id: %s, story.id: %s" % (m.read_date, m.story_id, m.story.id))
            m.story_id = story_id
            m.read_date = date
            m.save()
    
    return data
Example #21
0
def load_river_stories(request):
    user = get_user(request)
    feed_ids = [int(feed_id) for feed_id in request.POST.getlist("feeds")]
    offset = int(request.REQUEST.get("offset", 0))
    limit = int(request.REQUEST.get("limit", 25))
    page = int(request.REQUEST.get("page", 0)) + 1
    read_stories = int(request.REQUEST.get("read_stories", 0))
    # if page: offset = limit * page
    if page:
        limit = limit * page - read_stories

    def feed_qvalues(feed_id):
        feed = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        return Q(story_feed_id=feed_id) & Q(story_date__gte=feed.mark_read_date)

    feed_last_reads = map(feed_qvalues, feed_ids)
    qs = reduce(lambda q1, q2: q1 | q2, feed_last_reads)

    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only("story")
    read_stories = [rs.story.id for rs in read_stories]
    mstories = MStory.objects(Q(id__nin=read_stories) & qs)[offset : offset + limit]
    stories = Feed.format_stories(mstories)

    starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=feed_ids).only(
        "story_guid", "starred_date"
    )
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

    for story in stories:
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date)
        story["long_parsed_date"] = format_story_link_date__long(story_date)
        story["read_status"] = 0
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date)
        story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0}

    logging.info(
        " ---> [%s] ~FCLoading river stories: ~SB%s stories ~SN(%s feeds)" % (request.user, len(stories), len(feed_ids))
    )

    return dict(stories=stories)
Example #22
0
        # Switch to original feed for the user subscription
        logging.info("      ===> %s " % user_sub.user)
        user_sub.feed = original_feed
        user_sub.needs_unread_recalc = True
        try:
            user_sub.save()
            folders = json.decode(user_sub_folders.folders)
            folders = rewrite_folders(folders, original_feed, duplicate_feed)
            user_sub_folders.folders = json.encode(folders)
            user_sub_folders.save()
        except (IntegrityError, OperationError):
            logging.info("      !!!!> %s already subscribed" % user_sub.user)
            user_sub.delete()

    # Switch read stories
    user_stories = MUserStory.objects(feed_id=duplicate_feed.pk)
    logging.info(" ---> %s read stories" % user_stories.count())
    for user_story in user_stories:
        user_story.feed_id = original_feed.pk
        duplicate_story = user_story.story
        story_guid = duplicate_story.story_guid if hasattr(duplicate_story, "story_guid") else duplicate_story.id
        original_story = MStory.objects(story_feed_id=original_feed.pk, story_guid=story_guid)

        if original_story:
            user_story.story = original_story[0]
            try:
                user_story.save()
            except OperationError:
                # User read the story in the original feed, too. Ugh, just ignore it.
                pass
        else:
Example #23
0
#!/usr/bin/env python

from utils.munin.base import MuninGraph
from apps.rss_feeds.models import MStory
from apps.reader.models import MUserStory

graph_config = {
    'graph_category': 'NewsBlur',
    'graph_title': 'NewsBlur Stories',
    'graph_vlabel': 'Stories',
    'stories.label': 'stories',
    'tags.label': 'tags',
    'authors.label': 'authors',
    'read_stories.label': 'read_stories',
}

metrics = {
    'stories': MStory.objects().count(),
    'read_stories': MUserStory.objects().count(),
}

if __name__ == '__main__':
    MuninGraph(graph_config, metrics).run()
Example #24
0
                
                if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    feed = pfeed.feed
                    feed_process_duration = time.time() - start_duration
                    
                    if ret_entries['new'] or self.options['force']:
                        start = time.time()
                        if not feed.known_good or not feed.fetched_once:
                            feed.known_good = True
                            feed.fetched_once = True
                            feed = feed.save()
                        if random.random() <= 0.02:
                            feed.sync_redis()
                            MUserStory.delete_old_stories(feed_id=feed.pk)
                            MUserStory.sync_all_redis(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug('   ---> [%-30s] Unread count took too long...' % (feed.title[:30],))
                        if self.options['verbose']:
                            logging.debug(u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
                                          feed.title[:30], time.time() - start))
<<<<<<< HEAD
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries['new'] or ret_entries['updated'] or self.options['force']:
=======
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
>>>>>>> Cleaning up RSS feed header for shared stories feeds.
                    #     feed.get_stories(force=True)
Example #25
0
def load_single_feed(request, feed_id):
    start        = time.time()
    user         = get_user(request)
    offset       = int(request.REQUEST.get('offset', 0))
    limit        = int(request.REQUEST.get('limit', 12))
    page         = int(request.REQUEST.get('page', 1))
    dupe_feed_id = None
    userstories_db = None
    
    if page: offset = limit * (page-1)
    if not feed_id: raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
            dupe_feed_id = feed_id
        else:
            raise Http404
        
    stories = feed.get_stories(offset, limit) 
        
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id))
    
    checkpoint1 = time.time()
    
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    if usersub and stories:
        story_ids = [story['id'] for story in stories]
        userstories_db = MUserStory.objects(user_id=user.pk,
                                            feed_id=feed.pk,
                                            story_id__in=story_ids).only('story_id')
        starred_stories = MStarredStory.objects(user_id=user.pk, 
                                                story_feed_id=feed_id, 
                                                story_guid__in=story_ids).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
        userstories = set(us.story_id for us in userstories_db)
            
    checkpoint2 = time.time()
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        if usersub:
            if story['id'] in userstories:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
                story['read_status'] = 0
            if story['id'] in starred_stories:
                story['starred'] = True
                starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
                story['starred_date'] = format_story_link_date__long(starred_date, now)
        else:
            story['read_status'] = 1
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }

    checkpoint3 = time.time()
    
    # Intelligence
    feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else []
    feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    if usersub:
        usersub.feed_opens += 1
        usersub.save()
    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    diff3 = checkpoint3-start
    timediff = time.time()-start
    last_update = relative_timesince(feed.last_update)
    logging.user(request, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s(%s)/%.4s~SN)" % (
        feed.feed_title[:32], ('~SN/p%s' % page) if page > 1 else '', timediff,
        diff1, diff2, userstories_db and userstories_db.count() or '~SN0~SB', diff3))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    
    if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id
    if not usersub:
        data.update(feed.canonical())
        
    return data
Example #26
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = datetime.datetime.utcnow()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 1))
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    new_flag           = request.REQUEST.get('new_flag', False)
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id')
    read_stories = [rs.story_id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
                            if feed_id in feed_last_reads])
    feed_counts = dict(feed_counts)

    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        story_guid__nin=read_stories,
        story_feed_id__in=feed_ids,
        # story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    mstories = [story.value for story in mstories if story and story.value]

    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
        return feed_classifiers
    classifier_feeds   = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_titles  = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_tags    = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    
    classifiers = {}
    for feed_id in found_feed_ids:
        classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], 
                                                        classifier_authors[feed_id],
                                                        classifier_titles[feed_id],
                                                        classifier_tags[feed_id])
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }
    
    diff = datetime.datetime.utcnow() - start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    if new_flag:
        return dict(stories=stories, classifiers=classifiers)
    else:
        logging.user(request, "~BR~FCNo new flag on river")
        return dict(stories=stories)
Example #27
0
 def process_feed_wrapper(self, feed_queue):
     delta = None
     current_process = multiprocessing.current_process()
     identity = "X"
     feed = None
     
     if current_process._identity:
         identity = current_process._identity[0]
         
     for feed_id in feed_queue:
         start_duration = time.time()
         feed_fetch_duration = None
         feed_process_duration = None
         page_duration = None
         icon_duration = None
         feed_code = None
         ret_entries = None
         start_time = time.time()
         ret_feed = FEED_ERREXC
         try:
             feed = self.refresh_feed(feed_id)
             
             skip = False
             if self.options.get('fake'):
                 skip = True
                 weight = "-"
                 quick = "-"
                 rand = "-"
             elif (self.options.get('quick') and not self.options['force'] and 
                   feed.known_good and feed.fetched_once and not feed.is_push):
                 weight = feed.stories_last_month * feed.num_subscribers
                 random_weight = random.randint(1, max(weight, 1))
                 quick = float(self.options.get('quick', 0))
                 rand = random.random()
                 if random_weight < 100 and rand < quick:
                     skip = True
             if skip:
                 logging.debug('   ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...' % (
                     feed.title[:30],
                     weight,
                     feed.num_subscribers,
                     rand, quick))
                 continue
                 
             ffeed = FetchFeed(feed_id, self.options)
             ret_feed, fetched_feed = ffeed.fetch()
             feed_fetch_duration = time.time() - start_duration
             
             if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                 pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                 ret_feed, ret_entries = pfeed.process()
                 feed = pfeed.feed
                 feed_process_duration = time.time() - start_duration
                 
                 if ret_entries['new'] or self.options['force']:
                     start = time.time()
                     if not feed.known_good or not feed.fetched_once:
                         feed.known_good = True
                         feed.fetched_once = True
                         feed = feed.save()
                     if random.random() <= 0.02:
                         feed.sync_redis()
                         MUserStory.delete_old_stories(feed_id=feed.pk)
                         MUserStory.sync_all_redis(feed_id=feed.pk)
                     try:
                         self.count_unreads_for_subscribers(feed)
                     except TimeoutError:
                         logging.debug('   ---> [%-30s] Unread count took too long...' % (feed.title[:30],))
                     if self.options['verbose']:
                         logging.debug(u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss' % (
                                       feed.title[:30], time.time() - start))
         except KeyboardInterrupt:
             break
         except urllib2.HTTPError, e:
             logging.debug('   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' % (unicode(feed_id)[:30], e.fp.read()))
             feed.save_feed_history(e.code, e.msg, e.fp.read())
             fetched_feed = None
         except Feed.DoesNotExist, e:
             logging.debug('   ---> [%-30s] ~FRFeed is now gone...' % (unicode(feed_id)[:30]))
             continue
Example #28
0
    def process_feed_wrapper(self, feed_queue):
        delta = None
        current_process = multiprocessing.current_process()
        identity = "X"
        feed = None

        if current_process._identity:
            identity = current_process._identity[0]

        for feed_id in feed_queue:
            start_duration = time.time()
            feed_fetch_duration = None
            feed_process_duration = None
            page_duration = None
            icon_duration = None
            feed_code = None
            ret_entries = None
            start_time = time.time()
            ret_feed = FEED_ERREXC
            try:
                feed = self.refresh_feed(feed_id)

                skip = False
                if self.options.get('fake'):
                    skip = True
                    weight = "-"
                    quick = "-"
                    rand = "-"
                elif (self.options.get('quick') and not self.options['force']
                      and feed.known_good and feed.fetched_once
                      and not feed.is_push):
                    weight = feed.stories_last_month * feed.num_subscribers
                    random_weight = random.randint(1, max(weight, 1))
                    quick = float(self.options.get('quick', 0))
                    rand = random.random()
                    if random_weight < 100 and rand < quick:
                        skip = True
                if skip:
                    logging.debug(
                        '   ---> [%-30s] ~BGFaking fetch, skipping (%s/month, %s subs, %s < %s)...'
                        % (feed.title[:30], weight, feed.num_subscribers, rand,
                           quick))
                    continue

                ffeed = FetchFeed(feed_id, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                feed_fetch_duration = time.time() - start_duration

                if ((fetched_feed and ret_feed == FEED_OK)
                        or self.options['force']):
                    pfeed = ProcessFeed(feed_id, fetched_feed, self.options)
                    ret_feed, ret_entries = pfeed.process()
                    feed = pfeed.feed
                    feed_process_duration = time.time() - start_duration

                    if ret_entries['new'] or self.options['force']:
                        start = time.time()
                        if not feed.known_good or not feed.fetched_once:
                            feed.known_good = True
                            feed.fetched_once = True
                            feed = feed.save()
                        if random.random() <= 0.02:
                            feed.sync_redis()
                            MUserStory.delete_old_stories(feed_id=feed.pk)
                            MUserStory.sync_all_redis(feed_id=feed.pk)
                        try:
                            self.count_unreads_for_subscribers(feed)
                        except TimeoutError:
                            logging.debug(
                                '   ---> [%-30s] Unread count took too long...'
                                % (feed.title[:30], ))
                        if self.options['verbose']:
                            logging.debug(
                                u'   ---> [%-30s] ~FBTIME: unread count in ~FM%.4ss'
                                % (feed.title[:30], time.time() - start))
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                logging.debug(
                    '   ---> [%-30s] ~FRFeed throws HTTP error: ~SB%s' %
                    (unicode(feed_id)[:30], e.fp.read()))
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Feed.DoesNotExist, e:
                logging.debug('   ---> [%-30s] ~FRFeed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
Example #29
0
        # Switch to original feed for the user subscription
        logging.info("      ===> %s " % user_sub.user)
        user_sub.feed = original_feed
        user_sub.needs_unread_recalc = True
        try:
            user_sub.save()
            folders = json.decode(user_sub_folders.folders)
            folders = rewrite_folders(folders, original_feed, duplicate_feed)
            user_sub_folders.folders = json.encode(folders)
            user_sub_folders.save()
        except (IntegrityError, OperationError):
            logging.info("      !!!!> %s already subscribed" % user_sub.user)
            user_sub.delete()

    # Switch read stories
    user_stories = MUserStory.objects(feed_id=duplicate_feed.pk)
    logging.info(" ---> %s read stories" % user_stories.count())
    for user_story in user_stories:
        user_story.feed_id = original_feed.pk
        duplicate_story = user_story.story
        story_guid = duplicate_story.story_guid if hasattr(
            duplicate_story, 'story_guid') else duplicate_story.id
        original_story = MStory.objects(story_feed_id=original_feed.pk,
                                        story_guid=story_guid)

        if original_story:
            user_story.story = original_story[0]
            try:
                user_story.save()
            except OperationError:
                # User read the story in the original feed, too. Ugh, just ignore it.
Example #30
0
def load_single_feed(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 30))
    page = int(request.REQUEST.get('page', 0))
    if page:
        offset = limit * page
    feed_id = int(request.REQUEST['feed_id'])
    feed = Feed.objects.get(id=feed_id)
    force_update = request.GET.get('force_update', False)
    
    now = datetime.datetime.now()
    stories = feed.get_stories(offset, limit) 
        
    if force_update:
        feed.update(force_update)
    
    # Get intelligence classifier for user
    classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)
    
    try:
        usersub = UserSubscription.objects.get(user=user, feed=feed)
    except UserSubscription.DoesNotExist:
        # FIXME: Why is this happening for `conesus` when logged into another account?!
        logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed))
        usersub = UserSubscription.objects.create(user=user, feed=feed)
            

    userstories = MUserStory.objects(user_id=user.pk, 
                                     feed_id=feed.pk,
                                     read_date__gte=usersub.mark_read_date)
    userstories = [us.story.id for us in userstories]
    for story in stories:
        classifier_feeds.rewind()
        classifier_authors.rewind()
        classifier_tags.rewind()
        classifier_titles.rewind()
        if story.get('id') in userstories:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
            story['read_status'] = 0
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    # Intelligence
    feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else []
    feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    usersub.feed_opens += 1
    usersub.save()
    
    diff = datetime.datetime.now()-now
    logging.info(" ---> [%s] Loading feed: %s (%s.%s seconds)" % (request.user, feed, 
                                                                  diff.seconds, 
                                                                  diff.microseconds / 1000))
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers)
    return data
Example #31
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = datetime.datetime.utcnow()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 0))+1
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request.user, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story')
    read_stories = [rs.story.id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids])
    feed_counts = dict(feed_counts)
    
    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        id__nin=read_stories,
        story_feed_id__in=feed_ids,
        story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    mstories = [story.value for story in mstories]

    mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta)))

    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1
    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= offset + limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    
    # Find starred stories
    starred_stories = MStarredStory.objects(
        user_id=user.pk,
        story_feed_id__in=found_feed_ids
    ).only('story_guid', 'starred_date')
    starred_stories = dict([(story.story_guid, story.starred_date) 
                            for story in starred_stories])
    
    # Intelligence classifiers for all feeds involved
    def sort_by_feed(classifiers):
        feed_classifiers = defaultdict(list)
        for classifier in classifiers:
            feed_classifiers[classifier.feed_id].append(classifier)
        return feed_classifiers
    classifier_feeds   = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_titles  = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    classifier_tags    = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids))
    
    # Just need to format stories
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date']  = format_story_link_date__long(story_date, now)
        story['read_status'] = 0
        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['intelligence'] = {
            'feed':   apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
            'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story),
            'tags':   apply_classifier_tags(classifier_tags[story['story_feed_id']], story),
            'title':  apply_classifier_titles(classifier_titles[story['story_feed_id']], story),
        }
    
    diff = datetime.datetime.utcnow() - start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.user(request.user, "~FCLoading river stories: page %s - ~SB%s/%s "
                               "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % 
                               (page, len(stories), len(mstories), len(found_feed_ids), 
                               len(feed_ids), len(original_feed_ids), timediff))
    
    return dict(stories=stories)
Example #32
0
def load_social_stories(request, user_id, username=None):
    start          = time.time()
    user           = get_user(request)
    social_user_id = int(user_id)
    social_user    = get_object_or_404(User, pk=social_user_id)
    offset         = int(request.REQUEST.get('offset', 0))
    limit          = int(request.REQUEST.get('limit', 6))
    page           = request.REQUEST.get('page')
    order          = request.REQUEST.get('order', 'newest')
    read_filter    = request.REQUEST.get('read_filter', 'all')
    stories        = []
    
    if page: offset = limit * (int(page) - 1)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    social_profile = MSocialProfile.get_user(social_user.pk)
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, subscription_user_id=social_user_id)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    if socialsub and (read_filter == 'unread' or order == 'oldest'):
        story_ids = socialsub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit)
        story_date_order = "%sshared_date" % ('' if order == 'oldest' else '-')
        if story_ids:
            mstories = MSharedStory.objects(user_id=social_user.pk,
                                            story_db_id__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
    else:
        mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
        stories = Feed.format_stories(mstories)

    if not stories:
        return dict(stories=[])
    
    checkpoint1 = time.time()
    
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True)

    story_feed_ids = list(set(s['story_feed_id'] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]
    date_delta = UNREAD_CUTOFF
    if socialsub and date_delta < socialsub.mark_read_date:
        date_delta = socialsub.mark_read_date
    
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, social_user_id=social_user_id))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))

    checkpoint2 = time.time()
    
    story_ids = [story['id'] for story in stories]
    userstories_db = MUserStory.objects(user_id=user.pk,
                                        feed_id__in=story_feed_ids,
                                        story_id__in=story_ids).only('story_id')
    userstories = set(us.story_id for us in userstories_db)

    starred_stories = MStarredStory.objects(user_id=user.pk, 
                                            story_feed_id__in=story_feed_ids, 
                                            story_guid__in=story_ids).only('story_guid', 'starred_date')
    shared_stories = MSharedStory.objects(user_id=user.pk, 
                                          story_feed_id__in=story_feed_ids, 
                                          story_guid__in=story_ids)\
                                 .only('story_guid', 'shared_date', 'comments')
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
    shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                           for story in shared_stories])
    
    for story in stories:
        story['social_user_id'] = social_user_id
        story_feed_id = story['story_feed_id']
        # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(shared_date, now)
        story['long_parsed_date'] = format_story_link_date__long(shared_date, now)
        
        if not socialsub:
            story['read_status'] = 1
        elif story['id'] in userstories:
            story['read_status'] = 1
        elif story['shared_date'] < date_delta:
            story['read_status'] = 1
        elif not usersubs_map.get(story_feed_id):
            story['read_status'] = 0
        elif not story.get('read_status') and story['story_date'] < usersubs_map[story_feed_id].mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['shared_date'] < date_delta:
            story['read_status'] = 1
        # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date:
        #     story['read_status'] = 0
        else:
            story['read_status'] = 0

        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        if story['id'] in shared_stories:
            story['shared'] = True
            shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'],
                                                 user.profile.timezone)
            story['shared_date'] = format_story_link_date__long(shared_date, now)
            story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments'])

        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'],
                                           social_user_id=social_user_id),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    
    classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids,
                                           classifier_feeds=classifier_feeds,
                                           classifier_authors=classifier_authors,
                                           classifier_titles=classifier_titles,
                                           classifier_tags=classifier_tags)
                                           
    if socialsub:
        socialsub.feed_opens += 1
        socialsub.save()
    
    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    logging.user(request, "~FYLoading ~FMshared stories~FY: ~SB%s%s ~SN(~SB%.4ss/%.4ss~SN)" % (
    social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', diff1, diff2))

    return {
        "stories": stories, 
        "user_profiles": user_profiles, 
        "feeds": unsub_feeds, 
        "classifiers": classifiers,
    }
Example #33
0
def load_single_feed(request, feed_id):
    start        = time.time()
    user         = get_user(request)
    offset       = int(request.REQUEST.get('offset', 0))
    limit        = int(request.REQUEST.get('limit', 12))
    page         = int(request.REQUEST.get('page', 1))
    dupe_feed_id = None
    userstories_db = None
    
    if page: offset = limit * (page-1)
    if not feed_id: raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
            dupe_feed_id = feed_id
        else:
            raise Http404
        
    stories = feed.get_stories(offset, limit) 
        
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id))
    
    checkpoint1 = time.time()
    
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    if usersub:
        userstories_db = MUserStory.objects(user_id=user.pk,
                                            feed_id=feed.pk,
                                            read_date__gte=usersub.mark_read_date)
        starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

        for us in userstories_db:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                userstories.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid
            
    checkpoint2 = time.time()
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        if usersub:
            if story['id'] in userstories:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
                story['read_status'] = 0
            if story['id'] in starred_stories:
                story['starred'] = True
                starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
                story['starred_date'] = format_story_link_date__long(starred_date, now)
        else:
            story['read_status'] = 1
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }

    checkpoint3 = time.time()
    
    # Intelligence
    feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else []
    feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    if usersub:
        usersub.feed_opens += 1
        usersub.save()
    timediff = time.time()-start
    last_update = relative_timesince(feed.last_update)
    logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds)" % (
        feed, ('~SN/p%s' % page) if page > 1 else '', timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    if timediff >= 1:
        diff1 = checkpoint1-start
        diff2 = checkpoint2-start
        diff3 = checkpoint3-start
        logging.user(request.user, "~FYSlow feed load: ~SB%.4s/%.4s(%s)/%.4s" % (
            diff1, diff2, userstories_db and userstories_db.count(), diff3))
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    
    if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id
    if not usersub:
        data.update(feed.canonical())
        
    return data
Example #34
0
def load_single_feed(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 30))
    page = int(request.REQUEST.get('page', 0))
    if page:
        offset = limit * page
    feed_id = int(request.REQUEST.get('feed_id', 0))
    if feed_id == 0:
        raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
        else:
            raise Http404
        
    force_update = request.GET.get('force_update', False)
    
    now = datetime.datetime.utcnow()
    stories = feed.get_stories(offset, limit) 
        
    if force_update:
        feed.update(force_update)
    
    # Get intelligence classifier for user
    classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)
    
    try:
        usersub = UserSubscription.objects.get(user=user, feed=feed)
    except UserSubscription.DoesNotExist:
        # FIXME: Why is this happening for `conesus` when logged into another account?!
        logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed))
        usersub = UserSubscription.objects.create(user=user, feed=feed)
            
    userstories = []
    userstories_db = MUserStory.objects(user_id=user.pk, 
                                        feed_id=feed.pk,
                                        read_date__gte=usersub.mark_read_date)
    for us in userstories_db:
        if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
            userstories.append(us.story.story_guid)
        elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
            userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid
            
    for story in stories:
        classifier_feeds.rewind()
        classifier_authors.rewind()
        classifier_tags.rewind()
        classifier_titles.rewind()
        if story['id'] in userstories:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
            story['read_status'] = 0
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    # Intelligence
    feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else []
    feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    usersub.feed_opens += 1
    usersub.save()
    
    diff = datetime.datetime.utcnow()-now
    timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000)))
    logging.info(" ---> [%s] Loading feed: %s (%s seconds)" % (request.user, feed, timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    last_update = relative_timesince(feed.last_update)
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    return data
Example #35
0
    def process_feed_wrapper(self, feed_queue):
        """ wrapper for ProcessFeed
        """
        if not self.options['single_threaded']:
            # Close the DB so the connection can be re-opened on a per-process basis
            from django.db import connection
            connection.close()
        delta = None
        
        MONGO_DB = settings.MONGO_DB
        db = mongoengine.connection.connect(db=MONGO_DB['NAME'], host=MONGO_DB['HOST'], port=MONGO_DB['PORT'])
        
        current_process = multiprocessing.current_process()
        
        identity = "X"
        if current_process._identity:
            identity = current_process._identity[0]
        for feed in feed_queue:
            ret_entries = {
                ENTRY_NEW: 0,
                ENTRY_UPDATED: 0,
                ENTRY_SAME: 0,
                ENTRY_ERR: 0
            }
            start_time = datetime.datetime.now()
                    
            ### Uncomment to test feed fetcher
            # from random import randint
            # if randint(0,10) < 10:
            #     continue
            
            try:
                ffeed = FetchFeed(feed, self.options)
                ret_feed, fetched_feed = ffeed.fetch()
                
                if ((fetched_feed and ret_feed == FEED_OK) or self.options['force']):
                    pfeed = ProcessFeed(feed, fetched_feed, db, self.options)
                    ret_feed, ret_entries = pfeed.process()

                    if ret_entries.get(ENTRY_NEW) or self.options['force'] or not feed.fetched_once:
                        if not feed.fetched_once:
                            feed.fetched_once = True
                            feed.save()
                        MUserStory.delete_old_stories(feed_id=feed.pk)
                        user_subs = UserSubscription.objects.filter(feed=feed)
                        logging.debug(u'   ---> [%-30s] Computing scores for all feed subscribers: %s subscribers' % (unicode(feed)[:30], user_subs.count()))
                        stories_db = MStory.objects(story_feed_id=feed.pk,
                                                    story_date__gte=UNREAD_CUTOFF)
                        for sub in user_subs:
                            cache.delete('usersub:%s' % sub.user_id)
                            silent = False if self.options['verbose'] >= 2 else True
                            sub.calculate_feed_scores(silent=silent, stories_db=stories_db)
                    cache.delete('feed_stories:%s-%s-%s' % (feed.id, 0, 25))
                    # if ret_entries.get(ENTRY_NEW) or ret_entries.get(ENTRY_UPDATED) or self.options['force']:
                    #     feed.get_stories(force=True)
            except KeyboardInterrupt:
                break
            except urllib2.HTTPError, e:
                feed.save_feed_history(e.code, e.msg, e.fp.read())
                fetched_feed = None
            except Exception, e:
                logging.debug('[%d] ! -------------------------' % (feed.id,))
                tb = traceback.format_exc()
                logging.debug(tb)
                logging.debug('[%d] ! -------------------------' % (feed.id,))
                ret_feed = FEED_ERREXC 
                feed.save_feed_history(500, "Error", tb)
                fetched_feed = None
Example #36
0
def load_river_blurblog(request):
    limit = 10
    start = time.time()
    user = get_user(request)
    social_user_ids = [int(uid) for uid in request.REQUEST.getlist("social_user_ids") if uid]
    original_user_ids = list(social_user_ids)
    page = int(request.REQUEST.get("page", 1))
    order = request.REQUEST.get("order", "newest")
    read_filter = request.REQUEST.get("read_filter", "unread")
    relative_user_id = request.REQUEST.get("relative_user_id", None)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

    if not relative_user_id:
        relative_user_id = get_user(request).pk

    if not social_user_ids:
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [s.subscription_user_id for s in socialsubs]

    offset = (page - 1) * limit
    limit = page * limit - 1

    story_ids, story_dates = MSocialSubscription.feed_stories(
        user.pk, social_user_ids, offset=offset, limit=limit, order=order, read_filter=read_filter
    )
    mstories = MStory.objects(id__in=story_ids)
    story_id_to_dates = dict(zip(story_ids, story_dates))

    def sort_stories_by_id(a, b):
        return int(story_id_to_dates[str(b.id)]) - int(story_id_to_dates[str(a.id)])

    sorted_mstories = sorted(mstories, cmp=sort_stories_by_id)
    stories = Feed.format_stories(sorted_mstories)
    for s, story in enumerate(stories):
        story["story_date"] = datetime.datetime.fromtimestamp(story_dates[s])
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True)

    story_feed_ids = list(set(s["story_feed_id"] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]

    # Find starred stories
    if story_feed_ids:
        story_ids = [story["id"] for story in stories]
        starred_stories = MStarredStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "starred_date"
        )
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
        shared_stories = MSharedStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "shared_date", "comments"
        )
        shared_stories = dict(
            [
                (story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                for story in shared_stories
            ]
        )

        userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only(
            "story_id"
        )
        userstories = set(us.story_id for us in userstories_db)

    else:
        starred_stories = {}
        shared_stories = {}
        userstories = []

    # Intelligence classifiers for all feeds involved
    if story_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    else:
        classifier_feeds = []
        classifier_authors = []
        classifier_titles = []
        classifier_tags = []
    classifiers = sort_classifiers_by_feed(
        user=user,
        feed_ids=story_feed_ids,
        classifier_feeds=classifier_feeds,
        classifier_authors=classifier_authors,
        classifier_titles=classifier_titles,
        classifier_tags=classifier_tags,
    )

    # Just need to format stories
    for story in stories:
        if story["id"] in userstories:
            story["read_status"] = 1
        elif story["story_date"] < UNREAD_CUTOFF:
            story["read_status"] = 1
        else:
            story["read_status"] = 0
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date, now)
        story["long_parsed_date"] = format_story_link_date__long(story_date, now)
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date, now)
        story["intelligence"] = {
            "feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]),
            "author": apply_classifier_authors(classifier_authors, story),
            "tags": apply_classifier_tags(classifier_tags, story),
            "title": apply_classifier_titles(classifier_titles, story),
        }
        if story["id"] in shared_stories:
            story["shared"] = True
            shared_date = localtime_for_timezone(shared_stories[story["id"]]["shared_date"], user.profile.timezone)
            story["shared_date"] = format_story_link_date__long(shared_date, now)
            story["shared_comments"] = strip_tags(shared_stories[story["id"]]["comments"])

    diff = time.time() - start
    timediff = round(float(diff), 2)
    logging.user(
        request,
        "~FYLoading ~FCriver blurblogs stories~FY: ~SBp%s~SN (%s/%s "
        "stories, ~SN%s/%s/%s feeds)"
        % (page, len(stories), len(mstories), len(story_feed_ids), len(social_user_ids), len(original_user_ids)),
    )

    return {
        "stories": stories,
        "user_profiles": user_profiles,
        "feeds": unsub_feeds,
        "classifiers": classifiers,
        "elapsed_time": timediff,
    }
Example #37
0
def load_river_stories(request):
    limit              = 18
    offset             = 0
    start              = time.time()
    user               = get_user(request)
    feed_ids           = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids  = list(feed_ids)
    page               = int(request.REQUEST.get('page', 1))
    read_stories_count = int(request.REQUEST.get('read_stories_count', 0))
    bottom_delta       = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    limit = limit * page - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id')
    read_stories = [rs.story_id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    # max_feed_count     = 0
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        # if feed_counts[feed_id] > max_feed_count:
        #     max_feed_count = feed_counts[feed_id]
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))
    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
                            if feed_id in feed_last_reads])
    feed_counts = dict(feed_counts)

    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        story_guid__nin=read_stories,
        story_feed_id__in=feed_ids,
        # story_date__gte=start - bottom_delta
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    try:
        mstories = [story.value for story in mstories if story and story.value]
    except OperationFailure, e:
        raise e
Example #38
0
def load_single_feed(request, feed_id):
    start = datetime.datetime.utcnow()
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 12))
    page = int(request.REQUEST.get('page', 1))
    if page:
        offset = limit * (page-1)
    dupe_feed_id = None
    if not feed_id:
        raise Http404
        
    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get('feed_address')
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
            dupe_feed_id = feed_id
        else:
            raise Http404
        
    stories = feed.get_stories(offset, limit) 
        
    # Get intelligence classifier for user
    classifier_feeds   = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles  = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags    = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)
    
    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    if usersub:
        userstories_db = MUserStory.objects(user_id=user.pk,
                                            feed_id=feed.pk,
                                            read_date__gte=usersub.mark_read_date)
        starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

        for us in userstories_db:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                userstories.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid
            
    for story in stories:
        [x.rewind() for x in [classifier_feeds, classifier_authors, classifier_tags, classifier_titles]]
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        if usersub:
            if story['id'] in userstories:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date:
                story['read_status'] = 1
            elif not story.get('read_status') and story['story_date'] > usersub.last_read_date:
                story['read_status'] = 0
            if story['id'] in starred_stories:
                story['starred'] = True
                starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
                story['starred_date'] = format_story_link_date__long(starred_date, now)
        else:
            story['read_status'] = 1
        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, feed),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    # Intelligence
    feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else []
    feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else []
    classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, 
                                           classifier_authors, classifier_titles, classifier_tags)
    
    if usersub:
        usersub.feed_opens += 1
        usersub.save()
    
    diff = datetime.datetime.utcnow()-start
    timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000)))
    last_update = relative_timesince(feed.last_update)
    logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%s seconds)" % (
        feed, ('~SN/p%s' % page) if page > 1 else '', timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)
    
    data = dict(stories=stories, 
                feed_tags=feed_tags, 
                feed_authors=feed_authors, 
                classifiers=classifiers,
                last_update=last_update,
                feed_id=feed.pk)
    
    if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id
    if not usersub:
        data.update(feed.canonical())
        
    return data
Example #39
0
def load_river_stories(request):
    limit                = 18
    offset               = int(request.REQUEST.get('offset', 0))
    start                = time.time()
    user                 = get_user(request)
    feed_ids             = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id]
    original_feed_ids    = list(feed_ids)
    page                 = int(request.REQUEST.get('page', 1))
    read_stories_count   = int(request.REQUEST.get('read_stories_count', 0))
    days_to_keep_unreads = datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    if not feed_ids: 
        logging.user(request, "~FCLoading empty river stories: page %s" % (page))
        return dict(stories=[])
    
    # Fetch all stories at and before the page number.
    # Not a single page, because reading stories can move them up in the unread order.
    # `read_stories_count` is an optimization, works best when all 25 stories before have been read.
    offset = (page-1) * limit - read_stories_count
    limit = page * limit - read_stories_count
    
    # Read stories to exclude
    read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id')
    read_stories = [rs.story_id for rs in read_stories]
    
    # Determine mark_as_read dates for all feeds to ignore all stories before this date.
    feed_counts     = {}
    feed_last_reads = {}
    for feed_id in feed_ids:
        try:
            usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user)
        except UserSubscription.DoesNotExist:
            continue
        if not usersub: continue
        feed_counts[feed_id] = (usersub.unread_count_negative * 1 + 
                                usersub.unread_count_neutral * 10 +
                                usersub.unread_count_positive * 20)
        feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple()))

    feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:40]
    feed_ids = [f[0] for f in feed_counts]
    feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids
                            if feed_id in feed_last_reads])
    feed_counts = dict(feed_counts)

    # After excluding read stories, all that's left are stories 
    # past the mark_read_date. Everything returned is guaranteed to be unread.
    mstories = MStory.objects(
        story_guid__nin=read_stories,
        story_feed_id__in=feed_ids,
        # story_date__gte=start - days_to_keep_unreads
    ).map_reduce("""function() {
            var d = feed_last_reads[this[~story_feed_id]];
            if (this[~story_date].getTime()/1000 > d) {
                emit(this[~id], this);
            }
        }""",
        """function(key, values) {
            return values[0];
        }""",
        output='inline',
        scope={
            'feed_last_reads': feed_last_reads
        }
    )
    try:
        mstories = [story.value for story in mstories if story and story.value]
    except OperationFailure, e:
        raise e
Example #40
0
#!/usr/bin/env python 

from utils.munin.base import MuninGraph
from apps.rss_feeds.models import MStory
from apps.reader.models import MUserStory

graph_config = {
    'graph_category' : 'NewsBlur',
    'graph_title' : 'NewsBlur Stories',
    'graph_vlabel' : 'Stories',
    'stories.label': 'stories',
    'tags.label': 'tags',
    'authors.label': 'authors',
    'read_stories.label': 'read_stories',
}

metrics = {
    'stories': MStory.objects().count(),
    'read_stories': MUserStory.objects().count(),
}

if __name__ == '__main__':
    MuninGraph(graph_config, metrics).run()
Example #41
0
def load_single_feed(request):
    user = get_user(request)
    offset = int(request.REQUEST.get("offset", 0))
    limit = int(request.REQUEST.get("limit", 30))
    page = int(request.REQUEST.get("page", 0))
    if page:
        offset = limit * page
    feed_id = int(request.REQUEST.get("feed_id", 0))
    if feed_id == 0:
        raise Http404

    try:
        feed = Feed.objects.get(id=feed_id)
    except Feed.DoesNotExist:
        feed_address = request.REQUEST.get("feed_address")
        dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
        if dupe_feed:
            feed = dupe_feed[0].feed
        else:
            raise Http404

    force_update = request.GET.get("force_update", False)

    now = datetime.datetime.utcnow()
    stories = feed.get_stories(offset, limit)

    if force_update:
        feed.update(force_update)

    # Get intelligence classifier for user
    classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)
    classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)
    classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)
    classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)

    usersub = UserSubscription.objects.get(user=user, feed=feed)
    userstories = []
    userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date)
    starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only("story_guid", "starred_date")
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])

    for us in userstories_db:
        if hasattr(us.story, "story_guid") and isinstance(us.story.story_guid, unicode):
            userstories.append(us.story.story_guid)
        elif hasattr(us.story, "id") and isinstance(us.story.id, unicode):
            userstories.append(us.story.id)  # TODO: Remove me after migration from story.id->guid

    for story in stories:
        classifier_feeds.rewind()
        classifier_authors.rewind()
        classifier_tags.rewind()
        classifier_titles.rewind()
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date)
        story["long_parsed_date"] = format_story_link_date__long(story_date)
        if story["id"] in userstories:
            story["read_status"] = 1
        elif not story.get("read_status") and story["story_date"] < usersub.mark_read_date:
            story["read_status"] = 1
        elif not story.get("read_status") and story["story_date"] > usersub.last_read_date:
            story["read_status"] = 0
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date)
        story["intelligence"] = {
            "feed": apply_classifier_feeds(classifier_feeds, feed),
            "author": apply_classifier_authors(classifier_authors, story),
            "tags": apply_classifier_tags(classifier_tags, story),
            "title": apply_classifier_titles(classifier_titles, story),
        }

    # Intelligence
    feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else []
    feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else []
    classifiers = get_classifiers_for_user(
        user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags
    )

    usersub.feed_opens += 1
    usersub.save()

    diff = datetime.datetime.utcnow() - now
    timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000)))
    last_update = relative_timesince(feed.last_update)
    logging.info(" ---> [%s] ~FYLoading feed: ~SB%s ~SN(%s seconds)" % (request.user, feed, timediff))
    FeedLoadtime.objects.create(feed=feed, loadtime=timediff)

    data = dict(
        stories=stories,
        feed_tags=feed_tags,
        feed_authors=feed_authors,
        classifiers=classifiers,
        last_update=last_update,
        feed_id=feed.pk,
    )
    return data