Example #1
0
 def run(self, feed_ids, **kwargs):
     from apps.rss_feeds.models import Feed
     
     if not isinstance(feed_ids, list):
         feed_ids = [feed_ids]
     
     Feed.setup_feeds_for_premium_subscribers(feed_ids)
Example #2
0
 def run(self, feed_ids, **kwargs):
     from apps.rss_feeds.models import Feed
     
     if not isinstance(feed_ids, list):
         feed_ids = [feed_ids]
     
     Feed.schedule_feed_fetches_immediately(feed_ids)
Example #3
0
    def add_missing_feeds(self):
        all_feeds = self.flat()
        subs = [us.feed_id for us in
                UserSubscription.objects.filter(user=self.user).only('feed')]
        
        missing_subs = set(all_feeds) - set(subs)
        if missing_subs:
            logging.debug(" ---> %s is missing %s subs. Adding %s..." % (
                          self.user, len(missing_subs), missing_subs))
            for feed_id in missing_subs:
                feed = Feed.get_by_id(feed_id)
                if feed:
                    us, _ = UserSubscription.objects.get_or_create(user=self.user, feed=feed, defaults={
                        'needs_unread_recalc': True
                    })
                    if not us.needs_unread_recalc:
                        us.needs_unread_recalc = True
                        us.save()

        missing_folder_feeds = set(subs) - set(all_feeds)
        if missing_folder_feeds:
            user_sub_folders = json.decode(self.folders)
            logging.debug(" ---> %s is missing %s folder feeds. Adding %s..." % (
                          self.user, len(missing_folder_feeds), missing_folder_feeds))
            for feed_id in missing_folder_feeds:
                feed = Feed.get_by_id(feed_id)
                if feed and feed.pk == feed_id:
                    user_sub_folders = add_object_to_folder(feed_id, "", user_sub_folders)
            self.folders = json.encode(user_sub_folders)
            self.save()
Example #4
0
 def process_starred_items(self, stories):
     for story in stories:
         try:
             original_feed = Feed.get_feed_from_url(story["origin"]["streamId"], create=False, fetch=False)
             if not original_feed:
                 original_feed = Feed.get_feed_from_url(story["origin"]["htmlUrl"], create=False, fetch=False)
             content = story.get("content") or story.get("summary")
             story_db = {
                 "user_id": self.user.pk,
                 "starred_date": datetime.datetime.fromtimestamp(story["updated"]),
                 "story_date": datetime.datetime.fromtimestamp(story["published"]),
                 "story_title": story.get("title", story.get("origin", {}).get("title", "[Untitled]")),
                 "story_permalink": story["alternate"][0]["href"],
                 "story_guid": story["id"],
                 "story_content": content.get("content"),
                 "story_author_name": story.get("author"),
                 "story_feed_id": original_feed and original_feed.pk,
                 "story_tags": [tag for tag in story.get("categories", []) if "user/" not in tag],
             }
             logging.user(
                 self.user,
                 "~FCStarring: ~SB%s~SN in ~SB%s" % (story_db["story_title"][:50], original_feed and original_feed),
             )
             MStarredStory.objects.create(**story_db)
         except OperationError:
             logging.user(self.user, "~FCAlready starred: ~SB%s" % (story_db["story_title"][:50]))
         except Exception, e:
             logging.user(self.user, "~FC~BRFailed to star: ~SB%s / %s" % (story, e))
Example #5
0
 def process_starred_items(self, stories):
     for story in stories:
         try:
             original_feed = Feed.get_feed_from_url(story['origin']['streamId'], create=False, fetch=False)
             if not original_feed:
                 original_feed = Feed.get_feed_from_url(story['origin']['htmlUrl'], create=False, fetch=False)
             content = story.get('content') or story.get('summary')
             story_db = {
                 "user_id": self.user.pk,
                 "starred_date": datetime.datetime.fromtimestamp(story['updated']),
                 "story_date": datetime.datetime.fromtimestamp(story['published']),
                 "story_title": story.get('title', story.get('origin', {}).get('title', '[Untitled]')),
                 "story_permalink": story['alternate'][0]['href'],
                 "story_guid": story['id'],
                 "story_content": content.get('content'),
                 "story_author_name": story.get('author'),
                 "story_feed_id": original_feed and original_feed.pk,
                 "story_tags": [tag for tag in story.get('categories', []) if 'user/' not in tag]
             }
             logging.user(self.user, "~FCStarring: ~SB%s~SN in ~SB%s" % (story_db['story_title'][:50], original_feed and original_feed))
             MStarredStory.objects.create(**story_db)
         except OperationError:
             logging.user(self.user, "~FCAlready starred: ~SB%s" % (story_db['story_title'][:50]))
         except Exception, e:
             logging.user(self.user, "~FC~BRFailed to star: ~SB%s / %s" % (story, e))
             
Example #6
0
def feed_autocomplete(request):
    query = request.GET.get('term') or request.GET.get('query')
    version = int(request.GET.get('v', 1))
    format = request.GET.get('format', 'autocomplete')
    
    # user = get_user(request)
    # if True or not user.profile.is_premium:
    #     return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query)
    
    if not query:
        return dict(code=-1, message="Specify a search 'term'.", feeds=[], term=query)
    
    if '.' in query:
        try:
            parts = urlparse(query)
            if not parts.hostname and not query.startswith('http'):
                parts = urlparse('http://%s' % query)
            if parts.hostname:
                query = parts.hostname
        except:
            logging.user(request, "~FGAdd search, could not parse url in ~FR%s" % query)
        
    feed_ids = Feed.autocomplete(query)
    feeds = list(set([Feed.get_by_id(feed_id) for feed_id in feed_ids]))
    feeds = [feed for feed in feeds if feed and not feed.branch_from_feed]
    if format == 'autocomplete':
        feeds = [{
            'id': feed.pk,
            'value': feed.feed_address,
            'label': feed.feed_title,
            'tagline': feed.data and feed.data.feed_tagline,
            'num_subscribers': feed.num_subscribers,
        } for feed in feeds]
    else:
        feeds = [feed.canonical(full=True) for feed in feeds]
    feeds = sorted(feeds, key=lambda f: -1 * f['num_subscribers'])
    
    feed_ids = [f['id'] for f in feeds]
    feed_icons = dict((icon.feed_id, icon) for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids))
    
    for feed in feeds:
        if feed['id'] in feed_icons:
            feed_icon = feed_icons[feed['id']]
            if feed_icon.data:
                feed['favicon_color'] = feed_icon.color
                feed['favicon'] = feed_icon.data

    logging.user(request, "~FGAdd Search: ~SB%s ~SN(%s matches)" % (query, len(feeds),))
    
    if version > 1:
        return {
            'feeds': feeds,
            'term': query,
        }
    else:
        return feeds
Example #7
0
def feed_autocomplete(request):
    query = request.GET.get("term") or request.GET.get("query")
    version = int(request.GET.get("v", 1))
    format = request.GET.get("format", "autocomplete")

    # user = get_user(request)
    # if True or not user.profile.is_premium:
    #     return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query)

    if not query:
        return dict(code=-1, message="Specify a search 'term'.", feeds=[], term=query)

    if "." in query:
        try:
            parts = urlparse(query)
            if not parts.hostname and not query.startswith("http"):
                parts = urlparse("http://%s" % query)
            if parts.hostname:
                query = parts.hostname
        except:
            logging.user(request, "~FGAdd search, could not parse url in ~FR%s" % query)

    feed_ids = Feed.autocomplete(query)
    feeds = [Feed.get_by_id(feed_id) for feed_id in feed_ids]
    if format == "autocomplete":
        feeds = [
            {
                "id": feed.pk,
                "value": feed.feed_address,
                "label": feed.feed_title,
                "tagline": feed.data and feed.data.feed_tagline,
                "num_subscribers": feed.num_subscribers,
            }
            for feed in feeds
            if feed
        ]
    else:
        feeds = [feed.canonical(full=True) for feed in feeds]
    feeds = sorted(feeds, key=lambda f: -1 * f["num_subscribers"])

    feed_ids = [f["id"] for f in feeds]
    feed_icons = dict((icon.feed_id, icon) for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids))

    for feed in feeds:
        if feed["id"] in feed_icons:
            feed_icon = feed_icons[feed["id"]]
            if feed_icon.data:
                feed["favicon_color"] = feed_icon.color
                feed["favicon"] = feed_icon.data

    logging.user(request, "~FGAdd Search: ~SB%s ~SN(%s matches)" % (query, len(feeds)))

    if version > 1:
        return {"feeds": feeds, "term": query}
    else:
        return feeds
Example #8
0
def exception_retry(request):
    user = get_user(request)
    feed_id = get_argument_or_404(request, 'feed_id')
    reset_fetch = json.decode(request.POST['reset_fetch'])
    feed = Feed.get_by_id(feed_id)
    original_feed = feed
    
    if not feed:
        raise Http404
    
    feed.schedule_feed_fetch_immediately()
    changed = False
    if feed.has_page_exception:
        changed = True
        feed.has_page_exception = False
    if feed.has_feed_exception:
        changed = True
        feed.has_feed_exception = False
    if not feed.active:
        changed = True
        feed.active = True
    if changed:
        feed.save(update_fields=['has_page_exception', 'has_feed_exception', 'active'])
    
    original_fetched_once = feed.fetched_once
    if reset_fetch:
        logging.user(request, "~FRRefreshing exception feed: ~SB%s" % (feed))
        feed.fetched_once = False
    else:
        logging.user(request, "~FRForcing refreshing feed: ~SB%s" % (feed))
        
        feed.fetched_once = True
    if feed.fetched_once != original_fetched_once:
        feed.save(update_fields=['fetched_once'])

    feed = feed.update(force=True, compute_scores=False, verbose=True)
    feed = Feed.get_by_id(feed.pk)

    try:
        usersub = UserSubscription.objects.get(user=user, feed=feed)
    except UserSubscription.DoesNotExist:
        usersubs = UserSubscription.objects.filter(user=user, feed=original_feed)
        if usersubs:
            usersub = usersubs[0]
            usersub.switch_feed(feed, original_feed)
        else:
            return {'code': -1}
    usersub.calculate_feed_scores(silent=False)
    
    feeds = {feed.pk: usersub and usersub.canonical(full=True), feed_id: usersub.canonical(full=True)}
    return {'code': 1, 'feeds': feeds}
Example #9
0
    def count_unreads_for_subscribers(self, feed):
        user_subs = UserSubscription.objects.filter(
            feed=feed, active=True, user__profile__last_seen_on__gte=feed.unread_cutoff
        ).order_by("-last_read_date")

        if not user_subs.count():
            return

        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options["compute_scores"]:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
            stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=feed.unread_cutoff)
            stories = Feed.format_stories(stories, feed.pk)
            story_hashes = r.zrangebyscore(
                "zF:%s" % feed.pk, int(feed.unread_cutoff.strftime("%s")), int(time.time() + 60 * 60 * 24)
            )
            missing_story_hashes = set(story_hashes) - set([s["story_hash"] for s in stories])
            if missing_story_hashes:
                missing_stories = MStory.objects(
                    story_feed_id=feed.pk, story_hash__in=missing_story_hashes
                ).read_preference(pymongo.ReadPreference.PRIMARY)
                missing_stories = Feed.format_stories(missing_stories, feed.pk)
                stories = missing_stories + stories
                logging.debug(
                    u"   ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores"
                    % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories))
                )
            cache.set("S:%s" % feed.pk, stories, 60)
            logging.debug(
                u"   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)"
                % (
                    feed.title[:30],
                    len(stories),
                    user_subs.count(),
                    feed.num_subscribers,
                    feed.active_subscribers,
                    feed.premium_subscribers,
                )
            )
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get("mongodb_replication_lag"):
            logging.debug(
                u"   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag"
                % (feed.title[:30], self.options.get("mongodb_replication_lag"))
            )
Example #10
0
    def run(self, feed_pks, **kwargs):
        from apps.rss_feeds.models import Feed
        from apps.statistics.models import MStatistics
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)

        mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0))
        compute_scores = bool(mongodb_replication_lag < 10)
        
        profiler = DBProfilerMiddleware()
        profiler_activated = profiler.process_celery()
        if profiler_activated:
            mongo_middleware = MongoDumpMiddleware()
            mongo_middleware.process_celery(profiler)
            redis_middleware = RedisDumpMiddleware()
            redis_middleware.process_celery(profiler)
        
        options = {
            'quick': float(MStatistics.get('quick_fetch', 0)),
            'updates_off': MStatistics.get('updates_off', False),
            'compute_scores': compute_scores,
            'mongodb_replication_lag': mongodb_replication_lag,
        }
        
        if not isinstance(feed_pks, list):
            feed_pks = [feed_pks]
            
        for feed_pk in feed_pks:
            feed = Feed.get_by_id(feed_pk)
            if not feed or feed.pk != int(feed_pk):
                logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk))
                r.zrem('tasked_feeds', feed_pk)
            if feed:
                feed.update(**options)
                if profiler_activated: profiler.process_celery_finished()
Example #11
0
 def forwards(self, orm):
     from apps.rss_feeds.models import MStory, Feed
     import time
     
     db = pymongo.Connection(settings.MONGODB_HOST)
     batch = 0
     start = 0
     for f in xrange(start, Feed.objects.latest('pk').pk):
         if f < batch*100000: continue
         start = time.time()
         try:
             try:
                 feed = Feed.get_by_id(f)
             except Feed.DoesNotExist:
                 continue
             if not feed: continue
             cp1 = time.time() - start
             # if feed.active_premium_subscribers < 1: continue
             stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False)\
                                     .only('_id', 'story_feed_id', 'story_permalink')
             cp2 = time.time() - start
             for story in stories: 
                 db.newsblur.stories.update({"_id": story.id}, {"$set": {
                     "story_hash": story.story_hash
                 }})
             cp3 = time.time() - start
             print "%3s stories: %s (%s/%s/%s)" % (stories.count(), feed, round(cp1, 2), round(cp2, 2), round(cp3, 2))
         except Exception, e:
             print " ***> (%s) %s" % (f, e)
Example #12
0
def original_text(request):
    story_id = request.REQUEST.get('story_id')
    feed_id = request.REQUEST.get('feed_id')
    story_hash = request.REQUEST.get('story_hash', None)
    force = request.REQUEST.get('force', False)
    debug = request.REQUEST.get('debug', False)

    if story_hash:
        story, _ = MStory.find_story(story_hash=story_hash)
    else:
        story, _ = MStory.find_story(story_id=story_id, story_feed_id=feed_id)

    if not story:
        logging.user(request, "~FYFetching ~FGoriginal~FY story text: ~FRstory not found")
        return {'code': -1, 'message': 'Story not found.', 'original_text': None, 'failed': True}
    
    original_text = story.fetch_original_text(force=force, request=request, debug=debug)

    return {
        'feed_id': story.story_feed_id,
        'story_hash': story.story_hash,
        'story_id': story.story_guid,
        'image_urls': story.image_urls,
        'secure_image_urls': Feed.secure_image_urls(story.image_urls),
        'original_text': original_text,
        'failed': not original_text or len(original_text) < 100,
    }
Example #13
0
    def run(self, feed_pks, **kwargs):
        from apps.rss_feeds.models import Feed
        from apps.statistics.models import MStatistics
        r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL)

        mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0))
        compute_scores = bool(mongodb_replication_lag < 10)
        
        options = {
            'quick': float(MStatistics.get('quick_fetch', 0)),
            'updates_off': MStatistics.get('updates_off', False),
            'compute_scores': compute_scores,
            'mongodb_replication_lag': mongodb_replication_lag,
        }
        
        if not isinstance(feed_pks, list):
            feed_pks = [feed_pks]
            
        for feed_pk in feed_pks:
            feed = Feed.get_by_id(feed_pk)
            if not feed or feed.pk != int(feed_pk):
                logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk))
                r.zrem('tasked_feeds', feed_pk)
            if feed:
                feed.update(**options)
Example #14
0
def load_starred_stories(request):
    user = get_user(request)
    offset = int(request.REQUEST.get('offset', 0))
    limit = int(request.REQUEST.get('limit', 10))
    page = int(request.REQUEST.get('page', 0))
    if page: offset = limit * (page - 1)
        
    mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    for story in stories:
        story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(story_date, now)
        story['long_parsed_date'] = format_story_link_date__long(story_date, now)
        starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone)
        story['starred_date'] = format_story_link_date__long(starred_date, now)
        story['read_status'] = 1
        story['starred'] = True
        story['intelligence'] = {
            'feed': 0,
            'author': 0,
            'tags': 0,
            'title': 0,
        }
    
    logging.user(request, "~FCLoading starred stories: ~SB%s stories" % (len(stories)))
    
    return dict(stories=stories)
Example #15
0
 def run(self, feed_pks, **kwargs):
     from apps.rss_feeds.models import Feed
     from apps.statistics.models import MStatistics
     
     mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0))
     compute_scores = bool(mongodb_replication_lag < 10)
     
     options = {
         'fake': bool(MStatistics.get('fake_fetch')),
         'quick': float(MStatistics.get('quick_fetch', 0)),
         'compute_scores': compute_scores,
         'mongodb_replication_lag': mongodb_replication_lag,
     }
     
     if not isinstance(feed_pks, list):
         feed_pks = [feed_pks]
         
     for feed_pk in feed_pks:
         try:
             feed = Feed.get_by_id(feed_pk)
             if not feed:
                 raise Feed.DoesNotExist
             feed.update(**options)
         except Feed.DoesNotExist:
             logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
Example #16
0
def api_share_new_story(request):
    user = request.user
    body = request.body_json
    fields = body.get('actionFields')
    story_url = urlnorm.normalize(fields['story_url'])
    content = fields.get('story_content', "")
    story_title = fields.get('story_title', "[Untitled]")
    story_author = fields.get('story_author', "")
    comments = fields.get('comments', None)

    feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
    
    content = lxml.html.fromstring(content)
    content.make_links_absolute(story_url)
    content = lxml.html.tostring(content)
    
    shared_story = MSharedStory.objects.filter(user_id=user.pk,
                                               story_feed_id=feed and feed.pk or 0,
                                               story_guid=story_url).limit(1).first()
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": story_title,
            "story_feed_id": feed and feed.pk or 0,
            "story_content": content,
            "story_author": story_author,
            "story_date": datetime.datetime.now(),
            "user_id": user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(subscription_user_id=user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(request, "~BM~FYSharing story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
    else:
        logging.user(request, "~BM~FY~SBAlready~SN shared story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
    
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, 
                                                    subscription_user_id=user.pk)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    
    if socialsub:
        socialsub.mark_story_ids_as_read([shared_story.story_hash], 
                                          shared_story.story_feed_id, 
                                          request=request)
    else:
        RUserStory.mark_read(user.pk, shared_story.story_feed_id, shared_story.story_hash)

    shared_story.publish_update_to_subscribers()
    
    return {"data": [{
        "id": shared_story and shared_story.story_guid,
        "url": shared_story and shared_story.blurblog_permalink()
    }]}
Example #17
0
    def run(self, feed_pks, **kwargs):
        try:
            from apps.rss_feeds.models import Feed
            #from apps.statistics.models import MStatistics
            r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL)
            #mongodb_replication_lag = int(MStatistics.get('mongodb_replication_lag', 0))
            #compute_scores = bool(mongodb_replication_lag < 10)

            options = {
            #    'quick': float(MStatistics.get('quick_fetch', 0)),
            #    'compute_scores': compute_scores,
            #    'mongodb_replication_lag': mongodb_replication_lag,
            }

            if not isinstance(feed_pks, list):
                feed_pks = [feed_pks]

            for feed_pk in feed_pks:
                feed = Feed.get_by_id(feed_pk)
                if not feed or feed.pk != int(feed_pk):
                    logging.info(" ---> ~FRRemoving feed_id %s from tasked_feeds queue, points to %s..." % (feed_pk, feed and feed.pk))
                    r.zrem('tasked_feeds', feed_pk)
                if feed:
                    feed.update(**options)
        except Exception, e:
            logging.error(str(e)+\
                traceback.format_exc()+'\n'+\
                'error from:  UpdateFeeds\n')
            if settings.SEND_ERROR_MAILS:
                mail_admins("Error in UpdateFeeds",str(e)+'\n'+traceback.format_exc())
 def forwards(self, orm):
     for f in xrange(Feed.objects.latest('pk').pk):
         feed = Feed.get_by_id(f)
         if not feed: continue
         stories = MStory.objects.filter(story_feed_id=feed.pk, story_hash__exists=False)
         print "%3s stories: %s " % (stories.count(), feed)
         for story in stories: story.save()
Example #19
0
    def count_unreads_for_subscribers(self, feed):
        UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        user_subs = UserSubscription.objects.filter(feed=feed, 
                                                    active=True,
                                                    user__profile__last_seen_on__gte=UNREAD_CUTOFF)\
                                            .order_by('-last_read_date')
        
        if not user_subs.count():
            return
            
        for sub in user_subs:
            if not sub.needs_unread_recalc:
                sub.needs_unread_recalc = True
                sub.save()

        if self.options['compute_scores']:
            stories = MStory.objects(story_feed_id=feed.pk,
                                     story_date__gte=UNREAD_CUTOFF)\
                            .read_preference(pymongo.ReadPreference.PRIMARY)
            stories = Feed.format_stories(stories, feed.pk)
            logging.debug(u'   ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (
                          feed.title[:30], len(stories), user_subs.count(),
                          feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))        
            self.calculate_feed_scores_with_stories(user_subs, stories)
        elif self.options.get('mongodb_replication_lag'):
            logging.debug(u'   ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (
              feed.title[:30], self.options.get('mongodb_replication_lag')))
Example #20
0
 def __unicode__(self):
     user = User.objects.get(pk=self.user_id)
     if self.feed_id:
         feed = Feed.get_by_id(self.feed_id)
     else:
         feed = User.objects.get(pk=self.social_user_id)
     return "%s - %s/%s: (%s) %s" % (user, self.feed_id, self.social_user_id, self.score, feed)
Example #21
0
def story_public_comments(request):
    format           = request.REQUEST.get('format', 'json')
    relative_user_id = request.REQUEST.get('user_id', None)
    feed_id          = int(request.REQUEST['feed_id'])
    story_id         = request.REQUEST['story_id']
  
    if not relative_user_id:
        relative_user_id = get_user(request).pk
    
    stories = MSharedStory.objects.filter(story_feed_id=feed_id, story_guid=story_id).limit(1)
    stories = Feed.format_stories(stories)
    stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, 
                                                                        check_all=True,
                                                                        public=True)

    if format == 'html':
        stories = MSharedStory.attach_users_to_stories(stories, profiles)
        return render_to_response('social/story_comments.xhtml', {
            'story': stories[0],
        }, context_instance=RequestContext(request))
    else:
        return json.json_response(request, {
            'comments': stories[0]['public_comments'], 
            'user_profiles': profiles,
        })
Example #22
0
    def push_feed_notifications(cls, feed_id, new_stories, force=False):
        feed = Feed.get_by_id(feed_id)
        notifications = MUserFeedNotification.users_for_feed(feed.pk)
        logging.debug("   ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories" % (
                      feed, len(notifications), new_stories))
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        
        latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1)
        mstories = MStory.objects.filter(story_hash__in=latest_story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        total_sent_count = 0
        
        for user_feed_notification in notifications:
            sent_count = 0
            last_notification_date = user_feed_notification.last_notification_date
            try:
                usersub = UserSubscription.objects.get(user=user_feed_notification.user_id,
                                                       feed=user_feed_notification.feed_id)
            except UserSubscription.DoesNotExist:
                continue
            classifiers = user_feed_notification.classifiers(usersub)

            if classifiers == None:
                logging.debug("Has no usersubs")
                continue

            for story in stories:
                if sent_count >= 3:
                    logging.debug("Sent too many, ignoring...")
                    continue                    
                if story['story_date'] <= last_notification_date and not force:
                    logging.debug("Story date older than last notification date: %s <= %s" % (story['story_date'], last_notification_date))
                    continue
                
                if story['story_date'] > user_feed_notification.last_notification_date:
                    user_feed_notification.last_notification_date = story['story_date']
                    user_feed_notification.save()
                
                story['story_content'] = HTMLParser().unescape(story['story_content'])
                
                sent = user_feed_notification.push_story_notification(story, classifiers, usersub)
                if sent: 
                    sent_count += 1
                    total_sent_count += 1
        return total_sent_count, len(notifications)
Example #23
0
 def run(self, feed_pks, **kwargs):
     from apps.rss_feeds.models import Feed
     if not isinstance(feed_pks, list):
         feed_pks = [feed_pks]
     
     options = {}
     for feed_pk in feed_pks:
         feed = Feed.get_by_id(feed_pk)
         feed.update(options=options)
Example #24
0
def fetch_address_from_page(url, existing_feed=None):
    from apps.rss_feeds.models import Feed
    feed_finder_url = feedfinder.feed(url)
    if feed_finder_url:
        if existing_feed:
            if Feed.objects.filter(feed_address=feed_finder_url):
                return None
            existing_feed.feed_address = feed_finder_url
            existing_feed.save()
            feed = existing_feed
        else:
            try:
                feed = Feed.objects.get(feed_address=feed_finder_url)
            except Feed.DoesNotExist:
                feed = Feed(feed_address=feed_finder_url)
                feed.save()
                feed.update()
        return feed
Example #25
0
 def handle(self, *args, **options):
     if options['daemonize']:
         daemonize()
     
     if options['title']:
         feed = Feed.objects.get(feed_title__icontains=options['title'])
     else:
         feed = Feed.get_by_id(options['feed'])
     feed.update(force=options['force'], single_threaded=True, verbose=2)
Example #26
0
def search_feed(request):
    address = request.REQUEST['address']
    offset = int(request.REQUEST.get('offset', 0))
    feed = Feed.get_feed_from_url(address, create=False, aggressive=True, offset=offset)
    
    if feed:
        return feed.canonical()
    else:
        return dict(code=-1, message="No feed found matching that XML or website address.")
Example #27
0
def force_push(request):
    user = get_user(request)
    feed_id = request.REQUEST['feed_id']
    count = int(request.REQUEST.get('count', 1))
    
    logging.user(user, "~BM~FWForce pushing %s stories: ~SB%s" % (count, Feed.get_by_id(feed_id)))
    sent_count, user_count = MUserFeedNotification.push_feed_notifications(feed_id, new_stories=count, force=True)
    
    return {"message": "Pushed %s notifications to %s users" % (sent_count, user_count)}
Example #28
0
    def process_item(self, item, folders):
        feed_title = item.xpath('./string[@name="title"]') and item.xpath('./string[@name="title"]')[0].text
        feed_address = item.xpath('./string[@name="id"]') and item.xpath('./string[@name="id"]')[0].text.replace(
            "feed/", ""
        )
        feed_link = item.xpath('./string[@name="htmlUrl"]') and item.xpath('./string[@name="htmlUrl"]')[0].text
        category = (
            item.xpath('./list[@name="categories"]/object/string[@name="label"]')
            and item.xpath('./list[@name="categories"]/object/string[@name="label"]')[0].text
        )

        if not feed_address:
            feed_address = feed_link

        try:
            feed_link = urlnorm.normalize(feed_link)
            feed_address = urlnorm.normalize(feed_address)

            if len(feed_address) > Feed._meta.get_field("feed_address").max_length:
                return folders

            # See if it exists as a duplicate first
            duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
            if duplicate_feed:
                feed_db = duplicate_feed[0].feed
            else:
                feed_data = dict(feed_title=feed_title)
                feed_data["active_subscribers"] = 1
                feed_data["num_subscribers"] = 1
                feed_db, _ = Feed.find_or_create(
                    feed_address=feed_address, feed_link=feed_link, defaults=dict(**feed_data)
                )

            us, _ = UserSubscription.objects.get_or_create(
                feed=feed_db,
                user=self.user,
                defaults={
                    "needs_unread_recalc": True,
                    "mark_read_date": datetime.datetime.utcnow() - datetime.timedelta(days=1),
                    "active": self.user.profile.is_premium or self.auto_active,
                },
            )
            if not us.needs_unread_recalc:
                us.needs_unread_recalc = True
                us.save()
            if not category:
                category = ""

            if category:
                obj = {category: []}
                folders = add_object_to_folder(obj, "", folders)
            folders = add_object_to_folder(feed_db.pk, category, folders)
            # if feed_db.pk not in folders[category]:
            #     folders[category].append(feed_db.pk)
        except Exception, e:
            logging.info(" *** -> Exception: %s: %s" % (e, item))
Example #29
0
def search_feed(request):
    address = request.REQUEST.get("address")
    offset = int(request.REQUEST.get("offset", 0))
    if not address:
        return dict(code=-1, message="Please provide a URL/address.")

    feed = Feed.get_feed_from_url(address, create=False, aggressive=True, offset=offset)
    if feed:
        return feed.canonical()
    else:
        return dict(code=-1, message="No feed found matching that XML or website address.")
Example #30
0
    def add_subscription(cls, user, feed_address, folder=None, bookmarklet=False, auto_active=True,
                         skip_fetch=False):
        feed = None
        us = None
    
        logging.user(user, "~FRAdding URL: ~SB%s (in %s) %s" % (feed_address, folder, 
                                                                "~FCAUTO-ADD" if not auto_active else ""))
    
        feed = Feed.get_feed_from_url(feed_address)

        if not feed:    
            code = -1
            if bookmarklet:
                message = "This site does not have an RSS feed. Nothing is linked to from this page."
            else:
                message = "This address does not point to an RSS feed or a website with an RSS feed."
        else:
            us, subscription_created = cls.objects.get_or_create(
                feed=feed, 
                user=user,
                defaults={
                    'needs_unread_recalc': True,
                    'active': auto_active,
                }
            )
            code = 1
            message = ""
    
        if us:
            user_sub_folders_object, created = UserSubscriptionFolders.objects.get_or_create(
                user=user,
                defaults={'folders': '[]'}
            )
            if created:
                user_sub_folders = []
            else:
                user_sub_folders = json.decode(user_sub_folders_object.folders)
            user_sub_folders = add_object_to_folder(feed.pk, folder, user_sub_folders)
            user_sub_folders_object.folders = json.encode(user_sub_folders)
            user_sub_folders_object.save()
            
            if auto_active or user.profile.is_premium:
                us.active = True
                us.save()
        
            if not skip_fetch and feed.last_update < datetime.datetime.utcnow() - datetime.timedelta(days=1):
                feed = feed.update()
            
            from apps.social.models import MActivity
            MActivity.new_feed_subscription(user_id=user.pk, feed_id=feed.pk, feed_title=feed.title)
                
            feed.setup_feed_for_premium_subscribers()
        
        return code, message, us
Example #31
0
def api_saved_story(request):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    story_tag = fields['story_tag']
    entries = []

    if story_tag == "all":
        story_tag = ""

    params = dict(user_id=user.pk)
    if story_tag:
        params.update(dict(user_tags__contains=story_tag))
    mstories = MStarredStory.objects(
        **params).order_by('-starred_date')[:limit]
    stories = Feed.format_stories(mstories)

    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before:
            continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle":
            story['story_title'],
            "StoryContent":
            story['story_content'],
            "StoryURL":
            story['story_permalink'],
            "StoryAuthor":
            story['story_authors'],
            "PublishedAt":
            story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "SavedAt":
            story['starred_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "Tags":
            ', '.join(story['user_tags']),
            "Site":
            feed and feed['title'],
            "SiteURL":
            feed and feed['website'],
            "SiteRSS":
            feed and feed['address'],
            "ifttt": {
                "id": story['story_hash'],
                "timestamp": int(story['starred_date'].strftime("%s"))
            },
        })

    if after:
        entries = sorted(entries, key=lambda s: s['ifttt']['timestamp'])

    logging.user(
        request,
        "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories"
        % (story_tag if story_tag else "[All stories]", len(entries)))

    return {"data": entries}
Example #32
0
def share_story(request, token=None):
    code = 0
    story_url = request.POST['story_url']
    comments = request.POST['comments']
    title = request.POST['title']
    content = request.POST.get('content', None)
    rss_url = request.POST.get('rss_url', None)
    feed_id = request.POST.get('feed_id', None) or 0
    feed = None
    message = None
    profile = None

    if request.user.is_authenticated:
        profile = request.user.profile
    else:
        try:
            profile = Profile.objects.get(secret_token=token)
        except Profile.DoesNotExist:
            code = -1
            if token:
                message = "Not authenticated, couldn't find user by token."
            else:
                message = "Not authenticated, no token supplied and not authenticated."

    if not profile:
        return HttpResponse(json.encode({
            'code': code,
            'message': message,
            'story': None,
        }),
                            content_type='text/plain')

    if feed_id:
        feed = Feed.get_by_id(feed_id)
    else:
        if rss_url:
            logging.user(request.user,
                         "~FBFinding feed (share_story): %s" % rss_url)
            feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True)
        if not feed:
            logging.user(request.user,
                         "~FBFinding feed (share_story): %s" % story_url)
            feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
        if feed:
            feed_id = feed.pk

    if content:
        content = lxml.html.fromstring(content)
        content.make_links_absolute(story_url)
        content = lxml.html.tostring(content)
    else:
        importer = TextImporter(story=None,
                                story_url=story_url,
                                request=request,
                                debug=settings.DEBUG)
        document = importer.fetch(skip_save=True, return_document=True)
        content = document['content']
        if not title:
            title = document['title']

    shared_story = MSharedStory.objects.filter(
        user_id=profile.user.pk, story_feed_id=feed_id,
        story_guid=story_url).limit(1).first()
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": title,
            "story_feed_id": feed_id,
            "story_content": content,
            "story_date": datetime.datetime.now(),
            "user_id": profile.user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(
            subscription_user_id=profile.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(
            profile.user,
            "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments))
        message = "Sharing story from site: %s: %s" % (story_url, comments)
    else:
        shared_story.story_content = content
        shared_story.story_title = title
        shared_story.comments = comments
        shared_story.story_permalink = story_url
        shared_story.story_guid = story_url
        shared_story.has_comments = bool(comments)
        shared_story.story_feed_id = feed_id
        shared_story.save()
        logging.user(
            profile.user,
            "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" %
            (story_url, comments))
        message = "Updating shared story from site: %s: %s" % (story_url,
                                                               comments)
    try:
        socialsub = MSocialSubscription.objects.get(
            user_id=profile.user.pk, subscription_user_id=profile.user.pk)
    except MSocialSubscription.DoesNotExist:
        socialsub = None

    if socialsub:
        socialsub.mark_story_ids_as_read([shared_story.story_hash],
                                         shared_story.story_feed_id,
                                         request=request)
    else:
        RUserStory.mark_read(profile.user.pk, shared_story.story_feed_id,
                             shared_story.story_hash)

    shared_story.publish_update_to_subscribers()

    response = HttpResponse(json.encode({
        'code': code,
        'message': message,
        'story': shared_story,
    }),
                            content_type='text/plain')
    response['Access-Control-Allow-Origin'] = '*'
    response['Access-Control-Allow-Methods'] = 'POST'

    return response
Example #33
0
def api_share_new_story(request):
    user = request.user
    body = request.body_json
    fields = body.get('actionFields')
    story_url = urlnorm.normalize(fields['story_url'])
    story_content = fields.get('story_content', "")
    story_title = fields.get('story_title', "")
    story_author = fields.get('story_author', "")
    comments = fields.get('comments', None)

    original_feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)

    if not story_content or not story_title:
        ti = TextImporter(feed=original_feed,
                          story_url=story_url,
                          request=request)
        original_story = ti.fetch(return_document=True)
        if original_story:
            story_url = original_story['url']
            if not story_content:
                story_content = original_story['content']
            if not story_title:
                story_title = original_story['title']

    story_content = lxml.html.fromstring(story_content)
    story_content.make_links_absolute(story_url)
    story_content = lxml.html.tostring(story_content)

    shared_story = MSharedStory.objects.filter(
        user_id=user.pk,
        story_feed_id=original_feed and original_feed.pk or 0,
        story_guid=story_url).limit(1).first()
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": story_title or "[Untitled]",
            "story_feed_id": original_feed and original_feed.pk or 0,
            "story_content": story_content,
            "story_author": story_author,
            "story_date": datetime.datetime.now(),
            "user_id": user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(
            subscription_user_id=user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(
            request, "~BM~FYSharing story from ~SB~FCIFTTT~FY: ~SB%s: %s" %
            (story_url, comments))
    else:
        logging.user(
            request,
            "~BM~FY~SBAlready~SN shared story from ~SB~FCIFTTT~FY: ~SB%s: %s" %
            (story_url, comments))

    try:
        socialsub = MSocialSubscription.objects.get(
            user_id=user.pk, subscription_user_id=user.pk)
    except MSocialSubscription.DoesNotExist:
        socialsub = None

    if socialsub:
        socialsub.mark_story_ids_as_read([shared_story.story_hash],
                                         shared_story.story_feed_id,
                                         request=request)
    else:
        RUserStory.mark_read(user.pk, shared_story.story_feed_id,
                             shared_story.story_hash)

    shared_story.publish_update_to_subscribers()

    return {
        "data": [{
            "id": shared_story and shared_story.story_guid,
            "url": shared_story and shared_story.blurblog_permalink()
        }]
    }
Example #34
0
def check_share_on_site(request, token):
    code = 0
    story_url = request.GET['story_url']
    rss_url = request.GET.get('rss_url')
    callback = request.GET['callback']
    other_stories = None
    same_stories = None
    usersub = None
    message = None
    user = None

    if not story_url:
        code = -1
    else:
        try:
            user_profile = Profile.objects.get(secret_token=token)
            user = user_profile.user
        except Profile.DoesNotExist:
            code = -1

    feed = Feed.get_feed_from_url(rss_url, create=False, fetch=False)
    if not feed:
        feed = Feed.get_feed_from_url(story_url, create=False, fetch=False)
    if not feed:
        parsed_url = urlparse.urlparse(story_url)
        base_url = "%s://%s%s" % (parsed_url.scheme, parsed_url.hostname,
                                  parsed_url.path)
        feed = Feed.get_feed_from_url(base_url, create=False, fetch=False)
    if not feed:
        feed = Feed.get_feed_from_url(base_url + '/',
                                      create=False,
                                      fetch=False)

    if feed and user:
        try:
            usersub = UserSubscription.objects.filter(user=user, feed=feed)
        except UserSubscription.DoesNotExist:
            usersub = None
    feed_id = feed and feed.pk
    your_story, same_stories, other_stories = MSharedStory.get_shared_stories_from_site(
        feed_id, user_id=user_profile.user.pk, story_url=story_url)
    previous_stories = MSharedStory.objects.filter(
        user_id=user_profile.user.pk).order_by('-shared_date').limit(3)
    previous_stories = [{
        "user_id": story.user_id,
        "story_title": story.story_title,
        "comments": story.comments,
        "shared_date": story.shared_date,
        "relative_date": relative_timesince(story.shared_date),
        "blurblog_permalink": story.blurblog_permalink(),
    } for story in previous_stories]

    user_ids = set([user_profile.user.pk])
    for story in same_stories:
        user_ids.add(story['user_id'])
    for story in other_stories:
        user_ids.add(story['user_id'])

    users = {}
    profiles = MSocialProfile.profiles(user_ids)
    for profile in profiles:
        users[profile.user_id] = {
            "username": profile.username,
            "photo_url": profile.photo_url,
        }

    logging.user(user_profile.user,
                 "~BM~FCChecking share from site: ~SB%s" % (story_url),
                 request=request)

    response = HttpResponse(callback + '(' +
                            json.encode({
                                'code': code,
                                'message': message,
                                'feed': feed,
                                'subscribed': bool(usersub),
                                'your_story': your_story,
                                'same_stories': same_stories,
                                'other_stories': other_stories,
                                'previous_stories': previous_stories,
                                'users': users,
                            }) + ')',
                            mimetype='text/plain')
    response['Access-Control-Allow-Origin'] = '*'
    response['Access-Control-Allow-Methods'] = 'GET'

    return response
Example #35
0
 def refresh_feed(self, feed_id):
     """Update feed, since it may have changed"""
     return Feed.get_by_id(feed_id)
Example #36
0
    def calculate_feed_scores(self, silent=False, stories_db=None):
        now = datetime.datetime.utcnow()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        if self.user.profile.last_seen_on < UNREAD_CUTOFF:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        if not self.feed.fetched_once:
            if not silent:
                logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
            self.needs_unread_recalc = False
            self.save()
            return

        if not silent:
            logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed))
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
            
        read_stories = MUserStory.objects(user_id=self.user.pk,
                                          feed_id=self.feed.pk,
                                          read_date__gte=self.mark_read_date)
        # if not silent:
        #     logging.info(' ---> [%s]    Read stories: %s' % (self.user, datetime.datetime.now() - now))
        read_stories_ids = []
        for us in read_stories:
            if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode):
                read_stories_ids.append(us.story.story_guid)
            elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode):
                read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid
        stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk,
                                                  story_date__gte=date_delta)
        # if not silent:
        #     logging.info(' ---> [%s]    MStory: %s' % (self.user, datetime.datetime.now() - now))
        oldest_unread_story_date = now
        unread_stories_db = []
        for story in stories_db:
            if story.story_date < date_delta:
                continue
            if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids:
                unread_stories_db.append(story)
                if story.story_date < oldest_unread_story_date:
                    oldest_unread_story_date = story.story_date
        stories = Feed.format_stories(unread_stories_db, self.feed.pk)
        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))
            
        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        # if (self.unread_count_positive == 0 and 
        #     self.unread_count_neutral == 0):
        #     self.mark_feed_read()
        
        cache.delete('usersub:%s' % self.user.id)
        
        return
Example #37
0
def api_unread_story(request, trigger_slug=None):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    feed_or_folder = fields['feed_or_folder']
    entries = []

    if isinstance(feed_or_folder, int) or feed_or_folder.isdigit():
        feed_id = int(feed_or_folder)
        try:
            usersub = UserSubscription.objects.get(user=user, feed_id=feed_id)
        except UserSubscription.DoesNotExist:
            return dict(data=[])
        found_feed_ids = [feed_id]
        found_trained_feed_ids = [feed_id] if usersub.is_trained else []
        stories = usersub.get_stories(order="newest", read_filter="unread", 
                                      offset=0, limit=limit,
                                      default_cutoff_date=user.profile.unread_cutoff)
    else:
        folder_title = feed_or_folder
        if folder_title == "Top Level":
            folder_title = " "
        usf = UserSubscriptionFolders.objects.get(user=user)
        flat_folders = usf.flatten_folders()
        feed_ids = None
        if folder_title != "all":
            feed_ids = flat_folders.get(folder_title)
        usersubs = UserSubscription.subs_for_feeds(user.pk, feed_ids=feed_ids,
                                                   read_filter="unread")
        feed_ids = [sub.feed_id for sub in usersubs]
        params = {
            "user_id": user.pk, 
            "feed_ids": feed_ids,
            "offset": 0,
            "limit": limit,
            "order": "newest",
            "read_filter": "unread",
            "usersubs": usersubs,
            "cutoff_date": user.profile.unread_cutoff,
        }
        story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(**params)
        mstories = MStory.objects(story_hash__in=story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
        trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained]
        found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids))
    
    if found_trained_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk,
                                                        feed_id__in=found_trained_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, 
                                                            feed_id__in=found_trained_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, 
                                                          feed_id__in=found_trained_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, 
                                                      feed_id__in=found_trained_feed_ids))
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])

    for story in stories:
        if before and int(story['story_date'].strftime("%s")) > before: continue
        if after and int(story['story_date'].strftime("%s")) < after: continue
        score = 0
        if found_trained_feed_ids and story['story_feed_id'] in found_trained_feed_ids:
            score = compute_story_score(story, classifier_titles=classifier_titles, 
                                        classifier_authors=classifier_authors, 
                                        classifier_tags=classifier_tags,
                                        classifier_feeds=classifier_feeds)
            if score < 0: continue
            if trigger_slug == "new-unread-focus-story" and score < 1: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "meta": {
                "id": story['story_hash'],
                "timestamp": int(story['story_date'].strftime("%s"))
            },
        })
    
    if after:
        entries = sorted(entries, key=lambda s: s['meta']['timestamp'])
        
    logging.user(request, "~FYChecking unread%s stories with ~SB~FCIFTTT~SN~FY: ~SB%s~SN - ~SB%s~SN stories" % (" ~SBfocus~SN" if trigger_slug == "new-unread-focus-story" else "", feed_or_folder, len(entries)))
    
    return {"data": entries[:limit]}
Example #38
0
    def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False, hashes_only=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        ignore_user_stories = False
        
        stories_key         = 'F:%s' % (self.feed_id)
        read_stories_key    = 'RS:%s:%s' % (self.user_id, self.feed_id)
        unread_stories_key  = 'U:%s:%s' % (self.user_id, self.feed_id)

        unread_ranked_stories_key  = 'z%sU:%s:%s' % ('h' if hashes_only else '', 
                                                     self.user_id, self.feed_id)
        if offset and not withscores and r.exists(unread_ranked_stories_key):
            pass
        else:
            r.delete(unread_ranked_stories_key)
            if not r.exists(stories_key):
                print " ---> No stories on feed: %s" % self
                return []
            elif read_filter != 'unread' or not r.exists(read_stories_key):
                ignore_user_stories = True
                unread_stories_key = stories_key
            else:
                r.sdiffstore(unread_stories_key, stories_key, read_stories_key)
            sorted_stories_key          = 'zF:%s' % (self.feed_id)
            r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key])
        
        current_time    = int(time.time() + 60*60*24)
        if order == 'oldest':
            byscorefunc = r.zrangebyscore
            if read_filter == 'unread':
                min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                now = datetime.datetime.now()
                two_weeks_ago = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
                min_score = int(time.mktime(two_weeks_ago.timetuple()))-1000
            max_score = current_time
        else:
            byscorefunc = r.zrevrangebyscore
            min_score = current_time
            if read_filter == 'unread':
                # +1 for the intersection b/w zF and F, which carries an implicit score of 1.
                max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1
            else:
                max_score = 0

        if settings.DEBUG:
            debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True)
            print " ---> Unread all stories (%s - %s) %s stories: %s" % (
                min_score,
                max_score,
                len(debug_stories),
                debug_stories)
        story_ids = byscorefunc(unread_ranked_stories_key, min_score, 
                                  max_score, start=offset, num=500,
                                  withscores=withscores)[:limit]
        r.expire(unread_ranked_stories_key, 24*60*60)
        if not ignore_user_stories:
            r.delete(unread_stories_key)
        
        if withscores or hashes_only:
            return story_ids
        elif story_ids:
            story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-')
            mstories = MStory.objects(story_hash__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
            return stories
        else:
            return []
 def __unicode__(self):
     feed = Feed.get_by_id(self.feed_id)
     return "%s: %s" % (self.category_title, feed)
Example #40
0
 def refresh_feed(self):
     self.feed = Feed.get_by_id(self.feed_id)
     if self.feed_id != self.feed.pk:
         logging.debug(" ***> Feed has changed: from %s to %s" %
                       (self.feed_id, self.feed.pk))
         self.feed_id = self.feed.pk
Example #41
0
 def __init__(self, feed_id, options):
     self.feed = Feed.get_by_id(feed_id)
     self.options = options
     self.fpf = None
Example #42
0
                logging.debug('   ---> [%-30s] ~FRFeed is now gone...' %
                              (unicode(feed_id)[:30]))
                continue
            except TimeoutError, e:
                logging.debug('   ---> [%-30s] ~FRFeed fetch timed out...' %
                              (feed.title[:30]))
                feed.save_feed_history(505, 'Timeout', e)
                feed_code = 505
                fetched_feed = None
            except Exception, e:
                logging.debug('[%d] ! -------------------------' % (feed_id, ))
                tb = traceback.format_exc()
                logging.error(tb)
                logging.debug('[%d] ! -------------------------' % (feed_id, ))
                ret_feed = FEED_ERREXC
                feed = Feed.get_by_id(getattr(feed, 'pk', feed_id))
                if not feed: continue
                feed.save_feed_history(500, "Error", tb)
                feed_code = 500
                fetched_feed = None
                # mail_feed_error_to_admin(feed, e, local_vars=locals())
                if (not settings.DEBUG and hasattr(settings, 'RAVEN_CLIENT')
                        and settings.RAVEN_CLIENT):
                    settings.RAVEN_CLIENT.captureException()

            if not feed_code:
                if ret_feed == FEED_OK:
                    feed_code = 200
                elif ret_feed == FEED_SAME:
                    feed_code = 304
                elif ret_feed == FEED_ERRHTTP:
Example #43
0
def api_share_new_story(request):
    user = request.user
    body = request.body_json
    fields = body.get('actionFields')
    story_url = urlnorm.normalize(fields['story_url'])
    story_content = fields.get('story_content', "")
    story_title = fields.get('story_title', "")
    story_author = fields.get('story_author', "")
    comments = fields.get('comments', None)
        
    logging.user(request.user, "~FBFinding feed (api_share_new_story): %s" % story_url)
    original_feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
    story_hash = MStory.guid_hash_unsaved(story_url)
    feed_id = (original_feed and original_feed.pk or 0)
    if not user.profile.is_premium and MSharedStory.feed_quota(user.pk, story_hash, feed_id=feed_id):
        return {"errors": [{
            'message': 'Only premium users can share multiple stories per day from the same site.'
        }]}
    
    quota = 3
    if MSharedStory.feed_quota(user.pk, story_hash, quota=quota):
        logging.user(request, "~BM~FRNOT ~FYSharing story from ~SB~FCIFTTT~FY, over quota: ~SB%s: %s" % (story_url, comments))        
        return {"errors": [{
            'message': 'You can only share %s stories per day.' % quota
        }]}
        
    if not story_content or not story_title:
        ti = TextImporter(feed=original_feed, story_url=story_url, request=request)
        original_story = ti.fetch(return_document=True)
        if original_story:
            story_url = original_story['url']
            if not story_content:
                story_content = original_story['content']
            if not story_title:
                story_title = original_story['title']
    
    if story_content:
        story_content = lxml.html.fromstring(story_content)
        story_content.make_links_absolute(story_url)
        story_content = lxml.html.tostring(story_content)
    
    shared_story = MSharedStory.objects.filter(user_id=user.pk,
                                               story_feed_id=original_feed and original_feed.pk or 0,
                                               story_guid=story_url).limit(1).first()
    if not shared_story:
        title_max = MSharedStory._fields['story_title'].max_length
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": story_title and story_title[:title_max] or "[Untitled]",
            "story_feed_id": original_feed and original_feed.pk or 0,
            "story_content": story_content,
            "story_author_name": story_author,
            "story_date": datetime.datetime.now(),
            "user_id": user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        try:
            shared_story = MSharedStory.objects.create(**story_db)
            socialsubs = MSocialSubscription.objects.filter(subscription_user_id=user.pk)
            for socialsub in socialsubs:
                socialsub.needs_unread_recalc = True
                socialsub.save()
            logging.user(request, "~BM~FYSharing story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
        except NotUniqueError:
            logging.user(request, "~BM~FY~SBAlready~SN shared story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
    else:
        logging.user(request, "~BM~FY~SBAlready~SN shared story from ~SB~FCIFTTT~FY: ~SB%s: %s" % (story_url, comments))
    
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, 
                                                    subscription_user_id=user.pk)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    
    if socialsub and shared_story:
        socialsub.mark_story_ids_as_read([shared_story.story_hash], 
                                          shared_story.story_feed_id, 
                                          request=request)
    elif shared_story:
        RUserStory.mark_read(user.pk, shared_story.story_feed_id, shared_story.story_hash)
    
    if shared_story:
        shared_story.publish_update_to_subscribers()
    
    return {"data": [{
        "id": shared_story and shared_story.story_guid,
        "url": shared_story and shared_story.blurblog_permalink()
    }]}
Example #44
0
def api_shared_story(request):
    user = request.user
    body = request.body_json
    after = body.get('after', None)
    before = body.get('before', None)
    limit = body.get('limit', 50)
    fields = body.get('triggerFields')
    blurblog_user = fields['blurblog_user']
    entries = []
    
    if isinstance(blurblog_user, int) or blurblog_user.isdigit():
        social_user_ids = [int(blurblog_user)]
    elif blurblog_user == "all":
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [ss.subscription_user_id for ss in socialsubs]

    mstories = MSharedStory.objects(
        user_id__in=social_user_ids
    ).order_by('-shared_date')[:limit]        
    stories = Feed.format_stories(mstories)
    
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
    share_user_ids = list(set([story['user_id'] for story in stories]))
    users = dict([(u.pk, u.username) 
                 for u in User.objects.filter(pk__in=share_user_ids).only('pk', 'username')])
    feeds = dict([(f.pk, {
        "title": f.feed_title,
        "website": f.feed_link,
        "address": f.feed_address,
    }) for f in Feed.objects.filter(pk__in=found_feed_ids)])
    
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, 
                                                      social_user_id__in=social_user_ids))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk,
                                                        social_user_id__in=social_user_ids))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk,
                                                       social_user_id__in=social_user_ids))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, 
                                                     social_user_id__in=social_user_ids))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk,
                                                                         feed_id__in=found_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk,
                                                                             feed_id__in=found_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk,
                                                                           feed_id__in=found_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk,
                                                                       feed_id__in=found_feed_ids))
        
    for story in stories:
        if before and int(story['shared_date'].strftime("%s")) > before: continue
        if after and int(story['shared_date'].strftime("%s")) < after: continue
        score = compute_story_score(story, classifier_titles=classifier_titles, 
                                    classifier_authors=classifier_authors, 
                                    classifier_tags=classifier_tags,
                                    classifier_feeds=classifier_feeds)
        if score < 0: continue
        feed = feeds.get(story['story_feed_id'], None)
        entries.append({
            "StoryTitle": story['story_title'],
            "StoryContent": story['story_content'],
            "StoryURL": story['story_permalink'],
            "StoryAuthor": story['story_authors'],
            "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "StoryScore": score,
            "Comments": story['comments'],
            "Username": users.get(story['user_id']),
            "SharedAt": story['shared_date'].strftime("%Y-%m-%dT%H:%M:%SZ"),
            "Site": feed and feed['title'],
            "SiteURL": feed and feed['website'],
            "SiteRSS": feed and feed['address'],
            "meta": {
                "id": story['story_hash'],
                "timestamp": int(story['shared_date'].strftime("%s"))
            },
        })

    if after:
        entries = sorted(entries, key=lambda s: s['meta']['timestamp'])
        
    logging.user(request, "~FMChecking shared stories from ~SB~FCIFTTT~SN~FM: ~SB~FM%s~FM~SN - ~SB%s~SN stories" % (blurblog_user, len(entries)))

    return {"data": entries}
Example #45
0
def feed_autocomplete(request):
    query = request.GET.get('term') or request.GET.get('query')
    version = int(request.GET.get('v', 1))
    format = request.GET.get('format', 'autocomplete')

    # user = get_user(request)
    # if True or not user.profile.is_premium:
    #     return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query)

    if not query:
        return dict(code=-1,
                    message="Specify a search 'term'.",
                    feeds=[],
                    term=query)

    if '.' in query:
        try:
            parts = urlparse(query)
            if not parts.hostname and not query.startswith('http'):
                parts = urlparse('http://%s' % query)
            if parts.hostname:
                query = [parts.hostname]
                query.extend([p for p in parts.path.split('/') if p])
                query = ' '.join(query)
        except:
            logging.user(request,
                         "~FGAdd search, could not parse url in ~FR%s" % query)

    query_params = query.split(' ')
    tries_left = 5
    while len(query_params) and tries_left:
        tries_left -= 1
        feed_ids = Feed.autocomplete(' '.join(query_params))
        if feed_ids:
            break
        else:
            query_params = query_params[:-1]

    feeds = list(set([Feed.get_by_id(feed_id) for feed_id in feed_ids]))
    feeds = [feed for feed in feeds if feed and not feed.branch_from_feed]
    feeds = [
        feed for feed in feeds
        if all([x not in feed.feed_address for x in IGNORE_AUTOCOMPLETE])
    ]

    if format == 'autocomplete':
        feeds = [{
            'id': feed.pk,
            'value': feed.feed_address,
            'label': feed.feed_title,
            'tagline': feed.data and feed.data.feed_tagline,
            'num_subscribers': feed.num_subscribers,
        } for feed in feeds]
    else:
        feeds = [feed.canonical(full=True) for feed in feeds]
    feeds = sorted(feeds, key=lambda f: -1 * f['num_subscribers'])

    feed_ids = [f['id'] for f in feeds]
    feed_icons = dict(
        (icon.feed_id, icon)
        for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids))

    for feed in feeds:
        if feed['id'] in feed_icons:
            feed_icon = feed_icons[feed['id']]
            if feed_icon.data:
                feed['favicon_color'] = feed_icon.color
                feed['favicon'] = feed_icon.data

    logging.user(
        request, "~FGAdd Search: ~SB%s ~SN(%s matches)" % (
            query,
            len(feeds),
        ))

    if version > 1:
        return {
            'feeds': feeds,
            'term': query,
        }
    else:
        return feeds
Example #46
0
def feed_autocomplete(request):
    query = request.GET.get('term') or request.GET.get('query')
    version = int(request.GET.get('v', 1))
    format = request.GET.get('format', 'autocomplete')

    # user = get_user(request)
    # if True or not user.profile.is_premium:
    #     return dict(code=-1, message="Overloaded, no autocomplete results.", feeds=[], term=query)

    if not query:
        return dict(code=-1,
                    message="Specify a search 'term'.",
                    feeds=[],
                    term=query)

    if '.' in query:
        try:
            parts = urlparse(query)
            if not parts.hostname and not query.startswith('http'):
                parts = urlparse('http://%s' % query)
            if parts.hostname:
                query = parts.hostname
        except:
            logging.user(request,
                         "~FGAdd search, could not parse url in ~FR%s" % query)

    feed_ids = Feed.autocomplete(query)
    feeds = [Feed.get_by_id(feed_id) for feed_id in feed_ids]
    if format == 'autocomplete':
        feeds = [{
            'id': feed.pk,
            'value': feed.feed_address,
            'label': feed.feed_title,
            'tagline': feed.data and feed.data.feed_tagline,
            'num_subscribers': feed.num_subscribers,
        } for feed in feeds if feed]
    else:
        feeds = [feed.canonical(full=True) for feed in feeds]
    feeds = sorted(feeds, key=lambda f: -1 * f['num_subscribers'])

    feed_ids = [f['id'] for f in feeds]
    feed_icons = dict(
        (icon.feed_id, icon)
        for icon in MFeedIcon.objects.filter(feed_id__in=feed_ids))

    for feed in feeds:
        if feed['id'] in feed_icons:
            feed_icon = feed_icons[feed['id']]
            if feed_icon.data:
                feed['favicon_color'] = feed_icon.color
                feed['favicon'] = feed_icon.data

    logging.user(
        request, "~FGAdd Search: ~SB%s ~SN(%s matches)" % (
            query,
            len(feeds),
        ))

    if version > 1:
        return {
            'feeds': feeds,
            'term': query,
        }
    else:
        return feeds
Example #47
0
def exception_change_feed_link(request):
    feed_id = request.POST['feed_id']
    feed = get_object_or_404(Feed, pk=feed_id)
    original_feed = feed
    feed_link = request.POST['feed_link']
    timezone = request.user.profile.timezone
    code = -1

    if False and (feed.has_page_exception or feed.has_feed_exception):
        # Fix broken feed
        logging.user(
            request, "~FRFixing feed exception by link: ~SB%s~SN to ~SB%s" %
            (feed.feed_link, feed_link))
        found_feed_urls = feedfinder.find_feeds(feed_link)
        if len(found_feed_urls):
            code = 1
            feed.has_page_exception = False
            feed.active = True
            feed.fetched_once = False
            feed.feed_link = feed_link
            feed.feed_address = found_feed_urls[0]
            duplicate_feed = feed.schedule_feed_fetch_immediately()
            if duplicate_feed:
                new_feed = Feed.objects.get(pk=duplicate_feed.pk)
                feed = new_feed
                new_feed.schedule_feed_fetch_immediately()
                new_feed.has_page_exception = False
                new_feed.active = True
                new_feed.save()
    else:
        # Branch good feed
        logging.user(
            request, "~FRBranching feed by link: ~SB%s~SN to ~SB%s" %
            (feed.feed_link, feed_link))
        try:
            feed = Feed.objects.get(
                hash_address_and_link=Feed.generate_hash_address_and_link(
                    feed.feed_address, feed_link))
        except Feed.DoesNotExist:
            feed = Feed.objects.create(feed_address=feed.feed_address,
                                       feed_link=feed_link)
        code = 1
        if feed.pk != original_feed.pk:
            try:
                feed.branch_from_feed = original_feed.branch_from_feed or original_feed
            except Feed.DoesNotExist:
                feed.branch_from_feed = original_feed
            feed.feed_link_locked = True
            feed.save()

    feed = feed.update()
    feed = Feed.get_by_id(feed.pk)

    try:
        usersub = UserSubscription.objects.get(user=request.user, feed=feed)
    except UserSubscription.DoesNotExist:
        usersubs = UserSubscription.objects.filter(user=request.user,
                                                   feed=original_feed)
        if usersubs:
            usersub = usersubs[0]
            usersub.switch_feed(feed, original_feed)
        else:
            fetch_history = MFetchHistory.feed(feed_id, timezone=timezone)
            return {
                'code': -1,
                'feed_fetch_history': fetch_history['feed_fetch_history'],
                'page_fetch_history': fetch_history['page_fetch_history'],
                'push_history': fetch_history['push_history'],
            }

    usersub.calculate_feed_scores(silent=False)

    feed.update_all_statistics()
    classifiers = get_classifiers_for_user(usersub.user,
                                           feed_id=usersub.feed_id)

    if feed and feed.has_feed_exception:
        code = -1

    feeds = {
        original_feed.pk: usersub.canonical(full=True,
                                            classifiers=classifiers),
    }
    fetch_history = MFetchHistory.feed(feed_id, timezone=timezone)
    return {
        'code': code,
        'feeds': feeds,
        'new_feed_id': usersub.feed_id,
        'feed_fetch_history': fetch_history['feed_fetch_history'],
        'page_fetch_history': fetch_history['page_fetch_history'],
        'push_history': fetch_history['push_history'],
    }
Example #48
0
    # Prune the river to only include a set number of stories per feed
    # story_feed_counts = defaultdict(int)
    # mstories_pruned = []
    # for story in mstories:
    #     print story['story_title'], story_feed_counts[story['story_feed_id']]
    #     if story_feed_counts[story['story_feed_id']] >= 3: continue
    #     mstories_pruned.append(story)
    #     story_feed_counts[story['story_feed_id']] += 1

    stories = []
    for i, story in enumerate(mstories):
        if i < offset: continue
        if i >= limit: break
        stories.append(bunch(story))
    stories = Feed.format_stories(stories)
    found_feed_ids = list(set([story['story_feed_id'] for story in stories]))

    # Find starred stories
    try:
        starred_stories = MStarredStory.objects(
            user_id=user.pk,
            story_feed_id__in=found_feed_ids).only('story_guid',
                                                   'starred_date')
        starred_stories = dict([(story.story_guid, story.starred_date)
                                for story in starred_stories])
    except OperationFailure:
        logging.info(" ***> Starred stories failure")
        starred_stories = {}

    # Intelligence classifiers for all feeds involved
Example #49
0
 def __unicode__(self):
     feed = Feed.get_by_id(self.feed_id)
     return "%s - %s (%s)" % (feed, self.folder, self.count)
Example #50
0
    def push_feed_notifications(cls, feed_id, new_stories, force=False):
        feed = Feed.get_by_id(feed_id)
        notifications = MUserFeedNotification.users_for_feed(feed.pk)
        logging.debug(
            "   ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories"
            % (feed, len(notifications), new_stories)
        )
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)

        latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1)
        mstories = MStory.objects.filter(story_hash__in=latest_story_hashes).order_by('-story_date')
        stories = Feed.format_stories(mstories)
        total_sent_count = 0

        for user_feed_notification in notifications:
            sent_count = 0
            try:
                user = User.objects.get(pk=user_feed_notification.user_id)
            except User.DoesNotExist:
                continue
            months_ago = datetime.datetime.now() - datetime.timedelta(days=90)
            if user.profile.last_seen_on < months_ago:
                logging.user(user, f"~FBSkipping notifications, last seen: ~SB{user.profile.last_seen_on}")
                continue
            last_notification_date = user_feed_notification.last_notification_date
            try:
                usersub = UserSubscription.objects.get(
                    user=user_feed_notification.user_id, feed=user_feed_notification.feed_id
                )
            except UserSubscription.DoesNotExist:
                continue
            classifiers = user_feed_notification.classifiers(usersub)

            if classifiers is None:
                if settings.DEBUG:
                    logging.debug("Has no usersubs")
                continue

            for story in stories:
                if sent_count >= 3:
                    if settings.DEBUG:
                        logging.debug("Sent too many, ignoring...")
                    continue
                if story['story_date'] <= last_notification_date and not force:
                    if settings.DEBUG:
                        logging.debug(
                            "Story date older than last notification date: %s <= %s"
                            % (story['story_date'], last_notification_date)
                        )
                    continue

                if story['story_date'] > user_feed_notification.last_notification_date:
                    user_feed_notification.last_notification_date = story['story_date']
                    user_feed_notification.save()

                story['story_content'] = html.unescape(story['story_content'])

                sent = user_feed_notification.push_story_notification(story, classifiers, usersub)
                if sent:
                    sent_count += 1
                    total_sent_count += 1
        return total_sent_count, len(notifications)
Example #51
0
    def calculate_feed_scores(self, silent=False, stories=None, force=False):
        # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S")
        now = datetime.datetime.now()
        UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
        
        if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force:
            # if not silent:
            #     logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed))
            return
        
        # if not self.feed.fetched_once:
        #     if not silent:
        #         logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed))
        #     self.needs_unread_recalc = False
        #     self.save()
        #     return
            
        feed_scores = dict(negative=0, neutral=0, positive=0)
        
        # Two weeks in age. If mark_read_date is older, mark old stories as read.
        date_delta = UNREAD_CUTOFF
        if date_delta < self.mark_read_date:
            date_delta = self.mark_read_date
        else:
            self.mark_read_date = date_delta
        
        if not stories:
            stories = cache.get('S:%s' % self.feed_id)
            
        unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True)
        
        if not stories:
            stories_db = MStory.objects(story_hash__in=unread_story_hashes)
            stories = Feed.format_stories(stories_db, self.feed_id)
        
        oldest_unread_story_date = now
        unread_stories = []
        for story in stories:
            if story['story_date'] < date_delta:
                continue
            if story['story_hash'] in unread_story_hashes:
                unread_stories.append(story)
                if story['story_date'] < oldest_unread_story_date:
                    oldest_unread_story_date = story['story_date']

        # if not silent:
        #     logging.info(' ---> [%s]    Format stories: %s' % (self.user, datetime.datetime.now() - now))
        
        classifier_feeds   = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0))
        classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_titles  = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id))
        classifier_tags    = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id))

        # if not silent:
        #     logging.info(' ---> [%s]    Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count()))
            
        scores = {
            'feed': apply_classifier_feeds(classifier_feeds, self.feed),
        }
        
        for story in unread_stories:
            scores.update({
                'author' : apply_classifier_authors(classifier_authors, story),
                'tags'   : apply_classifier_tags(classifier_tags, story),
                'title'  : apply_classifier_titles(classifier_titles, story),
            })
            
            max_score = max(scores['author'], scores['tags'], scores['title'])
            min_score = min(scores['author'], scores['tags'], scores['title'])
            if max_score > 0:
                feed_scores['positive'] += 1
            elif min_score < 0:
                feed_scores['negative'] += 1
            else:
                if scores['feed'] > 0:
                    feed_scores['positive'] += 1
                elif scores['feed'] < 0:
                    feed_scores['negative'] += 1
                else:
                    feed_scores['neutral'] += 1
                
        
        # if not silent:
        #     logging.info(' ---> [%s]    End classifiers: %s' % (self.user, datetime.datetime.now() - now))

        self.unread_count_positive = feed_scores['positive']
        self.unread_count_neutral = feed_scores['neutral']
        self.unread_count_negative = feed_scores['negative']
        self.unread_count_updated = datetime.datetime.now()
        self.oldest_unread_story_date = oldest_unread_story_date
        self.needs_unread_recalc = False
        
        self.save()

        if (self.unread_count_positive == 0 and 
            self.unread_count_neutral == 0):
            self.mark_feed_read()
        
        if not silent:
            logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive']))
            
        return self
Example #52
0
    def subscribe(self,
                  topic,
                  feed,
                  hub=None,
                  callback=None,
                  lease_seconds=None,
                  force_retry=False):
        if hub is None:
            hub = self._get_hub(topic)

        if hub is None:
            raise TypeError(
                'hub cannot be None if the feed does not provide it')

        if lease_seconds is None:
            lease_seconds = getattr(settings, 'PUBSUBHUBBUB_LEASE_SECONDS',
                                    DEFAULT_LEASE_SECONDS)

        feed = Feed.get_by_id(feed.pk)
        subscription, created = self.get_or_create(feed=feed)
        signals.pre_subscribe.send(sender=subscription, created=created)
        subscription.set_expiration(lease_seconds)
        if len(topic) < 200:
            subscription.topic = topic
        else:
            subscription.topic = feed.feed_link[:200]
        subscription.hub = hub
        subscription.save()

        if callback is None:
            # try:
            #     callback_path = reverse('push-callback', args=(subscription.pk,))
            # except Resolver404:
            #     raise TypeError('callback cannot be None if there is not a reverable URL')
            # else:
            #     # callback = 'http://' + Site.objects.get_current() + callback_path
            callback = "http://push.pytune.com/push/%s" % subscription.pk  # + callback_path

        try:
            response = self._send_request(
                hub, {
                    'hub.mode': 'subscribe',
                    'hub.callback': callback,
                    'hub.topic': topic,
                    'hub.verify': ['async', 'sync'],
                    'hub.verify_token':
                    subscription.generate_token('subscribe'),
                    'hub.lease_seconds': lease_seconds,
                })
        except requests.ConnectionError:
            response = None

        if response and response.status_code == 204:
            subscription.verified = True
        elif response and response.status_code == 202:  # async verification
            subscription.verified = False
        else:
            error = response and response.text or ""
            if not force_retry and 'You may only subscribe to' in error:
                extracted_topic = re.search("You may only subscribe to (.*?) ",
                                            error)
                if extracted_topic:
                    subscription = self.subscribe(extracted_topic.group(1),
                                                  feed=feed,
                                                  hub=hub,
                                                  force_retry=True)
            else:
                logging.debug(
                    u'   ---> [%-30s] ~FR~BKFeed failed to subscribe to push: %s (code: %s)'
                    % (unicode(subscription.feed)[:30], error[:100], response
                       and response.status_code))

        subscription.save()
        feed.setup_push()
        if subscription.verified:
            signals.verified.send(sender=subscription)
        return subscription
Example #53
0
    def run(self, **kwargs):
        from apps.rss_feeds.models import Feed        
        settings.LOG_TO_STREAM = True
        now = datetime.datetime.utcnow()
        start = time.time()
        r = redis.Redis(connection_pool=settings.REDIS_FEED_POOL)
        tasked_feeds_size = r.zcard('tasked_feeds')
        
        hour_ago = now - datetime.timedelta(hours=1)
        r.zremrangebyscore('fetched_feeds_last_hour', 0, int(hour_ago.strftime('%s')))
        
        now_timestamp = int(now.strftime("%s"))
        queued_feeds = r.zrangebyscore('scheduled_updates', 0, now_timestamp)
        r.zremrangebyscore('scheduled_updates', 0, now_timestamp)
        r.sadd('queued_feeds', *queued_feeds)
        logging.debug(" ---> ~SN~FBQueuing ~SB%s~SN stale feeds (~SB%s~SN/~FG%s~FB~SN/%s tasked/queued/scheduled)" % (
                        len(queued_feeds),
                        r.zcard('tasked_feeds'),
                        r.scard('queued_feeds'),
                        r.zcard('scheduled_updates')))
        
        # Regular feeds
        if tasked_feeds_size < 10000:
            feeds = r.srandmember('queued_feeds', 10000)
            Feed.task_feeds(feeds, verbose=True)
            active_count = len(feeds)
        else:
            logging.debug(" ---> ~SN~FBToo many tasked feeds. ~SB%s~SN tasked." % tasked_feeds_size)
            active_count = 0
        cp1 = time.time()
        
        # Force refresh feeds
        refresh_feeds = Feed.objects.filter(
            active=True,
            fetched_once=False,
            active_subscribers__gte=1
        ).order_by('?')[:100]
        refresh_count = refresh_feeds.count()
        cp2 = time.time()
        
        # Mistakenly inactive feeds
        hours_ago = (now - datetime.timedelta(minutes=10)).strftime('%s')
        old_tasked_feeds = r.zrangebyscore('tasked_feeds', 0, hours_ago)
        inactive_count = len(old_tasked_feeds)
        if inactive_count:
            r.zremrangebyscore('tasked_feeds', 0, hours_ago)
            # r.sadd('queued_feeds', *old_tasked_feeds)
            for feed_id in old_tasked_feeds:
                r.zincrby('error_feeds', feed_id, 1)
                feed = Feed.get_by_id(feed_id)
                feed.set_next_scheduled_update()
            logging.debug(" ---> ~SN~FBRe-queuing ~SB%s~SN dropped feeds (~SB%s/%s~SN queued/tasked)" % (
                            inactive_count,
                            r.scard('queued_feeds'),
                            r.zcard('tasked_feeds')))
        cp3 = time.time()
        
        old = now - datetime.timedelta(days=1)
        old_feeds = Feed.objects.filter(
            next_scheduled_update__lte=old, 
            active_subscribers__gte=1
        ).order_by('?')[:500]
        old_count = old_feeds.count()
        cp4 = time.time()
        
        logging.debug(" ---> ~FBTasking ~SB~FC%s~SN~FB/~FC%s~FB (~FC%s~FB/~FC%s~SN~FB) feeds... (%.4s/%.4s/%.4s/%.4s)" % (
            active_count,
            refresh_count,
            inactive_count,
            old_count,
            cp1 - start,
            cp2 - cp1,
            cp3 - cp2,
            cp4 - cp3
        ))
        
        Feed.task_feeds(refresh_feeds, verbose=False)
        Feed.task_feeds(old_feeds, verbose=False)

        logging.debug(" ---> ~SN~FBTasking took ~SB%s~SN seconds (~SB%s~SN/~FG%s~FB~SN/%s tasked/queued/scheduled)" % (
                        int((time.time() - start)),
                        r.zcard('tasked_feeds'),
                        r.scard('queued_feeds'),
                        r.zcard('scheduled_updates')))
Example #54
0
def exception_change_feed_link(request):
    feed_id = request.POST['feed_id']
    feed = get_object_or_404(Feed, pk=feed_id)
    original_feed = feed
    feed_link = request.POST['feed_link']
    code = -1

    if feed.has_page_exception or feed.has_feed_exception:
        # Fix broken feed
        logging.user(
            request, "~FRFixing feed exception by link: ~SB%s~SN to ~SB%s" %
            (feed.feed_link, feed_link))
        feed_address = feedfinder.feed(feed_link)
        if feed_address:
            code = 1
            feed.has_page_exception = False
            feed.active = True
            feed.fetched_once = False
            feed.feed_link = feed_link
            feed.feed_address = feed_address
            feed.next_scheduled_update = datetime.datetime.utcnow()
            duplicate_feed = feed.save()
            if duplicate_feed:
                new_feed = Feed.objects.get(pk=duplicate_feed.pk)
                feed = new_feed
                new_feed.next_scheduled_update = datetime.datetime.utcnow()
                new_feed.has_page_exception = False
                new_feed.active = True
                new_feed.save()
    else:
        # Branch good feed
        logging.user(
            request, "~FRBranching feed by link: ~SB%s~SN to ~SB%s" %
            (feed.feed_link, feed_link))
        feed, _ = Feed.objects.get_or_create(feed_address=feed.feed_address,
                                             feed_link=feed_link)
        if feed.pk != original_feed.pk:
            try:
                feed.branch_from_feed = original_feed.branch_from_feed or original_feed
            except Feed.DoesNotExist:
                feed.branch_from_feed = original_feed
            feed.feed_link_locked = True
            feed.save()
            code = 1

    feed = feed.update()
    feed = Feed.get_by_id(feed.pk)

    try:
        usersub = UserSubscription.objects.get(user=request.user, feed=feed)
    except UserSubscription.DoesNotExist:
        usersubs = UserSubscription.objects.filter(user=request.user,
                                                   feed=original_feed)
        if usersubs:
            usersub = usersubs[0]
            usersub.switch_feed(feed, original_feed)
        else:
            return {'code': -1}

    usersub.calculate_feed_scores(silent=False)

    feed.update_all_statistics()
    classifiers = get_classifiers_for_user(usersub.user,
                                           feed_id=usersub.feed_id)

    if feed and feed.has_feed_exception:
        code = -1

    feeds = {
        original_feed.pk: usersub.canonical(full=True,
                                            classifiers=classifiers),
    }
    return {
        'code': code,
        'feeds': feeds,
        'new_feed_id': usersub.feed_id,
    }
Example #55
0
def newsletter_story(request, story_hash):
    story = MStory.objects.get(story_hash=story_hash)
    story = Feed.format_story(story)
    return HttpResponse(story['story_content'])
    def run(self, **kwargs):
        from apps.rss_feeds.models import Feed
        settings.LOG_TO_STREAM = True
        now = datetime.datetime.utcnow()
        start = time.time()
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)

        logging.debug(" ---> ~SN~FBQueuing broken feeds...")

        # Force refresh feeds
        refresh_feeds = Feed.objects.filter(
            active=True, fetched_once=False,
            active_subscribers__gte=1).order_by('?')[:100]
        refresh_count = refresh_feeds.count()
        cp1 = time.time()

        logging.debug(" ---> ~SN~FBFound %s active, unfetched broken feeds" %
                      refresh_count)

        # Mistakenly inactive feeds
        hours_ago = (now - datetime.timedelta(minutes=10)).strftime('%s')
        old_tasked_feeds = r.zrangebyscore('tasked_feeds', 0, hours_ago)
        inactive_count = len(old_tasked_feeds)
        if inactive_count:
            r.zremrangebyscore('tasked_feeds', 0, hours_ago)
            # r.sadd('queued_feeds', *old_tasked_feeds)
            for feed_id in old_tasked_feeds:
                r.zincrby('error_feeds', feed_id, 1)
                feed = Feed.get_by_id(feed_id)
                feed.set_next_scheduled_update()
        logging.debug(
            " ---> ~SN~FBRe-queuing ~SB%s~SN dropped/broken feeds (~SB%s/%s~SN queued/tasked)"
            %
            (inactive_count, r.scard('queued_feeds'), r.zcard('tasked_feeds')))
        cp2 = time.time()

        old = now - datetime.timedelta(days=1)
        old_feeds = Feed.objects.filter(
            next_scheduled_update__lte=old,
            active_subscribers__gte=1).order_by('?')[:500]
        old_count = old_feeds.count()
        cp3 = time.time()

        logging.debug(
            " ---> ~SN~FBTasking ~SBrefresh:~FC%s~FB inactive:~FC%s~FB old:~FC%s~SN~FB broken feeds... (%.4s/%.4s/%.4s)"
            % (
                refresh_count,
                inactive_count,
                old_count,
                cp1 - start,
                cp2 - cp1,
                cp3 - cp2,
            ))

        Feed.task_feeds(refresh_feeds, verbose=False)
        Feed.task_feeds(old_feeds, verbose=False)

        logging.debug(
            " ---> ~SN~FBTasking broken feeds took ~SB%s~SN seconds (~SB%s~SN/~FG%s~FB~SN/%s tasked/queued/scheduled)"
            % (int((time.time() - start)), r.zcard('tasked_feeds'),
               r.scard('queued_feeds'), r.zcard('scheduled_updates')))
Example #57
0
    def process_outline(self, outline, folders, in_folder=''):
        for item in outline:
            if (not hasattr(item, 'xmlUrl')
                    and (hasattr(item, 'text') or hasattr(item, 'title'))):
                folder = item
                title = getattr(item, 'text', None) or getattr(
                    item, 'title', None)
                # if hasattr(folder, 'text'):
                #     logging.info(' ---> [%s] ~FRNew Folder: %s' % (self.user, folder.text))
                obj = {title: []}
                folders = add_object_to_folder(obj, in_folder, folders)
                folders = self.process_outline(folder, folders, title)
            elif hasattr(item, 'xmlUrl'):
                feed = item
                if not hasattr(feed, 'htmlUrl'):
                    setattr(feed, 'htmlUrl', None)
                # If feed title matches what's in the DB, don't override it on subscription.
                feed_title = getattr(feed, 'title', None) or getattr(
                    feed, 'text', None)
                if not feed_title:
                    setattr(feed, 'title', feed.htmlUrl or feed.xmlUrl)
                    user_feed_title = None
                else:
                    setattr(feed, 'title', feed_title)
                    user_feed_title = feed.title

                feed_address = urlnorm.normalize(feed.xmlUrl)
                feed_link = urlnorm.normalize(feed.htmlUrl)
                if len(feed_address) > Feed._meta.get_field(
                        'feed_address').max_length:
                    continue
                if feed_link and len(feed_link) > Feed._meta.get_field(
                        'feed_link').max_length:
                    continue
                # logging.info(' ---> \t~FR%s - %s - %s' % (feed.title, feed_link, feed_address,))
                feed_data = dict(feed_address=feed_address,
                                 feed_link=feed_link,
                                 feed_title=feed.title)
                # feeds.append(feed_data)

                # See if it exists as a duplicate first
                duplicate_feed = DuplicateFeed.objects.filter(
                    duplicate_address=feed_address)
                if duplicate_feed:
                    feed_db = duplicate_feed[0].feed
                else:
                    feed_data['active_subscribers'] = 1
                    feed_data['num_subscribers'] = 1
                    feed_db, _ = Feed.find_or_create(
                        feed_address=feed_address,
                        feed_link=feed_link,
                        defaults=dict(**feed_data))

                if user_feed_title == feed_db.feed_title:
                    user_feed_title = None

                try:
                    us = UserSubscription.objects.get(feed=feed_db,
                                                      user=self.user)
                except UserSubscription.DoesNotExist:
                    us = UserSubscription(
                        feed=feed_db,
                        user=self.user,
                        needs_unread_recalc=True,
                        mark_read_date=datetime.datetime.utcnow() -
                        datetime.timedelta(days=1),
                        active=self.user.profile.is_premium,
                        user_title=user_feed_title)
                    us.save()

                if self.user.profile.is_premium and not us.active:
                    us.active = True
                    us.save()
                if not us.needs_unread_recalc:
                    us.needs_unread_recalc = True
                    us.save()

                folders = add_object_to_folder(feed_db.pk, in_folder, folders)

        return folders
Example #58
0
def save_story(request, token=None):
    code = 0
    story_url = request.POST['story_url']
    user_tags = request.POST.getlist('user_tags') or request.POST.getlist(
        'user_tags[]') or []
    add_user_tag = request.POST.get('add_user_tag', None)
    title = request.POST['title']
    content = request.POST.get('content', None)
    rss_url = request.POST.get('rss_url', None)
    user_notes = request.POST.get('user_notes', None)
    feed_id = request.POST.get('feed_id', None) or 0
    feed = None
    message = None
    profile = None

    if request.user.is_authenticated():
        profile = request.user.profile
    else:
        try:
            profile = Profile.objects.get(secret_token=token)
        except Profile.DoesNotExist:
            code = -1
            if token:
                message = "Not authenticated, couldn't find user by token."
            else:
                message = "Not authenticated, no token supplied and not authenticated."

    if not profile:
        return HttpResponse(json.encode({
            'code': code,
            'message': message,
            'story': None,
        }),
                            content_type='text/plain')

    if feed_id:
        feed = Feed.get_by_id(feed_id)
    else:
        if rss_url:
            logging.user(request.user,
                         "~FBFinding feed (save_story): %s" % rss_url)
            feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True)
        if not feed:
            logging.user(request.user,
                         "~FBFinding feed (save_story): %s" % story_url)
            feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
        if feed:
            feed_id = feed.pk

    if content:
        content = lxml.html.fromstring(content)
        content.make_links_absolute(story_url)
        content = lxml.html.tostring(content)
    else:
        importer = TextImporter(story=None,
                                story_url=story_url,
                                request=request,
                                debug=settings.DEBUG)
        document = importer.fetch(skip_save=True, return_document=True)
        content = document['content']
        if not title:
            title = document['title']

    if add_user_tag:
        user_tags = user_tags + [tag for tag in add_user_tag.split(',')]

    starred_story = MStarredStory.objects.filter(
        user_id=profile.user.pk, story_feed_id=feed_id,
        story_guid=story_url).limit(1).first()
    if not starred_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": title,
            "story_feed_id": feed_id,
            "story_content": content,
            "story_date": datetime.datetime.now(),
            "starred_date": datetime.datetime.now(),
            "user_id": profile.user.pk,
            "user_tags": user_tags,
            "user_notes": user_notes,
        }
        starred_story = MStarredStory.objects.create(**story_db)
        logging.user(
            profile.user, "~BM~FCStarring story from site: ~SB%s: %s" %
            (story_url, user_tags))
        message = "Saving story from site: %s: %s" % (story_url, user_tags)
    else:
        starred_story.story_content = content
        starred_story.story_title = title
        starred_story.user_tags = user_tags
        starred_story.story_permalink = story_url
        starred_story.story_guid = story_url
        starred_story.story_feed_id = feed_id
        starred_story.user_notes = user_notes
        starred_story.save()
        logging.user(
            profile.user,
            "~BM~FC~SBUpdating~SN starred story from site: ~SB%s: %s" %
            (story_url, user_tags))
        message = "Updating saved story from site: %s: %s" % (story_url,
                                                              user_tags)

    MStarredStoryCounts.schedule_count_tags_for_user(request.user.pk)

    response = HttpResponse(json.encode({
        'code': code,
        'message': message,
        'story': starred_story,
    }),
                            content_type='text/plain')
    response['Access-Control-Allow-Origin'] = '*'
    response['Access-Control-Allow-Methods'] = 'POST'

    return response
Example #59
0
def share_story(request, token):
    code = 0
    story_url = request.POST['story_url']
    comments = request.POST['comments']
    title = request.POST['title']
    content = request.POST['content']
    rss_url = request.POST.get('rss_url')
    feed_id = request.POST.get('feed_id') or 0
    feed = None
    message = None

    if not story_url:
        code = -1
    else:
        try:
            profile = Profile.objects.get(secret_token=token)
        except Profile.DoesNotExist:
            code = -1

    if feed_id:
        feed = Feed.get_by_id(feed_id)
    else:
        if rss_url:
            feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True)
        if not feed:
            feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
        if feed:
            feed_id = feed.pk

    content = lxml.html.fromstring(content)
    content.make_links_absolute(story_url)
    content = lxml.html.tostring(content)

    shared_story = MSharedStory.objects.filter(
        user_id=profile.user.pk, story_feed_id=feed_id,
        story_guid=story_url).limit(1).first()
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": title,
            "story_feed_id": feed_id,
            "story_content": content,
            "story_date": datetime.datetime.now(),
            "user_id": profile.user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(
            subscription_user_id=profile.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(
            profile.user,
            "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments))
    else:
        shared_story.story_content = content
        shared_story.story_title = title
        shared_story.comments = comments
        shared_story.story_permalink = story_url
        shared_story.story_guid = story_url
        shared_story.has_comments = bool(comments)
        shared_story.story_feed_id = feed_id
        shared_story.save()
        logging.user(
            profile.user,
            "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" %
            (story_url, comments))

    socialsub = MSocialSubscription.objects.get(
        user_id=profile.user.pk, subscription_user_id=profile.user.pk)
    socialsub.mark_story_ids_as_read([shared_story.story_hash],
                                     shared_story.story_feed_id,
                                     request=request)

    shared_story.publish_update_to_subscribers()

    response = HttpResponse(json.encode({
        'code': code,
        'message': message,
        'story': None,
    }),
                            mimetype='text/plain')
    response['Access-Control-Allow-Origin'] = '*'
    response['Access-Control-Allow-Methods'] = 'POST'

    return response
Example #60
0
class ProcessFeed:
    def __init__(self, feed_id, fpf, options):
        self.feed_id = feed_id
        self.options = options
        self.fpf = fpf
    
    def refresh_feed(self):
        self.feed = Feed.get_by_id(self.feed_id)
        if self.feed_id != self.feed.pk:
            logging.debug(" ***> Feed has changed: from %s to %s" % (self.feed_id, self.feed.pk))
            self.feed_id = self.feed.pk
    
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()
        
        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (
                                  self.feed.title[:30],
                                  self.fpf.bozo_exception,
                                  len(self.fpf.entries)))
                    
            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values
            
            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it (after 10 tries)
            if self.fpf.status == 301:
                if self.fpf.href.endswith('feedburner.com/atom.xml'):
                    return FEED_ERRHTTP, ret_values
                redirects, non_redirects = self.feed.count_redirects_in_history('feed')
                self.feed.save_feed_history(self.fpf.status, "HTTP Redirect (%d to go)" % (10-len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    address = self.fpf.href
                    if self.options['force'] and address:
                        address = qurl(address, remove=['_'])
                    self.feed.feed_address = address
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug("   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status, "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug("   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values
        
        if not self.fpf:
            logging.debug("   ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.title[:30]))
            self.feed.save_feed_history(551, "Broken feed")
            return FEED_ERRHTTP, ret_values
            
        if self.fpf and not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
                logging.debug("   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug("   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
                
        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        original_etag = self.feed.etag
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''
        if self.feed.etag != original_etag:
            self.feed.save(update_fields=['etag'])
            
        original_last_modified = self.feed.last_modified
        if hasattr(self.fpf, 'modified') and self.fpf.modified:
            try:
                self.feed.last_modified = datetime.datetime.strptime(self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z')
            except Exception, e:
                self.feed.last_modified = None
                logging.debug("Broken mtime %s: %s" % (self.feed.last_modified, e))
                pass
        if self.feed.last_modified != original_last_modified:
            self.feed.save(update_fields=['last_modified'])
        
        self.fpf.entries = self.fpf.entries[:100]
        
        original_title = self.feed.feed_title
        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        if self.feed.feed_title != original_title:
            self.feed.save(update_fields=['feed_title'])
        
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            original_tagline = self.feed.data.feed_tagline
            self.feed.data.feed_tagline = smart_unicode(tagline)
            if self.feed.data.feed_tagline != original_tagline:
                self.feed.data.save(update_fields=['feed_tagline'])

        if not self.feed.feed_link_locked:
            new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
            if self.options['force'] and new_feed_link:
                new_feed_link = qurl(new_feed_link, remove=['_'])
            if new_feed_link != self.feed.feed_link:
                logging.debug("   ---> [%-30s] ~SB~FRFeed's page is different: %s to %s" % (self.feed.title[:30], self.feed.feed_link, new_feed_link))               
                redirects, non_redirects = self.feed.count_redirects_in_history('page')
                self.feed.save_page_history(301, "HTTP Redirect (%s to go)" % (10-len(redirects)))
                if len(redirects) >= 10 or len(non_redirects) == 0:
                    self.feed.feed_link = new_feed_link
                    self.feed.save(update_fields=['feed_link'])
        
        # Determine if stories aren't valid and replace broken guids
        guids_seen = set()
        permalinks_seen = set()
        for entry in self.fpf.entries:
            guids_seen.add(entry.get('guid'))
            permalinks_seen.add(Feed.get_permalink(entry))
        guid_difference = len(guids_seen) != len(self.fpf.entries)
        single_guid = len(guids_seen) == 1
        replace_guids = single_guid and guid_difference
        permalink_difference = len(permalinks_seen) != len(self.fpf.entries)
        single_permalink = len(permalinks_seen) == 1
        replace_permalinks = single_permalink and permalink_difference
        
        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_hashes = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry, self.fpf.encoding)
            if story.get('published') < start_date:
                start_date = story.get('published')
            if replace_guids:
                if replace_permalinks:
                    new_story_guid = unicode(story.get('published'))
                    if self.options['verbose']:
                        logging.debug(u'   ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s' % (
                                      self.feed.title[:30],
                                      story.get('guid'), new_story_guid))
                    story['guid'] = new_story_guid
                else:
                    new_story_guid = Feed.get_permalink(story)
                    if self.options['verbose']:
                        logging.debug(u'   ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s' % (
                                      self.feed.title[:30],
                                      story.get('guid'), new_story_guid))
                    story['guid'] = new_story_guid
            story['story_hash'] = MStory.feed_guid_hash_unsaved(self.feed.pk, story.get('guid'))
            stories.append(story)
            story_hashes.append(story.get('story_hash'))

        existing_stories = dict((s.story_hash, s) for s in MStory.objects(
            story_hash__in=story_hashes,
            # story_date__gte=start_date,
            # story_feed_id=self.feed.pk
        ))

        ret_values = self.feed.add_update_stories(stories, existing_stories,
                                                  verbose=self.options['verbose'],
                                                  updates_off=self.options['updates_off'])

        if (hasattr(self.fpf, 'feed') and 
            hasattr(self.fpf.feed, 'links') and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now()
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG and
                self.feed.active_subscribers > 0 and
                (push_expired or not self.feed.is_push or self.options.get('force'))):
                logging.debug(u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' % (
                              self.feed.title[:30],
                              "~SKRe-~SN" if push_expired else "", hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url)
                except TimeoutError:
                    logging.debug(u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s' % (
                                  self.feed.title[:30], hub_url))                    
            elif (self.feed.is_push and 
                  (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' % (
                              self.feed.title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        logging.debug(u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
                      self.feed.title[:30], 
                      '~FG~SB' if ret_values['new'] else '', ret_values['new'],
                      '~FY~SB' if ret_values['updated'] else '', ret_values['updated'],
                      '~SB' if ret_values['same'] else '', ret_values['same'],
                      '~FR~SB' if ret_values['error'] else '', ret_values['error'],
                      len(self.fpf.entries)))
        self.feed.update_all_statistics(has_new_stories=bool(ret_values['new']), force=self.options['force'])
        if ret_values['new']:
            self.feed.trim_feed()
            self.feed.expire_redis()
        self.feed.save_feed_history(200, "OK")

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
                          self.feed.title[:30], time.time() - start))
        
        return FEED_OK, ret_values