Beispiel #1
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()
        
        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (
                                  self.feed.title[:30],
                                  self.fpf.bozo_exception,
                                  len(self.fpf.entries)))
                    
            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values
            
            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it (after 20 tries)
            if self.fpf.status == 301:
                if self.fpf.href.endswith('feedburner.com/atom.xml'):
                    return FEED_ERRHTTP, ret_values
                redirects, non_redirects = self.feed.count_redirects_in_history('feed')
                self.feed.save_feed_history(self.fpf.status, "HTTP Redirect (%d to go)" % (20-len(redirects)))
                if len(redirects) >= 20 or len(non_redirects) == 0:
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug("   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status, "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug("   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
                logging.debug("   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug("   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
                
        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        original_etag = self.feed.etag
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''
        if self.feed.etag != original_etag:
            self.feed.save(update_fields=['etag'])
            
        original_last_modified = self.feed.last_modified
        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            self.feed.last_modified = None
            pass
        if self.feed.last_modified != original_last_modified:
            self.feed.save(update_fields=['last_modified'])
        
        self.fpf.entries = self.fpf.entries[:100]
        
        original_title = self.feed.feed_title
        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        if self.feed.feed_title != original_title:
            self.feed.save(update_fields=['feed_title'])
        
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            original_tagline = self.feed.data.feed_tagline
            self.feed.data.feed_tagline = smart_unicode(tagline)
            if self.feed.data.feed_tagline != original_tagline:
                self.feed.data.save(update_fields=['feed_tagline'])

        if not self.feed.feed_link_locked:
            new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
            if new_feed_link != self.feed.feed_link:
                logging.debug("   ---> [%-30s] ~SB~FRFeed's page is different: %s to %s" % (self.feed.title[:30], self.feed.feed_link, new_feed_link))               
                redirects, non_redirects = self.feed.count_redirects_in_history('page')
                self.feed.save_page_history(301, "HTTP Redirect (%s to go)" % (20-len(redirects)))
                if len(redirects) >= 20 or len(non_redirects) == 0:
                    self.feed.feed_link = new_feed_link
                    self.feed.save(update_fields=['feed_link'])
        
        # Determine if stories aren't valid and replace broken guids
        guids_seen = set()
        permalinks_seen = set()
        for entry in self.fpf.entries:
            guids_seen.add(entry.get('guid'))
            permalinks_seen.add(Feed.get_permalink(entry))
        guid_difference = len(guids_seen) != len(self.fpf.entries)
        single_guid = len(guids_seen) == 1
        replace_guids = single_guid and guid_difference
        permalink_difference = len(permalinks_seen) != len(self.fpf.entries)
        single_permalink = len(permalinks_seen) == 1
        replace_permalinks = single_permalink and permalink_difference
        
        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_hashes = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            if replace_guids:
                if replace_permalinks:
                    new_story_guid = unicode(story.get('published'))
                    if self.options['verbose']:
                        logging.debug(u'   ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s' % (
                                      self.feed.title[:30],
                                      story.get('guid'), new_story_guid))
                    story['guid'] = new_story_guid
                else:
                    new_story_guid = Feed.get_permalink(story)
                    if self.options['verbose']:
                        logging.debug(u'   ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s' % (
                                      self.feed.title[:30],
                                      story.get('guid'), new_story_guid))
                    story['guid'] = new_story_guid
            story['story_hash'] = MStory.feed_guid_hash_unsaved(self.feed.pk, story.get('guid'))
            stories.append(story)
            story_hashes.append(story.get('story_hash'))

        existing_stories = dict((s.story_hash, s) for s in MStory.objects(
            story_hash__in=story_hashes,
            # story_date__gte=start_date,
            # story_feed_id=self.feed.pk
        ))

        ret_values = self.feed.add_update_stories(stories, existing_stories,
                                                  verbose=self.options['verbose'],
                                                  updates_off=self.options['updates_off'])

        if (hasattr(self.fpf, 'feed') and 
            hasattr(self.fpf.feed, 'links') and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now()
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG and
                self.feed.active_subscribers > 0 and
                (push_expired or not self.feed.is_push or self.options.get('force'))):
                logging.debug(u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' % (
                              self.feed.title[:30],
                              "~SKRe-~SN" if push_expired else "", hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url)
                except TimeoutError:
                    logging.debug(u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s' % (
                                  self.feed.title[:30], hub_url))                    
            elif (self.feed.is_push and 
                  (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' % (
                              self.feed.title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        logging.debug(u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
                      self.feed.title[:30], 
                      '~FG~SB' if ret_values['new'] else '', ret_values['new'],
                      '~FY~SB' if ret_values['updated'] else '', ret_values['updated'],
                      '~SB' if ret_values['same'] else '', ret_values['same'],
                      '~FR~SB' if ret_values['error'] else '', ret_values['error'],
                      len(self.fpf.entries)))
        self.feed.update_all_statistics(has_new_stories=bool(ret_values['new']), force=self.options['force'])
        if ret_values['new']:
            self.feed.trim_feed()
            self.feed.expire_redis()
        self.feed.save_feed_history(200, "OK")

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
                          self.feed.title[:30], time.time() - start))
        
        return FEED_OK, ret_values
Beispiel #2
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()
        
        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (
                                  self.feed.title[:30],
                                  self.fpf.bozo_exception,
                                  len(self.fpf.entries)))
                    
            if self.fpf.status == 304:                  # 304 stands for resource not modified
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values
            
            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it
            if self.fpf.status == 301:
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug("   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status, "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug("   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
                logging.debug("   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug("   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
                
        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            self.feed.last_modified = None
            pass
        
        self.fpf.entries = self.fpf.entries[:100]
        
        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))

        self.feed.feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
        
        self.feed = self.feed.save()

        # Determine if stories aren't valid and replace broken guids
        # if guid is single among many entries:
        #   if permalink also is single among many entries:
        #       replace the guid with published
        #   else if permalink is not:
        #       replace the guid with permalink
        guids_seen = set()
        permalinks_seen = set()
        for entry in self.fpf.entries:
            guids_seen.add(entry.get('guid'))
            permalinks_seen.add(Feed.get_permalink(entry))
        guid_difference = len(guids_seen) != len(self.fpf.entries) # means guid is duplicated.
        single_guid = len(guids_seen) == 1
        replace_guids = single_guid and guid_difference # means guid is single but entries not.
        permalink_difference = len(permalinks_seen) != len(self.fpf.entries)
        single_permalink = len(permalinks_seen) == 1
        replace_permalinks = single_permalink and permalink_difference
        
        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_hashes = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            if replace_guids:
                if replace_permalinks:
                    new_story_guid = unicode(story.get('published'))
                    if self.options['verbose']:
                        logging.debug(u'   ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s' % (
                                      self.feed.title[:30],
                                      story.get('guid'), new_story_guid))
                    story['guid'] = new_story_guid
                else:
                    new_story_guid = Feed.get_permalink(story)
                    if self.options['verbose']:
                        logging.debug(u'   ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s' % (
                                      self.feed.title[:30],
                                      story.get('guid'), new_story_guid))
                    story['guid'] = new_story_guid
            story['story_hash'] = MStory.feed_guid_hash_unsaved(self.feed.pk, story.get('guid'))
            stories.append(story)
            story_hashes.append(story.get('story_hash'))

        # find the existing_stories with story_hash in story_hashes.
        existing_stories = dict((s.story_hash, s) for s in MStory.objects(
            story_hash__in=story_hashes,
            # story_date__gte=start_date,
            # story_feed_id=self.feed.pk
        ))
        
        ret_values = self.feed.add_update_stories(stories, existing_stories,
                                                  verbose=self.options['verbose'],)
        
        logging.debug(u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
                      self.feed.title[:30], 
                      '~FG~SB' if ret_values['new'] else '', ret_values['new'],
                      '~FY~SB' if ret_values['updated'] else '', ret_values['updated'],
                      '~SB' if ret_values['same'] else '', ret_values['same'],
                      '~FR~SB' if ret_values['error'] else '', ret_values['error'],
                      len(self.fpf.entries)))

        # If there is new story, update all statistics
        self.feed.update_all_statistics(full=bool(ret_values['new']))

        self.feed.save_feed_history(200, "OK")
        
        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
                          self.feed.title[:30], time.time() - start))
        
        return FEED_OK, ret_values
Beispiel #3
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()

        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(
                        u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)'
                        % (self.feed.title[:30], self.fpf.bozo_exception,
                           len(self.fpf.entries)))

            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values

            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it
            if self.fpf.status == 301:
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug(
                        "   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..."
                        % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status,
                                                "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..."
                    % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception,
                                            feedparser.NonXMLContentType):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(
                    self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug(
                    "   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..."
                    % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address(
                    )
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception',
                                                self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values

        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            self.feed.last_modified = None
            pass

        self.fpf.entries = self.fpf.entries[:100]

        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            self.feed.data.feed_tagline = utf8encode(tagline)
            self.feed.data.save()
        if not self.feed.feed_link_locked:
            self.feed.feed_link = self.fpf.feed.get(
                'link') or self.fpf.feed.get('id') or self.feed.feed_link

        self.feed = self.feed.save()

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_guids = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            stories.append(story)
            story_guids.append(story.get('guid'))

        existing_stories = dict((s.story_guid, s) for s in MStory.objects(
            # story_guid__in=story_guids,
            story_date__gte=start_date,
            story_feed_id=self.feed.pk).limit(
                max(int(len(story_guids) * 1.5), 10)))

        ret_values = self.feed.add_update_stories(
            stories, existing_stories, verbose=self.options['verbose'])

        if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links')
                and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now(
                    )
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG
                    and self.feed.active_subscribers > 0
                    and (push_expired or not self.feed.is_push
                         or self.options.get('force'))):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' %
                    (self.feed.title[:30], "~SKRe-~SN" if push_expired else "",
                     hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url,
                                                       feed=self.feed,
                                                       hub=hub_url)
                except TimeoutError:
                    logging.debug(
                        u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s'
                        % (self.feed.title[:30], hub_url))
            elif (self.feed.is_push
                  and (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(
                    u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' %
                    (self.feed.title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()

        logging.debug(
            u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s'
            % (self.feed.title[:30], '~FG~SB' if ret_values['new'] else '',
               ret_values['new'], '~FY~SB' if ret_values['updated'] else '',
               ret_values['updated'], '~SB' if ret_values['same'] else '',
               ret_values['same'], '~FR~SB' if ret_values['error'] else '',
               ret_values['error'], len(self.fpf.entries)))
        self.feed.update_all_statistics(full=bool(ret_values['new']),
                                        force=self.options['force'])
        if ret_values['new']:
            self.feed.trim_feed()
            self.feed.expire_redis()
        self.feed.save_feed_history(200, "OK")

        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' %
                          (self.feed.title[:30], time.time() - start))

        return FEED_OK, ret_values
Beispiel #4
0
def load_social_stories(request, user_id, username=None):
    start          = time.time()
    user           = get_user(request)
    social_user_id = int(user_id)
    social_user    = get_object_or_404(User, pk=social_user_id)
    offset         = int(request.REQUEST.get('offset', 0))
    limit          = int(request.REQUEST.get('limit', 6))
    page           = request.REQUEST.get('page')
    order          = request.REQUEST.get('order', 'newest')
    read_filter    = request.REQUEST.get('read_filter', 'all')
    stories        = []
    
    if page: offset = limit * (int(page) - 1)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)
    
    social_profile = MSocialProfile.get_user(social_user.pk)
    try:
        socialsub = MSocialSubscription.objects.get(user_id=user.pk, subscription_user_id=social_user_id)
    except MSocialSubscription.DoesNotExist:
        socialsub = None
    mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
    stories = Feed.format_stories(mstories)
    
    if socialsub and (read_filter == 'unread' or order == 'oldest'):
        story_ids = socialsub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit)
        story_date_order = "%sshared_date" % ('' if order == 'oldest' else '-')
        if story_ids:
            mstories = MSharedStory.objects(user_id=social_user.pk,
                                            story_db_id__in=story_ids).order_by(story_date_order)
            stories = Feed.format_stories(mstories)
    else:
        mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit]
        stories = Feed.format_stories(mstories)

    if not stories:
        return dict(stories=[])
    
    checkpoint1 = time.time()
    
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True)

    story_feed_ids = list(set(s['story_feed_id'] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]
    date_delta = UNREAD_CUTOFF
    if socialsub and date_delta < socialsub.mark_read_date:
        date_delta = socialsub.mark_read_date
    
    # Get intelligence classifier for user
    classifier_feeds   = list(MClassifierFeed.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_titles  = list(MClassifierTitle.objects(user_id=user.pk, social_user_id=social_user_id))
    classifier_tags    = list(MClassifierTag.objects(user_id=user.pk, social_user_id=social_user_id))
    # Merge with feed specific classifiers
    classifier_feeds   = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_titles  = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    classifier_tags    = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))

    checkpoint2 = time.time()
    
    story_ids = [story['id'] for story in stories]
    userstories_db = MUserStory.objects(user_id=user.pk,
                                        feed_id__in=story_feed_ids,
                                        story_id__in=story_ids).only('story_id')
    userstories = set(us.story_id for us in userstories_db)

    starred_stories = MStarredStory.objects(user_id=user.pk, 
                                            story_feed_id__in=story_feed_ids, 
                                            story_guid__in=story_ids).only('story_guid', 'starred_date')
    shared_stories = MSharedStory.objects(user_id=user.pk, 
                                          story_feed_id__in=story_feed_ids, 
                                          story_guid__in=story_ids)\
                                 .only('story_guid', 'shared_date', 'comments')
    starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
    shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                           for story in shared_stories])
    
    for story in stories:
        story['social_user_id'] = social_user_id
        story_feed_id = story['story_feed_id']
        # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone)
        shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone)
        story['short_parsed_date'] = format_story_link_date__short(shared_date, now)
        story['long_parsed_date'] = format_story_link_date__long(shared_date, now)
        
        if not socialsub:
            story['read_status'] = 1
        elif story['id'] in userstories:
            story['read_status'] = 1
        elif story['shared_date'] < date_delta:
            story['read_status'] = 1
        elif not usersubs_map.get(story_feed_id):
            story['read_status'] = 0
        elif not story.get('read_status') and story['story_date'] < usersubs_map[story_feed_id].mark_read_date:
            story['read_status'] = 1
        elif not story.get('read_status') and story['shared_date'] < date_delta:
            story['read_status'] = 1
        # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date:
        #     story['read_status'] = 0
        else:
            story['read_status'] = 0

        if story['id'] in starred_stories:
            story['starred'] = True
            starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone)
            story['starred_date'] = format_story_link_date__long(starred_date, now)
        if story['id'] in shared_stories:
            story['shared'] = True
            shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'],
                                                 user.profile.timezone)
            story['shared_date'] = format_story_link_date__long(shared_date, now)
            story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments'])

        story['intelligence'] = {
            'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'],
                                           social_user_id=social_user_id),
            'author': apply_classifier_authors(classifier_authors, story),
            'tags': apply_classifier_tags(classifier_tags, story),
            'title': apply_classifier_titles(classifier_titles, story),
        }
    
    
    classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids,
                                           classifier_feeds=classifier_feeds,
                                           classifier_authors=classifier_authors,
                                           classifier_titles=classifier_titles,
                                           classifier_tags=classifier_tags)
                                           
    if socialsub:
        socialsub.feed_opens += 1
        socialsub.save()
    
    diff1 = checkpoint1-start
    diff2 = checkpoint2-start
    logging.user(request, "~FYLoading ~FMshared stories~FY: ~SB%s%s ~SN(~SB%.4ss/%.4ss~SN)" % (
    social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', diff1, diff2))

    return {
        "stories": stories, 
        "user_profiles": user_profiles, 
        "feeds": unsub_feeds, 
        "classifiers": classifiers,
    }
Beispiel #5
0
    def process(self):
        """ Downloads and parses a feed.
        """
        start = time.time()
        self.refresh_feed()
        
        ret_values = dict(new=0, updated=0, same=0, error=0)

        if hasattr(self.fpf, 'status'):
            if self.options['verbose']:
                if self.fpf.bozo and self.fpf.status != 304:
                    logging.debug(u'   ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (
                                  self.feed.title[:30],
                                  self.fpf.bozo_exception,
                                  len(self.fpf.entries)))
                    
            if self.fpf.status == 304:
                self.feed = self.feed.save()
                self.feed.save_feed_history(304, "Not modified")
                return FEED_SAME, ret_values
            
            # 302: Temporary redirect: ignore
            # 301: Permanent redirect: save it
            if self.fpf.status == 301:
                if not self.fpf.href.endswith('feedburner.com/atom.xml'):
                    self.feed.feed_address = self.fpf.href
                if not self.feed.known_good:
                    self.feed.fetched_once = True
                    logging.debug("   ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status))
                    self.feed = self.feed.schedule_feed_fetch_immediately()
                if not self.fpf.entries:
                    self.feed = self.feed.save()
                    self.feed.save_feed_history(self.fpf.status, "HTTP Redirect")
                    return FEED_ERRHTTP, ret_values
            if self.fpf.status >= 400:
                logging.debug("   ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(self.fpf.status, "HTTP Error")
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRHTTP, ret_values

        if not self.fpf.entries:
            if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType):
                logging.debug("   ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
            elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException):
                logging.debug("   ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries)))
                fixed_feed = None
                if not self.feed.known_good:
                    fixed_feed, feed = self.feed.check_feed_link_for_feed_address()
                if not fixed_feed:
                    self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception)
                else:
                    self.feed = feed
                self.feed = self.feed.save()
                return FEED_ERRPARSE, ret_values
                
        # the feed has changed (or it is the first time we parse it)
        # saving the etag and last_modified fields
        self.feed.etag = self.fpf.get('etag')
        if self.feed.etag:
            self.feed.etag = self.feed.etag[:255]
        # some times this is None (it never should) *sigh*
        if self.feed.etag is None:
            self.feed.etag = ''

        try:
            self.feed.last_modified = mtime(self.fpf.modified)
        except:
            self.feed.last_modified = None
            pass
        
        self.fpf.entries = self.fpf.entries[:100]
        
        if self.fpf.feed.get('title'):
            self.feed.feed_title = strip_tags(self.fpf.feed.get('title'))
        tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline)
        if tagline:
            self.feed.data.feed_tagline = utf8encode(tagline)
            self.feed.data.save()
        if not self.feed.feed_link_locked:
            self.feed.feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link
        
        self.feed = self.feed.save()

        # Compare new stories to existing stories, adding and updating
        start_date = datetime.datetime.utcnow()
        story_guids = []
        stories = []
        for entry in self.fpf.entries:
            story = pre_process_story(entry)
            if story.get('published') < start_date:
                start_date = story.get('published')
            stories.append(story)
            story_guids.append(story.get('guid'))

        existing_stories = dict((s.story_guid, s) for s in MStory.objects(
            # story_guid__in=story_guids,
            story_date__gte=start_date,
            story_feed_id=self.feed.pk
        ).limit(max(int(len(story_guids)*1.5), 10)))
        
        ret_values = self.feed.add_update_stories(stories, existing_stories,
                                                  verbose=self.options['verbose'])

        if (hasattr(self.fpf, 'feed') and 
            hasattr(self.fpf.feed, 'links') and self.fpf.feed.links):
            hub_url = None
            self_url = self.feed.feed_address
            for link in self.fpf.feed.links:
                if link['rel'] == 'hub' and not hub_url:
                    hub_url = link['href']
                elif link['rel'] == 'self':
                    self_url = link['href']
            push_expired = False
            if self.feed.is_push:
                try:
                    push_expired = self.feed.push.lease_expires < datetime.datetime.now()
                except PushSubscription.DoesNotExist:
                    self.feed.is_push = False
            if (hub_url and self_url and not settings.DEBUG and
                self.feed.active_subscribers > 0 and
                (push_expired or not self.feed.is_push or self.options.get('force'))):
                logging.debug(u'   ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' % (
                              self.feed.title[:30],
                              "~SKRe-~SN" if push_expired else "", hub_url))
                try:
                    PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url)
                except TimeoutError:
                    logging.debug(u'   ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s' % (
                                  self.feed.title[:30], hub_url))                    
            elif (self.feed.is_push and 
                  (self.feed.active_subscribers <= 0 or not hub_url)):
                logging.debug(u'   ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' % (
                              self.feed.title[:30]))
                self.feed.is_push = False
                self.feed = self.feed.save()
        
        logging.debug(u'   ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (
                      self.feed.title[:30], 
                      '~FG~SB' if ret_values['new'] else '', ret_values['new'],
                      '~FY~SB' if ret_values['updated'] else '', ret_values['updated'],
                      '~SB' if ret_values['same'] else '', ret_values['same'],
                      '~FR~SB' if ret_values['error'] else '', ret_values['error'],
                      len(self.fpf.entries)))
        self.feed.update_all_statistics(full=bool(ret_values['new']), force=self.options['force'])
        if ret_values['new']:
            self.feed.trim_feed()
            self.feed.expire_redis()
        self.feed.save_feed_history(200, "OK")
        
        if self.options['verbose']:
            logging.debug(u'   ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (
                          self.feed.title[:30], time.time() - start))
        
        return FEED_OK, ret_values
Beispiel #6
0
def mark_story_as_shared(request):
    code     = 1
    feed_id  = int(request.POST['feed_id'])
    story_id = request.POST['story_id']
    comments = request.POST.get('comments', '')
    source_user_id = request.POST.get('source_user_id')
    post_to_services = request.POST.getlist('post_to_services')
    format = request.REQUEST.get('format', 'json')
    
    MSocialProfile.get_user(request.user.pk)
    
    story, original_story_found = MStory.find_story(feed_id, story_id)

    if not story:
        return json.json_response(request, {
            'code': -1, 
            'message': 'Could not find the original story and no copies could be found.'
        })
    
    shared_story = MSharedStory.objects.filter(user_id=request.user.pk, 
                                               story_feed_id=feed_id, 
                                               story_guid=story_id).limit(1).first()
    if not shared_story:
        story_db = dict([(k, v) for k, v in story._data.items() 
                                if k is not None and v is not None])
        story_values = dict(user_id=request.user.pk, comments=comments, 
                            has_comments=bool(comments), story_db_id=story.id)
        story_db.update(story_values)
        shared_story = MSharedStory.objects.create(**story_db)
        if source_user_id:
            shared_story.set_source_user_id(int(source_user_id))
        socialsubs = MSocialSubscription.objects.filter(subscription_user_id=request.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(request, "~FCSharing ~FM%s: ~SB~FB%s" % (story.story_title[:20], comments[:30]))
    else:
        shared_story.comments = comments
        shared_story.has_comments = bool(comments)
        shared_story.save()
        logging.user(request, "~FCUpdating shared story ~FM%s: ~SB~FB%s" % (
                     story.story_title[:20], comments[:30]))
    
    if original_story_found:
        story.count_comments()
    shared_story.publish_update_to_subscribers()
    
    story = Feed.format_story(story)
    check_all = not original_story_found
    stories, profiles = MSharedStory.stories_with_comments_and_profiles([story], request.user.pk,
                                                                        check_all=check_all)
    story = stories[0]
    story['shared_comments'] = strip_tags(shared_story['comments'] or "")
    
    if post_to_services:
        for service in post_to_services:
            if service not in shared_story.posted_to_services:
                PostToService.delay(shared_story_id=shared_story.id, service=service)
    
    if shared_story.source_user_id and shared_story.comments:
        EmailStoryReshares.apply_async(kwargs=dict(shared_story_id=shared_story.id), countdown=60)
    
    if format == 'html':
        stories = MSharedStory.attach_users_to_stories(stories, profiles)
        return render_to_response('social/story_share.xhtml', {
            'story': story,
        }, context_instance=RequestContext(request))
    else:
        return json.json_response(request, {
            'code': code, 
            'story': story, 
            'user_profiles': profiles,
        })
Beispiel #7
0
def mark_story_as_shared(request):
    code = 1
    feed_id = int(request.POST["feed_id"])
    story_id = request.POST["story_id"]
    comments = request.POST.get("comments", "")
    source_user_id = request.POST.get("source_user_id")
    relative_user_id = request.POST.get("relative_user_id") or request.user.pk
    post_to_services = request.POST.getlist("post_to_services")
    format = request.REQUEST.get("format", "json")

    MSocialProfile.get_user(request.user.pk)

    story, original_story_found = MStory.find_story(feed_id, story_id)

    if not story:
        return json.json_response(
            request, {"code": -1, "message": "Could not find the original story and no copies could be found."}
        )

    shared_story = (
        MSharedStory.objects.filter(user_id=request.user.pk, story_feed_id=feed_id, story_guid=story_id)
        .limit(1)
        .first()
    )
    if not shared_story:
        story_db = dict([(k, v) for k, v in story._data.items() if k is not None and v is not None])
        story_values = dict(
            user_id=request.user.pk, comments=comments, has_comments=bool(comments), story_db_id=story.id
        )
        story_db.update(story_values)
        shared_story = MSharedStory.objects.create(**story_db)
        if source_user_id:
            shared_story.set_source_user_id(int(source_user_id))
        socialsubs = MSocialSubscription.objects.filter(subscription_user_id=request.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(request, "~FCSharing ~FM%s: ~SB~FB%s" % (story.story_title[:20], comments[:30]))
    else:
        shared_story.comments = comments
        shared_story.has_comments = bool(comments)
        shared_story.save()
        logging.user(request, "~FCUpdating shared story ~FM%s: ~SB~FB%s" % (story.story_title[:20], comments[:30]))

    if original_story_found:
        story.count_comments()
    shared_story.publish_update_to_subscribers()

    story = Feed.format_story(story)
    check_all = not original_story_found
    stories, profiles = MSharedStory.stories_with_comments_and_profiles([story], relative_user_id, check_all=check_all)
    story = stories[0]
    story["shared_comments"] = strip_tags(shared_story["comments"] or "")
    story["shared_by_user"] = True

    if post_to_services:
        for service in post_to_services:
            if service not in shared_story.posted_to_services:
                PostToService.delay(shared_story_id=shared_story.id, service=service)

    if shared_story.source_user_id and shared_story.comments:
        EmailStoryReshares.apply_async(
            kwargs=dict(shared_story_id=shared_story.id), countdown=settings.SECONDS_TO_DELAY_CELERY_EMAILS
        )

    if format == "html":
        stories = MSharedStory.attach_users_to_stories(stories, profiles)
        return render_to_response(
            "social/social_story.xhtml", {"story": story}, context_instance=RequestContext(request)
        )
    else:
        return json.json_response(request, {"code": code, "story": story, "user_profiles": profiles})
Beispiel #8
0
def load_river_blurblog(request):
    limit = 10
    start = time.time()
    user = get_user(request)
    social_user_ids = [int(uid) for uid in request.REQUEST.getlist("social_user_ids") if uid]
    original_user_ids = list(social_user_ids)
    page = int(request.REQUEST.get("page", 1))
    order = request.REQUEST.get("order", "newest")
    read_filter = request.REQUEST.get("read_filter", "unread")
    relative_user_id = request.REQUEST.get("relative_user_id", None)
    now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone)
    UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

    if not relative_user_id:
        relative_user_id = get_user(request).pk

    if not social_user_ids:
        socialsubs = MSocialSubscription.objects.filter(user_id=user.pk)
        social_user_ids = [s.subscription_user_id for s in socialsubs]

    offset = (page - 1) * limit
    limit = page * limit - 1

    story_ids, story_dates = MSocialSubscription.feed_stories(
        user.pk, social_user_ids, offset=offset, limit=limit, order=order, read_filter=read_filter
    )
    mstories = MStory.objects(id__in=story_ids)
    story_id_to_dates = dict(zip(story_ids, story_dates))

    def sort_stories_by_id(a, b):
        return int(story_id_to_dates[str(b.id)]) - int(story_id_to_dates[str(a.id)])

    sorted_mstories = sorted(mstories, cmp=sort_stories_by_id)
    stories = Feed.format_stories(sorted_mstories)
    for s, story in enumerate(stories):
        story["story_date"] = datetime.datetime.fromtimestamp(story_dates[s])
    stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True)

    story_feed_ids = list(set(s["story_feed_id"] for s in stories))
    usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids)
    usersubs_map = dict((sub.feed_id, sub) for sub in usersubs)
    unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys())))
    unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids)
    unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds]

    # Find starred stories
    if story_feed_ids:
        story_ids = [story["id"] for story in stories]
        starred_stories = MStarredStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "starred_date"
        )
        starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories])
        shared_stories = MSharedStory.objects(user_id=user.pk, story_guid__in=story_ids).only(
            "story_guid", "shared_date", "comments"
        )
        shared_stories = dict(
            [
                (story.story_guid, dict(shared_date=story.shared_date, comments=story.comments))
                for story in shared_stories
            ]
        )

        userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only(
            "story_id"
        )
        userstories = set(us.story_id for us in userstories_db)

    else:
        starred_stories = {}
        shared_stories = {}
        userstories = []

    # Intelligence classifiers for all feeds involved
    if story_feed_ids:
        classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids))
        classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids))
    else:
        classifier_feeds = []
        classifier_authors = []
        classifier_titles = []
        classifier_tags = []
    classifiers = sort_classifiers_by_feed(
        user=user,
        feed_ids=story_feed_ids,
        classifier_feeds=classifier_feeds,
        classifier_authors=classifier_authors,
        classifier_titles=classifier_titles,
        classifier_tags=classifier_tags,
    )

    # Just need to format stories
    for story in stories:
        if story["id"] in userstories:
            story["read_status"] = 1
        elif story["story_date"] < UNREAD_CUTOFF:
            story["read_status"] = 1
        else:
            story["read_status"] = 0
        story_date = localtime_for_timezone(story["story_date"], user.profile.timezone)
        story["short_parsed_date"] = format_story_link_date__short(story_date, now)
        story["long_parsed_date"] = format_story_link_date__long(story_date, now)
        if story["id"] in starred_stories:
            story["starred"] = True
            starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone)
            story["starred_date"] = format_story_link_date__long(starred_date, now)
        story["intelligence"] = {
            "feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]),
            "author": apply_classifier_authors(classifier_authors, story),
            "tags": apply_classifier_tags(classifier_tags, story),
            "title": apply_classifier_titles(classifier_titles, story),
        }
        if story["id"] in shared_stories:
            story["shared"] = True
            shared_date = localtime_for_timezone(shared_stories[story["id"]]["shared_date"], user.profile.timezone)
            story["shared_date"] = format_story_link_date__long(shared_date, now)
            story["shared_comments"] = strip_tags(shared_stories[story["id"]]["comments"])

    diff = time.time() - start
    timediff = round(float(diff), 2)
    logging.user(
        request,
        "~FYLoading ~FCriver blurblogs stories~FY: ~SBp%s~SN (%s/%s "
        "stories, ~SN%s/%s/%s feeds)"
        % (page, len(stories), len(mstories), len(story_feed_ids), len(social_user_ids), len(original_user_ids)),
    )

    return {
        "stories": stories,
        "user_profiles": user_profiles,
        "feeds": unsub_feeds,
        "classifiers": classifiers,
        "elapsed_time": timediff,
    }
Beispiel #9
0
def share_story(request, token):
    code = 0
    story_url = request.POST['story_url']
    comments = request.POST['comments']
    title = request.POST['title']
    content = request.POST['content']
    rss_url = request.POST.get('rss_url')
    feed_id = request.POST.get('feed_id') or 0
    feed = None
    message = None

    if not story_url:
        code = -1
    else:
        try:
            profile = Profile.objects.get(secret_token=token)
        except Profile.DoesNotExist:
            code = -1

    if feed_id:
        feed = Feed.objects.get(pk=feed_id)
    else:
        if rss_url:
            feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True)
        if not feed:
            feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
        if feed:
            feed_id = feed.pk

    parsed_url = urlparse.urlparse(story_url)
    base_url = "%s://%s%s" % (parsed_url.scheme, parsed_url.hostname,
                              parsed_url.path)
    scrubber = SelectiveScriptScrubber(base_url=base_url)
    content = scrubber.scrub(content)
    title = strip_tags(title)

    shared_story = MSharedStory.objects.filter(
        user_id=profile.user.pk, story_feed_id=feed_id,
        story_guid=story_url).limit(1).first()
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": title,
            "story_feed_id": feed_id,
            "story_content": content,
            "story_date": datetime.datetime.now(),
            "user_id": profile.user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(
            subscription_user_id=profile.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(
            profile.user,
            "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments))
    else:
        shared_story.story_content = content
        shared_story.story_title = title
        shared_story.comments = comments
        shared_story.story_permalink = story_url
        shared_story.story_guid = story_url
        shared_story.has_comments = bool(comments)
        shared_story.story_feed_id = feed_id
        shared_story.save()
        logging.user(
            profile.user,
            "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" %
            (story_url, comments))

    shared_story.publish_update_to_subscribers()

    response = HttpResponse(json.encode({
        'code': code,
        'message': message,
        'story': None,
    }),
                            mimetype='text/plain')
    response['Access-Control-Allow-Origin'] = '*'
    response['Access-Control-Allow-Methods'] = 'POST'

    return response
Beispiel #10
0
def share_story(request, token):
    code      = 0
    story_url = request.POST['story_url']
    comments  = request.POST['comments']
    title     = request.POST['title']
    content   = request.POST['content']
    rss_url   = request.POST.get('rss_url')
    feed_id   = request.POST.get('feed_id') or 0
    feed      = None
    message   = None
    
    if not story_url:
        code = -1
    else:
        try:
            profile = Profile.objects.get(secret_token=token)
        except Profile.DoesNotExist:
            code = -1
    
    if feed_id:
        feed = Feed.objects.get(pk=feed_id)
    else:
        if rss_url:
            feed = Feed.get_feed_from_url(rss_url, create=True, fetch=True)
        if not feed:
            feed = Feed.get_feed_from_url(story_url, create=True, fetch=True)
        if feed:
            feed_id = feed.pk
    
    parsed_url = urlparse.urlparse(story_url)
    base_url = "%s://%s%s" % (parsed_url.scheme, parsed_url.hostname, parsed_url.path)
    scrubber = SelectiveScriptScrubber(base_url=base_url)
    content = scrubber.scrub(content)
    title = strip_tags(title)
    
    shared_story = MSharedStory.objects.filter(user_id=profile.user.pk,
                                               story_feed_id=feed_id, 
                                               story_guid=story_url).limit(1).first()
    if not shared_story:
        story_db = {
            "story_guid": story_url,
            "story_permalink": story_url,
            "story_title": title,
            "story_feed_id": feed_id,
            "story_content": content,
            "story_date": datetime.datetime.now(),
            
            "user_id": profile.user.pk,
            "comments": comments,
            "has_comments": bool(comments),
        }
        shared_story = MSharedStory.objects.create(**story_db)
        socialsubs = MSocialSubscription.objects.filter(subscription_user_id=profile.user.pk)
        for socialsub in socialsubs:
            socialsub.needs_unread_recalc = True
            socialsub.save()
        logging.user(profile.user, "~BM~FYSharing story from site: ~SB%s: %s" % (story_url, comments))
    else:
        shared_story.story_content = content
        shared_story.story_title = title
        shared_story.comments = comments
        shared_story.story_permalink = story_url
        shared_story.story_guid = story_url
        shared_story.has_comments = bool(comments)
        shared_story.story_feed_id = feed_id
        shared_story.save()
        logging.user(profile.user, "~BM~FY~SBUpdating~SN shared story from site: ~SB%s: %s" % (story_url, comments))
    
    shared_story.publish_update_to_subscribers()
    
    response = HttpResponse(json.encode({
        'code':     code,
        'message':  message,
        'story':    None,
    }), mimetype='text/plain')
    response['Access-Control-Allow-Origin'] = '*'
    response['Access-Control-Allow-Methods'] = 'POST'
    
    return response