Example #1
0
    def process_facebook_feed(self, feed, full_update=False):
        self.logger.info('Processing feed %s' % unicode(feed))

        # First update the feed itself
        feed_info = self._fb_get(feed.origin_id)
        feed.interest = feed_info.get('likes', None)
        if not feed.picture:
            self.logger.info('Fetching picture info')
            picture_info = self._fb_get("%s?fields=picture" % feed.origin_id)
            feed.picture = picture_info.get('picture', {}).get('data', {}).get('url', None)

        # Limit downloading of personal feeds to last two months.
        if 'category' not in feed_info:
            feed.is_personal = True
            since = datetime.datetime.now() - datetime.timedelta(weeks=2*4)
            since = int(time.mktime(since.timetuple()))
            filter_args = "&since=%d" % since
            self.logger.debug('%s is a personal feed' % unicode(feed))
        else:
            feed.is_personal = False
            filter_args = ""

        if full_update:
            count = 100
        else:
            count = 20
        new_count = 0
        url = '%s/posts?limit=%d%s' % (feed.origin_id, count, filter_args)
        while True:
            self.logger.info('Fetching %s' % url)
            g = self._fb_get(url)
            found = False
            for post in g['data']:
                # Sanity check
                assert post['from']['id'] == feed.origin_id
                if post['type'] in ('question', 'swf', 'music', 'offer'):
                    # We skip these updates for now.
                    continue
                if post['type'] == 'status' and 'message' not in post:
                    # We're not interested in status updates with no content.
                    continue
                try:
                    upd = Update.objects.get(feed=feed, origin_id=post['id'])
                    found = True
                    if not full_update:
                        continue
                except Update.DoesNotExist:
                    upd = Update(feed=feed, origin_id=post['id'])
                    created = True
                    new_count += 1

                utc = dateutil.parser.parse(post['created_time'])
                upd.created_time = utc.astimezone(dateutil.tz.tzlocal())
                self._set_field_with_len(upd, 'text', post.get('message', None))
                upd.share_link = post.get('link', None)
                upd.picture = post.get('picture', None)
                self._set_field_with_len(upd, 'share_title', post.get('name', None))
                self._set_field_with_len(upd, 'share_caption', post.get('caption', None))
                self._set_field_with_len(upd, 'share_description', post.get('description', None))
                if upd.picture and len(upd.picture) > self._get_field_max_len(upd, 'picture'):
                    self.logger.warning("%s: Removing too long (%d) picture link" % (upd.origin_id, len(upd.picture)))
                    upd.picture = None
                if upd.share_link and len(upd.share_link) > self._get_field_max_len(upd, 'share_link'):
                    self.logger.warning("%s: Removing too long (%d) link" % (upd.origin_id, len(upd.share_link)))
                    upd.share_link = None
                sub_type = post.get('status_type', None)
                if sub_type:
                    upd.sub_type = sub_type
                else:
                    upd.sub_type = None
                upd.interest = post.get('likes', {}).get('count', None)
                if post['type'] == 'link':
                    upd.type = 'link'
                    if not upd.share_link:
                        self.logger.warning("FB %s: No link given for 'link' update" % post['id'])
                elif post['type'] == 'photo':
                    upd.type = 'photo'
                    assert upd.share_link
                    assert upd.picture
                elif post['type'] == 'status':
                    upd.type = 'status'
                elif post['type'] == 'video':
                    upd.type = 'video'
                    if not upd.share_link:
                        # Fall back to the 'source' attribute
                        upd.share_link = post.get('source', None)
                        if not upd.share_link:
                            pprint.pprint(post)
                            raise Exception("%s: No link for 'video 'update" % post['id'])
                        if upd.share_link and len(upd.share_link) > self._get_field_max_len(upd, 'share_link'):
                            self.logger.warning("%s: Removing too long link" % upd.origin_id)
                            upd.share_link = None
                else:
                    pprint.pprint(post)
                    raise Exception("Unknown FB update type: %s" % post['type'])
                upd.save()

            if not 'paging' in g:
                break
            next_args = urlparse.parse_qs(urlparse.urlparse(g['paging']['next']).query)
            until = int(next_args['until'][0])
            # If we didn't have any of the updates, get a bigger batch next
            # time.
            if not found:
                count = 100
            elif not full_update:
                # If at least some of the updates were in our DB already,
                # the feed is up-to-date.
                break
            url = "%s/posts?limit=%d&until=%d%s" % (feed.origin_id, count, until, filter_args)
        self.logger.info("%s: %d new updates" % (feed.account_name, new_count))
        feed.update_error_count = 0
        feed.last_update = datetime.datetime.now()
        feed.save()
Example #2
0
    def process_facebook_feed(self, feed, full_update=False):
        self.logger.info('Processing feed %s' % unicode(feed))

        # First update the feed itself
        url = '%s?fields=picture,likes,about' % feed.origin_id
        feed_info = self._fb_get(url)
        feed.picture = feed_info.get('picture', {}).get('data',
                                                        {}).get('url', None)
        feed.interest = feed_info.get('likes', None)
        # Limit downloading of personal feeds to last two months.
        if 'category' not in feed_info:
            feed.is_personal = True
            since = datetime.datetime.now() - datetime.timedelta(weeks=2 * 4)
            since = int(time.mktime(since.timetuple()))
            filter_args = "&since=%d" % since
            self.logger.debug('%s is a personal feed' % unicode(feed))
        else:
            feed.is_personal = False
            filter_args = ""

        if full_update:
            count = 100
        else:
            count = 20
        new_count = 0
        url = '%s/posts?limit=%d%s' % (feed.origin_id, count, filter_args)
        while True:
            self.logger.info('Fetching %s' % url)
            g = self._fb_get(url)
            found = False
            for post in g['data']:
                # Sanity check
                assert post['from']['id'] == feed.origin_id
                if post['type'] in ('question', 'swf', 'music', 'offer'):
                    # We skip these updates for now.
                    continue
                if post['type'] == 'status' and 'message' not in post:
                    # We're not interested in status updates with no content.
                    continue
                try:
                    upd = Update.objects.get(feed=feed, origin_id=post['id'])
                    found = True
                    if not full_update:
                        continue
                except Update.DoesNotExist:
                    upd = Update(feed=feed, origin_id=post['id'])
                    created = True
                    new_count += 1

                utc = dateutil.parser.parse(post['created_time'])
                upd.created_time = utc.astimezone(dateutil.tz.tzlocal())
                self._set_field_with_len(upd, 'text',
                                         post.get('message', None))
                upd.share_link = post.get('link', None)
                upd.picture = post.get('picture', None)
                self._set_field_with_len(upd, 'share_title',
                                         post.get('name', None))
                self._set_field_with_len(upd, 'share_caption',
                                         post.get('caption', None))
                self._set_field_with_len(upd, 'share_description',
                                         post.get('description', None))
                if upd.picture and len(upd.picture) > self._get_field_max_len(
                        upd, 'picture'):
                    self.logger.warning(
                        "%s: Removing too long (%d) picture link" %
                        (upd.origin_id, len(upd.picture)))
                    upd.picture = None
                if upd.share_link and len(
                        upd.share_link) > self._get_field_max_len(
                            upd, 'share_link'):
                    self.logger.warning("%s: Removing too long (%d) link" %
                                        (upd.origin_id, len(upd.share_link)))
                    upd.share_link = None
                sub_type = post.get('status_type', None)
                if sub_type:
                    upd.sub_type = sub_type
                else:
                    upd.sub_type = None
                upd.interest = post.get('likes', {}).get('count', None)
                if post['type'] == 'link':
                    upd.type = 'link'
                    if not upd.share_link:
                        self.logger.warning(
                            "FB %s: No link given for 'link' update" %
                            post['id'])
                elif post['type'] == 'photo':
                    upd.type = 'photo'
                    assert upd.share_link
                    assert upd.picture
                elif post['type'] == 'status':
                    upd.type = 'status'
                elif post['type'] == 'video':
                    upd.type = 'video'
                    if not upd.share_link:
                        # Fall back to the 'source' attribute
                        upd.share_link = post.get('source', None)
                        if not upd.share_link:
                            pprint.pprint(post)
                            raise Exception("%s: No link for 'video 'update" %
                                            post['id'])
                        if upd.share_link and len(
                                upd.share_link) > self._get_field_max_len(
                                    upd, 'share_link'):
                            self.logger.warning("%s: Removing too long link" %
                                                upd.origin_id)
                            upd.share_link = None
                else:
                    pprint.pprint(post)
                    raise Exception("Unknown FB update type: %s" %
                                    post['type'])
                upd.save()

            if not 'paging' in g:
                break
            next_args = urlparse.parse_qs(
                urlparse.urlparse(g['paging']['next']).query)
            until = int(next_args['until'][0])
            # If we didn't have any of the updates, get a bigger batch next
            # time.
            if not found:
                count = 100
            elif not full_update:
                # If at least some of the updates were in our DB already,
                # the feed is up-to-date.
                break
            url = "%s/posts?limit=%d&until=%d%s" % (feed.origin_id, count,
                                                    until, filter_args)
        self.logger.info("%s: %d new updates" % (feed.account_name, new_count))
        feed.update_error_count = 0
        feed.last_update = datetime.datetime.now()
        feed.save()