Esempio n. 1
0
    def process(self):
        for ent in self.json['entries']:
            id = ent['id'][2:]
            uuid = '%s-%s-%s-%s-%s' % (id[0:8], id[8:12], id[12:16], id[16:20],
                                       id[20:])
            guid = 'tag:friendfeed.com,2007:%s' % uuid
            if self.verbose:
                print("ID: %s" % guid)

            t = datetime.datetime.strptime(ent['date'], '%Y-%m-%dT%H:%M:%SZ')
            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and \
                   e.date_updated and mtime(t.timetuple()) <= e.date_updated:
                    continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.guid = guid
            e.title = truncate.smart(strip_entities(strip_tags(ent['body'])),
                                     max_length=40)
            e.link = ent['url']
            image_url = 'http://friendfeed-api.com/v2/picture/%s' % ent[
                'from']['id']
            e.link_image = media.save_image(image_url, direct_image=False)

            e.date_published = t
            e.date_updated = t
            e.author_name = ent['from']['name']

            content = ent['body']
            if 'thumbnails' in ent:
                content += '<p class="thumbnails">'
                for t in ent['thumbnails']:
                    if self.service.public:
                        t['url'] = media.save_image(t['url'])
                    if 'width' in t and 'height' in t:
                        iwh = ' width="%d" height="%d"' % (t['width'],
                                                           t['height'])
                    else:
                        iwh = ''

                    if 'friendfeed.com/e/' in t['link'] and \
                       ('youtube.com' in t['url'] or 'ytimg.com' in t['url']):
                        m = re.search(r'/vi/([\-\w]+)/', t['url'])
                        yid = m.groups()[0] if m else None
                        if yid:
                            t['link'] = 'http://www.youtube.com/watch?v=%s' % yid

                    content += '<a href="%s" rel="nofollow"><img src="%s"%s alt="thumbnail" /></a> ' % (
                        t['link'], t['url'], iwh)
                content += '</p>'

            if 'files' in ent:
                content += '<ul class="files">\n'
                for f in ent['files']:
                    if 'friendfeed-media' in f['url']:
                        content += '  <li><a href="%s" rel="nofollow">%s</a>' % (
                            f['url'], f['name'])
                        if 'size' in f:
                            content += ' <span class="size">%s</span>' % bytes_to_human(
                                f['size'])
                        content += '</li>\n'
                content += '</ul>\n'

            e.content = content

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 2
0
    def process(self):
        for ent in self.json:
            guid = 'tag:twitter.com,2007:http://twitter.com/%s/statuses/%s' % \
                (ent['user']['screen_name'], ent['id'])
            if self.verbose:
                print("ID: %s" % guid)

            t = datetime.datetime.strptime(ent['created_at'],
                                           '%a %b %d %H:%M:%S +0000 %Y')
            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and \
                   e.date_updated and mtime(t.timetuple()) <= e.date_updated:
                    continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.guid = guid
            e.title = 'Tweet: %s' % truncate.smart(
                strip_entities(strip_tags(ent['text'])), max_length=40)
            e.title = e.title.replace('#', '').replace('@', '')

            e.link  = 'https://twitter.com/%s/status/%s' % \
                (ent['user']['screen_name'], ent['id'])
            image_url = ent['user']['profile_image_url_https']
            e.link_image = media.save_image(image_url, direct_image=False)

            e.date_published = t
            e.date_updated = t
            e.author_name = ent['user']['name']

            # double expand
            e.content = 'Tweet: %s' % expand.all(expand.shorturls(ent['text']))

            if 'entities' in ent and 'media' in ent['entities']:
                content = ' <p class="thumbnails">'
                for t in ent['entities']['media']:
                    if t['type'] == 'photo':
                        tsize = 'thumb'
                        if 'media_url_https' in t:
                            image_url = '%s:%s' % (t['media_url_https'], tsize)
                            large_url = '%s:large' % t['media_url_https']
                        else:
                            image_url = '%s:%s' % (t['media_url'], tsize)
                            large_url = t['media_url']
                        link = t['expanded_url']
                        if self.service.public:
                            image_url = media.save_image(image_url)
                        if 'sizes' in t and tsize in t['sizes']:
                            sizes = t['sizes'][tsize]
                            iwh = ' width="%d" height="%d"' % (sizes['w'],
                                                               sizes['h'])
                        else:
                            iwh = ''
                        content += '<a href="%s" rel="nofollow" data-imgurl="%s"><img src="%s"%s alt="thumbnail" /></a> ' % (
                            link, large_url, image_url, iwh)
                content += '</p>'
                e.content += content

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 3
0
    def process(self):
        for ent in self.fp.entries:
            guid = ent.id if 'id' in ent else ent.link
            if self.verbose:
                print('ID: %s' % guid)
            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and 'updated_parsed' in ent:
                    if e.date_updated and \
                       mtime(ent.updated_parsed) <= e.date_updated:
                        continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.title = ent.title
            e.link = ent.get('feedburner_origlink', ent.get('link', ''))

            if 'author_detail' in ent:
                e.author_name = ent.author_detail.get('name', '')
                e.author_email = ent.author_detail.get('email', '')
                e.author_uri = ent.author_detail.get('href', '')
            else:
                e.author_name = ent.get('author', ent.get('creator', ''))
                if not e.author_name and 'author_detail' in self.fp.feed:
                    e.author_name = self.fp.feed.author_detail.get('name', '')
                    e.author_email = self.fp.feed.author_detail.get(
                        'email', '')
                    e.author_uri = self.fp.feed.author_detail.get('href', '')

            try:
                e.content = ent.content[0].value
            except:
                e.content = ent.get('summary', ent.get('description', ''))

            if 'published_parsed' in ent:
                e.date_published = mtime(ent.published_parsed)
            elif 'updated_parsed' in ent:
                e.date_published = mtime(ent.updated_parsed)

            if 'updated_parsed' in ent:
                e.date_updated = mtime(ent.updated_parsed)

            if 'geo_lat' in ent and 'geo_long' in ent:
                e.geolat = ent.geo_lat
                e.geolng = ent.geo_long
            elif 'georss_point' in ent:
                geo = ent['georss_point'].split(' ')
                e.geolat = geo[0]
                e.geolng = geo[1]

            if 'image' in self.fp.feed:
                e.link_image = media.save_image(self.fp.feed.image.url)
            else:
                for link in ent.links:
                    if link.rel == 'image' or link.rel == 'photo':
                        e.link_image = media.save_image(link.href)

            if hasattr(self, 'custom_process'):
                self.custom_process(e, ent)

            if hasattr(e, 'custom_mblob'):
                e.mblob = e.custom_mblob
            else:
                e.mblob = None

            mblob = media.mrss_init(e.mblob)
            if 'media_content' in ent:
                mblob['content'].append(ent.media_content)
            e.mblob = media.mrss_gen_json(mblob)

            e.content = strip_script(e.content)

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 4
0
    def process(self):
        for ent in self.stream['data']:
            guid = 'tag:facebook.com,2004:post/%s' % ent['id']
            if self.verbose:
                print("ID: %s" % guid)

            if 'updated_time' in ent:
                t = from_rfc3339(ent['updated_time'])
            else:
                t = from_rfc3339(ent['created_time'])

            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and \
                   e.date_updated and mtime(t.timetuple()) <= e.date_updated:
                    continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.guid = guid
            e.link = ent['actions'][0]['link']

            if 'from' in ent:
                frm = ent['from']
                image_url = 'http://graph.facebook.com/%s/picture' % frm['id']
                e.link_image = media.save_image(image_url, direct_image=False)
                e.author_name = frm['name']

            e.date_published = from_rfc3339(ent['created_time'])
            e.date_updated = t

            content = ''
            if 'message' in ent:
                content = expand.shorts(ent['message'])
                content = '<p>' + urlizetrunc(content, 45) + '</p>'

            name = ''
            if 'name' in ent:
                name = ent['name']
                content += ' <p>' + ent['name'] + '</p>'

            if 'picture' in ent and 'link' in ent:
                content += '<p class="thumbnails">'
                content += '<a href="%s" rel="nofollow">' \
                    '<img src="%s" alt="thumbnail" /></a> ' \
                    % (ent['link'], media.save_image(ent['picture'],
                                                     downscale=True))

                if 'description' in ent:
                    content += '<div class="fb-description">%s</div>' % \
                        ent['description']
                elif 'caption' in ent and name != ent['caption']:
                    content += '<div class="fb-caption">%s</div>' % \
                        ent['caption']

                content += '</p>'
            else:
                if 'description' in ent:
                    content += '<div class="fb-description">%s</div>' % \
                        ent['description']
                elif 'caption' in ent and name != ent['caption']:
                    content += '<div class="fb-caption">%s</div>' % \
                        ent['caption']

            e.content = content
            if 'message' in ent:
                e.title = truncate.smart(strip_tags(ent['message']),
                                         max_length=48)
            if e.title == '':
                e.title = strip_entities(strip_tags(content))[0:128]

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 5
0
    def process(self):
        for ent in self.json['entries']:
            id = ent['id'][2:]
            uuid = '%s-%s-%s-%s-%s' % (id[0:8], id[8:12], id[12:16],
                                       id[16:20], id[20:])
            guid = 'tag:friendfeed.com,2007:%s' % uuid
            if self.verbose:
                print("ID: %s" % guid)

            t = datetime.datetime.strptime(ent['date'], '%Y-%m-%dT%H:%M:%SZ')
            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and \
                   e.date_updated and mtime(t.timetuple()) <= e.date_updated:
                    continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.guid = guid
            e.title = truncate.smart(
                strip_entities(strip_tags(ent['body'])),
                max_length=40)
            e.link = ent['url']
            image_url = 'http://friendfeed-api.com/v2/picture/%s' % ent[
                'from']['id']
            e.link_image = media.save_image(image_url, direct_image=False)

            e.date_published = t
            e.date_updated = t
            e.author_name = ent['from']['name']

            content = ent['body']
            if 'thumbnails' in ent:
                content += '<p class="thumbnails">'
                for t in ent['thumbnails']:
                    if self.service.public:
                        t['url'] = media.save_image(t['url'])
                    if 'width' in t and 'height' in t:
                        iwh = ' width="%d" height="%d"' % (t['width'],
                                                           t['height'])
                    else:
                        iwh = ''

                    if 'friendfeed.com/e/' in t['link'] and \
                       ('youtube.com' in t['url'] or 'ytimg.com' in t['url']):
                        m = re.search(r'/vi/([\-\w]+)/', t['url'])
                        yid = m.groups()[0] if m else None
                        if yid:
                            t['link'] = 'http://www.youtube.com/watch?v=%s' % yid

                    content += '<a href="%s" rel="nofollow"><img src="%s"%s alt="thumbnail" /></a> ' % (
                        t['link'], t['url'], iwh)
                content += '</p>'

            if 'files' in ent:
                content += '<ul class="files">\n'
                for f in ent['files']:
                    if 'friendfeed-media' in f['url']:
                        content += '  <li><a href="%s" rel="nofollow">%s</a>' % (
                            f['url'], f['name'])
                        if 'size' in f:
                            content += ' <span class="size">%s</span>' % bytes_to_human(
                                f['size'])
                        content += '</li>\n'
                content += '</ul>\n'

            e.content = content

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 6
0
    def process(self):
        for key, group in groupby(self.fp.entries, lambda x: x.updated[0:19]):
            mblob = media.mrss_init()
            lgroup = 0
            content = '<p class="thumbnails">\n'
            first = True
            for ent in group:
                lgroup += 1
                if first:
                    firstent = ent
                    first = False
                if self.verbose:
                    print("ID: %s" % ent.id)

                if 'media_thumbnail' in ent:
                    tn = ent.media_thumbnail[0]
                    if self.service.public:
                        tn['url'] = media.save_image(tn['url'])
                    content += """  <a href="%s" rel="nofollow"><img src="%s" width="%s" height="%s" alt="thumbnail" /></a>\n""" % (
                        ent.link, tn['url'], tn['width'], tn['height'])

                if 'media_content' in ent:
                    mblob['content'].append(ent.media_content)

            ent = firstent
            content += '</p>'
            guid = 'tag:flickr.com,2004:/photo/%s' % ent.id

            try:
                e = Entry.objects.get(service=self.service, guid=ent.id)
                if not self.force_overwrite and 'updated_parsed' in ent:
                    if e.date_updated and \
                       mtime(ent.updated_parsed) <= e.date_updated:
                        continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=ent.id)

            e.mblob = media.mrss_gen_json(mblob)
            if lgroup > 1:
                e.idata = 'grouped'

            e.link = self.service.link
            e.title = 'Posted Photos'
            e.content = content

            if 'published_parsed' in ent:
                e.date_published = mtime(ent.published_parsed)
            elif 'updated_parsed' in ent:
                e.date_published = mtime(ent.updated_parsed)
            if 'updated_parsed' in ent:
                e.date_updated = mtime(ent.updated_parsed)

            if 'image' in self.fp.feed:
                e.link_image = media.save_image(self.fp.feed.image.href)
            else:
                for link in ent.links:
                    if link.rel == 'image':
                        e.link_image = media.save_image(link.href)
            try:
                e.save()
            except:
                pass
Esempio n. 7
0
    def process(self):
        for ent in self.fp.entries:
            guid = ent.id if 'id' in ent else ent.link
            if self.verbose:
                print('ID: %s' % guid)
            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and 'updated_parsed' in ent:
                    if e.date_updated and \
                       mtime(ent.updated_parsed) <= e.date_updated:
                        continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.title = ent.title
            e.link = ent.get('feedburner_origlink', ent.get('link', ''))

            if 'author_detail' in ent:
                e.author_name = ent.author_detail.get('name', '')
                e.author_email = ent.author_detail.get('email', '')
                e.author_uri = ent.author_detail.get('href', '')
            else:
                e.author_name = ent.get('author', ent.get('creator', ''))
                if not e.author_name and 'author_detail' in self.fp.feed:
                    e.author_name = self.fp.feed.author_detail.get('name', '')
                    e.author_email = self.fp.feed.author_detail.get(
                        'email', '')
                    e.author_uri = self.fp.feed.author_detail.get('href', '')

            try:
                e.content = ent.content[0].value
            except:
                e.content = ent.get('summary', ent.get('description', ''))

            if 'published_parsed' in ent:
                e.date_published = mtime(ent.published_parsed)
            elif 'updated_parsed' in ent:
                e.date_published = mtime(ent.updated_parsed)

            if 'updated_parsed' in ent:
                e.date_updated = mtime(ent.updated_parsed)

            if 'geo_lat' in ent and 'geo_long' in ent:
                e.geolat = ent.geo_lat
                e.geolng = ent.geo_long
            elif 'georss_point' in ent:
                geo = ent['georss_point'].split(' ')
                e.geolat = geo[0]
                e.geolng = geo[1]

            if 'image' in self.fp.feed:
                e.link_image = media.save_image(self.fp.feed.image.url)
            else:
                for link in ent.links:
                    if link.rel == 'image' or link.rel == 'photo':
                        e.link_image = media.save_image(link.href)

            if hasattr(self, 'custom_process'):
                self.custom_process(e, ent)

            if hasattr(e, 'custom_mblob'):
                e.mblob = e.custom_mblob
            else:
                e.mblob = None

            mblob = media.mrss_init(e.mblob)
            if 'media_content' in ent:
                mblob['content'].append(ent.media_content)
            e.mblob = media.mrss_gen_json(mblob)

            e.content = strip_script(e.content)

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 8
0
    def process(self):
        for ent in self.json:
            guid = 'tag:twitter.com,2007:http://twitter.com/%s/statuses/%s' % \
                (ent['user']['screen_name'], ent['id'])
            if self.verbose:
                print("ID: %s" % guid)

            t = datetime.datetime.strptime(ent['created_at'],
                                           '%a %b %d %H:%M:%S +0000 %Y')
            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and \
                   e.date_updated and mtime(t.timetuple()) <= e.date_updated:
                    continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.guid = guid
            e.title = 'Tweet: %s' % truncate.smart(
                strip_entities(strip_tags(ent['text'])), max_length=40)
            e.title = e.title.replace('#', '').replace('@', '')

            e.link  = 'https://twitter.com/%s/status/%s' % \
                (ent['user']['screen_name'], ent['id'])
            image_url = ent['user']['profile_image_url_https']
            e.link_image = media.save_image(image_url, direct_image=False)

            e.date_published = t
            e.date_updated = t
            e.author_name = ent['user']['name']

            # double expand
            e.content = 'Tweet: %s' % expand.all(expand.shorturls(ent['text']))

            if 'entities' in ent and 'media' in ent['entities']:
                content = ' <p class="thumbnails">'
                for t in ent['entities']['media']:
                    if t['type'] == 'photo':
                        tsize = 'thumb'
                        if 'media_url_https' in t:
                            image_url = '%s:%s' % (t['media_url_https'], tsize)
                            large_url = '%s:large' % t['media_url_https']
                        else:
                            image_url = '%s:%s' % (t['media_url'], tsize)
                            large_url = t['media_url']
                        link = t['expanded_url']
                        if self.service.public:
                            image_url = media.save_image(image_url)
                        if 'sizes' in t and tsize in t['sizes']:
                            sizes = t['sizes'][tsize]
                            iwh = ' width="%d" height="%d"' % (sizes['w'],
                                                               sizes['h'])
                        else:
                            iwh = ''
                        content += '<a href="%s" rel="nofollow" data-imgurl="%s"><img src="%s"%s alt="thumbnail" /></a> ' % (
                            link, large_url, image_url, iwh)
                content += '</p>'
                e.content += content

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass
Esempio n. 9
0
    def process(self):
        for ent in self.stream['data']:
            guid = 'tag:facebook.com,2004:post/%s' % ent['id']
            if self.verbose:
                print("ID: %s" % guid)

            if 'updated_time' in ent:
                t = from_rfc3339(ent['updated_time'])
            else:
                t = from_rfc3339(ent['created_time'])

            try:
                e = Entry.objects.get(service=self.service, guid=guid)
                if not self.force_overwrite and \
                   e.date_updated and mtime(t.timetuple()) <= e.date_updated:
                    continue
                if e.protected:
                    continue
            except Entry.DoesNotExist:
                e = Entry(service=self.service, guid=guid)

            e.guid = guid
            e.link = ent['actions'][0]['link']

            if 'from' in ent:
                frm = ent['from']
                image_url = 'http://graph.facebook.com/%s/picture' % frm['id']
                e.link_image = media.save_image(image_url, direct_image=False)
                e.author_name = frm['name']

            e.date_published = from_rfc3339(ent['created_time'])
            e.date_updated = t

            content = ''
            if 'message' in ent:
                content = expand.shorts(ent['message'])
                content = '<p>' + urlizetrunc(content, 45) + '</p>'

            name = ''
            if 'name' in ent:
                name = ent['name']
                content += ' <p>' + ent['name'] + '</p>'

            if 'picture' in ent and 'link' in ent:
                content += '<p class="thumbnails">'
                content += '<a href="%s" rel="nofollow">' \
                    '<img src="%s" alt="thumbnail" /></a> ' \
                    % (ent['link'], media.save_image(ent['picture'],
                                                     downscale=True))

                if 'description' in ent:
                    content += '<div class="fb-description">%s</div>' % \
                        ent['description']
                elif 'caption' in ent and name != ent['caption']:
                    content += '<div class="fb-caption">%s</div>' % \
                        ent['caption']

                content += '</p>'
            else:
                if 'description' in ent:
                    content += '<div class="fb-description">%s</div>' % \
                        ent['description']
                elif 'caption' in ent and name != ent['caption']:
                    content += '<div class="fb-caption">%s</div>' % \
                        ent['caption']

            e.content = content
            if 'message' in ent:
                e.title = truncate.smart(strip_tags(ent['message']),
                                         max_length=48)
            if e.title == '':
                e.title = strip_entities(strip_tags(content))[0:128]

            try:
                e.save()
                media.extract_and_register(e)
            except:
                pass