Ejemplo n.º 1
0
    def parse_url(cls, url):
        if url.isdigit():
            return url

        match = POST_REGEXP.match(url)
        if match:
            return match.group('post_id')

        match = CREATOR_REGEXP.match(url)
        if match:
            return hoordu.Dynamic({'vanity': match.group('vanity')})

        return None
Ejemplo n.º 2
0
    def parse_url(cls, url):
        if url.isdigit():
            return url

        match = POST_REGEXP.match(url)
        if match:
            return match.group('post_id')

        match = FANCLUB_REGEXP.match(url)
        if match:
            return hoordu.Dynamic({'creator_id': match.group('fanclub_id')})

        return None
Ejemplo n.º 3
0
    def parse_url(cls, url):
        if url.isdigit():
            return url

        for regexp in POST_REGEXP:
            match = regexp.match(url)
            if match:
                return match.group('post_id')

        for regexp in CREATOR_REGEXP:
            match = regexp.match(url)
            if match:
                return hoordu.Dynamic({'creator': match.group('creator')})

        return None
Ejemplo n.º 4
0
 def parse_url(cls, url):
     if url.isdigit():
         return url
     
     for regexp in TWEET_REGEXP:
         match = regexp.match(url)
         if match:
             return match.group('tweet_id')
     
     match = TIMELINE_REGEXP.match(url)
     if match:
         user = match.group('user')
         method = match.group('type')
         
         if method != 'likes':
             method = 'tweets'
         
         return hoordu.Dynamic({
             'user': user,
             'method': method
         })
     
     return None
Ejemplo n.º 5
0
    def _to_remote_post(self, post, remote_post=None, preview=False):
        main_id = post.id
        creator_id = post.user.userId
        creator_slug = post.creatorId
        creator_name = post.user.name
        # possible timezone issues?
        post_time = dateutil.parser.parse(post.publishedDatetime).astimezone(
            timezone.utc)

        if remote_post is None:
            remote_post = self._get_post(main_id)

        if remote_post is None:
            metadata = hoordu.Dynamic()
            if post.feeRequired != 0:
                metadata.price = post.feeRequired

            remote_post = RemotePost(
                source=self.source,
                original_id=main_id,
                url=POST_FORMAT.format(creator=creator_slug, post_id=main_id),
                title=post.title,
                type=PostType.collection,
                post_time=post_time,
                metadata_=metadata.to_json())

            self.session.add(remote_post)
            self.session.flush()

        self.log.info(f'downloading post: {remote_post.original_id}')
        self.log.info(f'local id: {remote_post.id}')

        if post.isLiked is True:
            remote_post.favorite = True

        # creators are identified by their pixiv id because their name and creatorId can change
        creator_tag = self._get_tag(TagCategory.artist, creator_id)
        remote_post.add_tag(creator_tag)

        if any((creator_tag.update_metadata('name', creator_name),
                creator_tag.update_metadata('slug', creator_slug))):
            self.session.add(creator_tag)

        for tag in post.tags:
            remote_tag = self._get_tag(TagCategory.general, tag)
            remote_post.add_tag(remote_tag)

        if post.hasAdultContent is True:
            nsfw_tag = self._get_tag(TagCategory.meta, 'nsfw')
            remote_post.add_tag(nsfw_tag)

        current_files = {file.metadata_: file for file in remote_post.files}
        current_urls = [r.url for r in remote_post.related]

        if post.type == 'image':
            for image, order in zip(post.body.images, itertools.count(1)):
                id = 'i-{}'.format(image.id)
                file = current_files.get(id)

                if file is None:
                    file = File(remote=remote_post,
                                remote_order=order,
                                metadata_=id)
                    self.session.add(file)
                    self.session.flush()

                else:
                    file.remote_order = order
                    self.session.add(file)

                need_orig = not file.present and not preview
                need_thumb = not file.thumb_present

                if need_thumb or need_orig:
                    self.log.info(f'downloading file: {file.remote_order}')

                    orig = self._download_file(
                        image.originalUrl) if need_orig else None
                    thumb = self._download_file(
                        image.thumbnailUrl) if need_thumb else None

                    self.session.import_file(file,
                                             orig=orig,
                                             thumb=thumb,
                                             move=True)

            remote_post.comment = post.body.text
            self.session.add(remote_post)

        elif post.type == 'file':
            for rfile, order in zip(post.body.files, itertools.count(1)):
                id = 'f-{}'.format(rfile.id)
                file = current_files.get(id)

                if file is None:
                    filename = '{0.name}.{0.extension}'.format(rfile)
                    file = File(remote=remote_post,
                                remote_order=order,
                                filename=filename,
                                metadata_=id)
                    self.session.add(file)
                    self.session.flush()

                else:
                    file.remote_order = order
                    self.session.add(file)

                need_orig = not file.present and not preview

                if need_orig:
                    self.log.info(f'downloading file: {file.remote_order}')

                    orig = self._download_file(rfile.url)

                    self.session.import_file(file, orig=orig, move=True)

            remote_post.comment = post.body.text
            self.session.add(remote_post)

        elif post.type == 'article':
            imagemap = post.body.get('imageMap')
            filemap = post.body.get('fileMap')
            embedmap = post.body.get('embedMap')

            order = 1

            blog = []
            for block in post.body.blocks:
                if block.type in ('p', 'header'):
                    links = block.get('links')
                    if links is not None:
                        for link in links:
                            url = link.url
                            if url not in current_urls:
                                remote_post.add_related_url(url)

                    blog.append({'type': 'text', 'content': block.text + '\n'})

                elif block.type == 'image':
                    id = 'i-{}'.format(block.imageId)
                    file = current_files.get(id)

                    if file is None:
                        file = File(remote=remote_post,
                                    remote_order=order,
                                    metadata_=id)
                        self.session.add(file)
                        self.session.flush()

                    else:
                        file.remote_order = order
                        self.session.add(file)

                    orig_url = imagemap[block.imageId].originalUrl
                    thumb_url = imagemap[block.imageId].thumbnailUrl

                    need_orig = not file.present and not preview
                    need_thumb = not file.thumb_present

                    if need_thumb or need_orig:
                        self.log.info(f'downloading file: {file.remote_order}')

                        orig = self._download_file(
                            orig_url) if need_orig else None
                        thumb = self._download_file(
                            thumb_url) if need_thumb else None

                        self.session.import_file(file,
                                                 orig=orig,
                                                 thumb=thumb,
                                                 move=True)

                    blog.append({'type': 'file', 'metadata': id})

                    order += 1

                elif block.type == 'file':
                    id = 'f-{}'.format(block.fileId)
                    file = current_files.get(id)

                    if file is None:
                        file = File(remote=remote_post,
                                    remote_order=order,
                                    metadata_=id)
                        self.session.add(file)
                        self.session.flush()

                    orig_url = filemap[block.fileId].url
                    thumb_url = post.coverImageUrl

                    need_orig = not file.present and not preview
                    need_thumb = not file.thumb_present and thumb_url is not None

                    if need_thumb or need_orig:
                        self.log.info(f'downloading file: {file.remote_order}')

                        orig = self._download_file(
                            orig_url) if need_orig else None
                        thumb = self._download_file(
                            thumb_url) if need_thumb else None

                        self.session.import_file(file,
                                                 orig=orig,
                                                 thumb=thumb,
                                                 move=True)

                    blog.append({'type': 'file', 'metadata': id})

                    order += 1

                elif block.type == 'embed':
                    embed = embedmap[block.embedId]

                    if embed.serviceProvider == 'fanbox':
                        related_post_id = embed.contentId.split('/')[-1]
                        url = POST_FORMAT.format(post_id=related_post_id)

                    elif embed.serviceProvider == 'google_forms':
                        url = 'https://docs.google.com/forms/d/e/{}/viewform'.format(
                            embed.contentId)

                    elif embed.serviceProvider == 'twitter':
                        url = 'https://twitter.com/i/web/status/{}'.format(
                            embed.contentId)

                    else:
                        raise NotImplementedError(
                            'unknown embed service provider: {}'.format(
                                embed.serviceProvider))

                    if url not in current_urls:
                        remote_post.add_related_url(url)

                    blog.append({'type': 'text', 'content': url + '\n'})

                else:
                    self.log.warning('unknown blog block: %s', str(block.type))

            remote_post.comment = hoordu.Dynamic({'comment': blog}).to_json()
            remote_post.type = PostType.blog
            self.session.add(remote_post)

        elif post.type == 'text':
            remote_post.comment = post.body.text
            remote_post.type = PostType.set
            self.session.add(remote_post)

        else:
            raise NotImplementedError('unknown post type: {}'.format(
                post.type))

        return remote_post
Ejemplo n.º 6
0
 def tweet_to_remote_post(self, tweet, remote_post=None, preview=False):
     # get the original tweet if this is a retweet
     if tweet.retweeted_status is not None:
         tweet = tweet.retweeted_status
     
     original_id = tweet.id_str
     user = tweet.user.screen_name
     user_id = tweet.user.id_str
     text = tweet.full_text
     post_time = datetime.utcfromtimestamp(tweet.created_at_in_seconds)
     
     if remote_post is None:
         remote_post = self._get_post(original_id)
     
     if remote_post is None:
         remote_post = RemotePost(
             source=self.source,
             original_id=original_id,
             url=TWEET_FORMAT.format(user=user, tweet_id=original_id),
             comment=text,
             type=PostType.set,
             post_time=post_time,
             metadata_=hoordu.Dynamic({'user': user}).to_json()
         )
         
         self.session.add(remote_post)
         self.session.flush()
     
     self.log.info(f'downloading post: {remote_post.original_id}')
     self.log.info(f'local id: {remote_post.id}')
     
     remote_post.comment = text
     remote_post.favorite = tweet.favorited is True
     
     user_tag = self._get_tag(TagCategory.artist, user_id)
     remote_post.add_tag(user_tag)
     
     if user_tag.update_metadata('user', user):
         self.session.add(user_tag)
     
     if tweet.possibly_sensitive:
         nsfw_tag = self._get_tag(TagCategory.meta, 'nsfw')
         remote_post.add_tag(nsfw_tag)
     
     if tweet.hashtags is not None:
         for hashtag in tweet.hashtags:
             tag = hashtag.text
             nsfw_tag = self._get_tag(TagCategory.general, tag)
             remote_post.add_tag(nsfw_tag)
     
     if tweet.in_reply_to_status_id is not None:
         url = TWEET_FORMAT.format(user=tweet.in_reply_to_screen_name, tweet_id=tweet.in_reply_to_status_id)
         remote_post.add_related_url(url)
     
     if tweet.urls is not None:
         for url in tweet.urls:
             if SUPPORT_URL_REGEXP.match(url.url):
                 raise APIError(text)
             
             # the unwound section is a premium feature
             self.log.info(f'unwinding: {url.url}')
             final_url = self._unwind_url(url.url)
             remote_post.add_related_url(final_url)
     
     self.session.add(remote_post)
     
     if tweet.media is not None:
         available = set(range(len(tweet.media)))
         present = set(file.remote_order for file in remote_post.files)
         
         for order in available - present:
             file = File(remote=remote_post, remote_order=order)
             self.session.add(file)
             self.session.flush()
         
         for file in remote_post.files:
             need_thumb = not file.thumb_present
             need_file = not file.present and not preview
             
             if need_thumb or need_file:
                 self.log.info(f'downloading file: {file.remote_order}')
                 
                 media = tweet.media[file.remote_order]
                 thumb = None
                 orig = None
                 
                 base_url, ext = media.media_url_https.rsplit('.', 1)
                 filename = '{}.{}'.format(base_url.rsplit('/', 1)[-1], ext)
                 
                 if media.type == 'photo':
                     if need_thumb:
                         thumb = self._download_media_file(base_url, ext, THUMB_SIZE, filename)
                     
                     if need_file:
                         orig = self._download_media_file(base_url, ext, ORIG_SIZE, filename)
                     
                     self.session.import_file(file, orig=orig, thumb=thumb, move=True)
                     file.ext = ext
                     file.thumb_ext = ext
                     self.session.add(file)
                     
                 elif media.type == 'video' or media.type == 'animated_gif':
                     if need_thumb:
                         thumb = self._download_media_file(base_url, ext, THUMB_SIZE, filename)
                     
                     if need_file:
                         orig = self._download_video(media)
                     
                     self.session.import_file(file, orig=orig, thumb=thumb, move=True)
                     file.thumb_ext = ext
                     self.session.add(file)
     
     return remote_post
Ejemplo n.º 7
0
def parse_args(hrd):
    # parse arguments
    args = hoordu.Dynamic()
    args.plugin_id = None
    args.command = None
    args.urls = []
    args.subscription = None
    args.num_posts = None
    args.disabled = False

    argi = 1
    sargi = 0  # sub argument count
    while argi < argc:
        arg = sys.argv[argi]
        argi += 1

        # global commands
        if arg == '-h' or arg == '--help':
            usage()
            sys.exit(0)

        elif arg == '-p' or arg == '--plugin':
            args.plugin_id = sys.argv[argi]
            argi += 1

        elif arg == '-d' or arg == '--disabled':
            args.disabled = True

        elif args.command is None:
            # pick command, or append to list or urls
            if arg in ('setup', 'list', 'enable', 'disable', 'update', 'fetch',
                       'rfetch', 'related'):
                args.command = arg
                sargi = 0

            else:
                args.urls.append(parse_url(hrd, arg, args.plugin_id))

        else:
            # sub-command arguments
            if args.command in ('enable', 'disable', 'update') and sargi < 1:
                parse_sub_name(arg, args)
                sargi += 1

            elif args.command in ('fetch', 'rfetch') and sargi < 2:
                if sargi == 0:
                    parse_sub_name(arg, args)

                else:
                    args.num_posts = int(arg)

                sargi += 1

            elif args.command in ('related') and sargi < 2:
                args.urls.append(parse_url(hrd, arg, args.plugin_id))
                sargi += 1

            else:
                fail(f'unknown argument: {arg}')

    # verify arguments
    urlc = len(args.urls)
    if urlc >= 2:
        for id, options in args.urls:
            if isinstance(options, hoordu.Dynamic):
                fail('can only process one search url at a time')

    if args.command == 'related' and urlc <= 1:
        fail('the related sub-command requires at least 2 urls')

    if args.command in ('list', 'setup') and args.plugin_id is None:
        fail(f'{args.command} sub-command requires a plugin to be specified')

    if args.command in ('enable', 'disable', 'fetch',
                        'rfetch') and args.subscription is None:
        fail(
            f'{args.command} sub-command requires a subscription to be specified'
        )

    if args.command == 'update' and args.subscription is None and args.plugin_id is None:
        fail(
            f'update sub-command requires a plugin or a subscription to be specified'
        )

    return args
Ejemplo n.º 8
0
    def _content_to_post(self, post, content, remote_post=None, preview=False):
        content_id = '{post_id}-{content_id}'.format(post_id=post.id,
                                                     content_id=content.id)
        creator_id = str(post.fanclub.id)
        creator_name = post.fanclub.user.name
        # possible timezone issues?
        post_time = dateutil.parser.parse(post.posted_at).astimezone(
            timezone.utc)

        if remote_post is None:
            remote_post = self._get_post(content_id)

        if remote_post is None:
            metadata = hoordu.Dynamic()
            if content.plan is not None:
                metadata.price = content.plan.price

            remote_post = RemotePost(source=self.source,
                                     original_id=content_id,
                                     url=POST_FORMAT.format(post_id=post.id),
                                     title=content.title,
                                     comment=content.comment,
                                     type=PostType.collection,
                                     post_time=post_time,
                                     metadata_=metadata.to_json())

            self.session.add(remote_post)
            self.session.flush()

        self.log.info(f'downloading post: {remote_post.original_id}')
        self.log.info(f'local id: {remote_post.id}')

        if post.liked is True:
            remote_post.favorite = True

        # creators are identified by their id because their name can change
        creator_tag = self._get_tag(TagCategory.artist, creator_id)
        remote_post.add_tag(creator_tag)

        if creator_tag.update_metadata('name', creator_name):
            self.session.add(creator_tag)

        for tag in post.tags:
            remote_tag = self._get_tag(TagCategory.general, tag.name)
            remote_post.add_tag(remote_tag)

        if post.rating == 'adult':
            nsfw_tag = self._get_tag(TagCategory.meta, 'nsfw')
            remote_post.add_tag(nsfw_tag)

        if content.category == 'file':
            if len(remote_post.files) == 0:
                file = File(remote=remote_post,
                            remote_order=0,
                            filename=content.filename)

                self.session.add(file)
                self.session.flush()

            else:
                file = remote_post.files[0]

            need_orig = not file.present and not preview

            if need_orig:
                self.log.info(f'downloading file: {content.filename}')

                orig_url = FILE_DOWNLOAD_URL.format(
                    download_uri=content.download_uri)
                orig = self._download_file(orig_url, filename=content.filename)

                self.session.import_file(file, orig=orig, move=True)

        elif content.category == 'photo_gallery':
            current_files = {
                file.metadata_: file
                for file in remote_post.files
            }

            order = 0
            for photo in content.post_content_photos:
                photo_id = str(photo.id)
                file = current_files.get(photo_id)

                if file is None:
                    file = File(remote=remote_post,
                                metadata_=photo_id,
                                remote_order=order)
                    self.session.add(file)
                    self.session.flush()

                elif file.remote_order != order:
                    file.remote_order = order
                    self.session.add(file)

                need_orig = not file.present and not preview
                need_thumb = not file.thumb_present

                if need_thumb or need_orig:
                    self.log.info(f'downloading file: {file.remote_order}')

                    orig = self._download_file(
                        photo.url.original) if need_orig else None
                    thumb = self._download_file(
                        photo.url.medium) if need_thumb else None

                    self.session.import_file(file,
                                             orig=orig,
                                             thumb=thumb,
                                             move=True)

                order += 1

        elif content.category == 'text':
            # there are no files to save
            remote_post.type = PostType.set
            self.session.add(remote_post)

        elif content.category == 'blog':
            current_files = {
                file.remote_order: file
                for file in remote_post.files
            }

            sections = hoordu.Dynamic.from_json(content.comment).ops
            blog = []
            order = 0
            for section in sections:
                insert = section.insert
                if isinstance(insert, str):
                    blog.append({'type': 'text', 'content': insert})

                elif isinstance(insert, hoordu.Dynamic):
                    fantiaImage = insert.get('fantiaImage')
                    if fantiaImage is not None:
                        photo_id = str(fantiaImage.id)
                        file = current_files.get(photo_id)

                        if file is None:
                            file = File(remote=remote_post,
                                        metadata_=photo_id,
                                        remote_order=order)
                            self.session.add(file)
                            self.session.flush()

                        orig_url = FILE_DOWNLOAD_URL.format(
                            download_uri=fantiaImage.original_url)
                        thumb_url = fantiaImage.url

                        need_orig = not file.present and not preview
                        need_thumb = not file.thumb_present

                        if need_thumb or need_orig:
                            self.log.info(
                                f'downloading file: {file.remote_order}')

                            orig = self._download_file(
                                orig_url) if need_orig else None
                            thumb = self._download_file(
                                thumb_url) if need_thumb else None

                            self.session.import_file(file,
                                                     orig=orig,
                                                     thumb=thumb,
                                                     move=True)

                        blog.append({'type': 'file', 'metadata': photo_id})

                        order += 1

                    else:
                        self.log.warning(f'unknown blog insert: {str(insert)}')

            remote_post.comment = hoordu.Dynamic({'comment': blog}).to_json()
            remote_post.type = PostType.blog
            self.session.add(remote_post)

        else:
            raise NotImplementedError('unknown content category: {}'.format(
                content.category))

        return remote_post