Exemple #1
0
def generate_image_src(image):
    keys = ['originals', '250x', '500x']
    if image is not None:
        if bool(image.picture):
            if bool(image.picture_250x) and bool(image.picture_500x):
                return dict(zip(keys, [image.picture.url, image.picture_250x.url, image.picture_500x.url]))
            else:  # originalは存在するが圧縮されていない場合
                otapick.compress_blog_image(image)
                if bool(image.picture_250x) and bool(image.picture_500x):
                    return dict(zip(keys, [image.picture.url, image.picture_250x.url, image.picture_500x.url]))
    return dict(zip(keys, [otapick.IMAGE_NOT_FOUND_URL for i in range(len(keys))]))
Exemple #2
0
def generate_thumbnail_url(blog):
    keys = ['originals', '250x', '500x']
    if Image.objects.filter(publisher=blog, order=0).exists():
        thumbnail = Image.objects.get(publisher=blog, order=0)
        if bool(thumbnail.picture_250x) and bool(thumbnail.picture_500x):
            return dict(zip(keys, [thumbnail.picture.url, thumbnail.picture_250x.url, thumbnail.picture_500x.url]))
        else:
            otapick.compress_blog_image(thumbnail)
            if bool(thumbnail.picture_250x) and bool(thumbnail.picture_500x):
                return dict(zip(keys, [thumbnail.picture.url, thumbnail.picture_250x.url, thumbnail.picture_500x.url]))

    return dict(zip(keys, [otapick.IMAGE_NOT_FOUND_URL for i in range(len(keys))]))
Exemple #3
0
def download_blog_images(group_id, group_key, blog_ct, writer_ct):
    """
    非同期でブログ画像をダウンロード、旧imgScraper.update()
    :param group_id:
    :param group_key:
    :param blog_ct:
    :param writer_ct:
    :return:
    """
    try:
        blog = Blog.objects.get(publishing_group__group_id=group_id,
                                blog_ct=blog_ct)
        img_urls = otapick.BlogImageCrawler().crawl(group_key=group_key,
                                                    blog_ct=blog_ct)

        # crawl error
        if img_urls is None:
            pass
        # image not found
        elif len(img_urls) == 0:
            pass
        else:
            # clear halfway remaining images
            Image.objects.filter(publisher=blog).delete()
            order = 0
            for i, img_url in enumerate(img_urls):
                media = otapick.BlogImageDownloader().download(
                    img_url, group_id, blog_ct, writer_ct)
                if media == 'not_image':  # exclude gif
                    pass
                elif media is not None:
                    if not Image.objects.filter(order=i,
                                                publisher=blog).exists():
                        image = Image(
                            order=order,
                            picture=media,
                            publisher=blog,
                        )
                        image.save()
                        otapick.compress_blog_image(image)
                        order += 1
                # image download failed
                else:
                    import traceback
                    traceback.print_exc()

    # timeout(60s)後の処理
    except SoftTimeLimitExceeded:
        pass
Exemple #4
0
    def handle(self, *args, **options):
        add_images = []

        images = Image.objects.filter(
            Q(picture_250x=None) | Q(picture_500x=None))
        bar = tqdm(total=images.count())

        for image in images:
            image = otapick.compress_blog_image(image, is_bulk=True)
            add_images.append(image)
            bar.update(1)

        if len(add_images) > 100:
            Image.objects.bulk_update(add_images,
                                      fields=['picture_250x', 'picture_500x'],
                                      batch_size=10000)
            add_images = []
Exemple #5
0
    def handle(self, *args, **options):
        images = Image.objects.all()
        bar = tqdm(total=images.count())
        blogImageCrawler = otapick.BlogImageCrawler()
        blogImageDownloader = otapick.BlogImageDownloader()

        for image in images:
            # ファイル名が無いドットファイルにファイル名を与える。(.jpg⇒_.jpg)
            if os.path.basename(str(image.picture)).startswith('.'):
                dir_name = os.path.dirname(str(
                    image.picture))  # blog_images/1_07/9244
                # blog_images/1_07/9244/_.jpg
                file_path = os.path.join(
                    dir_name, '_' + os.path.basename(str(image.picture)))

                # /www/var/otapick/media/blog_images/1_07/9244
                full_dir_name = os.path.dirname(str(image.picture.path))
                # /www/var/otapick/media/blog_images/1_07/9244/_.jpg
                full_file_path = os.path.join(
                    full_dir_name, '_' + os.path.basename(str(image.picture)))

                shutil.move(image.picture.path, full_file_path)
                image.picture = file_path
                image.save()
                print(
                    str(image.publisher.title) + '/' + str(image.order),
                    'resolve .file!!')

            # gifファイルを排除
            # 【廃止. デコメ絵文字を除外するため採用していたが, 公式がブログ画像としてgifファイルを配信しだしたため】
            # if os.path.splitext(str(image.picture))[1] == '.gif':
            #     otapick.delete_image(image)
            #     print(str(image.publisher.title) + '/' +
            #           str(image.order), 'resole gif file!!')

            # 不完全な画像ファイルの場合、そのブログの画像をDLしなおし圧縮も合わせて行う。
            try:
                # 250x・500xがどちらか1つでも欠陥であったら
                if not (bool(image.picture_250x) and bool(image.picture_500x)):
                    raise Exception("250x・500x error")

                # PilImageによりopenできるか検査
                pil_image = PilImage.open(image.picture.path)
                pil_image.verify()
                bar.update(1)
            except:
                group_id = image.publisher.publishing_group.group_id
                group_key = image.publisher.publishing_group.key
                blog_ct = image.publisher.blog_ct
                writer_ct = image.publisher.writer.ct

                image_url_list = blogImageCrawler.crawl(group_key=group_key,
                                                        blog_ct=blog_ct)
                if image_url_list is None:
                    print(
                        str(image.publisher.title) + '/' + str(image.order),
                        'は不完全でしたが、公式の掲載ブログにアクセスできません。')
                    bar.update(1)
                    continue

                image_url = image_url_list[image.order]
                media = blogImageDownloader.download(image_url, group_id,
                                                     blog_ct, writer_ct)
                if media == 'not_image':  # exclude gif
                    print(
                        str(image.publisher.title) + '/' + str(image.order),
                        'は不完全でしたが、公式の画像にアクセスできません。')
                    bar.update(1)
                    continue
                elif media is not None:
                    # set width & height
                    w, h = otapick.get_image_w_h(image)
                    image.width = w
                    image.height = h
                    image.save()
                else:
                    bar.update(1)
                    import traceback
                    traceback.print_exc()

                otapick.compress_blog_image(image)
                print(
                    str(image.publisher.title) + '/' + str(image.order),
                    'resolve incomplete file!!')
                bar.update(1)
Exemple #6
0
def exe_registration(blog_info_list, post_date, group_id, all_check, tweet, console):
    """
    ブログの登録処理
    Args:
        blog_info_list (list): ブログ情報のリスト。前提としてリストの中のブログは同じpost_dateを持つ。
        post_date (date): 共通のpost_date
        group_id (int): グループID
        all_check (bool): 保存済みのブログを見つけても処理を実行
        tweet (bool): 更新通知をtweetするか否か
        console (bool): ログ出力するか否か
    Returns:
        True(登録処理終了), False(登録処理続行)
    """
    download_count = 0
    blog_objects = []
    image_objects = []

    for i, blog_info in enumerate(blog_info_list):
        # new blog
        if not Blog.objects.filter(blog_ct=blog_info['blog_ct'], publishing_group__group_id=group_id).exists():
            blog = Blog(
                blog_ct=blog_info['blog_ct'],
                title=blog_info['title'],
                post_date=post_date,
                order_for_simul=i,
                writer=blog_info['member'],
                publishing_group=Group.objects.filter(
                    group_id=group_id).first(),
            )
            blog_objects.append(blog)
            download_count += 1
        # already saved
        else:
            blog = Blog.objects.get(
                blog_ct=blog_info['blog_ct'], publishing_group__group_id=group_id)

        if len(blog_info['image_urls']) > 0:
            order = 0
            for image_url in blog_info['image_urls']:
                if not Image.objects.filter(publisher=blog).exists():
                    media = otapick.BlogImageDownloader().download(
                        image_url, group_id, blog.blog_ct, blog.writer.ct)
                    if media == 'not_image':  # exclude gif
                        pass
                    elif media is not None:
                        image = Image(
                            order=order,
                            picture=media,
                            publisher=blog,
                        )

                        # set width & height
                        w, h = otapick.get_image_w_h(image)
                        image.width = w
                        image.height = h

                        image_objects.append(image)
                        order += 1
                    else:
                        import traceback
                        traceback.print_exc()

        # change the order_for_simul of already saved blog with the same post_date
        if Blog.objects.filter(post_date=post_date).exists():
            for saved_simultime_blog in Blog.objects.filter(post_date=post_date):
                saved_simultime_blog.order_for_simul += download_count
                saved_simultime_blog.save()

    # save new blog
    for blog_object in blog_objects:
        blog_object.save()
        if console:
            otapick.print_console(
                'register 「' + blog_object.title + '」 written by ' + blog_object.writer.full_kanji)

    # save new image
    for image_object in image_objects:
        image_object.save()
        otapick.compress_blog_image(image_object)

    # tweet update info
    if tweet:
        updateBot = otapick.UpdateBot()
        for blog_object in blog_objects:
            updateBot.tweet(
                group_id=blog_object.publishing_group.group_id, blog_ct=blog_object.blog_ct)

    # When there is at least one already saved blog in blog_list and all_check is False
    if download_count != len(blog_info_list) and not all_check:
        return True

    # When all blog in blog_list are new or when all_check is True
    else:
        return False