def generate_image_src(image): keys = ['originals', '250x', '500x'] if image is not None: if bool(image.picture): if bool(image.picture_250x) and bool(image.picture_500x): return dict(zip(keys, [image.picture.url, image.picture_250x.url, image.picture_500x.url])) else: # originalは存在するが圧縮されていない場合 otapick.compress_blog_image(image) if bool(image.picture_250x) and bool(image.picture_500x): return dict(zip(keys, [image.picture.url, image.picture_250x.url, image.picture_500x.url])) return dict(zip(keys, [otapick.IMAGE_NOT_FOUND_URL for i in range(len(keys))]))
def generate_thumbnail_url(blog): keys = ['originals', '250x', '500x'] if Image.objects.filter(publisher=blog, order=0).exists(): thumbnail = Image.objects.get(publisher=blog, order=0) if bool(thumbnail.picture_250x) and bool(thumbnail.picture_500x): return dict(zip(keys, [thumbnail.picture.url, thumbnail.picture_250x.url, thumbnail.picture_500x.url])) else: otapick.compress_blog_image(thumbnail) if bool(thumbnail.picture_250x) and bool(thumbnail.picture_500x): return dict(zip(keys, [thumbnail.picture.url, thumbnail.picture_250x.url, thumbnail.picture_500x.url])) return dict(zip(keys, [otapick.IMAGE_NOT_FOUND_URL for i in range(len(keys))]))
def download_blog_images(group_id, group_key, blog_ct, writer_ct): """ 非同期でブログ画像をダウンロード、旧imgScraper.update() :param group_id: :param group_key: :param blog_ct: :param writer_ct: :return: """ try: blog = Blog.objects.get(publishing_group__group_id=group_id, blog_ct=blog_ct) img_urls = otapick.BlogImageCrawler().crawl(group_key=group_key, blog_ct=blog_ct) # crawl error if img_urls is None: pass # image not found elif len(img_urls) == 0: pass else: # clear halfway remaining images Image.objects.filter(publisher=blog).delete() order = 0 for i, img_url in enumerate(img_urls): media = otapick.BlogImageDownloader().download( img_url, group_id, blog_ct, writer_ct) if media == 'not_image': # exclude gif pass elif media is not None: if not Image.objects.filter(order=i, publisher=blog).exists(): image = Image( order=order, picture=media, publisher=blog, ) image.save() otapick.compress_blog_image(image) order += 1 # image download failed else: import traceback traceback.print_exc() # timeout(60s)後の処理 except SoftTimeLimitExceeded: pass
def handle(self, *args, **options): add_images = [] images = Image.objects.filter( Q(picture_250x=None) | Q(picture_500x=None)) bar = tqdm(total=images.count()) for image in images: image = otapick.compress_blog_image(image, is_bulk=True) add_images.append(image) bar.update(1) if len(add_images) > 100: Image.objects.bulk_update(add_images, fields=['picture_250x', 'picture_500x'], batch_size=10000) add_images = []
def handle(self, *args, **options): images = Image.objects.all() bar = tqdm(total=images.count()) blogImageCrawler = otapick.BlogImageCrawler() blogImageDownloader = otapick.BlogImageDownloader() for image in images: # ファイル名が無いドットファイルにファイル名を与える。(.jpg⇒_.jpg) if os.path.basename(str(image.picture)).startswith('.'): dir_name = os.path.dirname(str( image.picture)) # blog_images/1_07/9244 # blog_images/1_07/9244/_.jpg file_path = os.path.join( dir_name, '_' + os.path.basename(str(image.picture))) # /www/var/otapick/media/blog_images/1_07/9244 full_dir_name = os.path.dirname(str(image.picture.path)) # /www/var/otapick/media/blog_images/1_07/9244/_.jpg full_file_path = os.path.join( full_dir_name, '_' + os.path.basename(str(image.picture))) shutil.move(image.picture.path, full_file_path) image.picture = file_path image.save() print( str(image.publisher.title) + '/' + str(image.order), 'resolve .file!!') # gifファイルを排除 # 【廃止. デコメ絵文字を除外するため採用していたが, 公式がブログ画像としてgifファイルを配信しだしたため】 # if os.path.splitext(str(image.picture))[1] == '.gif': # otapick.delete_image(image) # print(str(image.publisher.title) + '/' + # str(image.order), 'resole gif file!!') # 不完全な画像ファイルの場合、そのブログの画像をDLしなおし圧縮も合わせて行う。 try: # 250x・500xがどちらか1つでも欠陥であったら if not (bool(image.picture_250x) and bool(image.picture_500x)): raise Exception("250x・500x error") # PilImageによりopenできるか検査 pil_image = PilImage.open(image.picture.path) pil_image.verify() bar.update(1) except: group_id = image.publisher.publishing_group.group_id group_key = image.publisher.publishing_group.key blog_ct = image.publisher.blog_ct writer_ct = image.publisher.writer.ct image_url_list = blogImageCrawler.crawl(group_key=group_key, blog_ct=blog_ct) if image_url_list is None: print( str(image.publisher.title) + '/' + str(image.order), 'は不完全でしたが、公式の掲載ブログにアクセスできません。') bar.update(1) continue image_url = image_url_list[image.order] media = blogImageDownloader.download(image_url, group_id, blog_ct, writer_ct) if media == 'not_image': # exclude gif print( str(image.publisher.title) + '/' + str(image.order), 'は不完全でしたが、公式の画像にアクセスできません。') bar.update(1) continue elif media is not None: # set width & height w, h = otapick.get_image_w_h(image) image.width = w image.height = h image.save() else: bar.update(1) import traceback traceback.print_exc() otapick.compress_blog_image(image) print( str(image.publisher.title) + '/' + str(image.order), 'resolve incomplete file!!') bar.update(1)
def exe_registration(blog_info_list, post_date, group_id, all_check, tweet, console): """ ブログの登録処理 Args: blog_info_list (list): ブログ情報のリスト。前提としてリストの中のブログは同じpost_dateを持つ。 post_date (date): 共通のpost_date group_id (int): グループID all_check (bool): 保存済みのブログを見つけても処理を実行 tweet (bool): 更新通知をtweetするか否か console (bool): ログ出力するか否か Returns: True(登録処理終了), False(登録処理続行) """ download_count = 0 blog_objects = [] image_objects = [] for i, blog_info in enumerate(blog_info_list): # new blog if not Blog.objects.filter(blog_ct=blog_info['blog_ct'], publishing_group__group_id=group_id).exists(): blog = Blog( blog_ct=blog_info['blog_ct'], title=blog_info['title'], post_date=post_date, order_for_simul=i, writer=blog_info['member'], publishing_group=Group.objects.filter( group_id=group_id).first(), ) blog_objects.append(blog) download_count += 1 # already saved else: blog = Blog.objects.get( blog_ct=blog_info['blog_ct'], publishing_group__group_id=group_id) if len(blog_info['image_urls']) > 0: order = 0 for image_url in blog_info['image_urls']: if not Image.objects.filter(publisher=blog).exists(): media = otapick.BlogImageDownloader().download( image_url, group_id, blog.blog_ct, blog.writer.ct) if media == 'not_image': # exclude gif pass elif media is not None: image = Image( order=order, picture=media, publisher=blog, ) # set width & height w, h = otapick.get_image_w_h(image) image.width = w image.height = h image_objects.append(image) order += 1 else: import traceback traceback.print_exc() # change the order_for_simul of already saved blog with the same post_date if Blog.objects.filter(post_date=post_date).exists(): for saved_simultime_blog in Blog.objects.filter(post_date=post_date): saved_simultime_blog.order_for_simul += download_count saved_simultime_blog.save() # save new blog for blog_object in blog_objects: blog_object.save() if console: otapick.print_console( 'register 「' + blog_object.title + '」 written by ' + blog_object.writer.full_kanji) # save new image for image_object in image_objects: image_object.save() otapick.compress_blog_image(image_object) # tweet update info if tweet: updateBot = otapick.UpdateBot() for blog_object in blog_objects: updateBot.tweet( group_id=blog_object.publishing_group.group_id, blog_ct=blog_object.blog_ct) # When there is at least one already saved blog in blog_list and all_check is False if download_count != len(blog_info_list) and not all_check: return True # When all blog in blog_list are new or when all_check is True else: return False