def get_one_suite_and_download(self): resp = self.get_suite_pages_and_start_threads() # 如果重复的话resp是False,其他为None if resp is False: return time.sleep(3) # 打开对应的文件夹 # os.system("start explorer {}".format(settings.IMAGE_FOLDER.replace('/', '\\'))) # todo: 流程可以改为解析成每页后,就用gevent分发任务了,是否下载可以用一个bool控制,不用单独下载程序 # todo: 现在暂时不用gevent,会造成栈溢出 threads = [] for i in range(self.max_download_worker): thread = threading.Thread(target=self.download_images_to_local) thread.start() threads.append(thread) for t in threads: t.join() # 下载完将suite 完整性置为True suite_obj = DownloadedSuite.objects.filter(url=self.suite_url, is_complete=False).first() if suite_obj is None: return if suite_obj.max_page == get_local_suite_count( suite_obj.get_suite_folder_path()): suite_obj.is_complete = True suite_obj.save() return
def check_and_mark_suite_complete(self): """验证并标记suite是否下载完成""" assert self.folder is not None local_count = get_local_suite_count(self.folder) if local_count == self.suite_obj.max_page: self.suite_obj.is_complete = True else: self.suite_obj.is_complete = False self.suite_obj.save()
async def callback_downloadedsuite(self, suite_id): suite = DownloadedSuite.objects.filter(id=suite_id).first() last_local_files_count = 0 while True: local_files_count = get_local_suite_count( suite.get_suite_folder_path()) # print(suite.max_page, suite.name, local_files_count) print(last_local_files_count, local_files_count) if last_local_files_count != local_files_count: # 两次检查到数据不一样的时候发送 serializer = MzituDownloadedSuiteSerializer(suite) message = JSONRenderer().render(serializer.data) await self.write_message(message) await gen.sleep(1) last_local_files_count = local_files_count if suite.max_page == local_files_count: break self.close(reason='下载完成') # 关闭这次连接 return
def get_locals_count(self, obj) -> int: path = obj.get_suite_folder_path() if not path: return 0 return get_local_suite_count(path)
def get_suite_pages_and_start_threads(self): """判断suite是否完整,获得每页的url,启动threads分析每页""" page_content = proxy_request(self.suite_url) max_page_num = self.parse_max_page_num_of_suite(page_content) title = self.parse_suite_title(page_content) logger.debug(title) suite_folder = settings.IMAGE_FOLDER_MZITU suite_folder = os.path.join(suite_folder, title) if not os.path.isdir(suite_folder): # folder 创建 os.makedirs(suite_folder, exist_ok=True) suite_instance, is_created = DownloadedSuite.objects.get_or_create( name=title, defaults={ 'url': self.suite_url, 'max_page': max_page_num }) # 获取tags if not suite_instance.tags.all(): tags_href_and_name = self.parse_tags_of_suite(page_content) tag_instances = [] for href, name in tags_href_and_name: tag_instance, _ = Tag.objects.update_or_create( name=name, defaults={'url': href}) tag_instances.append(tag_instance) suite_instance.tags.set(tag_instances) if is_created is False: # 如果数据库有记录,则看看本地文件是否完整 print("该套牌已在DB中,确认是否存在,确认套图是否完整...") max_page_num = suite_instance.max_page img_local_file_count = get_local_suite_count( suite_instance.get_suite_folder_path()) img_obj_count = SuiteImageMap.objects.filter( suite__id=suite_instance.id).count() if img_local_file_count >= max_page_num and img_obj_count == suite_instance.max_page: # 本地文件数量匹配,img数据库完整 print("已完整下载,跳过") return False else: print("该套图不完整,重新下载") # 分析每页 # todo: 可以和download_one_suite的threads部分合并为一个threads启动器 threads = [] for i in range(1, max_page_num + 1): thread = threading.Thread(target=self.get_one_pic_url, args=( suite_folder, i, )) thread.start() threads.append(thread) for t in threads: t.join() return