예제 #1
0
    def save_image(self,
                   images: list,
                   image_type=IMAGE_TYPE_DESC.COVER,
                   headers=None) -> list:
        """
        images:需要保存的图片连接列表;
        检查images中新image,即未保存过的图片连接;
        保存所有新image,如果失败,任然保存成功的;
        返回images对应的image对象列表,失败部分则以None代替
        """
        # return []
        img_list = self.check_image(images)
        logging.info("需要保存的 {} 图片共有 {} 条".format(image_type, len(img_list)))
        imgs = []
        for idx, img_url in enumerate(img_list, 0):
            res = self.do_request(img_url, headers=headers).content

            if not res:
                imgs.append(None)
                continue

            lock.acquire()
            try:
                img, flag = Image.normal.get_or_create(
                    origin_addr=img_list[idx], )
            finally:
                lock.release()

            if not img.key or flag:
                photo_info = photo_lib.save_binary_photo(res)
                key = photo_info["id"]
                name = photo_info["name"]

                img.img_type = image_type
                img.active = True
                img.key = key
                img.name = name
                lock.acquire()
                img.save()

            imgs.append(img)

        return imgs
예제 #2
0
 def book_insert(self, url):
     lock.acquire()
     if url and (url not in self.url_done):
         self.url_done.append(url)
         self.total_done += 1
     else:
         return
     self.current_run_threading.append(url)
     lock.release()
     parser_cls = parser_selector.get_parser(url)
     # bic = BookInsertClient(url, parser_cls.book_type, 'with_chapter')
     bic = BookInsertClient(url, parser_cls.book_type, 'with_content', True,
                            True)
     bic.run()
     lock.acquire()
     self.current_run_threading.remove(url)
     logging.info('当前还有线程 共 {} 条等待执行结束'.format(
         len(self.current_run_threading)))
     lock.release()
예제 #3
0
    def handler(self):
        """处理书本信息"""

        res = self.do_request(self.url, self.headers)
        if not res or res.status_code != 200:
            return None, None
        self.book_info = self.parser(res)
        self.save_book_info_to_db(self.book_info)
        cover_list = self.save_image(self.book_info['cover'],
                                     headers=self.headers)
        logging.info(cover_list)
        if cover_list and all(cover_list):
            self.book.cover.add(*cover_list)
        lock.acquire()
        try:
            self.book.save()
        finally:
            lock.release()
        return self.book, res
예제 #4
0
def asyncio_task():
    logging.info("任务开始执行!")
    queryset = Task.normal.filter(
        active=True, task_status=TASK_STATUS_DESC.WAIT)
    logging.info("获取任务列表成功:共{}条".format(queryset.count()))
    for task in queryset:
        task.task_status = TASK_STATUS_DESC.RUNNING
        task.markup = ""
        task.progress = 0
        task.save()


        try:
        # if True:
            content = eval(task.content)

            if task.task_type == TASK_TYPE_DESC.NOVEL_INSERT:
                "小说新增"
                s = BookInsertClient(url=content['url'], book_type=BOOK_TYPE_DESC.Novel)
            elif task.task_type == TASK_TYPE_DESC.COMIC_INSERT:
                "漫画新增"
                s = BookInsertClient(url=content['url'], book_type=BOOK_TYPE_DESC.Comic)
            elif task.task_type in [TASK_TYPE_DESC.NOVEL_UPDATE, TASK_TYPE_DESC.COMIC_UPDATE]:
                "书本全更新"
                s = BookUpdateClient(book_id=content['book_id'])
            elif task.task_type in [TASK_TYPE_DESC.NOVEL_CHAPTER_UPDATE, TASK_TYPE_DESC.COMIC_CHAPTER_UPDATE]:
                "书本单章更新"
                s = BookUpdateClient(chapter_id=content['chapter_id'])
            elif task.task_type in [TASK_TYPE_DESC.NOVEL_MAKE_BOOK, TASK_TYPE_DESC.COMIC_MAKE_BOOK]:
                s = MakeMyWord(book_id=content['book_id'])
            elif task.task_type == TASK_TYPE_DESC.SEND_TO_KINDLE:
                s = SendKindleEmail(book_id=content['book_id'])
            else:
                task.task_status = TASK_STATUS_DESC.FAILD
                task.markup = "任务未执行, {}不存在".format(task.task_type)
                task.save()
                return
            
            s.run()
            
        except Exception as e:
            error_info = "执行任务失败: {}".format(e)
            logging.info(error_info)
            task.markup = error_info
            task.task_status = TASK_STATUS_DESC.FAILD
            task.save()
            return

        task.task_status = TASK_STATUS_DESC.FINISH
        task.save()
        logging.info("执行任务结束")
        return
예제 #5
0
    def handler_threading(self, urls):
        logging.info("自动新增书籍开始执行,共有{}条".format(len(urls)))
        q = Queue(maxsize=20)
        st = time.time()
        all_len = len(urls) or 1
        while urls:
            url = urls.pop()

            t = threading.Thread(target=self.book_insert, args=(url, ))
            q.put(t)
            if (q.full() == True) or (len(urls)) == 0:
                thread_list = []
                while q.empty() == False:
                    t = q.get()
                    t.setDaemon(True)
                    thread_list.append(t)
                    t.start()
                for t in thread_list:
                    t.join(5)
        logging.info('当前还有处理 {} 的线程 共 {} 条等待执行结束'.format(
            self.current_run_threading, len(self.current_run_threading)))
예제 #6
0
    async def handler_all(self):
        all_chapter = Chapter.normal.filter(
            book_id=self.book.id, active=False,
            book_type=self.book.book_type).values('id', 'origin_addr')
        logging.info('<<{}>>: 所有章节正文 : 共{}条'.format(self.book,
                                                    len(all_chapter)))
        tasks = []
        for chapter in all_chapter:
            task = self.async_do_request(chapter['origin_addr'],
                                         'text',
                                         self.headers,
                                         encoding=self.encoding)

            tasks.append(task)
            if len(tasks) >= 30:
                res_list = await asyncio.gather(*tasks)
                await self.call_handler_content(all_chapter, res_list)
                tasks = []

        res_list = await asyncio.gather(*tasks)
        await self.call_handler_content(all_chapter, res_list)
예제 #7
0
    def _update_chapter_content_db(self, comic_id):
        logger.info('_update_chapter_content_db')

        queryset = Chapter.normal.filter(book__pk=comic_id).values(
            "id", "origin_addr")

        for obj in queryset:
            count = ChapterImage.normal.filter(book__pk=comic_id,
                                               chapter=obj).count()
            if 'origin_addr' in obj and not count:
                image_list = self.get_chapter_content(obj['origin_addr'])
                for index, img in enumerate(image_list.values(), 1):
                    info = self._save_image_disk(img)
                    img, flag = Image.normal.get_or_create(
                        img_type=IMAGE_TYPE_DESC.CHAPER_CONTENT,
                        order=index,
                        key=info['id'],
                        name=info['name'])
                    ChapterImage(comic_id=comic_id,
                                 chapter_id=obj['id'],
                                 image_id=img.id,
                                 order=index).save()
예제 #8
0
    def save_chapter_list_to_db(self, chapter_dick_list):
        '''保存章节信息到数据库'''
        new_urls = self.check_chapters(chapter_dick_list)
        logging.info("即将保存《{}》的{}条新章节到数据库".format(self.book, len(new_urls)))
        need_create = []
        for index, chapter_dict in enumerate(chapter_dick_list, 0):
            chapter_title = list(chapter_dict.keys())[0]
            chapter_link = list(chapter_dict.values())[0]

            if chapter_link in new_urls:
                chapter = Chapter(title=chapter_title,
                                  origin_addr=chapter_link,
                                  order=index,
                                  book_type=self.book.book_type,
                                  book_id=self.book.id,
                                  number=index)
                need_create.append(chapter)

            if len(need_create) >= 200:
                self.bulk_create_chapter(need_create)
                need_create = []

        self.bulk_create_chapter(need_create)
예제 #9
0
    def handler_all_book(self, book_info_list):
        logging.info("自动插入书本信息,即将处理{}条数据".format(len(book_info_list)))
        count = 0
        author, _ = Author.normal.get_or_create(name="未知")
        exist_url = Book.normal.filter(
            origin_addr__in=[info["url"]
                             for info in book_info_list]).values_list(
                                 "origin_addr", flat=True)
        need_url = []
        new_urls = []
        for i in book_info_list:

            if (i["url"] not in exist_url) and i["url"] not in new_urls:
                need_url.append(i)
                new_urls.append(i["url"])
        books = []
        for idx, info in enumerate(tqdm(need_url), 1):
            logging.info("新自动插入书{}/{}条: {}  {}".format(idx, len(need_url),
                                                       info["title"],
                                                       info["url"]))
            if info.get("author", None):
                author, _ = Author.normal.get_or_create(name=info["author"])

            book = Book(
                on_shelf=False,
                author=author,
                book_type=BOOK_TYPE_DESC.Novel,
                title=info["title"][:60],
                markup=info["label"][:100],
                origin_addr=info["url"],
            )
            books.append(book)

            if len(books) >= 500:
                Book.normal.bulk_create(books)
                books = []
        Book.normal.bulk_create(books)
예제 #10
0
def send_book_to_kindle():
    logging.info("推送订阅书本至kindle任务开始")
    start = time.time()
    total = 0
    fail = 0
    look = 0
    book_ids = (SubscribeBook.normal.filter(ready=True).values_list(
        "book_id", flat=True).distinct())
    user_id = 1
    for book_id in book_ids:
        subs = SubscribeBook.normal.filter(ready=True, book_id=book_id)

        start_chapter, end_chapter = subs[0].chapter, subs[
            0].book.latest_chapter()
        # 判断需要推送的章节是否都已可用
        send_chapters = Chapter.normal.filter(
            book_id=book_id,
            number__in=[
                x for x in range(start_chapter.number if start_chapter else 0,
                                 end_chapter.number + 1)
            ],
        ).values("active", flat=True)
        if not all(send_chapters):
            fail += 1
            look += 1
            logging.info("{}部分章节不可用,不予推送至kindle".format(subs[0].book.title))
            continue

        to_email = [sub.user.email for sub in subs]
        try:
            # if True:
            # 开启事务
            with transaction.atomic():
                task_makebook = Task.create_task_for_make_book(
                    user_id,
                    book_id,
                    start_chapter.id if start_chapter else 0,
                    end_chapter.id,
                )
                task_email = Task.create_task_for_send_email(
                    user_id, book_id, list(set(to_email)))
                model_task.delay([task_makebook.id, task_email.id])
                for sub in subs:
                    sub.chapter_id = subs[0].book.latest_chapter().id
                    sub.ready = False
                    sub.count = sub.count + 1
                    sub.save()
        except Exception as e:
            fail += 1
            look += len(to_email)
            logging.error(f"推送订阅书本至kindle任务book_id: {book_id}, 失败。原因: {e}")
            continue

    stop = time.time()
    logging.info("推送订阅书本至kindle任务创建结束,共推送{}本, 失败{}本, 受影响用户{}位, 共耗时{}秒".format(
        total - fail if total > fail else 0, fail, look, stop - start))
예제 #11
0
    def makeComicWord(self):
        # 临时文件夹
        comic_temp_path = os.path.join(settings.UPLOAD_SAVE_PATH, self.title)

        part = 0
        part_size = 1024 * 1024 * 20
        current_size = 0
        pre_size = lambda cur: cur + 1024 * 1024 * 5
        # 设置章节
        chapters = Chapter.normal.filter(book=self.book)
        for chapter in chapters:
            if current_size == 0:
                # 初始化word
                part += 1
                doc = Document()
                doc.add_heading(chapter.title, level=1)
                logging.info("WORD part-{} 已经初始化".format(part))

            chapter_imgs = ChapterImage.normal.filter(chapter=chapter,
                                                      book=self.book)
            if chapter_imgs:
                for img_idx, img in enumerate(chapter_imgs):
                    img_path = img.image.get_path('title')
                    img_size = os.path.getsize(img_path)
                    current_size += img_size

                    # 切割大图片临时文件夹
                    temp_path = os.path.join(
                        comic_temp_path,
                        os.path.split(img_path)[-1].split('.')[0])

                    # 如果是大文件就分隔
                    after_split = split_photo_fit_kindle(img_path, temp_path)
                    for small_img in after_split:
                        doc.add_picture(small_img)

            if pre_size(current_size) >= part_size:
                # 保存word
                filename = os.path.join(
                    settings.UPLOAD_SAVE_PATH,
                    '{}__{}.docx'.format(self.book.title, part))
                if os.path.exists(filename):
                    os.remove(filename)
                doc.save(filename)
                current_size = 0
                logging.info("WORD part-{} 已经完成".format(part))

        # 删除临时文件
        shutil.rmtree(comic_temp_path)

        logging.info("word 完成")
        self.book.is_download = True
        self.book.save()
예제 #12
0
def send_book_to_kindle():
    logging.info('推送订阅书本至kindle任务开始')
    start = time.time()
    total = 0
    fail = 0
    look = 0
    book_ids = SubscribeBook.normal.filter(ready=True).values('book_id').distinct()
    for book_dict in book_ids:
        book_id = book_dict['book_id']
        subs = SubscribeBook.normal.filter(ready=True, book_id=book_id)

        start_chapter, end_chapter = subs[0].chapter, subs[0].book.latest_chapter()
        # 判断需要推送的章节是否都已可用
        send_chapters = Chapter.normal.filter(book_id=book_id, number__in=[x for x in range(start_chapter.number if start_chapter else 0, end_chapter.number+1)]).values('active')
        if not all([x['active'] for x in send_chapters]):
            fail+=1
            look+=1
            logging.info("{}部分章节不可用,不予推送至kindle".format(subs[0].book.title))
            continue

        to_email = [sub.user.email for sub in subs]
        try:
        # if True:
            # 开启事务
            with transaction.atomic():
                MakeMyWord(book_id, start_chapter.id if start_chapter else 0, end_chapter.id).run()
                SendKindleEmail(book_id, list(set(to_email))).run()
                for sub in subs:
                    sub.chapter_id = subs[0].book.latest_chapter().id
                    sub.ready = False
                    sub.count = sub.count+1
                    sub.save()
        except Exception as e:
            fail += 1
            look += len(to_email)
            logging.info('推送订阅书本至kindle任务book_id:{}, 失败。原因:{}'.format(book_id, e))
            continue
      
    stop =  time.time()
    logging.info('推送订阅书本至kindle任务结束,共推送{}本, 失败{}本, 受影响用户{}位, 共耗时{}秒'.format(total-fail if total>fail else 0, fail, look, stop-start))
예제 #13
0
    def _save_comic_db(self, info):
        logger.info('_save_comic_db')

        comic = Book.normal.filter(title=info['name'],
                                   book_type=BOOK_TYPE_DESC.Comic).first()
        if not comic:
            comic = Book()
        comic.book_type = BOOK_TYPE_DESC.Comic
        comic.title = info.get('name')
        comic.author_id = self._save_or_get_author_db(info)
        comic.desc = info.get('desc')
        comic.markeup = info.get('markeup')
        comic.title = info.get('name')
        comic.origin_addr = self.url
        comic.save()
        if isinstance(info['cover'], list):
            logger.info('_save_comic_db run loop')
            for index, url in enumerate(info['cover'], 1):
                info = self._save_image_disk(url)
                img, flag = Image.normal.get_or_create(
                    img_type=IMAGE_TYPE_DESC.COVER,
                    key=info['id'],
                    name=info['name'])
                logger.info('_save_comic_db run loop,{}==={}==={}'.format(
                    comic, info, img))
                comic.cover.add(img)

        else:
            info = self._save_image_disk(info['cover'])
            # img = Image(img_type=IMAGE_TYPE_DESC.COMIC_COVER, key=info['id'], name=info['name']).save()
            img, flag = Image.normal.get_or_create(
                img_type=IMAGE_TYPE_DESC.COVER,
                key=info['id'],
                name=info['name'])
            logger.info('_save_comic_db run singal,{}==={}==={}'.format(
                comic, info, img))
            comic.cover.add(img)
        comic.save()
        return comic
예제 #14
0
    def run(self):
        # 初始化邮箱
        self.getEmail()
        logging.info("初始化邮箱完成")
        # 获取附件
        self.getAttachFile()
        logging.info("获取附件完成: {}".format(self.attach_file))
        # 添加附件
        if isinstance(self.attach_file, list):
            [self.email.attach_file(filepart) for filepart in self.attach_file]

        elif isinstance(self.attach_file, str):
            self.email.attach_file(self.attach_file)
        # 发送
        self.email.send()
        logging.info("邮件发送完成")
예제 #15
0
 def get_chapter_content_only(self, ret_data):
     logger.info('get_chapter_content for comic: only start')
     image_list = self.parser.parse_image_list(ret_data)
     logger.info(
         'get_chapter_content for comic: {} comlpleted'.format(image_list))
     return image_list
예제 #16
0
 def _save_image_disk(self, url):
     logger.info('_save_image_disk for comic: {}'.format(url))
     resp_data = self.session.get(url, timeout=5).content
     photo_info = photo_lib.save_binary_photo(resp_data)
     return photo_info
예제 #17
0
 def run(self):
     logger.info('Using parser %s ..', type(self.parser).__name__)
     self.run_update_chapter()
     logger.info('comlpleted for comic')
예제 #18
0
 def get_chapter_content(self, url):
     logger.info('get_chapter_content for comic: {} start'.format(url))
     ret_data = self.session.get(url, timeout=5).text
     image_list = self.parser.parse_image_list(ret_data)
     logger.info('get_chapter_content for comic: {} comlpleted'.format(url))
     return image_list
예제 #19
0
def auto_insert_books():
    logging.info('自动新增书本开始')
    start = time.time()
    BookAutoInsertClient().run()
    stop =  time.time()
    logging.info('自动新增书本任务结束, 共耗时{}秒'.format(stop-start))
예제 #20
0
def handle_worker_tasks(self):
    start = time.time()
    asyncio_task()
    stop =  time.time()
    logging.info('任务结束, 共耗时{}秒'.format(stop-start))
예제 #21
0
 def get_chapter_content(self, url):
     logger.info('get_chapter_content: {} start'.format(url))
     ret_data = self.session.get(url, timeout=5)
     content = self.parser.parse_chapter_content(ret_data)
     logger.info('get_chapter_content: {} comlpleted'.format(url))
     return content
예제 #22
0
def slow_auto_insert_books():
    logging.info('全站新增书本及其内容任务开始')
    start = time.time()
    AutoInsertBookClient("with_content").run()
    stop =  time.time()
    logging.info('全站新增书本及其内容任务结束, 共耗时{}秒'.format(stop-start))
예제 #23
0
 def get_chapter_list(self):
     logger.info('get_chapter_list start')
     ret_data = self.session.get(self.url, timeout=5)
     chapter_list = self.parser.parse_chapter(ret_data)
     logger.info('get_chapter_list comlpleted: {}'.format(chapter_list))
     return chapter_list
예제 #24
0
 def get_book_info(self):
     logger.info('get_book_info start')
     ret_data = self.session.get(self.url, timeout=5)
     book_info = self.parser.parse_info(ret_data)
     logger.info('get_book_info comlpleted')
     return book_info
예제 #25
0
def once_auto_insert_books():
    logging.info('全站新增书本任务开始')
    start = time.time()
    AutoInsertBookClient().run()
    stop =  time.time()
    logging.info('全站新增书本任务结束, 共耗时{}秒'.format(stop-start))
예제 #26
0
def insert_books_all_site_without_chapters():
    logging.info("全站新增书本任务开始")
    start = time.time()
    BookInsertAllSiteClient().run()
    stop = time.time()
    logging.info("全站新增书本任务结束, 共耗时{}秒".format(stop - start))
예제 #27
0
def cache_proxy_ip():
    logging.info("获取代理ip任务开始")
    ips = parser_utils.get_proxy_ip(100)
    cache.set("proxy_ips", ips, 60 * 30)
    logging.info("获取代理ip任务结束,共找到{}条可用数据".format(len(ips)))