def retrieve(self, request, *args, **kwargs): # 获取小说主体 novel_id = request.query_params.get("novel_id") try: novel = NovelEntry.objects.get(is_active=True, id=novel_id) except NovelEntry.DoesNotExist: return SuccessHR("不存在该小说") else: novel_name = novel.name # 判断是否已经存在该文件 file_path = BASE_DIR + "/novel/" + novel_name + ".txt" if not os.path.exists(file_path): # 获取章节内容顺序 sections = SectionContent.objects.filter( is_active=True, novel=novel).order_by("section__order") with open(file_path, "w+", encoding="utf-8") as f: for i in sections: log_common.info(msg=f"写入{i.section.name}") f.write('\n' + i.section.name + '\n') f.write(i.content) log_common.info(msg="写入完成") if os.path.exists(file_path): # file = open(file_path, 'r', encoding="utf-8") # response = self.get_file_response(file=file, file_name=novel_name + ".txt") response = self.big_file_download(file_name=novel_name + ".txt", file_path=file_path) return response
def list(self, request, *args, **kwargs): queryset = self.filter_queryset(self.get_queryset()) page = self.paginate_queryset(queryset) if page is not None: serializer = self.get_serializer(page, many=True) return self.get_paginated_response(serializer.data) serializer = self.get_serializer(queryset, many=True) return SuccessHR(serializer.data)
def list(self, request, *args, **kwargs): host = request.query_params.get("host") url = request.query_params.get("url") book_name = request.query_params.get("book_name") if not url: return ErrorHR("参数url缺失") if host: self.query_sql &= Q(host__contains=host) book = self.get_novel_entry(book_name=book_name) if not book: return ErrorHR("不存在该书") # 获取章节的抓取规则 rule = GraspRule.objects.filter(self.query_sql).first() list_rule = rule.list_rule section_rule_p = rule.section_rule_p section_rule = rule.section_rule decode = rule.decode res = requests_get(url=url, decode=decode) parse_html = html_to_etree(res) sections = [] # 获取章节列表 section_p = parse_html.xpath(list_rule) section_p_obj = None need_add_obj = [] order = 0 for i in section_p: # 判断是否为父级目录 if dict(i.attrib).get( "class") == section_rule_p and section_rule_p is not None: order = 0 # 判断need_add_obj 有就新增 if need_add_obj: NovelSection.objects.bulk_create(need_add_obj) need_add_obj.clear() _name = i.text section_p_obj = self.create_section(novel=book, name=_name) else: # 获取目录 order += 1 a = i.xpath(section_rule) if a: o = a[0] href = o.xpath("./@href")[0] sec_name = o.text need_add_obj.append( NovelSection(novel=book, name=sec_name, url=href, parent=section_p_obj, order=order)) # 结束后再次判断need_add_obj if need_add_obj: NovelSection.objects.bulk_create(need_add_obj) return SuccessHR("创建成功")
def list(self, request, *args, **kwargs): a = "我怀着饥饿感寻找家, 不清楚家和饥饿感两者 究竟谁是谁的代名词。 我想我即将和父亲对饮 杯中的浓茶,一如往常, 茶水浓腻的涡旋让我 分不清所处的时光,五岁 或者二十五岁,父亲或许 尚未苍老,我并未长大。 父亲不善言辞,惯于沉默, 戒烟前香烟代表他的情愫。 餐桌上我会揶揄他的厨艺, 他始终笨拙地学不会翻炒, 而我也尝不惯杯中的浓茶。 茶水的苦味在我年轻的时岁 被舌尖放大,仿佛生活的网。 而我已沉默多年,并未想清楚 如何在父亲身上原谅我,或者 如何从我身上理解我的父亲。" b = "走在路上我也是一个生动的人 我的头发茂盛像青草 像来自遥远,野外的处女地 胳膊有力地摆动。嘴角 含笑 一点点微微的倔强 " return SuccessHR([ { "title": "记", "content": a[:40] }, { "title": "随手", "content": b[:45] }, ])
def post(self, request, *args, **kwargs): # 小说url url = request.data.get("url") host = request.data.get("host") rule = self._search_rule(url=host) if not rule: return ErrorHR("该网站不存在抓取规则") # 获取书本实体 book = self._get_book(url=url, rule=rule.book_name, decode=rule.decode) if not book.section_complete: # 获取章节 self._get_sections(url=url, list_rule=rule.list_rule, section_rule=rule.section_rule, book=book, decode=rule.decode) if not book.content_complete: pass return SuccessHR("success")
def get_paginated_response(self, data): if hasattr(self, 'page'): content = OrderedDict([ ('pageNo', self.page.number), ('pageSize', self.page.paginator.per_page), ('total', self.page.paginator.count), # ('next', self.get_next_link()), # ('previous', self.get_previous_link()), ('list', data) ]) else: # TODO self.page 属性不存在, 下述字段值需要重新获取 content = OrderedDict([ ('pageNo', 0), ('pageSize', self.page_size), ('total', 0), # ('next', self.get_next_link()), # ('previous', self.get_previous_link()), ('list', []) ]) return SuccessHR(content)
def retrieve(self, request, *args, **kwargs): code = request.query_params.get("code") print(code) get_code2session(jscode=code) return SuccessHR({"name": "success"})
def list(self, request, *args, **kwargs): # a = 1 / 0 return SuccessHR([])
def list(self, request, *args, **kwargs): # 获取小说主体 # TODO 遍历 筛选出哪些章节需要再次获取章节内容,进行获取 novel_id = request.query_params.get("novel_id") if not novel_id: return ErrorHR("请选择小说") try: novel = NovelEntry.objects.get(is_active=True, id=novel_id) host = novel.host url = novel.url # 获取抓取规则 try: g_rule = GraspRule.objects.get(is_active=True, host=host, service=url) except GraspRule.DoesNotExist: return ErrorHR("不存在该小说的爬取规则配置") else: content_rule = g_rule.content_rule # 获取小说下未获取小说内容的章节 # 获取已经爬取的章节 exists_content = SectionContent.objects.filter( is_active=True, novel=novel).values_list("section_id") n_sections = NovelSection.objects.filter( is_active=True, novel=novel).exclude(id__in=[i[0] for i in exists_content]) # 生成格式 n_sections_list = [ i for i in GetNovelSectionsSerializer(n_sections, many=True).data ] # 切片 100 个一组 section_deque = deque(maxlen=10) for i in n_sections_list: section_deque.append(i) if len(section_deque) == 10: # 获取章节的内容 result = get_content(sections=section_deque, host=host, content_rule=content_rule, decode=g_rule.decode) # 整理成对象 need_add_obj = [] for j in result: pk = j.get("id") content = j.get("content") need_add_obj.append( SectionContent(novel=novel, section_id=pk, content=content)) if need_add_obj: SectionContent.objects.bulk_create(need_add_obj) # 清空 log_common.info(msg="===清空队列===") section_deque.clear() novel.content_complete = True novel.save() return SuccessHR("爬取成功") except NovelEntry.DoesNotExist: return SuccessHR("不存在该小说") except Exception as ex: log_common.error(ex) return SuccessHR("中断,可再次开启~")
def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) self.perform_create(serializer) return SuccessHR(serializer.data)
def retrieve(self, request, *args, **kwargs): instance = self.get_object() serializer = self.get_serializer(instance) return SuccessHR(serializer.data)