def crawler_author_poetry(author_id=None): page = 1 count = 100 author_obj = Author() while True: if author_id is None: authors = author_obj.find_authors({"id": {">": 1229}}, page, count) else: authors = author_obj.find_authors({'id': { '=': author_id }}, page, count) LOGGER.info("type: %s, len: %s", type(authors), len(authors)) if not authors: break for author in authors: try: LOGGER.info("start crawler author: %s", author['name']) crawler_author_record(author) LOGGER.info(author) except Exception as ex: LOGGER.error("author: %s, ex: %s", author['name'], ex, exc_info=True) # time.sleep(60) page += 1
def get_famous(page, count): sent_obj = Sentence() sents = sent_obj.find_sentence_by_cond({}, page, count) result = [] authors = [] for sentence in sents: authors.append(sentence['author_id']) sentence_data = ship_sentence(sentence) result.append(sentence_data) author_obj = Author() author_map = {} authors = author_obj.find_authors({"id": authors}, 1, count) for author in authors: author_id = author['id'] author_map[author_id] = author for sentence_data in result: author_id = sentence_data.pop("author_id", '') sentence_data['author'] = '' sentence_data['dynasty'] = '' author_info = author_map.get(author_id) if author_info: sentence_data['author'] = author_info['name'] sentence_data['dynasty'] = author_info['dynasty'] return result
def save_crawled_author(author_data): author_obj = Author(**author_data) author_record = author_obj.find_author_by_name() if author_record: return None author_id = author_obj.save_author() return author_id
def search_example_author(count): # random_start = random.randint(101, 1000) # cond = {'total': {'>=': random_start}} author_instance = Author() authors = author_instance.find_authors({}, 1, count) data = {'id': 1, 'name': u'作者'} data_list = [] for author in authors: data_list.append({ 'author_name': author['name'], 'author_id': author['id'], 'total': author['total'] }) data['data'] = data_list return data
def check(): page = 1 count = 100 author_obj = Author() while True: authors = author_obj.find_authors({}, page, count) LOGGER.info("type: %s, len: %s", type(authors), len(authors)) if not authors: break for author in authors: _id = author['id'] ps = Poetry(author_id=_id) ret = ps.find_poetry_by_author_id(1, 1) if len(ret) == 0: # print("_id: %s not found" % _id) crawler_author_poetry(_id) page += 1
def get_authors(page, count): author_obj = Author() authors = author_obj.find_authors({}, page, count) ret_data = [] for author in authors: desc = author['description'] _id = author['id'] headimg = author['headimg'] total = author['total'] dynasty = author['dynasty'] name = author['name'] tmp = { 'desc': desc, 'author_id': _id, 'headimg': headimg, 'total': total, 'dynasty': dynasty, 'name': name } ret_data.append(tmp) return ret_data
def check_save_author(author_data): author_obj = Author(**author_data) author_record = author_obj.find_author_by_name() if not author_record: author_obj.save_author() author_record = author_obj.find_author_by_name() return author_record
def get_author_data(session_data, author_id, page, count): author_obj = Author(id=author_id) author = author_obj.find_author_by_id() if not author: LOG.error("not found author, id: %s", author_id) return {} poetry_obj = Poetry(author_id=author_id) author_headimg = author['headimg'] author_name = author['name'] author_desc = author['description'] total = author['total'] poetrys = poetry_obj.find_poetry_by_author_id(page, count) result = [] for poetry in poetrys: tmp_data = ship_poetry_list(poetry) result.append(tmp_data) author_info = { 'author_name': author_name, 'author_headimg': author_headimg, 'author_desc': author_desc, 'author_total': total, 'author_id': author_id } return {'poetry_list': result, 'author_info': author_info}
def query_poetry(query_dict): keyword = u','.join(query_dict.values()) poetry_obj = Poetry(content=keyword) author_id = 1 fields = [ 'id', 'title', 'author', 'likes', 'author_id', 'content', 'dynasty' ] if 'name' in query_dict: poetry_list = poetry_obj.search_widget(1, 5, fields, sort={'likes': -1}) elif 'content' in query_dict: poetry_list = poetry_obj.search_widget(1, 5, fields) elif 'author' in query_dict: author_obj = Author(name=keyword) author_data = author_obj.find_author_by_name() author_id = author_data['id'] if author_data: poetry_obj.author_id = author_data['id'] poetry_list = poetry_obj.find_poetry_by_author_id(1, 3) else: poetry_list = poetry_obj.search_widget(1, 5, fields, sort={'likes': -1}) else: poetry_list = poetry_obj.search_widget(1, 5, fields) title = '' desc = '' item_list = [] LOG.info(len(poetry_list)) poetry_list = list(poetry_list) poetry_list.sort(key=lambda x: x['likes'], reverse=True) for poetry in poetry_list: author = poetry['author'] if not title: title = poetry['title'] desc = poetry['dynasty'] + "·" + author content = poetry['content'] pages = "/pages/detail/detail?id={}".format(poetry['id']) content = unicode(content, 'utf-8') content = content[:50].encode("utf-8") tmp = {'jump_url': pages, "content": content} if 'author' in query_dict: tmp.update({'title': poetry['title']}) else: tmp.update({"jump_url": pages}) item_list.append(tmp) data = {"errcode": 0, 'errmsg': "ok"} if 'author' in query_dict: LOG.info(author_id) data['jump_url'] = u"/pages/authorPoetry/authorPoetry?id="\ u"{}".format(author_id).encode("utf-8") else: data['jump_url'] = u"/pages/search/search?keyword="\ u"{}".format(keyword).encode("utf-8") data['title'] = title data['desc'] = desc data['item_list'] = item_list data['more_description'] = u"点击查看更多优美诗词".encode("utf-8") return data