Example #1
0
def crawler_author_poetry(author_id=None):
    page = 1
    count = 100
    author_obj = Author()
    while True:
        if author_id is None:
            authors = author_obj.find_authors({"id": {">": 1229}}, page, count)
        else:
            authors = author_obj.find_authors({'id': {
                '=': author_id
            }}, page, count)
        LOGGER.info("type: %s, len: %s", type(authors), len(authors))
        if not authors:
            break
        for author in authors:
            try:
                LOGGER.info("start crawler author: %s", author['name'])
                crawler_author_record(author)
                LOGGER.info(author)
            except Exception as ex:
                LOGGER.error("author: %s, ex: %s",
                             author['name'],
                             ex,
                             exc_info=True)
            # time.sleep(60)
        page += 1
Example #2
0
def get_famous(page, count):
    sent_obj = Sentence()
    sents = sent_obj.find_sentence_by_cond({}, page, count)
    result = []
    authors = []
    for sentence in sents:
        authors.append(sentence['author_id'])
        sentence_data = ship_sentence(sentence)
        result.append(sentence_data)

    author_obj = Author()
    author_map = {}
    authors = author_obj.find_authors({"id": authors}, 1, count)
    for author in authors:
        author_id = author['id']
        author_map[author_id] = author
    for sentence_data in result:
        author_id = sentence_data.pop("author_id", '')
        sentence_data['author'] = ''
        sentence_data['dynasty'] = ''
        author_info = author_map.get(author_id)
        if author_info:
            sentence_data['author'] = author_info['name']
            sentence_data['dynasty'] = author_info['dynasty']
    return result
Example #3
0
def save_crawled_author(author_data):
    author_obj = Author(**author_data)
    author_record = author_obj.find_author_by_name()
    if author_record:
        return None
    author_id = author_obj.save_author()
    return author_id
Example #4
0
def search_example_author(count):
    # random_start = random.randint(101, 1000)
    # cond = {'total': {'>=': random_start}}
    author_instance = Author()
    authors = author_instance.find_authors({}, 1, count)
    data = {'id': 1, 'name': u'作者'}
    data_list = []
    for author in authors:
        data_list.append({
            'author_name': author['name'],
            'author_id': author['id'],
            'total': author['total']
        })
    data['data'] = data_list
    return data
Example #5
0
def check():
    page = 1
    count = 100
    author_obj = Author()
    while True:
        authors = author_obj.find_authors({}, page, count)
        LOGGER.info("type: %s, len: %s", type(authors), len(authors))
        if not authors:
            break
        for author in authors:
            _id = author['id']
            ps = Poetry(author_id=_id)
            ret = ps.find_poetry_by_author_id(1, 1)
            if len(ret) == 0:
                # print("_id: %s not found" % _id)
                crawler_author_poetry(_id)
        page += 1
Example #6
0
def get_authors(page, count):
    author_obj = Author()
    authors = author_obj.find_authors({}, page, count)
    ret_data = []
    for author in authors:
        desc = author['description']
        _id = author['id']
        headimg = author['headimg']
        total = author['total']
        dynasty = author['dynasty']
        name = author['name']
        tmp = {
            'desc': desc,
            'author_id': _id,
            'headimg': headimg,
            'total': total,
            'dynasty': dynasty,
            'name': name
        }
        ret_data.append(tmp)
    return ret_data
Example #7
0
def check_save_author(author_data):
    author_obj = Author(**author_data)
    author_record = author_obj.find_author_by_name()
    if not author_record:
        author_obj.save_author()
        author_record = author_obj.find_author_by_name()
    return author_record
Example #8
0
def get_author_data(session_data, author_id, page, count):
    author_obj = Author(id=author_id)
    author = author_obj.find_author_by_id()
    if not author:
        LOG.error("not found author, id: %s", author_id)
        return {}
    poetry_obj = Poetry(author_id=author_id)
    author_headimg = author['headimg']
    author_name = author['name']
    author_desc = author['description']
    total = author['total']
    poetrys = poetry_obj.find_poetry_by_author_id(page, count)
    result = []
    for poetry in poetrys:
        tmp_data = ship_poetry_list(poetry)
        result.append(tmp_data)
    author_info = {
        'author_name': author_name,
        'author_headimg': author_headimg,
        'author_desc': author_desc,
        'author_total': total,
        'author_id': author_id
    }
    return {'poetry_list': result, 'author_info': author_info}
Example #9
0
def query_poetry(query_dict):
    keyword = u','.join(query_dict.values())
    poetry_obj = Poetry(content=keyword)
    author_id = 1
    fields = [
        'id', 'title', 'author', 'likes', 'author_id', 'content', 'dynasty'
    ]
    if 'name' in query_dict:
        poetry_list = poetry_obj.search_widget(1,
                                               5,
                                               fields,
                                               sort={'likes': -1})
    elif 'content' in query_dict:
        poetry_list = poetry_obj.search_widget(1, 5, fields)
    elif 'author' in query_dict:
        author_obj = Author(name=keyword)
        author_data = author_obj.find_author_by_name()
        author_id = author_data['id']
        if author_data:
            poetry_obj.author_id = author_data['id']
            poetry_list = poetry_obj.find_poetry_by_author_id(1, 3)
        else:
            poetry_list = poetry_obj.search_widget(1,
                                                   5,
                                                   fields,
                                                   sort={'likes': -1})
    else:
        poetry_list = poetry_obj.search_widget(1, 5, fields)
    title = ''
    desc = ''
    item_list = []
    LOG.info(len(poetry_list))
    poetry_list = list(poetry_list)
    poetry_list.sort(key=lambda x: x['likes'], reverse=True)
    for poetry in poetry_list:
        author = poetry['author']
        if not title:
            title = poetry['title']
            desc = poetry['dynasty'] + "·" + author
        content = poetry['content']
        pages = "/pages/detail/detail?id={}".format(poetry['id'])
        content = unicode(content, 'utf-8')
        content = content[:50].encode("utf-8")
        tmp = {'jump_url': pages, "content": content}
        if 'author' in query_dict:
            tmp.update({'title': poetry['title']})
        else:
            tmp.update({"jump_url": pages})
        item_list.append(tmp)
    data = {"errcode": 0, 'errmsg': "ok"}
    if 'author' in query_dict:
        LOG.info(author_id)
        data['jump_url'] = u"/pages/authorPoetry/authorPoetry?id="\
            u"{}".format(author_id).encode("utf-8")
    else:
        data['jump_url'] = u"/pages/search/search?keyword="\
            u"{}".format(keyword).encode("utf-8")
    data['title'] = title
    data['desc'] = desc
    data['item_list'] = item_list
    data['more_description'] = u"点击查看更多优美诗词".encode("utf-8")
    return data