Beispiel #1
0
def topic_test(topic_url):
    Logging.info(u"topic_test:")
    topic = Topic(topic_url)
    topic_id = topic.get_topic_id()
    print topic_id
    topic_name = topic.get_topic_name()
    print topic_name
    question_num = topic.get_question_num()
    print question_num
    follower_num = topic.get_follower_num()
    print follower_num
    questions = topic.get_questions()
Beispiel #2
0
        #time.sleep(0.1)


def user_spider(user):
    DataBase.put_user_in_db(user)
    for follower in user.get_followers():
        DataBase.put_user_in_db(follower)
        DataBase.put_follow_user_in_db(user,follower)


if __name__ == '__main__':
    import sys
    sys.setrecursionlimit(1000000)
    THREADS = 10
    p = mp.Pool(processes = THREADS)
    topic = Topic("http://www.zhihu.com/topic/19554927")
    if not topicBloom.is_element_exist(topic.get_topic_id()):
        topicBloom.insert_element(topic.get_topic_id())
        Worm_status.record_status("topicBloom", topicBloom)
    DataBase.put_topic_in_db(topic)
    go = topic.get_questions()
    num = topic.get_question_num()
    while num >= 0:
        try:
            p.map(spider,itertools.islice(go,20))
        except AttributeError:
            pass
        finally:
            num -= 20

Beispiel #3
0
        Logging.debug("Exist Question")
        #time.sleep(0.1)


def user_spider(user):
    DataBase.put_user_in_db(user)
    for follower in user.get_followers():
        DataBase.put_user_in_db(follower)
        DataBase.put_follow_user_in_db(user, follower)


if __name__ == '__main__':
    import sys
    sys.setrecursionlimit(1000000)
    THREADS = 10
    p = mp.Pool(processes=THREADS)
    topic = Topic("http://www.zhihu.com/topic/19554927")
    if not topicBloom.is_element_exist(topic.get_topic_id()):
        topicBloom.insert_element(topic.get_topic_id())
        Worm_status.record_status("topicBloom", topicBloom)
    DataBase.put_topic_in_db(topic)
    go = topic.get_questions()
    num = topic.get_question_num()
    while num >= 0:
        try:
            p.map(spider, itertools.islice(go, 20))
        except AttributeError:
            pass
        finally:
            num -= 20
Beispiel #4
0
def test_topic(topic_url):
    Logging.info(u"topic_test:")
    topic = Topic(topic_url)
    topic.get_question_num()
    for topic in topic.get_child():
        print topic.get_topic_name()
Beispiel #5
0
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from zhihu_api.Topic import Topic
from database_operation.DataBase import DataBase


def store(topic):
    if topic is not None:
        for item in topic.get_child():
            store(item)
        try:
            DataBase.put_topic_in_db(topic)
            print topic.get_topic_name()
        except:
            pass


if __name__ == "__main__":
    import sys
    sys.setrecursionlimit(1000000)
    rootTopic = Topic("http://www.zhihu.com/topic/19776749")
    store(rootTopic)