class SpiderTopicData(object):
    """
	获取知乎数据
	"""
    def __init__(self):
        """
		初始化相关参数
		"""
        self.login = Login()
        self.login.login()
        self.headers = self.login.getHeaders()
        self.data = self.login.getData()
        self.session = self.login.getSession()
        # self.session.encoding = 'utf8'
        # print self.session.
        # data link list
        self.havefinished_list = []  # 已经处理的数据连接
        self.waitting_list = []  # 未处理的数据连接
        self.record_topic_data = dict()
        self.record_topic_link_data = dict()

    def setRootTopic(self, root_topic_id='19778317'):
        self.waitting_list.append(root_topic_id)

    def relogin(self):
        """
		短线重连
		:return:
		"""
        self.login.login()
        self.headers = self.login.getHeaders()
        self.data = self.login.getData()
        self.session = self.login.getSession()

    def getLinkTopic(
        self,
        link_url='https://www.zhihu.com/topic/19778317/organize/entire?parent=19778317'
    ):
        """
		获取连接下的数据
		:param link_url:
		:return: 数据 text
		"""
        # self.session
        try:
            res = self.session.post(link_url,
                                    data=self.data,
                                    headers=self.headers)
            time.sleep(random.randint(4, 12))
        except:
            self.relogin()
            res = self.session.post(link_url,
                                    data=self.data,
                                    headers=self.headers)
            time.sleep(random.randint(4, 12))
        topic = json.loads(res.text)
        # topic = eval(res.text)
        cur_topic = topic['msg'][0]
        sub_topics = topic['msg'][1]
        # for test
        parent_topic_name = cur_topic[1].encode('utf8')
        parent_topic_id = cur_topic[2]
        sub_topics_name = []
        sub_topics_id = []
        for sub in sub_topics:
            sub_topics_id.append(sub[0][2])
            sub_topics_name.append(sub[0][1].encode('utf8'))
        result = dict()
        result["parent_topic_id"] = parent_topic_id
        result["parent_topic_name"] = parent_topic_name
        result["sub_topics_name"] = sub_topics_name
        result["sub_topics_id"] = sub_topics_id
        return result

    def recordData(self, result):
        """
		处理爬取的topic 数据,并记录
		:param result:
		:return:
		"""
        sub_topics_id = result['sub_topics_id']
        sub_topics_name = result['sub_topics_name']
        parent_topic_name = result['parent_topic_name']
        parent_topic_id = result['parent_topic_id']
        existed = False
        child_topic_id = ''
        if parent_topic_id not in self.record_topic_data:
            self.record_topic_data[parent_topic_id] = parent_topic_name
        for sub_id, sub_name in zip(sub_topics_id, sub_topics_name):
            if sub_name == str("加载更多"):
                existed = True
                child_topic_id = sub_id
                continue
            if sub_id not in self.record_topic_data:
                self.record_topic_data[sub_id] = sub_name
            if sub_id not in self.record_topic_link_data:
                self.record_topic_link_data[sub_id] = []
            self.record_topic_link_data[sub_id].append(parent_topic_id)
        return existed, parent_topic_id, child_topic_id

    def getSubTopic(self, parent_topic_id='19778317', child_topic_id=''):
        """
		爬取知乎某一话题下的所有子话题,仅爬取话题下一层(即仅爬取当前话题的孩子话题,孙子不管)
		:param parent_topic_id: 当前话题 ID
		:param child_topic_id: 子话题 ID应对显示不全时
		:return:
		"""
        state = True
        sub_topics_id = []
        while state:
            url_link = 'https://www.zhihu.com/topic/{0}/organize/entire'.format(
                parent_topic_id)
            if child_topic_id:
                url_link += "?child={}&parent={}".format(
                    child_topic_id, parent_topic_id)
            result = self.getLinkTopic(url_link)
            sub_topics_id.extend(result['sub_topics_id'])
            state, parent_topic_id, child_topic_id = self.recordData(result)
        return sub_topics_id

    def getAllTopic(self):
        """
		爬取队列所有话题及子话题
		:return:
		"""
        while self.waitting_list:
            topic_id = self.waitting_list.pop(0)
            if topic_id in self.havefinished_list:
                continue
            sub_topics = self.getSubTopic(parent_topic_id=topic_id)
            self.waitting_list.extend((list(set(sub_topics))))
            self.havefinished_list.append(topic_id)
            print "当前一获取topic number:{}\t 当前已遍历 topic number: {}"\
             .format(len(self.havefinished_list)+len(self.waitting_list),len(self.havefinished_list))
            self.writeResulttoFile()

    def writeResulttoFile(self,
                          topic_file=config.TopicFilePath + "zhihu_topic.json",
                          topic_link_file=config.TopicFilePath +
                          'zhihu_topic_link.json'):
        with codecs.open(topic_file, 'w', encoding='utf8') as topic_fp:
            json.dump(self.record_topic_data,
                      topic_fp,
                      ensure_ascii=False,
                      encoding='utf8')

        with codecs.open(topic_link_file, 'w',
                         encoding='utf8') as topic_link_fp:
            json.dump(self.record_topic_link_data,
                      topic_link_fp,
                      ensure_ascii=False,
                      encoding='utf8')