def load_collect(self, page): """ load collect """ version = begin_time() if not os.path.exists('%scookie_collect' % data_dir): print('TB cookie not exist!!!') return with codecs.open('%scookie_collect' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readline() changeCookie(cookie[:-1]) changeHtmlTimeout(30) for block in range(page // 10 + 1): begin = block * 10 end = min(page, (block + 1) * 10) threadings = [] for index in range(begin, end): work = threading.Thread(target=self.load_collect_once, args=(index, )) threadings.append(work) for work in threadings: work.start() for work in threadings: work.join() collect = [self.collect[k] for k in sorted(self.collect.keys())] collect = sum(collect, []) with codecs.open('%scollect_wyy' % data_dir, 'w', encoding='utf-8') as f: f.write("\n".join(collect)) end_time(version)
def load_goods(self): """ load goods """ version = begin_time() if not os.path.exists('%scookie' % data_dir): print('Youdao Note cookie not exist!!!') return with codecs.open('%scookie' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readline() changeCookie(cookie[:-1]) threadings = [] for index, tid in enumerate(self.request_list): work = threading.Thread(target=self.load_goods_once, args=( index, tid, )) threadings.append(work) for work in threadings: work.start() for work in threadings: work.join() goods = [self.goods[k] for k in sorted(self.goods.keys())] goods = sum(goods, []) with codecs.open('%sgoods' % data_dir, 'w', encoding='utf-8') as f: f.write("\n".join(goods)) end_time(version)
def getZhihuView(self): if os.path.exists('%scookie' % data_dir): with codecs.open('%scookie' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readline() else: cookie = ' ' changeCookie(cookie[:-1]) url_basic = [ 'https://www.zhihu.com/api/v4/creator/content_statistics/', 'articles?order_field=object_created&order_sort=descend&begin_date=2018-09-01&end_date=', datetime.datetime.now().strftime("%Y-%m-%d"), '&page_no=' ] url = "".join(url_basic) json = self.get_request(url + '1', 1) if not json: return if not 'data' in json: if 'code' in json: print(json) return for index in json['data']: zhihu_title = index['title'] zhihu_id = int(index['url_token']) zhihu_count = int(index['read_count']) if zhihu_title in self.title2slug: temp_slug = self.title2slug[zhihu_title] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count elif zhihu_id in self.zhihu_id_map: temp_slug = self.zhihu_id_map[zhihu_id] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count else: print(index['title']) for index in range(json['count'] // 10): print('zhihu', index) json = self.get_request(url + str(index + 2), 1) if not json: continue for index in json['data']: zhihu_title = index['title'] zhihu_id = int(index['url_token']) zhihu_count = int(index['read_count']) if zhihu_title in self.title2slug: temp_slug = self.title2slug[zhihu_title] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count elif zhihu_id in self.zhihu_id_map: temp_slug = self.zhihu_id_map[zhihu_id] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count else: print(index['title'])
def getZhihuView(self): cookie = ''.join(read_file('{}cookie'.format(data_dir))) changeCookie(cookie) url_basic = [ self.ZHIHU_URL, 'articles?order_field=object_created&order_sort=descend&begin_date=2018-09-01&end_date=', datetime.datetime.now().strftime("%Y-%m-%d"), '&page_no=' ] url = ''.join(url_basic) json = self.get_request('{}{}'.format(url, 1), 1, lambda i: not i) if not json: return if not 'data' in json: if 'code' in json: echo('0|warning', json) return echo(3, 'zhihu', json) for index in json['data']: zhihu_title = index['title'] zhihu_id = int(index['url_token']) zhihu_count = int(index['read_count']) if zhihu_title in self.title2slug: temp_slug = self.title2slug[zhihu_title] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count elif zhihu_id in self.zhihu_id_map: temp_slug = self.zhihu_id_map[zhihu_id] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count else: echo('0|debug', index['title']) for index in range(1, json['count'] // 10): echo(1, 'zhihu', index) json = self.get_request('{}{}'.format(url, 1 + index), 1, lambda i: not i) echo(2, 'zhihu', json) if not json: continue for index in json['data']: zhihu_title = index['title'] zhihu_id = int(index['url_token']) zhihu_count = int(index['read_count']) if zhihu_title in self.title2slug: temp_slug = self.title2slug[zhihu_title] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count elif zhihu_id in self.zhihu_id_map: temp_slug = self.zhihu_id_map[zhihu_id] self.zhihu_id[temp_slug] = zhihu_id self.zhihu_views[temp_slug] = zhihu_count else: echo('0|debug', index['title'])