def get_ext_by_category(self, category): url = "https://chrome.google.com/webstore/ajax/item?pv=20161108&count=209&category={}".format( category) res = self.get_ext_item_reps(url) jsonlist = json.loads(res.lstrip(")]}'\n")) jsonlist = jsonlist[1][1] if jsonlist: for json_ in jsonlist: id_str, users, info = self._list2info(json_) if check_in_file(id_str, self.json_path): return False if users >= conf['more_then_user_num']: print('[*] id : %s' % id_str) dict2file(info, path=self.json_path)
def update(): import io import json from lib.threadManager import ThreadPool from lib.common import check_in_file, dict2file from core.googleExtDownloader import ext_info_add_list print('[*] -- update start ---') pool = ThreadPool(conf['threadnum']) with io.open(conf['data_file'], 'r', encoding='utf-8') as f: for count, line in enumerate(f): info = json.loads(line.strip()) if check_in_file(info.get('id'), './data/data2_1000.json'): continue pool.add_task(ext_info_add_list, extinfo=info) pool.destroy() while not pool.out_queue.empty(): result = pool.get_task() if result: dict2file(result, conf['etx_info_weblist_file'])
def run(self): for category in self.category_list: print('[*] category : ' + str(category)) start = self.start while True: print('[*] start : %s !!!!' % str(start)) url = self.ext_item_url.format(limit=self.limit, start=start, category=category) res = self.get_ext_item_reps(url) jsonlist = self._res_to_info_list(res) if jsonlist: for json in jsonlist: id_str, users, info = self._list2info(json) if check_in_file(id_str, self.json_path): continue if users >= conf['more_then_user_num']: print('[*] id : %s' % id_str) dict2file(info, path=self.json_path) else: break start = start + self.limit