def start_requests(self): count = utils.get_table_length(self.sql, self.name) count_free = utils.get_table_length(self.sql, config.httpbin_table) ids = utils.get_table_ids(self.sql, self.name) ids_free = utils.get_table_ids(self.sql, config.httpbin_table) for i in range(0, count + count_free): table = self.name if (i < count) else config.httpbin_table id = ids[i] if i < count else ids_free[i - len(ids)] proxy = utils.get_proxy_info(self.sql, table, id) if proxy == None: continue for url in self.urls: cur_time = time.time() yield Request( url = url, headers = self.headers, meta = { 'cur_time': cur_time, 'download_timeout': self.timeout, 'proxy_info': proxy, 'table': table, 'id': proxy.get('id'), 'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')), 'vali_count': proxy.get('vali_count', 0) }, dont_filter = True, callback = self.success_parse, errback = self.error_parse, )
def get_unity_version(self, response): content = json.loads(response.body) utils.log('unity content:%s' % response.body) unity_version = content.get('kharma_version', '') headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Connection': 'keep-alive', 'Host': 'www.assetstore.unity3d.com', 'Referer': 'https://www.assetstore.unity3d.com/en/', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0', 'X-Kharma-Version': unity_version, 'X-Requested-With': 'UnityAssetStore', 'X-Unity-Session': '26c4202eb475d02864b40827dfff11a14657aa41', } count = utils.get_table_length(self.sql, self.name) count_free = utils.get_table_length(self.sql, config.httpbin_table) ids = utils.get_table_ids(self.sql, self.name) ids_free = utils.get_table_ids(self.sql, config.httpbin_table) for i in range(0, count + count_free): table = self.name if (i < count) else config.httpbin_table id = ids[i] if i < count else ids_free[i - len(ids)] proxy = utils.get_proxy_info(self.sql, table, id) if proxy == None: continue url = 'https://www.assetstore.unity3d.com/api/en-US/content/overview/' + '368' + '.json' cur_time = time.time() yield Request( url = url, headers = headers, meta = { 'cur_time': cur_time, 'download_timeout': self.timeout, 'proxy_info': proxy, 'table': table, 'id': proxy.get('id'), 'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')), }, dont_filter = True, callback = self.success_parse, errback = self.error_parse, )
def start_requests(self): count = utils.get_table_length(self.sql, self.name) count_free = utils.get_table_length(self.sql, config.httpbin_table) ids = utils.get_table_ids(self.sql, self.name) ids_free = utils.get_table_ids(self.sql, config.httpbin_table) for i in range(0, count + count_free): table = self.name if (i < count) else config.httpbin_table id = ids[i] if i < count else ids_free[i - len(ids)] proxy = utils.get_proxy_info(self.sql, table, id) if proxy == None: continue url = random.choice(self.urls) pattern = re.compile('\d+', re.S) product_id = re.search(pattern, url).group() cur_time = time.time() self.log('start_request cur_time:%s' % cur_time) yield Request( url = url, headers = self.headers, meta = { 'cur_time': cur_time, 'download_timeout': self.timeout, 'proxy_info': proxy, 'table': table, 'id': proxy.get('id'), 'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')), 'vali_count': proxy.get('vali_count', 0), 'product_id': product_id, }, dont_filter = True, callback = self.get_comment_count, errback = self.error_parse, )
def start_requests(self): count = utils.get_table_length(self.sql, self.name) count_free = utils.get_table_length(self.sql, config.httpbin_table) ids = utils.get_table_ids(self.sql, self.name) ids_free = utils.get_table_ids(self.sql, config.httpbin_table) for i in range(0, count + count_free): table = self.name if (i < count) else config.httpbin_table id = ids[i] if i < count else ids_free[i - len(ids)] proxy = utils.get_proxy_info(self.sql, table, id) if proxy == None: continue for url in self.urls: cur_time = time.time() yield FormRequest( url=url, headers=self.headers, method='POST', meta={ 'cur_time': cur_time, 'download_timeout': self.timeout, 'proxy_info': proxy, 'table': table, 'id': proxy.get('id'), 'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')), 'vali_count': proxy.get('vali_count', 0), }, cookies={ 'Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1488937030', '_ga': 'GA1.2.40497390.1488937014', 'TG-TRACK-CODE': 'search_code', 'index_location_city': '%E5%8C%97%E4%BA%AC', 'LGRID': '20170308093710-bf6755eb-039f-11e7-8025-525400f775ce', 'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1488881288,1488936799,1488936947,1488937014', 'JSESSIONID': 'BDCBB6167F960CE43AF54B75A651F586', 'LGSID': '20170308093653-b59316f0-039f-11e7-9229-5254005c3644', 'LGUID': '20170308093653-b593185f-039f-11e7-9229-5254005c3644', 'user_trace_token': '20170308093654-723efcfac8fb4c28a670d073d5113e02', 'SEARCH_ID': '4db4dc3dea1c46b49018ae5421b53ffa' }, formdata={ 'first': 'true', 'kd': 'ios', 'pn': '1', }, dont_filter=True, callback=self.success_parse, errback=self.error_parse, )