def amazon_robot_check(self, data, country): print('正在进行amazon机器人验证') cur_header = amazon_headers.copy() cur_header['user-agent'] = UserAgent().random session, response = self.request(data['session'], getAmazonDomain(country), headers=cur_header, proxies=data['proxies']) data['session'] = session return data if not is_robot(etree.HTML(response)) else None
def amazon_robot_check(country): print('正在进行amazon机器人验证') cur_header = amazon_headers.copy() cur_header['user-agent'] = UserAgent().random session, response = request(requests.session(), getAmazonDomain(country), headers=cur_header) if not is_robot(etree.HTML(response)): print('没有机器人验证') return True else: print('机器人验证') return False
def find_wolves(): print 'Wolves:' hw = 0 rw = 0 robots = 0 min_resp_time = sys.maxint for k, bidder in shared.bidders.iteritems(): f = bidder.features() if len(bidder.auctions) > 100: if utils.is_robot(bidder): rw += 1 elif utils.is_human(bidder): hw += 1 bidder.set_outcome(1.0) print 'among %s bidders, found %s wolves-humans and %s robots (among %s robots in general)' % ( len(bidders), hw, rw, robots)
def start_download(self): # 解析数据 并存储数据 # 判断asin是否存在 amazon_data = self.requests.getAmaoznData(self.is_lang) self.write_msg('正在获取第{}页'.format(self.requests.getPage())) if amazon_data and is_number(amazon_data): if amazon_data == 404: self.write_msg('asin不存在,请查看是否输入有误') if amazon_data == 2: self.write_msg('请求失败') if amazon_data == 400: self.write_msg('服务器无法理解此请求') self.startButton.config(state=NORMAL) return self.write_msg('正在解析数据') dispose = AmazonDispose(amazon_data, self.siteBox.get(), self.asinEntry.get()) if is_robot(dispose.get_selector()): self.write_msg('机器人验证') self.startButton.config(state=NORMAL) return if dispose.is_lang(): self.is_lang = True self.write_msg('语言不符合, 重新请求') self.wait('重新请求') self.start_download() return dic_data = dispose.dispose() print(dic_data) # self.write_msg(str(dic_data)) if dic_data: self.write_msg('写入数据') self.csv.writerCsv(dic_data) else: self.write_msg('没有数据可以写入') if dispose.isNextPage(): self.wait('请求下一页') self.requests.nextPage() self.start_download() else: self.csv.closeCsv() self.write_msg('评论获取完毕') self.startButton.config(state=NORMAL)
def read_bidders(filename): with open(filename, 'rb') as bidsfile: reader = csv.reader(bidsfile, delimiter=',', quotechar='|') skip(reader, 1) for row in reader: b = Bidder(row[0], row[1], row[2], row[3] if len(row) > 3 else None) #if b.bidder_id in ['f5b2bbad20d1d7ded3ed960393bec0f40u6hn', 'e90e4701234b13b7a233a86967436806wqqw4']: # b.outcome = 1 add_bidder(b) shared.addr_1[b.get_addr_1()].add(b.bidder_id) shared.addr_2[b.get_addr_2()].add(b.bidder_id) shared.pmt_type[b.get_payment_type()].add(b.bidder_id) shared.pmt_accnt[b.get_payment_acct()].add(b.bidder_id) print "%s data done" % filename rc = sum([1 for i in shared.bidders.values() if utils.is_robot(i)]) hc = sum([1 for i in shared.bidders.values() if utils.is_human(i)]) uc = sum([1 for i in shared.bidders.values() if i.outcome is None]) print "Found %s robots, %s humans, %s unknown" % (rc, hc, uc)
def is_robot(self): return utils.is_robot(self)