コード例 #1
0
 def amazon_robot_check(self, data, country):
     print('正在进行amazon机器人验证')
     cur_header = amazon_headers.copy()
     cur_header['user-agent'] = UserAgent().random
     session, response = self.request(data['session'],
                                      getAmazonDomain(country),
                                      headers=cur_header,
                                      proxies=data['proxies'])
     data['session'] = session
     return data if not is_robot(etree.HTML(response)) else None
コード例 #2
0
def amazon_robot_check(country):
    print('正在进行amazon机器人验证')
    cur_header = amazon_headers.copy()
    cur_header['user-agent'] = UserAgent().random
    session, response = request(requests.session(),
                                getAmazonDomain(country),
                                headers=cur_header)
    if not is_robot(etree.HTML(response)):
        print('没有机器人验证')
        return True
    else:
        print('机器人验证')
        return False
コード例 #3
0
def find_wolves():
    print 'Wolves:'
    hw = 0
    rw = 0
    robots = 0
    min_resp_time = sys.maxint
    for k, bidder in shared.bidders.iteritems():
        f = bidder.features()

        if len(bidder.auctions) > 100:
            if utils.is_robot(bidder):
                rw += 1
            elif utils.is_human(bidder):
                hw += 1

            bidder.set_outcome(1.0)

    print 'among %s bidders, found %s wolves-humans and %s robots (among %s robots in general)' % (
        len(bidders), hw, rw, robots)
コード例 #4
0
 def start_download(self):
     # 解析数据 并存储数据
     # 判断asin是否存在
     amazon_data = self.requests.getAmaoznData(self.is_lang)
     self.write_msg('正在获取第{}页'.format(self.requests.getPage()))
     if amazon_data and is_number(amazon_data):
         if amazon_data == 404:
             self.write_msg('asin不存在,请查看是否输入有误')
         if amazon_data == 2:
             self.write_msg('请求失败')
         if amazon_data == 400:
             self.write_msg('服务器无法理解此请求')
         self.startButton.config(state=NORMAL)
         return
     self.write_msg('正在解析数据')
     dispose = AmazonDispose(amazon_data, self.siteBox.get(),
                             self.asinEntry.get())
     if is_robot(dispose.get_selector()):
         self.write_msg('机器人验证')
         self.startButton.config(state=NORMAL)
         return
     if dispose.is_lang():
         self.is_lang = True
         self.write_msg('语言不符合, 重新请求')
         self.wait('重新请求')
         self.start_download()
         return
     dic_data = dispose.dispose()
     print(dic_data)
     # self.write_msg(str(dic_data))
     if dic_data:
         self.write_msg('写入数据')
         self.csv.writerCsv(dic_data)
     else:
         self.write_msg('没有数据可以写入')
     if dispose.isNextPage():
         self.wait('请求下一页')
         self.requests.nextPage()
         self.start_download()
     else:
         self.csv.closeCsv()
         self.write_msg('评论获取完毕')
         self.startButton.config(state=NORMAL)
コード例 #5
0
def read_bidders(filename):
    with open(filename, 'rb') as bidsfile:
        reader = csv.reader(bidsfile, delimiter=',', quotechar='|')
        skip(reader, 1)
        for row in reader:
            b = Bidder(row[0], row[1], row[2],
                       row[3] if len(row) > 3 else None)
            #if b.bidder_id in ['f5b2bbad20d1d7ded3ed960393bec0f40u6hn', 'e90e4701234b13b7a233a86967436806wqqw4']:
            #    b.outcome = 1

            add_bidder(b)

            shared.addr_1[b.get_addr_1()].add(b.bidder_id)
            shared.addr_2[b.get_addr_2()].add(b.bidder_id)

            shared.pmt_type[b.get_payment_type()].add(b.bidder_id)
            shared.pmt_accnt[b.get_payment_acct()].add(b.bidder_id)

        print "%s data done" % filename
        rc = sum([1 for i in shared.bidders.values() if utils.is_robot(i)])
        hc = sum([1 for i in shared.bidders.values() if utils.is_human(i)])
        uc = sum([1 for i in shared.bidders.values() if i.outcome is None])
        print "Found %s robots, %s humans, %s unknown" % (rc, hc, uc)
コード例 #6
0
 def is_robot(self):
     return utils.is_robot(self)