def __init__(self, url, ip=None): super(Robot, self).__init__(url, ip) from crawler.connection import USER_AGENT, send_request request = HttpRequestGet(host=self.url, path='/robots.txt') request.set("User-Agent", USER_AGENT) request.set("Accept", "text/plain") response = send_request(request.writable(), self.ip_list[0], 80) if response: self.robots_txt = response.content self.__process() del request, response
def isknown(self, url): request = HttpRequestGet(host=self.host, path='/url/isknown') request.set("User-Agent", USER_AGENT) request.set("Accept", "application/xml;text/xml") request.set("Accept-Charset", "utf-8") request.query("url", url) request.query("consumer_key", "none") return send_request(request.writable(), self.ip, self.port)
def choose(self, limit=10): request = HttpRequestGet(host=self.host, path='/url/choose') request.set("User-Agent", USER_AGENT) request.set("Accept", "application/xml;text/xml") request.set("Accept-Charset", "utf-8") request.query("consumer_key", "none") response = send_request(request.writable(), self.ip, self.port) return self.__process(response)