Beispiel #1
0
class Kuai(object):
    def __init__(self):
        self.log = Log(__class__.__name__)
        pass

    @staticmethod
    @check
    def get_response(url):
        resp = requests.get(url=url, headers=HEADER)
        resp.encoding = resp.apparent_encoding
        resp.raise_for_status
        return resp

    @property
    def ips(self):
        for url in URL:
            r = self.get_response(url)
            try:
                html = etree.HTML(r.text)
            except AttributeError as e:
                self.log.error("Response is None,ErrorType:%s" % e)
            else:
                iplist = html.xpath(XPATH)
                for ip in iplist:
                    proxy = ':'.join(ip.xpath("./td/text()")[:2])
                    self.log.info("Find:%s" % proxy)
                    yield proxy
Beispiel #2
0
class Mongo(object):
    def __init__(self, dbname="test", collection="test", host=None, port=None):
        self.dbn = dbname
        self.coln = collection
        self.client = MongoClient(host, port)
        self.db = self.client[dbname]
        self.collection = self.db[collection]
        self.log = Log(__class__.__name__)

    def insert(self, dict_data):
        if isinstance(dict_data, dict):
            # check existance before insert.
            if not self.is_exist(dict_data):
                self.collection.insert(dict_data)
                self.log.info("Insert dict is :%s" % (str(dict_data)))
        else:
            self.log.info("Error: %s,dict is :%s" %
                          (TypeError, eval(dict_data)))
            raise TypeError

    def delete(self, key, value):
        if value or key:
            if self.collection.delete({key: value}):
                self.log.info(
                    "MongoDB delete Error: %s,key is :%s,value is :%s" %
                    ("Not exist", key, value))
        else:
            self.log.error("MongoDB delete Error: %s,key is :%s,value is :%s" %
                           (AttributeError, key, value))
            raise AttributeError

    # find all remarks with key
    def find_all(self):
        for results in self.collection.find():
            yield results

    # find one remark with key
    def find_one(self):
        return random.choice([proxy for proxy in self.find_all()])

    def is_exist(self, dict_data):
        if self.collection.find_one(dict_data):
            self.log.warning("Dict is exist :%s" % (str(dict_data)))
            return True
        else:
            return False

    def delete_all(self):
        self.collection.remove()
        self.log.warning("MongoDB delete all remarks.")

    # function drop_database is belong to MongoClient.A difference between Mongo shell.
    def drop_db(self):
        self.client.drop_database(self.dbn)
        self.log.warning("MongoDB delete db %s." % self.dbn)

    def drop_collection(self):
        self.collection.drop()
        self.log.warning("MongoDB delete collection %s." % self.coln)
Beispiel #3
0
class You(object):
    def __init__(self):
        self.log = Log(__class__.__name__)
        pass

    @staticmethod
    def get_response(url):
        resp = requests.get(url=url, headers = HEADER)
        resp.encoding = resp.apparent_encoding
        resp.raise_for_status
        return resp

    @property
    def ips(self):
        lis = []
        start_resp = self.get_response(start_url)
        try:
            html = etree.HTML(start_resp.text)
        except AttributeError as e:
            self.log.error("Response is None,ErrorType:%s" % e)
        else:
            parse_url = html.xpath(XPATH)[0]
            num = re.search("\d+",parse_url)
            link = re.sub("\d+",num.group(0)+"_{index}",parse_url)
            urls = (link.format(index=ind) for ind in range(2, PAGE + 1))

            r = self.get_response(parse_url)
            try:
                lis = lis + re.findall("(?:\d{1,3}\.){3}\d{1,3}\:\d{1,4}", r.text)
            except AttributeError as e:
                self.log.error("Response is None,ErrorType:%s" % e)
            else:
                for url in urls:
                    r = self.get_response(url)
                    lis = lis + re.findall("(?:\d{1,3}\.){3}\d{1,3}\:\d{1,4}",r.text)
                for x in lis:
                    self.log.info("Find:%s" % x)
                    yield x