Beispiel #1
0
def fetch_kuaidaili() -> list:
    '''
    抓取快代理
    '''
    base_url = 'https://www.kuaidaili.com/free/inha/{}/'
    try:
        proxies = []
        for page in range(1, 3):
            url = base_url.format(page)
            html = get_html(url)
            ip = html.xpath(
                '//div[@id="list"]/table/tbody/tr/td[@data-title="IP"]/text()')
            port = html.xpath(
                '//div[@id="list"]/table/tbody/tr/td[@data-title="PORT"]/text()'
            )
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
            sleep(3)
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch kuaidaili')
    return proxies
Beispiel #2
0
def fetch_kxdaili() -> list:
    '''
    抓取kaixin代理
    '''
    base_url = 'http://www.kxdaili.com/dailiip/1/{}.html#ip'
    try:
        proxies = []
        for page in range(1, 4):
            url = base_url.format(page)
            html = get_html(url)
            tmp = [i.text for i in html.xpath('//tbody/tr/td')]
            res = [
                i for i in tmp
                if i and (i.isdigit() or ''.join(i.split('.')).isdigit())
            ]
            ip = [i for l, i in enumerate(res) if l % 2 == 0]
            port = [i for l, i in enumerate(res) if l % 2 == 1]
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch kxdaili')
    return proxies
Beispiel #3
0
def fetch_3366ip() -> list:
    '''
    抓取3366ip代理
    '''
    base_url = 'http://www.ip3366.net/free/?stype=1&page={}'
    try:
        proxies = []
        for page in range(1, 4):
            url = base_url.format(page)
            html = get_html(url)
            tmp = [i.text for i in html.xpath('//tbody/tr/td')]
            res = [
                i for i in tmp
                if i.isdigit() or ''.join(i.split('.')).isdigit()
            ]
            ip = [i for l, i in enumerate(res) if l % 2 == 0]
            port = [i for l, i in enumerate(res) if l % 2 == 1]
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch 3366ip')
    return proxies
Beispiel #4
0
 def add_one(self, item: str):
     '''
     向表中添加1个proxy
     '''
     try:
         sql = "INSERT INTO %s (IP_PORT, valid) VALUES ('%s', 1)"
         self.cur.execute(sql % (self.db_name, item))
         self.conn.commit()
     except Exception as e:
         self.conn.rollback()
         logger.exception('add fail')
         traceback.print_exc()
Beispiel #5
0
 def set_false(self, item: str):
     '''
     删除表中对应的proxy
     '''
     try:
         sql = "UPDATE %s SET valid=0 WHERE IP_PORT='%s'"
         self.cur.execute(sql % (self.db_name, item))
         self.conn.commit()
     except Exception as e:
         self.conn.rollback()
         logger.exception('set false fail')
         traceback.print_exc()
Beispiel #6
0
 def get_all(self) -> list:
     '''
     从表中抽取所有proxies
     '''
     try:
         sql = 'SELECT IP_PORT FROM %s'
         self.cur.execute(sql % self.db_name)
         ports = [i[0] for i in self.cur]
     except Exception as e:
         logger.exception('get all fail')
         traceback.print_exc()
     return ports
Beispiel #7
0
 def get_invalid_items(self) -> list:
     '''
     从表中抽取所有无效的proxies
     '''
     try:
         sql = 'SELECT IP_PORT FROM %s where valid=0'
         self.cur.execute(sql % self.db_name)
         ports = [i[0] for i in self.cur]
     except Exception as e:
         logger.exception('get invalid items fail')
         traceback.print_exc()
     return ports
Beispiel #8
0
 def get_valid_count(self):
     '''
     获取表中有效代理总数
     '''
     try:
         sql = "SELECT COUNT(*) FROM %s where valid=1"
         self.cur.execute(sql % self.db_name)
         count = self.cur.fetchone()[0]
     except Exception as e:
         self.conn.rollback()
         logger.exception('get_count fail')
         traceback.print_exc()
     return count
Beispiel #9
0
 def get_one(self) -> str:
     '''
     从表中随机抽取1个proxy
     return like: 127.0.0.1: 8080
     '''
     try:
         sql = "SELECT IP_PORT FROM %s where valid=1"
         self.cur.execute(sql % self.db_name)
         ports = [i[0] for i in self.cur]
     except Exception as e:
         self.conn.rollback()
         logger.exception('get_one fail')
         traceback.print_exc()
     return random.choice(ports)
Beispiel #10
0
 def __init__(self):
     '''
     初始化数据库连接和游标,创建表proxies:只有1列IP_PORT
     '''
     self.conn = sqlite3.connect('proxy.db')
     self.cur = self.conn.cursor()
     self.db_name = 'proxies'
     try:
         sql = "CREATE TABLE IF NOT EXISTS {} (id int PRIMARY KEY, IP_PORT varchar(20) NOT NULL, valid int(1) NOT NULL)"
         self.cur.execute(sql.format(self.db_name))
     except Exception as e:
         self.conn.rollback()
         logger.exception('init fail')
         traceback.print_exc()
Beispiel #11
0
def fetch_66ip() -> list:
    '''
    抓取66ip代理
    '''
    url = 'http://www.66ip.cn/nmtq.php?getnum=512&isp=0&anonymoustype=0&start=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip'
    try:
        html = get_html(url)
        ip_port = html.xpath('string(//body)').split('\r\n\t\t')[1:-2]
        proxies = []
        for i in ip_port:
            proxies.append(i)
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch 66ip')
    return proxies
Beispiel #12
0
def fetch_mimvp() -> list:
    '''
    抓取mimvp代理
    '''
    url = 'https://proxy.mimvp.com/free.php?proxy=in_hp'
    try:
        html = get_html(url)
        ip = html.xpath(
            '//div[@class="free-list"]/table/tbody/td[@class="tbl-proxy-ip"]/text()'
        )
        port = [80 for i in ip]
        proxies = []
        for l, r in zip(ip, port):
            proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch mimvp')
    return proxies
Beispiel #13
0
def fetch_xici() -> list:
    '''
    抓取xici代理
    '''
    base_url = 'http://www.xicidaili.com/nn/{}'
    try:
        proxies = []
        for page in range(1, 3):
            url = base_url.format(page)
            html = get_html(url)
            ip = html.xpath('//tr[@class="odd"]/td[2]/text()')
            port = html.xpath('//tr[@class="odd"]/td[3]/text()')
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch xici')
    return proxies
Beispiel #14
0
def fetch_ip181() -> list:
    '''
    抓取ip181代理
    '''
    url = 'http://www.ip181.com/'
    try:
        html = get_html(url)
        tmp = [i.text for i in html.xpath('//tbody/tr/td')]
        res = [
            i for i in tmp
            if i and (i.isdigit() or ''.join(i.split('.')).isdigit())
        ]
        ip = [i for l, i in enumerate(res) if l % 2 == 0]
        port = [i for l, i in enumerate(res) if l % 2 == 1]
        proxies = []
        for l, r in zip(ip, port):
            proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch ip181')
    return proxies
Beispiel #15
0
def fetch_data5u() -> list:
    '''
    抓取data5u代理
    '''
    url = 'http://www.data5u.com/free/gngn/index.shtml'
    try:
        html = get_html(url)
        tmp = [i.text for i in html.xpath('//ul[@class="l2"]/span/li')]
        res = [
            i for i in tmp
            if i and (i.isdigit() or ''.join(i.split('.')).isdigit())
        ]
        ip = [i for l, i in enumerate(res) if l % 2 == 0]
        port = [i for l, i in enumerate(res) if l % 2 == 1]
        proxies = []
        for l, r in zip(ip, port):
            proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch data5u')
    return proxies