class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('valid_schedule')

    def __validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug(
                        'validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info(
                        'validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                if value and int(value) < -5:
                    # 计数器小于-5删除该代理
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')

    def main(self):
        self.__validProxy()
class ProxyManager(object):
    def __init__(self):
        self.db = DbClient()
        self.config = GetConfig()
        self.raw_proxy_queue = 'raw_proxy'
        self.log = LogHandler('proxy_manager')
        self.useful_proxy_queue = 'useful_proxy'

    def refresh(self):
        for proxyGetter in self.config.proxy_getter_functions:
            proxy_set = set()
            for proxy in getattr(GeteFreeProxy, proxyGetter.strip())():
                if proxy.strip():
                    self.log.info('{func}: fetch proxy {proxy}'.format(
                        func=proxyGetter, proxy=proxy))
                    proxy_set.add(proxy.strip())

            self.db.changeTable(self.raw_proxy_queue)
            for proxy in proxy_set:
                self.db.put(proxy)

    def get(self):
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.get()

    def delete(self, proxy):
        self.db.changeTable(self.useful_proxy_queue)
        self.db.delete(proxy)

    def getAll(self):
        self.db.changeTable(self.useful_proxy_queue)
        return self.db.getAll()

    def get_status(self):
        self.db.changeTable(self.raw_proxy_queue)
        total_raw_proxy = self.db.get_status()
        self.db.changeTable(self.useful_proxy_queue)
        total_useful_queue = self.db.get_status()
        return {
            'raw_proxy': total_raw_proxy,
            'useful_proxy': total_useful_queue
        }
Exemple #3
0
class ProxyRefreshSchedule(ProxyManager):
    """
    代理定时刷新
    """
    def __init__(self):
        ProxyManager.__init__(self)
        self.log = LogHandler('refresh_schedule')

    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy = self.db.pop()
        self.log.info('%s start validProxy_a' % time.ctime())
        exist_proxy = self.db.getAll()
        while raw_proxy:
            if validUsefulProxy(raw_proxy) and (raw_proxy not in exist_proxy):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('validProxy_a: %s validation pass' % raw_proxy)
            else:
                self.log.debug('validProxy_a: %s validation fail' % raw_proxy)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy = self.db.pop()
        self.log.info('%s validProxy_a complete' % time.ctime())
Exemple #4
0
def test_log_handler():
    log = LogHandler("Tlog")
    log.info("test log")
    log.resetName("test1")
    log.info('this is a log from test1')

    log.resetName('test2')
    log.info('this is a log from test2')
Exemple #5
0
class DBConfig(object):
    def __init__(self, ):
        self.config = ConfigParser()
        self.name = "config.ini"
        self.sql_path = os.path.join(ROOT_PATH, self.name)
        self.log = LogHandler("db")

    def add_db_config(self, dbtype, host, port, user, password, database,
                      charset):
        """
        增加或修改数据库配置,配置文件位置config/config.ini
        :param dbtype: 数据库类型
        :param host: 主机
        :param port: 端口
        :param user: 用户名
        :param password: 密码
        :param database: 数据库名称
        :param charset: 字符集
        :return: True 增加或修改成功
        """
        self.config.read(self.sql_path, encoding="utf-8")

        if dbtype in self.config:
            # TODO 设置数据库配置
            self.config.set(dbtype, "host", host)
            self.config.set(dbtype, "port", port)
            self.config.set(dbtype, "user", user)
            self.config.set(dbtype, "password", password)
            self.config.set(dbtype, "database", database)
            self.config.set(dbtype, "charset", charset)

            with open(self.sql_path, "w", encoding="utf8") as f:
                self.config.write(f)
                self.log.info(
                    "Amend the success , Modifying the data %s" %
                    [dbtype, host, port, user, password, database, charset])
                return True
        else:
            # TODO 修改数据库配置
            self.config.add_section(dbtype)
            self.config.set(dbtype, "host", host)
            self.config.set(dbtype, "port", port)
            self.config.set(dbtype, "user", user)
            self.config.set(dbtype, "password", password)
            self.config.set(dbtype, "database", database)
            self.config.set(dbtype, "charset", charset)

            with open(self.sql_path, "w+", encoding="utf8") as f:
                self.config.write(f)
                self.log.info(
                    "Amend the success , Modifying the data %s" %
                    [dbtype, host, port, user, password, database, charset])
                return True

    def get_db_config(self, dbtyep):
        """
        返回数据库相关配置
        :param dbtyep: 数据库类型
        :return: dict(数据库配置) None不存在
        """
        # TODO 获取配置
        self.config.read(self.sql_path, encoding="utf-8")
        if dbtyep in self.config:
            options = self.config.items(dbtyep)
            option = {x: y for x, y in options}
            for k, v in option.items():
                if k == "port":
                    option[k] = int(v)
            self.log.info("success %s" % option)
            return option
        else:
            self.log.error("Parameter error %s" % dbtyep)
            return None

    def update_config(self, section, option, value):
        """
        根据传入参数修改相关配置
        :param section: 块
        :param option:  修改key
        :param value:   修改值
        :return: True 修改成功 False 参数错误
        """
        # TODO 新增其他配置
        self.config.read(self.sql_path, encoding="utf-8")
        if section in self.config.sections():
            if option in self.config.options(section):
                self.config.set(section, option, value)
                # return '需要修改'
                self.log.info("Need to be modified")
            else:
                self.log.error("Parameter error %s" % option)
                return None
        else:
            self.log.error("Parameter error %s" % section)

            return None

        with open(self.sql_path, "w", encoding="utf8") as f:
            self.config.write(f)
            self.log.info("Amend the success")
            return True

    def add_config(self, section, option, value):
        """
        独立创建其他配置文件
        :param section: 块
        :param option:  修改key
        :param value:   修改值
        :return: True 修改成功
        """
        self.config.read(self.sql_path, encoding="utf-8")
        if section not in self.config.sections():
            self.config.add_section(section)
            self.config.set(section, option, value)
            with open(self.sql_path, "w+", encoding="utf8") as f:
                self.config.write(f)
                self.log.info("Amend the success")
        elif section in self.config.sections():
            self.config.set(section, option, value)
            with open(self.sql_path, "w+", encoding="utf8") as f:
                self.config.write(f)
                self.log.info("Amend the success")
Exemple #6
0
_smtpadd = 'smtp.ruifucredit.com'


def sendMail(sender, reciver, subject, content, passwd, smtpadd):
    log.info('Start to initialize the mail message.')
    username = sender
    password = passwd
    msg = MIMEMultipart('related')
    msg['Subject'] = subject
    # html格式
    html = content
    htm = MIMEText(html, 'html', 'utf-8')
    msg.attach(htm)
    msg['From'] = sender
    msg['To'] = reciver

    # 发送邮件
    smtp = smtplib.SMTP()
    smtp.connect(smtpadd)
    smtp.login(username, password)
    smtp.sendmail(sender, reciver.split(','), msg.as_string())
    smtp.quit()


if __name__ == "__main__":
    result = get_info()
    tableinfo = get_table_count()
    html = create_html(result['nameinfo'], result['datainfo'], tableinfo)
    sendMail(_sender_address, _reciver_address, _subject, html, _passwd, _smtpadd)
    log.info('Send mail successfully.')
Exemple #7
0
class IpSpider(object):
    def __init__(self, urltype):
        """

        :param urltype: 0-国内高匿代理IP;1-国内透明代理IP;2-国内HTTPS代理IP;3-国外高匿代理IP
        """
        url_list = {
            0: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=1',
            1: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=2',
            2: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=3',
            3: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=4',
        }
        if urltype in [0, 1, 2, 3]:
            self.url = url_list.get(urltype)
        self.ua = UserAgent()
        self.sqlite = SqliteClient()
        self.sqlite.create_table_sqlite()
        self.log = LogHandler("db")

    def run_spider(self, page):
        """
        进行爬虫抓取
        :param page:几页
        :return:tuple
        """
        iplist = []
        for x in range(1, page + 1):
            headers = {'Host': 'www.pcdaili.com', "user-agent": self.ua.chrome}
            sp_url = self.url + "&page=%d" % x
            try:
                r = requests.get(sp_url, headers=headers)
            except Exception as e:
                self.log.error(e)
            finally:
                html = etree.HTML(r.text)
                res = html.xpath(
                    '/html/body/div/div/div[2]/table/tbody/tr/td/text()')
                iptuple = self.group_list(res, 7)
                iplist.append(iptuple)
                time.sleep(1)
                self.log.info("spider html ok")
        return iplist

    def group_list(self, grouped, length):
        """
        分组
        :param grouped:列表
        :param length:分组长度
        :return: [(),()]
        """
        d = [
            tuple(grouped[i:i + length])
            for i in range(0, len(grouped), length)
        ]

        return d[:13]

    def ip_insert_sql(self, ip_list):
        """
        ip代理插入数据库
        :param ip_list: ip列表
        :return:
        """
        for y in range(len(ip_list)):
            # print(ip_list[y])
            for x in ip_list[y]:
                ip_addr = x[0]
                ip_port = x[1]
                type = x[3]
                ip_proxy = type + "://" + ip_addr + ":" + ip_port
                is_ok_ip = self.validate_ip(type=type, ip_proxy=ip_proxy)
                if is_ok_ip:
                    insert_res = self.sqlite.insert_table_sqlite(
                        ip_addr=ip_addr,
                        ip_port=ip_port,
                        type=type,
                        ip_proxy=ip_proxy)

        return True

    def validate_ip(self, type, ip_proxy):
        """
        测试ip是否能够代理访问https://weibo.com/
        :param type:ip类型
        :param ip_proxy:IP地址
        :return:true false
        """
        test_url = "https://weibo.com/"
        proxies = {type: ip_proxy}

        try:
            requests.get(test_url, proxies=proxies)
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info(ip_proxy + " is ok !test url is " + test_url)
            return True
Exemple #8
0
class SqliteClient(object):
    def __init__(self, dbtype='sqlit'):
        """

        :param dbtype: 选择数据库类型
        """
        self.log = LogHandler("db")
        DBCONFIG = DBConfig().get_db_config(dbtype)
        ROOT_PATH = os.path.join(os.path.dirname(os.path.abspath(CURRENT_PATH)), DBCONFIG.get('path'))
        DB_NAME = DBCONFIG.get("dbname")
        DB_PATH = os.path.join(ROOT_PATH,DB_NAME)
        print(DB_PATH)
        self.conn = sqlite3.connect(DB_PATH)

        self.c = self.conn.cursor()

    def create_table_sqlite(self):
        """
        创建数据表
        :return: false true
        """
        try:
            sql = "create table if not exists ipdaili(ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT)"
            # self.c.execute('''CREATE TABLE ipdaili
                 # (ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT )''')
            self.c.execute(sql)
            self.conn.commit()
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info("create success")
            return True

    def insert_table_sqlite(self, ip_addr, ip_port, type,ip_proxy):
        """
        插入数据
        :param ip_addr: ip地址
        :param ip_port: 端口
        :param type:    类型
        :return:false true
        """
        downloadtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        try:
            self.c.execute("INSERT INTO ipdaili (ip_addr,ip_port,type,ip_proxy,Downloadtime) VALUES (?,?,?,?,?)",
                           (ip_addr, ip_port, type,ip_proxy, downloadtime))
            self.conn.commit()
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info("insert success")
            return True

    def search_table_sqlite(self, sql="select * from ipdaili"):
        """
        查询数据数
        :param sql:执行sql语句
        :return:结果值 false
        """
        try:
            res = self.c.execute(sql)
            self.conn.commit()
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info("search success")
            return res.fetchall()

    def __del__(self):
        """
        关闭链接
        :return:
        """
        # class_name = self.__class__.__name__
        self.conn.close()
Exemple #9
0
class MysqlCline(object):
    def __init__(self, dbtype):
        """
        创建数据库
        :param dbtype: 数据库类型
        """
        self.log = LogHandler("db")

        dbconfig = DBConfig().get_db_config(dbtype)

        # self.connection = pymysql.connect(
        #     **dbconfig,
        # )
        if dbtype == "mysql":
            # print("ok")
            self.connection = pymysql.connect(**dbconfig, )

    def create_table_mysql(self):
        """
        创建表
        :return: false true
        """
        sql = """CREATE TABLE IF NOT EXISTS ipdaili (
          ip_addr varchar(30) DEFAULT NULL,
          ip_port varchar(11) DEFAULT NULL,
          type varchar(10) DEFAULT NULL,
          Downloadtime varchar(30) DEFAULT NULL
            )"""
        try:
            cursor = self.connection.cursor()
            cursor.execute(sql)
            self.connection.commit()
            self.log.info("create success")
            return True
        except Exception as e:
            self.log.error(e)
            return False
        finally:
            self.log.info("create success")
            return True
        pass

    def insert_table_mysql(self, ip_addr, ip_port, type):
        """
        插入数据
        :param ip_addr: ip地址
        :param ip_port: 端口
        :param type:    类型
        :return:false true
        """
        # 插入数据  # TODO 不能用with
        try:
            cursor = self.connection.cursor()
            downloadtime = datetime.datetime.now().strftime(
                "%Y-%m-%d %H:%M:%S")
            sql = "INSERT INTO ipdaili VALUES ('" + ip_addr + "','" + ip_port + "','" + type + "','" + downloadtime + "');"

            cursor.execute(sql)
            self.connection.commit()
            self.connection.commit()
            self.log.info("inserter sql success")
            return True
        except Exception as e:
            self.log.error(e)
            return False
        finally:
            self.log.info("insert success")

    def search_table_mysql(self, sql="select * from ipdaili"):
        """
        查询数据库
        :param sql:查询语句
        :return:结果值 false
        """
        try:
            cursor = self.connection.cursor()
            cursor.execute(sql)
            res = cursor.fetchall()
        except Exception as e:
            self.log.error(e)
            return False
        finally:

            self.log.info("search success")
            return res

    def __del__(self):
        """
        关闭数据库链接
        :return:
        """
        self.connection.close()