Esempio n. 1
0
class DBConfig(object):
    def __init__(self, ):
        self.config = ConfigParser()
        self.name = "config.ini"
        self.sql_path = os.path.join(ROOT_PATH, self.name)
        self.log = LogHandler("db")

    def add_db_config(self, dbtype, host, port, user, password, database,
                      charset):
        """
        增加或修改数据库配置,配置文件位置config/config.ini
        :param dbtype: 数据库类型
        :param host: 主机
        :param port: 端口
        :param user: 用户名
        :param password: 密码
        :param database: 数据库名称
        :param charset: 字符集
        :return: True 增加或修改成功
        """
        self.config.read(self.sql_path, encoding="utf-8")

        if dbtype in self.config:
            # TODO 设置数据库配置
            self.config.set(dbtype, "host", host)
            self.config.set(dbtype, "port", port)
            self.config.set(dbtype, "user", user)
            self.config.set(dbtype, "password", password)
            self.config.set(dbtype, "database", database)
            self.config.set(dbtype, "charset", charset)

            with open(self.sql_path, "w", encoding="utf8") as f:
                self.config.write(f)
                self.log.info(
                    "Amend the success , Modifying the data %s" %
                    [dbtype, host, port, user, password, database, charset])
                return True
        else:
            # TODO 修改数据库配置
            self.config.add_section(dbtype)
            self.config.set(dbtype, "host", host)
            self.config.set(dbtype, "port", port)
            self.config.set(dbtype, "user", user)
            self.config.set(dbtype, "password", password)
            self.config.set(dbtype, "database", database)
            self.config.set(dbtype, "charset", charset)

            with open(self.sql_path, "w+", encoding="utf8") as f:
                self.config.write(f)
                self.log.info(
                    "Amend the success , Modifying the data %s" %
                    [dbtype, host, port, user, password, database, charset])
                return True

    def get_db_config(self, dbtyep):
        """
        返回数据库相关配置
        :param dbtyep: 数据库类型
        :return: dict(数据库配置) None不存在
        """
        # TODO 获取配置
        self.config.read(self.sql_path, encoding="utf-8")
        if dbtyep in self.config:
            options = self.config.items(dbtyep)
            option = {x: y for x, y in options}
            for k, v in option.items():
                if k == "port":
                    option[k] = int(v)
            self.log.info("success %s" % option)
            return option
        else:
            self.log.error("Parameter error %s" % dbtyep)
            return None

    def update_config(self, section, option, value):
        """
        根据传入参数修改相关配置
        :param section: 块
        :param option:  修改key
        :param value:   修改值
        :return: True 修改成功 False 参数错误
        """
        # TODO 新增其他配置
        self.config.read(self.sql_path, encoding="utf-8")
        if section in self.config.sections():
            if option in self.config.options(section):
                self.config.set(section, option, value)
                # return '需要修改'
                self.log.info("Need to be modified")
            else:
                self.log.error("Parameter error %s" % option)
                return None
        else:
            self.log.error("Parameter error %s" % section)

            return None

        with open(self.sql_path, "w", encoding="utf8") as f:
            self.config.write(f)
            self.log.info("Amend the success")
            return True

    def add_config(self, section, option, value):
        """
        独立创建其他配置文件
        :param section: 块
        :param option:  修改key
        :param value:   修改值
        :return: True 修改成功
        """
        self.config.read(self.sql_path, encoding="utf-8")
        if section not in self.config.sections():
            self.config.add_section(section)
            self.config.set(section, option, value)
            with open(self.sql_path, "w+", encoding="utf8") as f:
                self.config.write(f)
                self.log.info("Amend the success")
        elif section in self.config.sections():
            self.config.set(section, option, value)
            with open(self.sql_path, "w+", encoding="utf8") as f:
                self.config.write(f)
                self.log.info("Amend the success")
Esempio n. 2
0
class NetEase(object):
    def __init__(self):
        """
        构造默认 header request session
        """
        self.header = {
            "Accept":
            "*/*",
            "Accept-Encoding":
            "gzip,deflate,sdch",
            "Accept-Language":
            "zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4",
            "Connection":
            "keep-alive",
            "Content-Type":
            "application/x-www-form-urlencoded",
            "Host":
            "music.163.com",
            "Referer":
            "http://music.163.com",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
        }
        self.session = requests.session()
        self.log = LogHandler('NeteaseApi')

    def _raw_request(self, method, url, data=None):
        """
        实际发起请求方法
        :param method: POST | GET
        :param url: url
        :param data: 请求携带的数据
        :return: response
        """
        if method == "GET":
            response = self.session.get(url,
                                        params=data,
                                        headers=self.header,
                                        timeout=DEFAULT_TIMEOUT)
        elif method == "POST":
            response = self.session.post(url,
                                         data=data,
                                         headers=self.header,
                                         timeout=DEFAULT_TIMEOUT)
        return response

    def _get_form_data(self, encrypt_data):
        """
        获取加密后的 form data 参数
        :param encrypt_data: 待加密的参数
        :return: 加密后的参数 {"params":"", "encSecKey":""}
        """
        key = netease.create_key(16)
        return {
            "params": netease.aes(netease.aes(encrypt_data, netease.NONCE),
                                  key),
            "encSecKey": netease.rsa(key, netease.PUBKEY, netease.MODULUS)
        }

    def request(self, method, path, data={}, default={"code": -1}):
        """
        统一请求方法
        :param method: POST | GET
        :param path: 路径
        :param data: 未加密的 data
        :param default: 默认的 response
        :return: response
        """
        url = "{}{}".format(BASE_URL, path)
        response = default
        csrf_token = ""

        data.update({"csrf_token": csrf_token})
        params = self._get_form_data(json.dumps(data).encode('utf-8'))
        try:
            self.log.debug(
                '[Netease api] url: {};\trequest  data: {};\tparams: {}'.
                format(url, data, params))
            response = self._raw_request(method, url, params)
            response = response.json()
            self.log.debug('[Netease api] url: {};\tresponse data: {}'.format(
                url, response))
        except requests.exceptions.RequestException as e:
            self.log.error('[Netease api] request error: {}'.format(e))
        except ValueError as e:
            self.log.error(
                "[Netease api] request error; Path: {}, response: {}".format(
                    path, response.text[:200]))
        finally:
            return response

    def songs_url(self, song_id):
        """
        获取音乐的实际 url,外链
            {ids: "[514235010]", level: "standard", encodeType: "aac", csrf_token: ""}
        :param song_id: 音乐 id
        :return: 带有外链的 json 串
        """
        path = "/weapi/song/enhance/player/url/v1?csrf_token="
        params = {
            'ids': '[' + str(song_id) + ']',
            'level': 'standard',
            'encodeType': 'aac',
            'csrf_token': ''
        }
        return self.request(POST, path, params)

    def songs_lyric(self, song_id):
        """
        获取音乐歌词
            {id: "186453", lv: -1, tv: -1, csrf_token: ""}
        :param song_id:
        :return:
        """
        path = "/weapi/song/lyric?csrf_token="
        params = {'id': str(song_id), 'lv': -1, 'tv': -1, 'csrf_token': ''}
        return self.request(POST, path, params)

    def songs_search(self, keyword, offset=0, limit=30):
        """
        搜索音乐
            按照关键字搜索一般就用这个
            {hlpretag: "<span class="s-fc7">", hlposttag: "</span>", s: "春夏秋冬 张国荣", type: "1", offset: "0", …}
        :return:
        """
        path = '/weapi/cloudsearch/get/web?csrf_token='
        params = {
            'csrf_token': '',
            'hlposttag': '</span>',
            'hlpretag': '<span class="s-fc7">',
            'limit': str(limit),
            'offset': str(offset),
            's': str(keyword),
            'total': 'true',
            'type': '1'
        }
        return self.request(POST, path, params)

    def songs_search_(self, song):
        """
        搜索音乐,搜索框联动接口,不常用
            {s: "春夏秋冬", limit: "8", csrf_token: ""}
        :return:
        """
        path = "/weapi/search/suggest/web?csrf_token="
        params = {'s': str(song), 'limit': 8, 'csrf_token': ''}
        return self.request(POST, path, params)

    def songs_detail(self, song_id):
        """
        获取歌曲详情
            给定 song id
            {id: "186453", c: "[{"id":"186453"}]", csrf_token: ""}
        :param song_id: 必传参数,song id
        :return: Song
        """
        path = "/weapi/v3/song/detail?csrf_token="
        params = {
            'id': str(song_id),
            'c': "[{'id': " + str(song_id) + "}]",
            'csrf_token': ''
        }
        return self.request(POST, path, params)
Esempio n. 3
0
class SqliteClient(object):
    def __init__(self, dbtype='sqlit'):
        """

        :param dbtype: 选择数据库类型
        """
        self.log = LogHandler("db")
        DBCONFIG = DBConfig().get_db_config(dbtype)
        ROOT_PATH = os.path.join(os.path.dirname(os.path.abspath(CURRENT_PATH)), DBCONFIG.get('path'))
        DB_NAME = DBCONFIG.get("dbname")
        DB_PATH = os.path.join(ROOT_PATH,DB_NAME)
        print(DB_PATH)
        self.conn = sqlite3.connect(DB_PATH)

        self.c = self.conn.cursor()

    def create_table_sqlite(self):
        """
        创建数据表
        :return: false true
        """
        try:
            sql = "create table if not exists ipdaili(ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT)"
            # self.c.execute('''CREATE TABLE ipdaili
                 # (ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT )''')
            self.c.execute(sql)
            self.conn.commit()
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info("create success")
            return True

    def insert_table_sqlite(self, ip_addr, ip_port, type,ip_proxy):
        """
        插入数据
        :param ip_addr: ip地址
        :param ip_port: 端口
        :param type:    类型
        :return:false true
        """
        downloadtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        try:
            self.c.execute("INSERT INTO ipdaili (ip_addr,ip_port,type,ip_proxy,Downloadtime) VALUES (?,?,?,?,?)",
                           (ip_addr, ip_port, type,ip_proxy, downloadtime))
            self.conn.commit()
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info("insert success")
            return True

    def search_table_sqlite(self, sql="select * from ipdaili"):
        """
        查询数据数
        :param sql:执行sql语句
        :return:结果值 false
        """
        try:
            res = self.c.execute(sql)
            self.conn.commit()
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info("search success")
            return res.fetchall()

    def __del__(self):
        """
        关闭链接
        :return:
        """
        # class_name = self.__class__.__name__
        self.conn.close()
Esempio n. 4
0
class IpSpider(object):
    def __init__(self, urltype):
        """

        :param urltype: 0-国内高匿代理IP;1-国内透明代理IP;2-国内HTTPS代理IP;3-国外高匿代理IP
        """
        url_list = {
            0: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=1',
            1: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=2',
            2: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=3',
            3: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=4',
        }
        if urltype in [0, 1, 2, 3]:
            self.url = url_list.get(urltype)
        self.ua = UserAgent()
        self.sqlite = SqliteClient()
        self.sqlite.create_table_sqlite()
        self.log = LogHandler("db")

    def run_spider(self, page):
        """
        进行爬虫抓取
        :param page:几页
        :return:tuple
        """
        iplist = []
        for x in range(1, page + 1):
            headers = {'Host': 'www.pcdaili.com', "user-agent": self.ua.chrome}
            sp_url = self.url + "&page=%d" % x
            try:
                r = requests.get(sp_url, headers=headers)
            except Exception as e:
                self.log.error(e)
            finally:
                html = etree.HTML(r.text)
                res = html.xpath(
                    '/html/body/div/div/div[2]/table/tbody/tr/td/text()')
                iptuple = self.group_list(res, 7)
                iplist.append(iptuple)
                time.sleep(1)
                self.log.info("spider html ok")
        return iplist

    def group_list(self, grouped, length):
        """
        分组
        :param grouped:列表
        :param length:分组长度
        :return: [(),()]
        """
        d = [
            tuple(grouped[i:i + length])
            for i in range(0, len(grouped), length)
        ]

        return d[:13]

    def ip_insert_sql(self, ip_list):
        """
        ip代理插入数据库
        :param ip_list: ip列表
        :return:
        """
        for y in range(len(ip_list)):
            # print(ip_list[y])
            for x in ip_list[y]:
                ip_addr = x[0]
                ip_port = x[1]
                type = x[3]
                ip_proxy = type + "://" + ip_addr + ":" + ip_port
                is_ok_ip = self.validate_ip(type=type, ip_proxy=ip_proxy)
                if is_ok_ip:
                    insert_res = self.sqlite.insert_table_sqlite(
                        ip_addr=ip_addr,
                        ip_port=ip_port,
                        type=type,
                        ip_proxy=ip_proxy)

        return True

    def validate_ip(self, type, ip_proxy):
        """
        测试ip是否能够代理访问https://weibo.com/
        :param type:ip类型
        :param ip_proxy:IP地址
        :return:true false
        """
        test_url = "https://weibo.com/"
        proxies = {type: ip_proxy}

        try:
            requests.get(test_url, proxies=proxies)
        except Exception as e:
            self.log.error(e)
            return False
        else:
            self.log.info(ip_proxy + " is ok !test url is " + test_url)
            return True
Esempio n. 5
0
class MysqlCline(object):
    def __init__(self, dbtype):
        """
        创建数据库
        :param dbtype: 数据库类型
        """
        self.log = LogHandler("db")

        dbconfig = DBConfig().get_db_config(dbtype)

        # self.connection = pymysql.connect(
        #     **dbconfig,
        # )
        if dbtype == "mysql":
            # print("ok")
            self.connection = pymysql.connect(**dbconfig, )

    def create_table_mysql(self):
        """
        创建表
        :return: false true
        """
        sql = """CREATE TABLE IF NOT EXISTS ipdaili (
          ip_addr varchar(30) DEFAULT NULL,
          ip_port varchar(11) DEFAULT NULL,
          type varchar(10) DEFAULT NULL,
          Downloadtime varchar(30) DEFAULT NULL
            )"""
        try:
            cursor = self.connection.cursor()
            cursor.execute(sql)
            self.connection.commit()
            self.log.info("create success")
            return True
        except Exception as e:
            self.log.error(e)
            return False
        finally:
            self.log.info("create success")
            return True
        pass

    def insert_table_mysql(self, ip_addr, ip_port, type):
        """
        插入数据
        :param ip_addr: ip地址
        :param ip_port: 端口
        :param type:    类型
        :return:false true
        """
        # 插入数据  # TODO 不能用with
        try:
            cursor = self.connection.cursor()
            downloadtime = datetime.datetime.now().strftime(
                "%Y-%m-%d %H:%M:%S")
            sql = "INSERT INTO ipdaili VALUES ('" + ip_addr + "','" + ip_port + "','" + type + "','" + downloadtime + "');"

            cursor.execute(sql)
            self.connection.commit()
            self.connection.commit()
            self.log.info("inserter sql success")
            return True
        except Exception as e:
            self.log.error(e)
            return False
        finally:
            self.log.info("insert success")

    def search_table_mysql(self, sql="select * from ipdaili"):
        """
        查询数据库
        :param sql:查询语句
        :return:结果值 false
        """
        try:
            cursor = self.connection.cursor()
            cursor.execute(sql)
            res = cursor.fetchall()
        except Exception as e:
            self.log.error(e)
            return False
        finally:

            self.log.info("search success")
            return res

    def __del__(self):
        """
        关闭数据库链接
        :return:
        """
        self.connection.close()