Exemple #1
0
class mysqlPipeLine(object):
    conn = None
    cursor = None

    def open_spider(self, spider):
        print('开始爬虫并连接数据库')
        self.conn = Connect(host='127.0.0.1',
                            port=3306,
                            user='******',
                            password='******',
                            db='my_django_project_2',
                            charset='utf8')
        self.cursor = self.conn.cursor()

    def process_item(self, item, spider):
        img_src = item['img_src']
        img_path = img_src.split('/')[-1]
        sql = 'insert into pic_4k(img_name,img_size,img_path,img_cls) values("%s","%s","%s","%s")' % (
            item['img_name'], item['img_size'], img_path, '4k动物')
        try:
            self.cursor.execute(sql)
            self.conn.commit()
        except Exception as e:
            print('向数据库中插入数据异常:', e)
            self.conn.rollback()
        return item

    def close_spider(self, spider):
        print('结束爬虫并关闭数据库连接')
        self.cursor.close()
        self.conn.close()
Exemple #2
0
class QcwyMysqlPipeline(object):
    def __init__(self, host, database, port, user, password):
        self.host = host
        self.database = database
        self.table_name = '51job'
        self.port = port
        self.user = user
        self.password = password

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            host=crawler.settings.get('MYSQL_HOST'),
            database=crawler.settings.get('MYSQL_DATABASE'),
            port=crawler.settings.get('MYSQL_PORT'),
            user=crawler.settings.get('MYSQL_USER'),
            password=crawler.settings.get('MYSQL_PASSWORD'),
        )

    def open_spider(self, spider):
        # print(self.table_name)
        self.db = Connect(
            host=self.host,
            database=self.database,
            port=self.port,
            user=self.user,
            password=self.password,
            charset='utf8',
        )
        self.cursor = self.db.cursor()

    def process_item(self, item, spider):
        data = dict(item)
        keys = ','.join(data.keys())
        values = ','.join(['\"%s\"' % i for i in data.values()])
        sql = 'insert into %s(%s) values (%s)' % (self.table_name, keys,
                                                  values)
        try:
            self.cursor.execute(sql)
            self.db.commit()
        except Exception as e:
            self.db.rollback()
            print('异常错误是:', e)

    def close_spider(self, spider):
        self.cursor.close()
        self.db.close()
Exemple #3
0
class ConnMysql(object):
    # ===================数据库连接===================
    def __init__(self,
                 host='localhost',
                 port=3306,
                 user='******',
                 password='******',
                 database="allip",
                 charset='utf8'):
        self.lock = Lock()
        try:
            self.conn = Connect(host=host,
                                port=port,
                                user=user,
                                password=password,
                                database=database,
                                charset=charset)
            self.cursor = self.conn.cursor()
        except Exception as e:
            print("数据库连接异常:{}".format(e))
            print("请检查您是否使用了默认的数据库参数...")
        else:
            pass
        finally:
            pass

    # ===================保存数据===================
    def exe(self, sql_language):
        try:
            # 数据库插入数据
            self.lock.acquire()  # 获取锁,多线程应用.
            self.cursor.execute(sql_language)
            self.conn.commit()
        except Exception as e:
            self.conn.rollback()
            print("执行sql语句失败...{}".format(e))
        else:
            pass
        finally:
            self.lock.release()  # 释放锁

    def close(self):
        self.cursor.close()
        self.conn.close()
Exemple #4
0
class Resource(SQLResource): # MySQL resource

    @typecheck
    def __init__(self, name: str, *,
                 decimal_precision: (int, int),
                 server_address: (str, int),
                 connect_timeout: float,
                 database: str,
                 username: str,
                 password: str,
                 sql_mode: optional(str),
                 charset: optional(str)):

        SQLResource.__init__(self, name, decimal_precision = decimal_precision)

        self._host, self._port = server_address
        self._connect_timeout = connect_timeout
        self._database = database
        self._username = username
        self._password = password
        self._sql_mode = sql_mode
        self._charset = charset or "utf8"

    ###################################

    def connect(self):
        SQLResource.connect(self)
        self._connection = Connect(
                host = self._host, port = self._port, connect_timeout = self._connect_timeout,
                db = self._database, user = self._username, passwd = self._password,
                sql_mode = self._sql_mode, charset = self._charset)
        try:
            self._connection.autocommit(False)
        except:
            self._connection.close()
            raise

    ###################################

    def _execute_sql(self, sql, params):

        try:
            param_list = ", ".join("@{0:s} = {1:s}".format(n, v) for n, v in params.items())
            at_params = { n: "@{0:s}".format(n) for n in params.keys() }
            sql = sql.format(**at_params)
        except:
            ResourceError.rethrow(recoverable = True, terminal = False)

        cursor = self._connection.cursor()
        try:

            for n, v in params.items():
                cursor.execute("SET @{0:s}={1:s}".format(n, v))

            pmnc.log.info(">> {0:s}".format(sql))
            if param_list:
                if pmnc.log.debug:
                    pmnc.log.debug("-- {0:s} -- ({1:s})".format(sql, param_list))

            records = []
            try:

                cursor.execute(sql)
                rowcount = cursor.rowcount

                if rowcount >= 0:
                    pmnc.log.info("<< OK, {0:d} record(s)".format(rowcount))
                    if rowcount > 0 and cursor.description:
                        column_names = [ t[0] for t in cursor.description ]
                        for record in cursor.fetchall():
                            records.append(dict(zip(column_names, record)))
                else:
                    pmnc.log.info("<< OK")

            except MySQL_Error as e:
                code, message = e.args[0].args
                pmnc.log.warning("<< {0:s}{1:s} !! MySQL_Error(\"[{2:d}] {3:s}\") in {4:s}".\
                                 format(sql, " -- ({0:s})".format(param_list)
                                        if param_list else "", code, message, trace_string()))
                SQLResourceError.rethrow(recoverable = True,
                        code = code, description = message) # note that there is no state
            except Exception:
                pmnc.log.warning("<< {0:s}{1:s} !! {2:s}".\
                                 format(sql, " -- ({0:s})".format(param_list)
                                        if param_list else "", exc_string()))
                ResourceError.rethrow(recoverable = True)
            else:
                return records

        finally:
            cursor.close()

    ###################################

    def commit(self):
        self._connection.commit()

    ###################################

    def rollback(self):
        self._connection.rollback()

    ###################################

    def disconnect(self):
        try:
            self._connection.close()
        except:
            pmnc.log.error(exc_string()) # log and ignore
        finally:
            SQLResource.disconnect(self)

    ###################################

    _supported_types = SQLResource._supported_types | { float, set, date, time }

    def _py_to_sql_NoneType(self, v):
        return self._connection.escape(SQLResource._py_to_sql_NoneType(self, v))

    def _py_to_sql_int(self, v):
        return self._connection.escape(SQLResource._py_to_sql_int(self, v))

    def _py_to_sql_Decimal(self, v):
        return self._connection.escape(SQLResource._py_to_sql_Decimal(self, v))

    def _py_to_sql_bool(self, v):
        return self._connection.escape(SQLResource._py_to_sql_bool(self, v))

    def _py_to_sql_datetime(self, v):
        return self._connection.escape(SQLResource._py_to_sql_datetime(self, v))

    def _py_to_sql_str(self, v):
        return self._connection.escape(SQLResource._py_to_sql_str(self, v))

    def _py_to_sql_bytes(self, v):
        return "X'{0:s}'".format(b2a_hex(v).decode("ascii"))

    def _py_to_sql_float(self, v):
        return self._connection.escape(v)

    def _py_to_sql_set(self, v):
        return "'{0:s}'".format(",".join(self._connection.escape(s)[1:-1]
                                         for s in v if isinstance(s, str)))

    def _py_to_sql_date(self, v):
        return self._connection.escape(v)

    def _py_to_sql_time(self, v):
        return self._connection.escape(v)

    ###################################

    def _sql_to_py_float(self, v):
        return v

    def _sql_to_py_set(self, v):
        return v

    def _sql_to_py_date(self, v):
        return v

    def _sql_to_py_time(self, v):
        return v

    def _sql_to_py_timedelta(self, v): # PyMySQL represents TIME as timedelta
        return time(hour = v.seconds // 3600,
                    minute = (v.seconds % 3600) // 60,
                    second = v.seconds % 60)