class mysqlPipeLine(object): conn = None cursor = None def open_spider(self, spider): print('开始爬虫并连接数据库') self.conn = Connect(host='127.0.0.1', port=3306, user='******', password='******', db='my_django_project_2', charset='utf8') self.cursor = self.conn.cursor() def process_item(self, item, spider): img_src = item['img_src'] img_path = img_src.split('/')[-1] sql = 'insert into pic_4k(img_name,img_size,img_path,img_cls) values("%s","%s","%s","%s")' % ( item['img_name'], item['img_size'], img_path, '4k动物') try: self.cursor.execute(sql) self.conn.commit() except Exception as e: print('向数据库中插入数据异常:', e) self.conn.rollback() return item def close_spider(self, spider): print('结束爬虫并关闭数据库连接') self.cursor.close() self.conn.close()
class QcwyMysqlPipeline(object): def __init__(self, host, database, port, user, password): self.host = host self.database = database self.table_name = '51job' self.port = port self.user = user self.password = password @classmethod def from_crawler(cls, crawler): return cls( host=crawler.settings.get('MYSQL_HOST'), database=crawler.settings.get('MYSQL_DATABASE'), port=crawler.settings.get('MYSQL_PORT'), user=crawler.settings.get('MYSQL_USER'), password=crawler.settings.get('MYSQL_PASSWORD'), ) def open_spider(self, spider): # print(self.table_name) self.db = Connect( host=self.host, database=self.database, port=self.port, user=self.user, password=self.password, charset='utf8', ) self.cursor = self.db.cursor() def process_item(self, item, spider): data = dict(item) keys = ','.join(data.keys()) values = ','.join(['\"%s\"' % i for i in data.values()]) sql = 'insert into %s(%s) values (%s)' % (self.table_name, keys, values) try: self.cursor.execute(sql) self.db.commit() except Exception as e: self.db.rollback() print('异常错误是:', e) def close_spider(self, spider): self.cursor.close() self.db.close()
class ConnMysql(object): # ===================数据库连接=================== def __init__(self, host='localhost', port=3306, user='******', password='******', database="allip", charset='utf8'): self.lock = Lock() try: self.conn = Connect(host=host, port=port, user=user, password=password, database=database, charset=charset) self.cursor = self.conn.cursor() except Exception as e: print("数据库连接异常:{}".format(e)) print("请检查您是否使用了默认的数据库参数...") else: pass finally: pass # ===================保存数据=================== def exe(self, sql_language): try: # 数据库插入数据 self.lock.acquire() # 获取锁,多线程应用. self.cursor.execute(sql_language) self.conn.commit() except Exception as e: self.conn.rollback() print("执行sql语句失败...{}".format(e)) else: pass finally: self.lock.release() # 释放锁 def close(self): self.cursor.close() self.conn.close()
class Resource(SQLResource): # MySQL resource @typecheck def __init__(self, name: str, *, decimal_precision: (int, int), server_address: (str, int), connect_timeout: float, database: str, username: str, password: str, sql_mode: optional(str), charset: optional(str)): SQLResource.__init__(self, name, decimal_precision = decimal_precision) self._host, self._port = server_address self._connect_timeout = connect_timeout self._database = database self._username = username self._password = password self._sql_mode = sql_mode self._charset = charset or "utf8" ################################### def connect(self): SQLResource.connect(self) self._connection = Connect( host = self._host, port = self._port, connect_timeout = self._connect_timeout, db = self._database, user = self._username, passwd = self._password, sql_mode = self._sql_mode, charset = self._charset) try: self._connection.autocommit(False) except: self._connection.close() raise ################################### def _execute_sql(self, sql, params): try: param_list = ", ".join("@{0:s} = {1:s}".format(n, v) for n, v in params.items()) at_params = { n: "@{0:s}".format(n) for n in params.keys() } sql = sql.format(**at_params) except: ResourceError.rethrow(recoverable = True, terminal = False) cursor = self._connection.cursor() try: for n, v in params.items(): cursor.execute("SET @{0:s}={1:s}".format(n, v)) pmnc.log.info(">> {0:s}".format(sql)) if param_list: if pmnc.log.debug: pmnc.log.debug("-- {0:s} -- ({1:s})".format(sql, param_list)) records = [] try: cursor.execute(sql) rowcount = cursor.rowcount if rowcount >= 0: pmnc.log.info("<< OK, {0:d} record(s)".format(rowcount)) if rowcount > 0 and cursor.description: column_names = [ t[0] for t in cursor.description ] for record in cursor.fetchall(): records.append(dict(zip(column_names, record))) else: pmnc.log.info("<< OK") except MySQL_Error as e: code, message = e.args[0].args pmnc.log.warning("<< {0:s}{1:s} !! MySQL_Error(\"[{2:d}] {3:s}\") in {4:s}".\ format(sql, " -- ({0:s})".format(param_list) if param_list else "", code, message, trace_string())) SQLResourceError.rethrow(recoverable = True, code = code, description = message) # note that there is no state except Exception: pmnc.log.warning("<< {0:s}{1:s} !! {2:s}".\ format(sql, " -- ({0:s})".format(param_list) if param_list else "", exc_string())) ResourceError.rethrow(recoverable = True) else: return records finally: cursor.close() ################################### def commit(self): self._connection.commit() ################################### def rollback(self): self._connection.rollback() ################################### def disconnect(self): try: self._connection.close() except: pmnc.log.error(exc_string()) # log and ignore finally: SQLResource.disconnect(self) ################################### _supported_types = SQLResource._supported_types | { float, set, date, time } def _py_to_sql_NoneType(self, v): return self._connection.escape(SQLResource._py_to_sql_NoneType(self, v)) def _py_to_sql_int(self, v): return self._connection.escape(SQLResource._py_to_sql_int(self, v)) def _py_to_sql_Decimal(self, v): return self._connection.escape(SQLResource._py_to_sql_Decimal(self, v)) def _py_to_sql_bool(self, v): return self._connection.escape(SQLResource._py_to_sql_bool(self, v)) def _py_to_sql_datetime(self, v): return self._connection.escape(SQLResource._py_to_sql_datetime(self, v)) def _py_to_sql_str(self, v): return self._connection.escape(SQLResource._py_to_sql_str(self, v)) def _py_to_sql_bytes(self, v): return "X'{0:s}'".format(b2a_hex(v).decode("ascii")) def _py_to_sql_float(self, v): return self._connection.escape(v) def _py_to_sql_set(self, v): return "'{0:s}'".format(",".join(self._connection.escape(s)[1:-1] for s in v if isinstance(s, str))) def _py_to_sql_date(self, v): return self._connection.escape(v) def _py_to_sql_time(self, v): return self._connection.escape(v) ################################### def _sql_to_py_float(self, v): return v def _sql_to_py_set(self, v): return v def _sql_to_py_date(self, v): return v def _sql_to_py_time(self, v): return v def _sql_to_py_timedelta(self, v): # PyMySQL represents TIME as timedelta return time(hour = v.seconds // 3600, minute = (v.seconds % 3600) // 60, second = v.seconds % 60)