def _db_init(dbFile): # 判断db文件是否存在 exist = False ls = os.listdir('.') if dbFile in ls: exist = True try: db = sqlite3.connect(dbFile, check_same_thread=False) c = db.cursor() except sqlite3.Error, e: print ("连接sqlite3数据库失败").decode('utf-8'), e.args[0] logging.cratical("连接sqlite3数据库失败") exit()
def _initDB(dbFile): exist = False ls = os.listdir('.') if dbFile in ls: exist = True db = sqlite3.connect(dbFile, check_same_thread=False) c = db.cursor() if not exist: try: c.execute('create table spider(id integer primary key,\ url text,key text,content text)') db.commit() except sqlite3.OperationalError: logging.cratical(dbFile + ' 创建表格错误') return db, c
except sqlite3.Error, e: print ("连接sqlite3数据库失败").decode('utf-8'), e.args[0] logging.cratical("连接sqlite3数据库失败") exit() if not exist: try: print('创建表格中...').decode('utf-8') c.execute("create table if not exists table_crawler(\ id integer primary key autoincrement,\ title text,\ key text,\ url text,\ content text)") db.commit() except sqlite3.OperationalError: logging.cratical(dbFile + ' 创建表格错误') return db, c def _insert(title, key, url, content): db.text_factory = str # 防止插入中文数据时出现错误 try: c.execute('''INSERT INTO table_crawler(title,key,url,content) values(?,?,?,?)''', (title, key, url, content)) db.commit() keyurls.add(url) except Exception, e: failed.add(url) try: logging.critical('插入 ' + str(url) + ' 数据错误' + 'Error' + str(e)) except Exception, e: # 有些链接中含有logging无法解码的字符