def parse_sql_to_excel(sql_name, table_name, excel_name): """把SQL转化成EXCEL""" try: # 创建数据库连接 conn = spider_db.db_conn(sql_name) # 查询需要转化的数据表 sql = "select * from {}".format(table_name) cur = spider_db.dbExecu(sql, conn) except Exception as e: print "数据库连接异常" else: # 创建Excel workbook = xlwt.Workbook(encoding='ascii') worksheet = workbook.add_sheet(table_name) # 往单元格内写数据 sql_data = cur.fetchall() for index_outer, data in enumerate(sql_data): for index_inner, da in enumerate(data): worksheet.write(index_outer, index_inner, label=str(da)) # 保存excel workbook.save(excel_name) # 关闭数据库连接 spider_db.db_close(conn)
def parse_excel_to_sql(sql_name, table_name, excel_name): """把EXCEL转化成SQL""" try: # 打开excel读取数据 data = xlrd.open_workbook(excel_name) # 创建数据库连接 conn = spider_db.db_conn(sql_name) except Exception as e: print "参数异常" else: # 获取工作表 table_list = data.sheets() for table in table_list: # 获取行列的值 rows = table.nrows cols = table.ncols # 获取整行和整列的值(返回list) # table.row_values(i) # table.col_values(i) # 遍历数据 for row in xrange(rows): for col in xrange(cols): val = table.cell(row, col).value sql = "insert into {}() values() {}".format(table_name) cur = spider_db.dbExecu(sql, conn)
def _details_page(self, a, b): """根据给定的参数返回正确的页面""" conn = spider_db.db_conn(db="gaoji_drug_data") sql = """select drug_url_app, drug_params, img_small, img_details from alihealth_app where img_details='{}' limit %s, %s;""" % ( a, b) cur = spider_db.dbExecu(sql, conn) return cur
def _save_app_data(self, drug_data): """持久化数据""" conn = spider_db.db_conn(db=self.db_name, user=self.user, passwd=self.passwd) sql = """update alihealth_app set img_details='{}' where drug_url_app='{}';""" \ .format(MySQLdb.escape_string(drug_data['img_details']), MySQLdb.escape_string(drug_data['url'])) spider_db.dbExecu(sql, conn) spider_db.db_close(conn)
def _save_data(self, drug_data): """持久化数据""" conn = spider_db.db_conn(db=self.db_name, user=self.user, passwd=self.passwd) sql = """insert into alihealth_drug values(0, "{}", "{}", "{}", "{}", "{}");""".\ format(MySQLdb.escape_string(drug_data['drug_url']), MySQLdb.escape_string(drug_data['drug_name']), MySQLdb.escape_string(drug_data['drug_params']), MySQLdb.escape_string(drug_data['img_small']), MySQLdb.escape_string(drug_data['img_details'])) spider_db.dbExecu(sql, conn) spider_db.db_close(conn)
def _duplicate_removal(self, url_list): """URL去重""" conn = spider_db.db_conn(db=self.db_name, user=self.user, passwd=self.passwd) duplicate_list = list(url_list) print "获取到的链接为{}个".format(len(url_list)) for url in duplicate_list: sql = """select drug_url from alihealth_drug where drug_url='{}';""".format( url) cur = spider_db.dbExecu(sql, conn) if cur.fetchone() is not None: print "{} was duplicate".format(url) url_list.remove(url) spider_db.db_close(conn) print "去重后的链接为{}个".format(len(url_list)) return url_list