コード例 #1
0
def parse_sql_to_excel(sql_name, table_name, excel_name):
    """把SQL转化成EXCEL"""

    try:
        # 创建数据库连接
        conn = spider_db.db_conn(sql_name)
        # 查询需要转化的数据表
        sql = "select * from {}".format(table_name)
        cur = spider_db.dbExecu(sql, conn)
    except Exception as e:
        print "数据库连接异常"
    else:
        # 创建Excel
        workbook = xlwt.Workbook(encoding='ascii')
        worksheet = workbook.add_sheet(table_name)

        # 往单元格内写数据
        sql_data = cur.fetchall()
        for index_outer, data in enumerate(sql_data):
            for index_inner, da in enumerate(data):
                worksheet.write(index_outer, index_inner, label=str(da))

        # 保存excel
        workbook.save(excel_name)
        # 关闭数据库连接
        spider_db.db_close(conn)
コード例 #2
0
def parse_excel_to_sql(sql_name, table_name, excel_name):
    """把EXCEL转化成SQL"""

    try:
        # 打开excel读取数据
        data = xlrd.open_workbook(excel_name)
        # 创建数据库连接
        conn = spider_db.db_conn(sql_name)
    except Exception as e:
        print "参数异常"
    else:
        # 获取工作表
        table_list = data.sheets()

        for table in table_list:
            # 获取行列的值
            rows = table.nrows
            cols = table.ncols
            # 获取整行和整列的值(返回list)
            # table.row_values(i)
            # table.col_values(i)
            # 遍历数据
            for row in xrange(rows):
                for col in xrange(cols):
                    val = table.cell(row, col).value
                    
                    sql = "insert into {}() values() {}".format(table_name)
                    cur = spider_db.dbExecu(sql, conn)
コード例 #3
0
ファイル: alihealth.py プロジェクト: BruceCobb/SpiderAlibaba
    def _details_page(self, a, b):
        """根据给定的参数返回正确的页面"""
        conn = spider_db.db_conn(db="gaoji_drug_data")
        sql = """select drug_url_app, drug_params, img_small, img_details from alihealth_app where img_details='{}' limit %s, %s;""" % (
            a, b)

        cur = spider_db.dbExecu(sql, conn)
        return cur
コード例 #4
0
ファイル: alihealth.py プロジェクト: BruceCobb/SpiderAlibaba
    def _save_app_data(self, drug_data):
        """持久化数据"""
        conn = spider_db.db_conn(db=self.db_name,
                                 user=self.user,
                                 passwd=self.passwd)
        sql = """update alihealth_app set img_details='{}' where drug_url_app='{}';""" \
            .format(MySQLdb.escape_string(drug_data['img_details']),
                    MySQLdb.escape_string(drug_data['url']))

        spider_db.dbExecu(sql, conn)
        spider_db.db_close(conn)
コード例 #5
0
ファイル: alihealth.py プロジェクト: BruceCobb/SpiderAlibaba
    def _save_data(self, drug_data):
        """持久化数据"""
        conn = spider_db.db_conn(db=self.db_name,
                                 user=self.user,
                                 passwd=self.passwd)
        sql = """insert into alihealth_drug values(0, "{}", "{}", "{}", "{}", "{}");""".\
            format(MySQLdb.escape_string(drug_data['drug_url']),
                   MySQLdb.escape_string(drug_data['drug_name']),
                   MySQLdb.escape_string(drug_data['drug_params']),
                   MySQLdb.escape_string(drug_data['img_small']),
                   MySQLdb.escape_string(drug_data['img_details']))

        spider_db.dbExecu(sql, conn)
        spider_db.db_close(conn)
コード例 #6
0
ファイル: alihealth.py プロジェクト: BruceCobb/SpiderAlibaba
    def _duplicate_removal(self, url_list):
        """URL去重"""
        conn = spider_db.db_conn(db=self.db_name,
                                 user=self.user,
                                 passwd=self.passwd)

        duplicate_list = list(url_list)
        print "获取到的链接为{}个".format(len(url_list))

        for url in duplicate_list:
            sql = """select drug_url from alihealth_drug where drug_url='{}';""".format(
                url)
            cur = spider_db.dbExecu(sql, conn)
            if cur.fetchone() is not None:
                print "{} was duplicate".format(url)
                url_list.remove(url)

        spider_db.db_close(conn)
        print "去重后的链接为{}个".format(len(url_list))

        return url_list