Esempio n. 1
0
class PageDataConfDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self, page_data_id, p_type, p_key, p_value, p_description):
        key = self.db.insert("insert into ec_spider.t_page_data_conf (page_data_id, p_type, p_key, p_value, p_description, created, updated) values(%s{})".format(", %s"*6), (page_data_id, p_type, p_key, p_value, p_description, get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query_by_page_data_id(self, page_data_id):
        data = self.db.query("select id, page_data_id, p_type, p_key, p_value, p_description, created, updated from ec_spider.t_page_data_conf where page_data_id = {}".format(page_data_id))
        return data

    def delete(self, id):
        self.db.delete("delete from ec_spider.t_page_data_conf where id = {}".format(id))

    def delete_by_page_data_id(self, page_data_id):
        self.db.delete("delete from ec_spider.t_page_data_conf where page_data_id = {}".format(page_data_id))
        self.db.commit()
Esempio n. 2
0
class DataTabColumnDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self, data_tab_id, col_name, col_type, col_type_length, col_description, check_col_name, is_file_column, is_primary_key, is_data_maintenance_pk):
        key = self.db.insert("insert into ec_spider.t_data_tab_column (data_tab_id, col_name, col_type, col_type_length, col_description, check_col_name, is_file_column, is_primary_key, is_data_maintenance_pk, created, updated) values(%s{})".format(", %s"*10), (data_tab_id, col_name, col_type, col_type_length, col_description, check_col_name, is_file_column, is_primary_key, is_data_maintenance_pk, get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query_by_tab_id(self, tab_id):
        data = self.db.query("select id, data_tab_id, col_name, col_type, col_type_length, col_description, check_col_name, is_file_column, is_primary_key, is_data_maintenance_pk, created, updated from ec_spider.t_data_tab_column where data_tab_id = {}".format(tab_id))
        return data

    def delete(self, id):
        self.db.delete("delete from ec_spider.t_data_tab_column where id = {}".format(id))

    def delete_by_data_tab_id(self, data_tab_id):
        self.db.delete("delete from ec_spider.t_data_tab_column where data_tab_id = {}".format(data_tab_id))
        self.db.commit()
Esempio n. 3
0
class PageDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self, website, name, menu_level_first, url, menu_level_second=None, menu_level_third=None):
        key = self.db.insert("insert into ec_spider.t_page (website, name, menu_level_first, menu_level_second, menu_level_third, url, created, updated) values(%s{})".format(", %s"*7), (website, name, menu_level_first, menu_level_second, menu_level_third, url, get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query(self, id):
        data = self.db.query("select id, website, name, menu_level_first, menu_level_second, menu_level_third, url, created, updated from ec_spider.t_page where id = {}".format(id))
        return data

    def delete(self, id):
        self.db.delete("delete from ec_spider.t_page where id = {}".format(id))
        self.db.commit()
Esempio n. 4
0
class DataTabDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self, name, page_data_id, check_name_rule, business_columns, pre_cnt=1):
        key = self.db.insert("insert into ec_spider.t_data_tab (name, page_data_id, check_name_rule, business_columns, pre_cnt, created, updated) values(%s{})".format(", %s"*6), (name, page_data_id, check_name_rule, business_columns, pre_cnt, get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query_by_page_data_id(self, page_data_id):
        data = self.db.query("select id, name, page_data_id, check_name_rule, business_columns, pre_cnt, created, updated from ec_spider.t_data_tab where page_data_id = {}".format(page_data_id))
        return data
    
    def query(self, id):
        data = self.db.query("select id, name, page_data_id, check_name_rule, business_columns, pre_cnt, created, updated from ec_spider.t_data_tab where id = {}".format(id))
        return data
    
    def delete(self, id):
        self.db.delete("delete from ec_spider.t_data_tab where id = {}".format(id))
        self.db.commit()
Esempio n. 5
0
class PageDataDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self, page_id, name, status, data_source_type, data_update_freq, data_update_time, rule_read_file_prefix, rule_save_path_suffix):
        key = self.db.insert("insert into ec_spider.t_page_data (page_id, name, status, data_source_type, data_update_freq, data_update_time, rule_read_file_prefix, rule_save_path_suffix, created, updated) values(%s{})".format(", %s"*9), (page_id, name, status, data_source_type, data_update_freq, data_update_time, rule_read_file_prefix, rule_save_path_suffix, get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query(self, id):
        data = self.db.query("select id, page_id, name, status, data_source_type, data_update_freq, data_update_time, rule_read_file_prefix, rule_save_path_suffix, created, updated from ec_spider.t_page_data where id = {}".format(id))
        return data

    def query_by_page_id(self, page_id):
        data = self.db.query("select id, page_id, name, status, data_source_type, data_update_freq, data_update_time, rule_read_file_prefix, rule_save_path_suffix, created, updated from ec_spider.t_page_data where page_id = {}".format(page_id))
        return data

    def delete(self, id):
        self.db.delete("delete from ec_spider.t_page_data where id = {}".format(id))
        self.db.commit()
Esempio n. 6
0
 def __init__(self, store_id, page_data_id, port):
     """
     初始化爬虫任务所需的信息
     1.实例化对象:Store、PageData、Table
     2.环境初始化
     3.web_driver 连接确认
     4.web_driver 店铺LOGIN确认,确认浏览正常并店铺已登录成功时置login_flag=True
     :param store_id: 店铺id,用来获取店铺对象
     :param page_data_id: 抓取的页面数据块id,用来获取页面数据块对象
     :param port: 已开启的浏览器服务端口
     """
     self.error = None
     self.login_flag = False
     try:
         self.store = StoreService().get_store(store_id)
         self.page_data = PageDataService().get_page_data(page_data_id)
         self.page = self.page_data.page
         self.db = DataBase()
         self.port = port
         self.FILE_PART_PATH = self.store.name + '/' + self.page_data.name + '/' + self.page_data.data_update_freq
         self.FILE_DOWNLOAD_PATH = setting.FILE_DOWNLOAD_PATH_PREFIX + '/' + self.store.name
         self.FILE_PROCESS_PATH = setting.FILE_PROCESS_PATH_PREFIX + '/' + self.FILE_PART_PATH
         self.FILE_BACKUP_PATH = setting.FILE_BACKUP_PATH_PREFIX + '/' + self.FILE_PART_PATH
         if not os.path.exists(self.FILE_DOWNLOAD_PATH):
             os.makedirs(self.FILE_DOWNLOAD_PATH)
         if not os.path.exists(self.FILE_PROCESS_PATH):
             os.makedirs(self.FILE_PROCESS_PATH)
         if not os.path.exists(self.FILE_BACKUP_PATH):
             os.makedirs(self.FILE_BACKUP_PATH)
         # 下载目录清理
         self.clear_download_path()
         # 初始化webdriver,判断是否已登录
         self.driver = None
         self.init_web_driver()
         self.check_store_login()
         # 数据维度字典
         self.data_dimension_dict = {}
         # 下载文件取数时需要
         self.file_names = []
         # 单文件、单数据表存储,例:[DataFrame]
         # 多文件/多sheet、单数据表存储,例:[DataFrame, DataFrame, DataFrame] # TODO 暂无忽略
         # 多文件/多sheet、多数据表存储:判断条件 page_data.is_multiple_tab()
         # 例:[{'tab.name', [DataFrame]}, {'tab.name', [DataFrame, DataFrame]}]
         self.source_data_list = []
         self.data_list = []
     except Exception as e:
         Logging.error(e)
         self.error = ErrorEnum.ERROR_1000
Esempio n. 7
0
class StoreDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self,
               name,
               plt_name,
               plt_store_id,
               login_username=None,
               url=None,
               status=1):
        key = self.db.insert(
            "insert into ec_spider.t_store (name, plt_name, plt_store_id, login_username, url, status, created, updated) values(%s{})"
            .format(', %s' * 7),
            (name, plt_name, plt_store_id, login_username, url, status,
             get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query(self, id):
        data = self.db.query(
            "select id, name, plt_name, plt_store_id, login_username, url, status, created, updated from ec_spider.t_store where id = {}"
            .format(id))
        return data

    def query_by_name(self, store_name):
        data = self.db.query(
            "select id, name, plt_name, plt_store_id, login_username, url, status, created, updated from ec_spider.t_store where name = '{}'"
            .format(store_name))
        return data

    def delete(self, id):
        self.db.delete(
            "delete from ec_spider.t_store where id = {}".format(id))
        self.db.commit()
        return True
Esempio n. 8
0
 def __init__(self):
     self.error = None
     self.db = DataBase()
Esempio n. 9
0
 def __init__(self):
     self.db = DataBase()
Esempio n. 10
0
class StorePropertyDao:
    def __init__(self):
        self.db = DataBase()

    def insert(self, store_id, p_type, p_key, p_value, p_description):
        key = self.db.insert(
            "insert into ec_spider.t_store_property (store_id, p_type, p_key, p_value, p_description, created, updated) values(%s{})"
            .format(', %s' * 6),
            (store_id, p_type, p_key, p_value, p_description,
             get_current_timestamp(), get_current_timestamp()))
        self.db.commit()
        return key

    def query_by_store_id(self, store_id):
        data = self.db.query(
            "select id, store_id, p_type, p_key, p_value, p_description, created, updated from ec_spider.t_store_property where store_id = {}"
            .format(store_id))
        return data

    def delete(self, id):
        self.db.delete(
            "delete from ec_spider.t_store_property where id = {}".format(id))
        self.db.commit()

    def delete_by_store_id(self, store_id):
        self.db.delete(
            "delete from ec_spider.t_store_property where store_id = {}".
            format(store_id))
        self.db.commit()
Esempio n. 11
0
from common.db import DataBase

if __name__ == '__main__':
    db1 = DataBase()
    # data1 = db1.query('select * from temp_test')
    # db1.dispose()
    db2 = DataBase()
    db2.dispose()
    print(db1.db_conn, db2.db_conn)
    print(DataBase._DB__pool)