def insert_to_db(db, csv_dir, db_table_name, col_name_series, replace=False): global insert_method, insert_methods col_name_string = ("`" + col_name_series + "`,").sum().strip(",") db_cursor = db.cursor() method = "REPLACE" if replace else "IGNORE" try: sql_insert = """ LOAD DATA {} '{}' {} INTO TABLE {} CHARACTER SET utf8 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' LINES TERMINATED by '{}' IGNORE 1 LINES ({});""".format(insert_method, csv_dir, method, db_table_name, CR_STR, col_name_string) n_row_insert = db_cursor.execute(sql_insert) except MySQLdb.OperationalError as e: # print("插入数据库错误", e, ", 改变插入方式{}->{}。".format(insert_method, insert_methods[1])) Logger.log_warn( "WARNING:", "current method applied to insert into the database caused an error, change the method from {} to {}. {}." .format(insert_method, insert_methods[1], e), ) insert_method = insert_methods[1] sql_insert = """ LOAD DATA {} '{}' {} INTO TABLE {} CHARACTER SET utf8 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' LINES TERMINATED by '{}' IGNORE 1 LINES ({});""".format(insert_method, csv_dir, method, db_table_name, CR_STR, col_name_string) n_row_insert = db_cursor.execute(sql_insert) db.commit() return n_row_insert
def generate_links(self, station_no_ori, beg_arr=[], end_arr=[]): if type(beg_arr) == datetime.datetime and type(end_arr) == datetime.datetime: beg_dt = beg_arr end_dt = end_arr else: if len(beg_arr) == 0: while True: try: beg_arr = np.array( # input("请输入数据开始时间(年 月 日 时 分):").split(), dtype=np.int16 input( "Please enter start time of data (yyyy, mm, dd):" ).split(), dtype=np.int16, ) beg_dt = self.creat_dt(beg_arr) end_arr = np.array( # input("请输入数据结束时间(年 月 日 时 分):").split(), dtype=np.int16 input( "Please enter end time of data (yyyy, mm, dd):" ).split(), dtype=np.int16, ) end_dt = self.creat_dt(end_arr) break except ValueError as e: # print("输入格式有误(", e, "),请重新输入!") Logger.log_fail( "FAILURE:", "wrong format of input, please retry. {}.".format(e), ) continue else: beg_dt = self.creat_dt(beg_arr) end_dt = self.creat_dt(end_arr) params = { "station_id": station_info_ori.loc[station_no_ori, "station_id"], "start_time": beg_dt.strftime("%Y-%m-%d %H:%M:%S"), "stop_time": end_dt.strftime("%Y-%m-%d %H:%M:%S"), "station_name": station_info_ori.loc[station_no_ori, "station_name1"], } return { "link": "http://ycmets.com/PC/download.asp?" + parse.urlencode(params), "params": params, "station_no_ori": station_no_ori, "beg_dt": beg_dt, "end_dt": end_dt, }
def init_db(): db = connect_db(True) db_cursor = db.cursor() db_cursor.execute("""CREATE DATABASE IF NOT EXISTS station_db; USE station_db;""") # create station info delete_table(db, "station_info") s_info_tmp_dir = TEMP_DIR + "/s_info.csv" station_info.to_csv(s_info_tmp_dir, encoding="utf-8", index=False) s_num = update_table( db, "station_info", s_info_tmp_dir, station_info, "station_no", "INT PRIMARY KEY", ) # print("{}条站点信息已更新。".format(s_num)) Logger.log_normal( "UPDATE:", "{} lines of imformation of stations updated.".format(s_num)) # create column info delete_table(db, "col_info") c_info = col_info.loc[ ~col_info["data_label"].isin(["日期", "时间"]), :].reset_index(drop=True) c_info = c_info.reset_index(drop=False) c_info.rename(columns={"index": "id"}, inplace=True) c_info["db_name"] = c_info["en_name"].map(EN2CLEAN_DICT) c_info_tmp_dir = TEMP_DIR + "/c_info.csv" c_info.to_csv(c_info_tmp_dir, encoding="utf-8", index=False) c_num = update_table(db, "col_info", c_info_tmp_dir, c_info, "id", "int primary key") # print("{}条表头信息已更新。".format(c_num)) Logger.log_normal( "UPDATE:", "{} lines of imformation of columns updated.".format(c_num)) for ii in range(STATION_NUM): tb_cols = aval_col_types(ii) tb_name = station_info.loc[ii, "db_table_name"] creat_table(db_cursor, tb_name, tb_cols["en_name"], tb_cols["type"]) init_authority_tables(db) create_user(db) close_db(db) rm_tmp()
def save(self, filename, verbose=False): if filename != "": filename = filename.split(".")[0] else: filename = ( station_info_ori.loc[self.info[0]["station_no_ori"], "db_table_name"] + "_" + self.info[0]["beg_dt"].strftime("%Y%m%d-%H%M%S") + "_" + self.info[0]["end_dt"].strftime("%Y%m%d-%H%M%S") ) self.csv_dir = "{}/{}.csv".format(TEMP_DIR, filename) self.df.to_csv(self.csv_dir, index=False, encoding="utf-8") if verbose: Logger.log_high("SAVE:", "data saved, location:{}.".format(self.csv_dir)) # print("成功保存数据,位置:{}".format(self.csv_dir)) return self.csv_dir
def insert_to_db(self, db): if not self.is_empty: n_row_insert = insert_to_db( db, self.csv_dir, station_info_ori.loc[self.info[0]["station_no_ori"], "db_table_name"], self.df.columns.to_series(), ) Logger.log_high( "INSERT:", "insert into database, number of lines:{}.".format(n_row_insert), ) # print("成功向数据库中插入数据条数:" + str(n_row_insert)) rm_tmp_file(self.csv_dir) else: Logger.log_warn( "WARNING:", "fail to insert into database, for data is empty." )
def create_user(db): global id_methods, id_method db_cursor = db.cursor() db_cursor.execute( "SELECT DISTINCT CONCAT('''',user,'''@''',host,'''') AS query FROM mysql.user;" ) users = [i[0] for i in db_cursor.fetchall()] user_names = [i for i in db_config.keys() if i != "root"] for user_name in user_names: user_name_full = "'{}'@'{}'".format(user_name, "localhost") if user_name_full in users: Logger.log_warn( "WARNING:", "user {} already exists.".format(user_name_full), ) else: try: db_cursor.execute("CREATE USER {} {} '{}';".format( user_name_full, id_method, db_config[user_name]["passwd"])) except MySQLdb.ProgrammingError as e: Logger.log_warn( "WARNING:", "current method applied to create a user caused an error, change the method from {} to {}. {}." .format(id_method, id_methods[1], e), ) id_method = id_methods[1] db_cursor.execute("CREATE USER {} {} '{}';".format( user_name_full, id_method, db_config[user_name]["passwd"])) Logger.log_normal("USER:"******"add user {}.".format(user_name_full)) webUser_table = list( station_info["db_table_name"]) + ["col_info", "station_info"] loginAssistant_table = ["member", "question", "auth"] privilege_args = [(i, "webUser") for i in webUser_table] + [(i, "loginAssistant") for i in loginAssistant_table] for args in privilege_args: db_cursor.execute( "GRANT SELECT ON station_db.{} TO '{}'@'localhost';".format( args[0], args[1])) db_cursor.execute( "GRANT UPDATE, INSERT, DELETE ON `station_db`.`member` TO 'loginAssistant'@'localhost';" ) db_cursor.execute("FLUSH PRIVILEGES;") Logger.log_normal("USER:"******"privileges flushed.")
def download(self, filename="", verbose=False): # Download the data from Internet and save as csv. # Input: filename count = 0 try: for no, info in enumerate(self.info): online_data = pd.read_excel(info["link"]) if len(online_data) != 0: count += 1 station_no_ori = info["station_no_ori"] required_labels = col_info_ori.loc[ col_info_ori["station" + str(station_no_ori)] == 1, "label" + str(station_no_ori), ] required_labels = ["日期", "时间"] + list(required_labels)[1:] online_data = self.data_pre_process(online_data[required_labels]) if count == 1: self.df = online_data else: self.df = self.df.merge(online_data, on="datetime", how="outer") except urllib.error.URLError as e: Logger.log_fail("FAILURE:", "error in network connection. {}".format(e)) self.is_empty = True return np.nan if count == 0: Logger.log_warn("WARNING:", "no more recent data available in the period.") # print("警告:本时间段内暂无更新数据。") self.is_empty = True return np.nan else: self.is_empty = False self.mem_size = self.size_unify(np.sum(self.df.memory_usage())) if verbose: Logger.log_high( "FETCH:", "data fetched, size:{}.".format(self.mem_size) ) # print("成功获取数据,内存大小:{}".format(self.mem_size)) return self.save(filename, verbose)
def auto_download( db, datetime_beg=datetime.datetime(2020, 1, 1), int_min=1, max_data_int=datetime.timedelta(days=7), verbose=False, ): Logger.log_high( "LOOP:", "updating database automatically, interval:{} minute(s).".format(int_min), ) # print("正在自动更新数据库数据,时间间隔:每{}分钟...".format(int_min)) # check empty while True: for station_no in range(STATION_NUM): db_table_name = station_info.loc[station_no, "db_table_name"] datetime_firstlast = check_station(db, db_table_name) # if the table is empty, skip if type(datetime_firstlast) != list: continue datetime_first, datetime_last = datetime_firstlast # if early data missing if datetime_first > datetime_beg + max_data_int: Logger.log_warn( "WARNING:", "previous data missing ({}->{}), re-downloading...".format( datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"), datetime_first.strftime("%Y/%m/%d-%H:%M:%S"), ), ) # print( # "之前数据有缺失({}->{}),正在补充...".format( # datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"), # datetime_first.strftime("%Y/%m/%d-%H:%M:%S"), # ) # ) auto_download_period( db, station_no, datetime_beg, datetime_first, max_data_int, verbose=verbose, ) # if new data is not up to date datetime_now = datetime.datetime.now() if datetime_now > datetime_last: auto_download_period( db, station_no, datetime_last, datetime_now, max_data_int, verbose=verbose, ) datetime_now = datetime.datetime.now() print( "Synchronization step finished ({}->{}), sleeps for {} minute(s).".format( datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"), datetime_now.strftime("%Y/%m/%d-%H:%M:%S"), int_min, ) ) # print( # "本次同步完成({}->{}),休眠{}分钟。".format( # datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"), # datetime_now.strftime("%Y/%m/%d-%H:%M:%S"), # int_min, # ) # ) for sleep_period in range(int_min * 12): time.sleep(5)
def init_authority_tables(db): db_cursor = db.cursor() db_cursor.execute("SHOW TABLES; ") tables = [i[0] for i in db_cursor.fetchall()] if "member" in tables: Logger.log_warn( "WARNING:", "Table 'member' already exists.", ) else: db_cursor.execute(""" CREATE TABLE IF NOT EXISTS `member`( `id` INT(11) UNSIGNED PRIMARY KEY auto_increment, `username` VARCHAR(50) NOT NULL, `password` VARCHAR(50) NOT NULL, `question_id` TINYINT(1) UNSIGNED NOT NULL, `answer` VARCHAR(50) NOT NULL, `truename` VARCHAR(50) DEFAULT NULL, `address` VARCHAR(50) DEFAULT NULL, `email` VARCHAR(50) NOT NULL, `authority` TINYINT(1) UNSIGNED DEFAULT 1 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; INSERT INTO `member` (`username`, `password`, `question_id`, `answer`, `truename`, `address`, `email`, `authority`) VALUES ('admin', '123123', '1', 'answer', '管理员', '上海交通大学农业与生物学院', 'admin@surfes', '3'), ('visitor', 'visitor', '1', 'answer', '访客', '访客', 'visitor@surfes', '1'), ('freeze', 'freeze', '1', 'answer', '冻结测试账户', '冻结测试账户', 'freeze@surfes', '0');""" ) if "question" in tables: Logger.log_warn( "WARNING:", "Table 'question' already exists.", ) else: db_cursor.execute(""" CREATE TABLE IF NOT EXISTS `question` ( `id` TINYINT(1) UNSIGNED PRIMARY KEY auto_increment, `question` VARCHAR(50) NOT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; INSERT INTO `question` (`question`) VALUES ("您祖母叫什么名字?"), ("您祖父叫什么名字?"), ("您的生日是什么时候?(例如:1980/01/01)"), ("您母亲的名字?"), ("您父亲的名字?"), ("您宠物的名字叫什么?"), ("您的车号是什么?"), ("您的家乡是哪里?"), ("您的小学叫什么名字?"), ("您最喜欢的颜色?"), ("您女儿/儿子的小名叫什么?"), ("谁是您儿时最好的伙伴?"), ( "您最尊敬的老师的名字?");""") if "auth" in tables: Logger.log_warn( "WARNING:", "Table 'auth' already exists.", ) else: db_cursor.execute(""" CREATE TABLE IF NOT EXISTS `auth` ( `id` INT(1) UNSIGNED PRIMARY KEY auto_increment, `contentID` VARCHAR(50) NOT NULL, `authLevel` TINYINT(1) UNSIGNED NOT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; INSERT INTO `auth` (`contentID`, `authLevel`) VALUES ('about', 1), ('account', 1), ('footingInfo', 1), ('live', 1), ('historyTable', 2), ('historyGraph', 2), ('download', 2), ('setting', 1), ('afterLoad', 1);""")