Beispiel #1
0
def insert_to_db(db, csv_dir, db_table_name, col_name_series, replace=False):
    global insert_method, insert_methods
    col_name_string = ("`" + col_name_series + "`,").sum().strip(",")
    db_cursor = db.cursor()
    method = "REPLACE" if replace else "IGNORE"
    try:
        sql_insert = """
            LOAD DATA {} '{}'
                {} INTO TABLE {}
                CHARACTER SET utf8
                FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' LINES TERMINATED by '{}'
                IGNORE 1 LINES
                ({});""".format(insert_method, csv_dir, method, db_table_name,
                                CR_STR, col_name_string)
        n_row_insert = db_cursor.execute(sql_insert)
    except MySQLdb.OperationalError as e:
        # print("插入数据库错误", e, ", 改变插入方式{}->{}。".format(insert_method, insert_methods[1]))
        Logger.log_warn(
            "WARNING:",
            "current method applied to insert into the database caused an error, change the method from {} to {}. {}."
            .format(insert_method, insert_methods[1], e),
        )
        insert_method = insert_methods[1]
        sql_insert = """
            LOAD DATA {} '{}'
                {} INTO TABLE {}
                CHARACTER SET utf8
                FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ESCAPED BY '\"' LINES TERMINATED by '{}'
                IGNORE 1 LINES
                ({});""".format(insert_method, csv_dir, method, db_table_name,
                                CR_STR, col_name_string)
        n_row_insert = db_cursor.execute(sql_insert)
    db.commit()
    return n_row_insert
Beispiel #2
0
    def generate_links(self, station_no_ori, beg_arr=[], end_arr=[]):
        if type(beg_arr) == datetime.datetime and type(end_arr) == datetime.datetime:
            beg_dt = beg_arr
            end_dt = end_arr
        else:
            if len(beg_arr) == 0:
                while True:
                    try:
                        beg_arr = np.array(
                            # input("请输入数据开始时间(年 月 日 时 分):").split(), dtype=np.int16
                            input(
                                "Please enter start time of data (yyyy, mm, dd):"
                            ).split(),
                            dtype=np.int16,
                        )
                        beg_dt = self.creat_dt(beg_arr)
                        end_arr = np.array(
                            # input("请输入数据结束时间(年 月 日 时 分):").split(), dtype=np.int16
                            input(
                                "Please enter end time of data (yyyy, mm, dd):"
                            ).split(),
                            dtype=np.int16,
                        )
                        end_dt = self.creat_dt(end_arr)
                        break
                    except ValueError as e:
                        # print("输入格式有误(", e, "),请重新输入!")
                        Logger.log_fail(
                            "FAILURE:",
                            "wrong format of input, please retry. {}.".format(e),
                        )
                        continue
            else:
                beg_dt = self.creat_dt(beg_arr)
                end_dt = self.creat_dt(end_arr)

        params = {
            "station_id": station_info_ori.loc[station_no_ori, "station_id"],
            "start_time": beg_dt.strftime("%Y-%m-%d %H:%M:%S"),
            "stop_time": end_dt.strftime("%Y-%m-%d %H:%M:%S"),
            "station_name": station_info_ori.loc[station_no_ori, "station_name1"],
        }
        return {
            "link": "http://ycmets.com/PC/download.asp?" + parse.urlencode(params),
            "params": params,
            "station_no_ori": station_no_ori,
            "beg_dt": beg_dt,
            "end_dt": end_dt,
        }
Beispiel #3
0
def init_db():
    db = connect_db(True)
    db_cursor = db.cursor()

    db_cursor.execute("""CREATE DATABASE IF NOT EXISTS station_db;
           USE station_db;""")

    # create station info
    delete_table(db, "station_info")
    s_info_tmp_dir = TEMP_DIR + "/s_info.csv"
    station_info.to_csv(s_info_tmp_dir, encoding="utf-8", index=False)
    s_num = update_table(
        db,
        "station_info",
        s_info_tmp_dir,
        station_info,
        "station_no",
        "INT PRIMARY KEY",
    )
    # print("{}条站点信息已更新。".format(s_num))
    Logger.log_normal(
        "UPDATE:",
        "{} lines of imformation of stations updated.".format(s_num))

    # create column info
    delete_table(db, "col_info")
    c_info = col_info.loc[
        ~col_info["data_label"].isin(["日期", "时间"]), :].reset_index(drop=True)
    c_info = c_info.reset_index(drop=False)
    c_info.rename(columns={"index": "id"}, inplace=True)
    c_info["db_name"] = c_info["en_name"].map(EN2CLEAN_DICT)
    c_info_tmp_dir = TEMP_DIR + "/c_info.csv"
    c_info.to_csv(c_info_tmp_dir, encoding="utf-8", index=False)
    c_num = update_table(db, "col_info", c_info_tmp_dir, c_info, "id",
                         "int primary key")
    # print("{}条表头信息已更新。".format(c_num))
    Logger.log_normal(
        "UPDATE:", "{} lines of imformation of columns updated.".format(c_num))

    for ii in range(STATION_NUM):
        tb_cols = aval_col_types(ii)
        tb_name = station_info.loc[ii, "db_table_name"]
        creat_table(db_cursor, tb_name, tb_cols["en_name"], tb_cols["type"])

    init_authority_tables(db)
    create_user(db)

    close_db(db)
    rm_tmp()
Beispiel #4
0
 def save(self, filename, verbose=False):
     if filename != "":
         filename = filename.split(".")[0]
     else:
         filename = (
             station_info_ori.loc[self.info[0]["station_no_ori"], "db_table_name"]
             + "_"
             + self.info[0]["beg_dt"].strftime("%Y%m%d-%H%M%S")
             + "_"
             + self.info[0]["end_dt"].strftime("%Y%m%d-%H%M%S")
         )
     self.csv_dir = "{}/{}.csv".format(TEMP_DIR, filename)
     self.df.to_csv(self.csv_dir, index=False, encoding="utf-8")
     if verbose:
         Logger.log_high("SAVE:", "data saved, location:{}.".format(self.csv_dir))
         # print("成功保存数据,位置:{}".format(self.csv_dir))
     return self.csv_dir
Beispiel #5
0
 def insert_to_db(self, db):
     if not self.is_empty:
         n_row_insert = insert_to_db(
             db,
             self.csv_dir,
             station_info_ori.loc[self.info[0]["station_no_ori"], "db_table_name"],
             self.df.columns.to_series(),
         )
         Logger.log_high(
             "INSERT:",
             "insert into database, number of lines:{}.".format(n_row_insert),
         )
         # print("成功向数据库中插入数据条数:" + str(n_row_insert))
         rm_tmp_file(self.csv_dir)
     else:
         Logger.log_warn(
             "WARNING:", "fail to insert into database, for data is empty."
         )
Beispiel #6
0
def create_user(db):
    global id_methods, id_method
    db_cursor = db.cursor()
    db_cursor.execute(
        "SELECT DISTINCT CONCAT('''',user,'''@''',host,'''') AS query FROM mysql.user;"
    )
    users = [i[0] for i in db_cursor.fetchall()]

    user_names = [i for i in db_config.keys() if i != "root"]
    for user_name in user_names:
        user_name_full = "'{}'@'{}'".format(user_name, "localhost")
        if user_name_full in users:
            Logger.log_warn(
                "WARNING:",
                "user {} already exists.".format(user_name_full),
            )
        else:
            try:
                db_cursor.execute("CREATE USER {} {} '{}';".format(
                    user_name_full, id_method, db_config[user_name]["passwd"]))
            except MySQLdb.ProgrammingError as e:
                Logger.log_warn(
                    "WARNING:",
                    "current method applied to create a user caused an error, change the method from {} to {}. {}."
                    .format(id_method, id_methods[1], e),
                )
                id_method = id_methods[1]
                db_cursor.execute("CREATE USER {} {} '{}';".format(
                    user_name_full, id_method, db_config[user_name]["passwd"]))
            Logger.log_normal("USER:"******"add user {}.".format(user_name_full))
    webUser_table = list(
        station_info["db_table_name"]) + ["col_info", "station_info"]
    loginAssistant_table = ["member", "question", "auth"]
    privilege_args = [(i, "webUser")
                      for i in webUser_table] + [(i, "loginAssistant")
                                                 for i in loginAssistant_table]
    for args in privilege_args:
        db_cursor.execute(
            "GRANT SELECT ON station_db.{} TO '{}'@'localhost';".format(
                args[0], args[1]))
    db_cursor.execute(
        "GRANT UPDATE, INSERT, DELETE ON `station_db`.`member` TO 'loginAssistant'@'localhost';"
    )
    db_cursor.execute("FLUSH PRIVILEGES;")
    Logger.log_normal("USER:"******"privileges flushed.")
Beispiel #7
0
 def download(self, filename="", verbose=False):
     # Download the data from Internet and save as csv.
     # Input: filename
     count = 0
     try:
         for no, info in enumerate(self.info):
             online_data = pd.read_excel(info["link"])
             if len(online_data) != 0:
                 count += 1
                 station_no_ori = info["station_no_ori"]
                 required_labels = col_info_ori.loc[
                     col_info_ori["station" + str(station_no_ori)] == 1,
                     "label" + str(station_no_ori),
                 ]
                 required_labels = ["日期", "时间"] + list(required_labels)[1:]
                 online_data = self.data_pre_process(online_data[required_labels])
                 if count == 1:
                     self.df = online_data
                 else:
                     self.df = self.df.merge(online_data, on="datetime", how="outer")
     except urllib.error.URLError as e:
         Logger.log_fail("FAILURE:", "error in network connection. {}".format(e))
         self.is_empty = True
         return np.nan
     if count == 0:
         Logger.log_warn("WARNING:", "no more recent data available in the period.")
         # print("警告:本时间段内暂无更新数据。")
         self.is_empty = True
         return np.nan
     else:
         self.is_empty = False
         self.mem_size = self.size_unify(np.sum(self.df.memory_usage()))
         if verbose:
             Logger.log_high(
                 "FETCH:", "data fetched, size:{}.".format(self.mem_size)
             )
             # print("成功获取数据,内存大小:{}".format(self.mem_size))
         return self.save(filename, verbose)
Beispiel #8
0
def auto_download(
    db,
    datetime_beg=datetime.datetime(2020, 1, 1),
    int_min=1,
    max_data_int=datetime.timedelta(days=7),
    verbose=False,
):
    Logger.log_high(
        "LOOP:",
        "updating database automatically, interval:{} minute(s).".format(int_min),
    )
    # print("正在自动更新数据库数据,时间间隔:每{}分钟...".format(int_min))
    # check empty
    while True:
        for station_no in range(STATION_NUM):
            db_table_name = station_info.loc[station_no, "db_table_name"]
            datetime_firstlast = check_station(db, db_table_name)
            # if the table is empty, skip
            if type(datetime_firstlast) != list:
                continue
            datetime_first, datetime_last = datetime_firstlast
            # if early data missing
            if datetime_first > datetime_beg + max_data_int:
                Logger.log_warn(
                    "WARNING:",
                    "previous data missing ({}->{}), re-downloading...".format(
                        datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"),
                        datetime_first.strftime("%Y/%m/%d-%H:%M:%S"),
                    ),
                )
                # print(
                #     "之前数据有缺失({}->{}),正在补充...".format(
                #         datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"),
                #         datetime_first.strftime("%Y/%m/%d-%H:%M:%S"),
                #     )
                # )
                auto_download_period(
                    db,
                    station_no,
                    datetime_beg,
                    datetime_first,
                    max_data_int,
                    verbose=verbose,
                )
            # if new data is not up to date
            datetime_now = datetime.datetime.now()
            if datetime_now > datetime_last:
                auto_download_period(
                    db,
                    station_no,
                    datetime_last,
                    datetime_now,
                    max_data_int,
                    verbose=verbose,
                )
        datetime_now = datetime.datetime.now()
        print(
            "Synchronization step finished ({}->{}), sleeps for {} minute(s).".format(
                datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"),
                datetime_now.strftime("%Y/%m/%d-%H:%M:%S"),
                int_min,
            )
        )
        # print(
        #     "本次同步完成({}->{}),休眠{}分钟。".format(
        #         datetime_beg.strftime("%Y/%m/%d-%H:%M:%S"),
        #         datetime_now.strftime("%Y/%m/%d-%H:%M:%S"),
        #         int_min,
        #     )
        # )
        for sleep_period in range(int_min * 12):
            time.sleep(5)
Beispiel #9
0
def init_authority_tables(db):
    db_cursor = db.cursor()
    db_cursor.execute("SHOW TABLES; ")
    tables = [i[0] for i in db_cursor.fetchall()]
    if "member" in tables:
        Logger.log_warn(
            "WARNING:",
            "Table 'member' already exists.",
        )
    else:
        db_cursor.execute("""
CREATE TABLE IF NOT EXISTS `member`(
  `id` INT(11) UNSIGNED PRIMARY KEY auto_increment,
  `username` VARCHAR(50) NOT NULL,
  `password` VARCHAR(50) NOT NULL,
  `question_id` TINYINT(1) UNSIGNED NOT NULL,
  `answer` VARCHAR(50) NOT NULL,
  `truename` VARCHAR(50) DEFAULT NULL,
  `address` VARCHAR(50) DEFAULT NULL,
  `email` VARCHAR(50) NOT NULL,
  `authority` TINYINT(1) UNSIGNED DEFAULT 1
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

INSERT INTO `member` (`username`, `password`, `question_id`, `answer`, `truename`, `address`, `email`, `authority`) VALUES
    ('admin', '123123', '1', 'answer', '管理员', '上海交通大学农业与生物学院', 'admin@surfes', '3'),
    ('visitor', 'visitor', '1', 'answer', '访客', '访客', 'visitor@surfes', '1'),
    ('freeze', 'freeze', '1', 'answer', '冻结测试账户', '冻结测试账户',  'freeze@surfes', '0');"""
                          )
    if "question" in tables:
        Logger.log_warn(
            "WARNING:",
            "Table 'question' already exists.",
        )
    else:
        db_cursor.execute("""
CREATE TABLE IF NOT EXISTS `question` (
  `id` TINYINT(1) UNSIGNED PRIMARY KEY auto_increment,
  `question` VARCHAR(50) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

INSERT INTO `question` (`question`) VALUES
    ("您祖母叫什么名字?"), ("您祖父叫什么名字?"), ("您的生日是什么时候?(例如:1980/01/01)"),
    ("您母亲的名字?"), ("您父亲的名字?"), ("您宠物的名字叫什么?"), ("您的车号是什么?"),
    ("您的家乡是哪里?"), ("您的小学叫什么名字?"), ("您最喜欢的颜色?"),
    ("您女儿/儿子的小名叫什么?"), ("谁是您儿时最好的伙伴?"), ( "您最尊敬的老师的名字?");""")
    if "auth" in tables:
        Logger.log_warn(
            "WARNING:",
            "Table 'auth' already exists.",
        )
    else:
        db_cursor.execute("""
CREATE TABLE IF NOT EXISTS `auth` (
  `id` INT(1) UNSIGNED PRIMARY KEY auto_increment,
  `contentID` VARCHAR(50) NOT NULL,
	`authLevel` TINYINT(1) UNSIGNED NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

INSERT INTO `auth` (`contentID`, `authLevel`) VALUES
	('about', 1),
	('account', 1),
	('footingInfo', 1),
	('live', 1),
	('historyTable', 2),
	('historyGraph', 2),
	('download', 2),
	('setting', 1),
	('afterLoad', 1);""")