Esempio n. 1
0
 def get_median(self, obj, stations, bundary):
     db = connect_db()
     db_cursor = db.cursor()
     median = []
     for sta_i in range(len(stations)):
         q = """SELECT
                         d.dNum AS median
                 FROM
                     (
                     SELECT
                         @rowIndex := @rowIndex + 1 AS rowIndex,
                         {} AS dNum
                     FROM
                         {},
                         ( SELECT @rowIndex := - 1 ) AS tmpI
                 WHERE {} > {} AND {} < {}
                     ORDER BY
                         {}
                     ) AS d
                 WHERE
                     rowIndex IN (
                     FLOOR( @rowindex / 2 ),
                     CEIL( @rowindex / 2 ));""".format(
             obj, stations[sta_i], obj, bundary[0], obj, bundary[1], obj
         )
         db_cursor.execute(q)
         m = db_cursor.fetchall()[0]
         median.append(np.mean(m))
     close_db(db)
     return median
Esempio n. 2
0
def ori_extreme(plt_i, sta_i):
    db = connect_db()
    db_cursor = db.cursor()
    obj = pollutants[plt_i]

    q = "SELECT {} FROM {};".format(obj, stations[sta_i])
    db_cursor.execute(q)
    ori_data = np.array([i[0] for i in db_cursor.fetchall()])
    close_db(db)

    box_stats = cbook.boxplot_stats(ori_data)

    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
    _ = axs[0].bxp(box_stats, showfliers=True)
    _ = axs[1].bxp(box_stats, showfliers=False)

    axs[0].set_xticks([])
    axs[1].set_xticks([])
    axs[0].set_ylabel(pollutant_labels[plt_i])
    # axs[0].set_ylabel("$\\rm PM_{10} (\\mu g \\cdot cm^{-3})$")

    fig.savefig(
        "{}_{}_box.png".format(stations[sta_i], pollutants[plt_i]),
        dpi=300,
        bbox_inches="tight",
    )
    print(
        "{}站点{}数据:共{}条,<0有{}条,>999有{}条。".format(
            stations[sta_i],
            pollutants[plt_i],
            len(ori_data),
            len(ori_data[ori_data < 0]),
            len(ori_data[ori_data > 999]),
        )
    )
Esempio n. 3
0
 def get_p_data(self, obj, stations, percentile, bundary):
     db = connect_db()
     db_cursor = db.cursor()
     p_data = []
     for sta_i in range(len(stations)):
         sta_p_list = []
         for p in percentile:
             q = """SELECT
                             d.dNum AS median
                     FROM
                         (
                         SELECT
                             @rowIndex := @rowIndex + 1 AS rowIndex,
                             {} AS dNum
                         FROM
                             {},
                             ( SELECT @rowIndex := - 1 ) AS tmpI
                     WHERE {} > {} AND {} < {}
                         ORDER BY
                             {}
                         ) AS d
                     WHERE
                         rowIndex IN (
                         FLOOR( @rowindex * {} ),
                         CEIL( @rowindex * {} ));""".format(
                 obj, stations[sta_i], obj, bundary[0], obj, bundary[1], obj, p, p
             )
             db_cursor.execute(q)
             m = db_cursor.fetchall()[0]
             sta_p_list.append(np.mean(m))
         p_data.append(sta_p_list)
     close_db(db)
     return p_data
Esempio n. 4
0
def mysql_q(query="SELECT 'PF';", series=False):
    db = connect_db()
    db_cursor = db.cursor()
    db_cursor.execute(query)
    result = db_cursor.fetchall()
    close_db(db)
    if series:
        result = pd.Series(result).apply(lambda x: x[0])
    return result
Esempio n. 5
0
 def get_std(self, obj, stations, bundary):
     db = connect_db()
     db_cursor = db.cursor()
     std = []
     for sta_i in range(len(stations)):
         q = "SELECT STDDEV(LOG({})) FROM {} WHERE {} > {} AND {} < {};".format(
             obj, stations[sta_i], obj, bundary[0], obj, bundary[1]
         )
         db_cursor.execute(q)
         std.append(db_cursor.fetchall()[0][0])
     close_db(db)
     return std
Esempio n. 6
0
def get_data_dict(plt_i, pollutants_thd, method="trim"):
    db = connect_db()
    db_cursor = db.cursor()

    data_dict = {}

    for sta_i in range(len(stations)):
        data_dict[stations[sta_i]] = {}
        for ssn_i in range(len(season_bundaries)):
            data_dict[stations[sta_i]][ssn_i] = {}
            for year_i in range(len(years)):
                time_span = [
                    str(years[year_i]) + "-" + season_bundaries[ssn_i],
                    str(years[year_i] + math.floor((ssn_i + 1) / len(season_bundaries)))
                    + "-"
                    + season_bundaries[(ssn_i + 1) % len(season_bundaries)],
                ]

                q = 'SELECT {} FROM {} WHERE datetime >= "{}" AND datetime < "{}" {};'.format(
                    pollutants[plt_i],
                    stations[sta_i],
                    time_span[0],
                    time_span[1],
                    lmt[1],
                )
                # print(q)
                if db_cursor.execute(q):
                    data = db_cursor.fetchall()
                    data_dict[stations[sta_i]][ssn_i][years[year_i]] = []
                    for d in data:
                        if method == "trim":
                            if (
                                d[0] > pollutants_thd[sta_i][0]
                                and d[0] < pollutants_thd[sta_i][1]
                            ):
                                data_dict[stations[sta_i]][ssn_i][years[year_i]].append(
                                    d[0]
                                )
                        elif method == "clip":
                            data_dict[stations[sta_i]][ssn_i][years[year_i]].append(
                                np.clip(
                                    d[0],
                                    pollutants_thd[sta_i][0],
                                    pollutants_thd[sta_i][1],
                                )
                            )

    close_db(db)
    del data

    with open("{}_data_dict.pickle".format(pollutants[plt_i]), "wb") as f:
        pickle.dump(data_dict, f)
Esempio n. 7
0
def way3_trim(plt_i):
    db = connect_db()
    db_cursor = db.cursor()

    obj = pollutants[plt_i]
    for obj, obj_label, obj_log_label in zip(
        pollutants, pollutant_labels, pollutant_log_labels
    ):
        t1 = MAD(obj=obj)
        t2 = sigma3(obj=obj)
        t3 = percentile(obj=obj)
        bundary_list = [t1.MAD["span"], t2.sigma3["span"], t3.p_data]
        method_names = ["MAD", "sigma3", "percentile005"]

        for method_name, bundarys in zip(method_names, bundary_list):
            t_data = []
            for sta_i in range(len(stations)):
                q = "SELECT {} FROM {} WHERE {} > {} AND {} < {};".format(
                    obj,
                    stations[sta_i],
                    obj,
                    bundarys[sta_i][0],
                    obj,
                    bundarys[sta_i][1],
                )
                db_cursor.execute(q)
                t_data.append([i[0] for i in db_cursor.fetchall()])

                tt_data = np.array(t_data[sta_i])
                tt_data = tt_data[tt_data > 0.0001]

                fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
                _ = axs[0].hist(x=tt_data, bins=50, density=True)
                _ = axs[1].hist(x=np.log(tt_data), bins=100, density=True)
                axs[0].set_xlabel(obj_label)
                axs[1].set_xlabel(obj_log_label)
                axs[0].set_ylabel("频率")
                filename = "{}_{}_freq_trim_{}.png".format(
                    stations[sta_i], obj, method_name
                )
                fig.savefig(filename, dpi=300, bbox_inches="tight")
    close_db(db)
Esempio n. 8
0
 def get_MAD(self, obj, stations, ratio, bundary):
     db = connect_db()
     db_cursor = db.cursor()
     median = self.get_median(obj, stations, bundary)
     abs_dev_median = []
     for sta_i in range(len(stations)):
         q = """SELECT
                         d.absDev AS absDevMed
                 FROM
                         (
                         SELECT
                             @rowIndex := @rowIndex + 1 AS rowIndex,
                             absDev
                         FROM
                         (
                         SELECT
                                 ABS({}-{}) AS absDev
                             FROM
                             {}
                         WHERE {} > {} AND {} < {}
                         ) AS tmpAbsDevT,
                             ( SELECT @rowIndex := - 1 ) AS tmpI
                         ORDER BY
                             absDev
                         ) AS d
                 WHERE
                     rowIndex IN (
                     FLOOR( @rowindex / 2 ),
                     CEIL( @rowindex / 2 ));""".format(
             obj, median[sta_i], stations[sta_i], obj, bundary[0], obj, bundary[1]
         )
         db_cursor.execute(q)
         m = db_cursor.fetchall()[0]
         abs_dev_median.append(np.mean(m))
     close_db(db)
     MAD_span = [
         [m - adm * ratio, m + adm * ratio] for m, adm in zip(median, abs_dev_median)
     ]
     return {"median": median, "adm": abs_dev_median, "span": MAD_span}
Esempio n. 9
0
def log_sigma3_trim():
    db = connect_db()
    db_cursor = db.cursor()

    bundarys_list = {}
    for obj, obj_label, obj_log_label in zip(
        pollutants, pollutant_labels, pollutant_log_labels
    ):
        trim_conf = log_sigma3(obj=obj)
        bundarys = trim_conf.sigma3["span"]

        bundarys_list[obj] = bundarys

        t_data = []
        for sta_i in range(1):
            q = "SELECT {} FROM {} WHERE {} > {} AND {} < {};".format(
                obj, stations[sta_i], obj, bundarys[sta_i][0], obj, bundarys[sta_i][1]
            )
            db_cursor.execute(q)
            t_data.append([i[0] for i in db_cursor.fetchall()])

        tt_data = np.array(t_data[0])
        tt_data = tt_data[tt_data > 0.0001]

        fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
        _ = axs[0].hist(x=tt_data, bins=50, density=True)
        _ = axs[1].hist(x=np.log(tt_data), bins=100, density=True)
        axs[0].set_xlabel(obj_label)
        axs[1].set_xlabel(obj_log_label)
        axs[0].set_ylabel("频率")
        filename = "cm_{}_freq_trim2_{}.png".format(obj, "logsigma3")
        fig.savefig(filename, dpi=300, bbox_inches="tight")
        print(filename)
    close_db(db)
    with open("log_sigma3_trim_bundarys_list.json", "w") as f:
        json.dump(bundarys_list, f)
    return bundarys_list
Esempio n. 10
0
    )

    args = argparser.parse_args()

    db = connect_db()

    if args.mode == "init":
        init_db()
        test_download(db, args.beg, args.end)
        datetime_beg = datetime.datetime(2017, 1, 1)
        auto_download(
            db=db,
            datetime_beg=datetime_beg,
            int_min=args.int,
            max_data_int=datetime.timedelta(days=args.max),
            verbose=False,
        )
    elif args.mode == "loop":
        datetime_beg = datetime.datetime(2020, 1, 1)
        auto_download(
            db=db,
            datetime_beg=datetime_beg,
            int_min=args.int,
            max_data_int=datetime.timedelta(days=args.max),
            verbose=False,
        )
    elif args.mode == "once":
        test_download(db, args.beg, args.end)

    close_db(db)