Esempio n. 1
0
    def run(self, id, only=False):
        """
        加载数据,执行任务
        :param id: 传入任务id或者任务文件名
        :param only: 是否调用并行任务或者子任务
        :return:
        """

        print(
            f"----****----****----****开始执行任务id:{id};****----****----****----")

        df = self.load_df(id)

        if df["is_delete"][0] == 0:

            run_status = self.start(df["model"][0], df["file_basename"][0],
                                    df["crontab"][0], df["guandata_uuid"][0],
                                    df["next_run_time"][0])

            if not only:
                syn_task_str = self.string_clean(df["syn_task"][0])
                if syn_task_str:
                    self.check_appendix_task(syn_task_str)

                sub_task_str = self.string_clean(df["sub_task"][0])
                if run_status == 1:
                    if sub_task_str:
                        self.check_appendix_task(sub_task_str)

        else:
            dingdingrobot(content=df["file_basename"][0] + "\n已经假删除;",
                          subject="test")
Esempio n. 2
0
    def __get_df(self,
                 sql,
                 conn=None,
                 index=0,
                 toprint=None,
                 connect_once=True):
        sql = self.sql_clean(sql)

        try:
            if self.conn.open == False:
                self.conn = self.getconn()
        except:
            self.conn = self.getconn()
        try:
            self.cursor = self.conn.cursor()
            set_sql = "SET SESSION group_concat_max_len = 102400;"
            rows = self.cursor.execute(set_sql)
            df = pd.read_sql(sql, self.conn)
            self.to_print(df, index=index, toprint=toprint)
            if not connect_once:
                self.close(self.conn, self.cursor)
        except Exception as e:
            print("连接异常")
            self.close(self.conn, self.cursor)
            """判断错误如果为sql语法错误的话,跳出循环"""
            content = repr(e)
            error_content = StringHelper.error(content)

            if StringHelper.sql_error_check(content):
                dingdingrobot(
                    content=f"sql语法不正确\n{error_content}\n路径try_rerun",
                    subject="test")
                raise
        return df
    def __get_df(self, sql, index=0, toprint=None, connect_once=True):
        sql = self.sql_clean(sql)
        try:
            if not self.conn.open:
                self.conn = self.getconn()
        except:
            self.conn = self.getconn()
            self.cursor = self.conn.cursor()
        try:
            df = pd.read_sql(sql, self.conn)
            self.to_print(df, index=index, toprint=toprint)
            if not connect_once:
                self.close(self.conn)
        except Exception as e:
            print("(presto)连接异常")
            self.close(self.conn)
            """判断错误如果为sql语法错误的话,跳出循环"""
            content = repr(e)
            error_content = StringHelper.error(content)

            if StringHelper.sql_error_check(content):
                dingdingrobot(
                    content=f"(presto)sql语法不正确\n{error_content}\n路径try_rerun",
                    subject="test")
                raise

        return df
Esempio n. 4
0
def kill_main():
    df = get_pids()
    kill_pids = tuple(df['ps_pids'])
    print(df)
    if kill_pids:
        kill_data = tabulate(df.values, headers=df.columns, tablefmt="simple")
        dingdingrobot(subject="test", title='进程监测', content=kill_data)
        """
Esempio n. 5
0
        def wrapper(*args, **kwargs):
            try:
                if engine == "mysql":
                    from src.utils.mysqlhelper import MysqlHelper
                    sql = """
                     select {1} from {0} where {1} >= date_sub(CURDATE(),interval {2} day) {3} limit 1;
                     """
                    sql = sql.format(tb_name, col, days, conditions)
                    sqlinstance = MysqlHelper(**db)
                if engine == "presto":
                    sql = """
                    select {1} from {0} where {1} >= date_format(date_add('day',{2},current_date),'{3}') {4} limit 1 """
                    sql = sql.format(tb_name, col, days, format, conditions)
                    from src.utils.prostohelper import prestohelper

                    sqlinstance = prestohelper(**config.hive_prosto)
                print("********检查表:{}是否已经更新********".format(tb_name))
                print(sql)
                df = sqlinstance.get_df(sql)

            except Exception as e:
                print("depend_on_check error execute:")
                content = repr(e)
                error_content = StringHelper.error(content)
                print(error_content)

                dingdingrobot(content=StringHelper.error(
                    f"表{tb_name},depend_on_check表达式不正确"),
                              subject=subject)

                raise Exception("depend_on_check表达式不正确")

            if df.shape[0] > 0:

                try:
                    res = call_func(*args, **kwargs)
                    return res
                except Exception as e:

                    print('depend_on_check error execute:')
                    content = repr(e)
                    error_content = StringHelper.error(content)
                    print(error_content)

                    raise
            else:
                global count_times
                print(f"depend_on_check第{count_times + 1}次")

                if dingding and count_times == 0:
                    dingdingrobot(
                        content=StringHelper.error(f"依赖表{tb_name},当日无数据"),
                        subject=subject)
                    count_times += 1

                raise Exception("依赖表未存在,来自depend_on_check")
Esempio n. 6
0
    def start(self, model, file_basename, crontab, guandata_uuid_str,
              next_run_time):
        """
        调度脚本
        :param model: 任务的模块归属,目前有任务迁移模块,邮件模块,和通知模块
        :param file_basename: 文件名
        :param crontab: 定时器
        :param guandata_uuid_str: 观远id集合
        :param next_run_time: 脚本下次执行时间
        :return: 返回任务执行的结果状态
        """

        import runpy

        startTime = time.time()
        try:
            # 开始执行脚本
            runpy.run_path(file_path.replace("utils", "") + "/" + model + "/" +
                           file_basename + ".py",
                           run_name="__main__")

            guandata_uuid_str = self.string_clean(guandata_uuid_str)
            if guandata_uuid_str:
                guandata_uuid_list = guandata_uuid_str.split(",")
                self.guandata_uuid_list.extend(guandata_uuid_list)
            run_status = 1

        except Exception as e:
            print('timerhelper error execute:')
            content = repr(e)
            error_content = StringHelper.str_cut(content)
            print(error_content)

            dingdingrobot(content=f'{file_basename} \n运行失败:{error_content}',
                          subject="test")
            run_status = 0

        endTime = time.time()
        time_eclipse = round((endTime - startTime), 2)

        startTime = time.strftime("%Y-%m-%d %H:%M:%S",
                                  time.localtime(startTime))
        endTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(endTime))

        if crontab:
            self.next_run_time = get_next_time(crontab)

        self.maintain_job_check(crontab, file_basename, startTime, endTime,
                                time_eclipse, run_status, next_run_time)

        return run_status
Esempio n. 7
0
    def model_evaluate(self, y, pred_y, des="train"):
        try:
            p, n = y.value_counts()
            print("{0}正负样本比:{1}:{2}".format(des, n, p))
        except Exception as e:
            pass
        """获得混淆矩阵"""
        conf_m = confusion_matrix(y, pred_y)
        conf_m_df = pd.DataFrame(conf_m,
                                 columns=["pred_0", "pred_1"],
                                 index=["true_0", "true_1"]).reset_index()
        plot_suit.plot_matrix(conf_m_df.set_index(["index"]),
                              title="{0}_confusion_matrix".format(des))
        confusion_matrix_table = self.pretty_tabel(conf_m_df)
        print('{0}-confusion matrix'.format(des))
        print(confusion_matrix_table)
        if self.dingding:
            dingdingrobot(des + "\n" + "正负样本比:{0}:{1}\n".format(n, p) +
                          str(confusion_matrix_table))
        """分类指标的文本报告"""
        rpt = classification_report(y, pred_y).replace("avg / total",
                                                       "avg/total")
        rpt_df = pd.read_csv(io.StringIO(
            rpt.replace("avg / total", "avg/total")),
                             sep="\s+").round({
                                 "precision": 2,
                                 "recall": 2,
                                 "f1-score": 2,
                                 "support": 2
                             })
        plot_suit.plot_matrix(rpt_df, title="{0}_report".format(des))
        print('{0}-classification_report'.format(des))
        print(rpt)
        if self.dingding:
            dingdingrobot(des + "\n" + rpt)

        fpr, tpr, _ = roc_curve(y, pred_y)
        roc_auc = auc(fpr, tpr)
        plot_suit.plot_roc_curve(fpr,
                                 tpr,
                                 roc_auc,
                                 title="{0}_roc".format(des))
        """日志记录"""
        if self.log:
            data = {}
            data["update"] = self.cur_time
            data["confusion_matrix"] = conf_m_df.to_dict(orient='split')
            data["report"] = rpt_df.to_dict(orient='split')
            json_str = json.dumps(data)
            hflog.info(json_str)
Esempio n. 8
0
def get_next_time(crontab):
    url = "http://api.bejson.com/btools/othertools/cron/"
    data = {"crontxt": crontab}

    response = requests.post(url=url, data=data)

    hjson = response.json()

    if hjson["code"] != -1:
        obj_str = hjson['obj']

        return obj_str.split("<br>")[0]
    else:
        if hjson["message"] != "解析失败,请联系管理员":
            import sys
            import os
            filename = sys.argv[0][sys.argv[0].rfind(os.sep) + 1:].split('.py')[0].split('/')[-1]
            dingdingrobot(title='crontab设置失败:', content=f"{filename}:{crontab}", subject="test")
        return None
        def wrapper(*args, **kwargs):
            try:
                from src.utils.mysqlhelper import MysqlHelper
                from src.utils.ding_robot import dingdingrobot
                sql = """
                select update_time from {0} where update_time > date_sub(CURDATE(),interval {1} day) limit 1;
                """
                sql = sql.format(tb_name, days)
                mysqlinstance = MysqlHelper(**db)
                print("********检查表:{}是否已经更新********".format(tb_name))
                print(sql)
                df = mysqlinstance.get_df(sql)

            except Exception as e:
                print("check_update_time error execute:")
                content = repr(e)
                error_content = StringHelper.error(content)
                print(error_content)
                dingdingrobot(content="check_update_time表达式不正确",
                              subject=subject)
                raise Exception("check_update_time表达式不正确;")

            if df.shape[0] == 0:
                res = call_func(*args, **kwargs)
                return res
            else:
                if dingding:
                    dingdingrobot(content=StringHelper.error(
                        f"表名{tb_name},当日数据已经存在,将自动删除当日数据,重新执行"),
                                  subject=subject)

                sql = """
                delete from {0} where update_time > date_sub(CURDATE(),interval {1} day);
                """

                sql = sql.format(tb_name, days)
                mysqlinstance.execute(sql)

                res = call_func(*args, **kwargs)
                return res
Esempio n. 10
0
    def model_train(self, X_train, y_train, model, method={}):

        model_name = str(model).split("(")[0]
        if method != {}:
            model = self.model_gscv(X_train, y_train, model, method)
        else:
            model.fit(X_train, y_train)
        self.model_save(model, filename=model_name + "_" + self.cur_time)

        if self.dingding:
            dingdingrobot(content="当前时间" + self.cur_time)
            dingdingrobot(model_name + "\n" + re.sub(r'\s+', "", str(model)))
        if self.log:
            data = {}
            data["update"] = self.cur_time
            data["model"] = re.sub(r'\s+', "", str(model))
            json_str = json.dumps(data)
            hflog.info(json_str)
        pred_y = model.predict(X_train)
        self.model_evaluate(y_train, pred_y, des=model_name + "_" + "train")

        return model
Esempio n. 11
0
        def wrapper(*args, **kwargs):
            count_times = n
            sleep = sleep_time
            for i in range(count_times):
                try:
                    res = call_func(*args, **kwargs)
                    return res
                except Exception as e:
                    error_n = i + 1
                    """
                    早上七点之后每次休眠时间改成5秒,最大重跑次数为3
                    """
                    if time.localtime()[3] > 7:
                        sleep = 5
                        if i > 3:
                            i = count_times - 1
                    time.sleep(sleep)
                    fun = call_func.__name__

                    print("try_rerun error execute:")
                    content = repr(e)
                    error_content = StringHelper.error(content)
                    print(
                        f"连续{i + 1}次出现异常\n函数名:{fun}\n{error_content}\n路径try_rerun;"
                    )
                    """如果为sql语法错误,跳出循环"""
                    if StringHelper.sql_error_check(content):
                        raise Exception("sql语法不正确,路径try_rerun;")
                    if i == count_times - 1:
                        if dingding:
                            dingdingrobot(
                                content=
                                f"连续{error_n}次出现异常\n函数:{fun}\n{error_content}\n路径try_rerun",
                                subject=subject)
                        raise Exception(
                            "连续{}次出现异常,路径try_rerun;".format(error_n))
Esempio n. 12
0
    def insertmany_bydf(self, df, tb, if_exists="append", n=6):
        """数据插入数据库的封装方法用于 处理空值&打印过程信息&打印插入信息 """

        startTime = time.time()

        filename = sys.argv[0][sys.argv[0].rfind(os.sep) +
                               1:].split('.py')[0].split('/')[-1]

        df = df.where(pd.notnull(df),
                      "None").replace("nan", "None").replace("NaN", "None")
        df = df.astype("str")
        sql = '''insert into {0} ({1}) values ({2});'''
        sql = sql.format(tb, ",".join(df.columns),
                         ("%s," * len(df.columns))[:-1])
        count_times = 0

        while count_times < n:
            try:
                conn = self.getconn()
                cursor = conn.cursor()
                if if_exists == "replace":
                    delete_rows = cursor.execute("delete from {0}".format(tb))
                elif if_exists == "replace-truncate":
                    delete_rows = cursor.execute("truncate {0}".format(tb))
                else:
                    delete_rows = 0
                para = [
                    tuple([None if y == "None" else y for y in x])
                    for x in df.values
                ]
                insert_rows = cursor.executemany(sql, para)
                conn.commit()

                print("insert数据行数:{0}".format(insert_rows))
                print("(mysql)数据库insert成功")

                endTime = time.time()
                time_eclipse = round((endTime - startTime), 2)

                # hflog.info({
                #     "filename": filename,
                #     "tb": tb,
                #     "delete_rows": delete_rows,
                #     "insert_rows": insert_rows,
                #     "time_eclipse": time_eclipse,
                # })

                count_times = n

            except Exception as e:

                count_times += 1
                conn.rollback()
                conn.commit()
                self.close(conn, cursor)
                time.sleep(10)

                if count_times >= n:
                    print("end分割线----------------------------分割线end")
                    print('insertmany_bydf error execute:')
                    content = repr(e)
                    error_content = StringHelper.error(content)
                    print(error_content)
                    dingdingrobot(content=error_content, subject='test')
                    print("start分割线----------------------------分割线start")
                    raise Exception("数据插入失败")

            finally:

                self.close(conn, cursor)