Esempio n. 1
0
def get_executions_data(flow_name):
    ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"],
               MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])
    try:
        sql = "select id from merce_flow_execution where flow_name like '%s%%%%' order by create_time desc limit 1" % flow_name
        flow_info = ms.ExecuQuery(sql.encode('utf-8'))
        print(sql)
        print('execution_Id:', flow_info[0]["id"])
    except:
        return
    new_data = {
        "fieldList": [{
            "fieldName": "executionId",
            "fieldValue": flow_info[0]["id"],
            "comparatorOperator": "EQUAL",
            "logicalOperator": "AND"
        }],
        "sortObject": {
            "field": "lastModifiedTime",
            "orderDirection": "DESC"
        },
        "offset":
        0,
        "limit":
        8
    }
    return new_data
Esempio n. 2
0
class GetLog(object):
    def __init__(self, execution_id, host):
        self.ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"], MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])
        self.execution_id = execution_id
        self.host = host

    def get_job_id(self):
        sql = "select flow_id ,job_id from merce_flow_execution where id = '%s' " % self.execution_id
        try:
            flow_job_id = self.ms.ExecuQuery(sql)
            flow_id = flow_job_id[0]["flow_id"]
            job_id = flow_job_id[0]["job_id"]
            # print(flow_id, job_id)
            return flow_id, job_id
        except Exception:
            return

    def get_log_url(self):
        """
        根据flow id和job id,查询任务日志
        1. 若任务生成application ID,则进入yarn直接查看日志
        2. 若任务没有生成application ID,则进入系统页面查看是否有日志生成
        """
        flow_id = self.get_job_id()[0]
        job_id = self.get_job_id()[1]
        detail_url = "%s/api/executions/query" % self.host
        data = {"fieldList": [{"fieldName": "flowId", "fieldValue": flow_id, "comparatorOperator":"EQUAL","logicalOperator":"AND"},{"fieldName":"jobId","fieldValue":job_id,"comparatorOperator":"EQUAL"}],"sortObject":{"field":"lastModifiedTime","orderDirection":"DESC"},"offset":0,"limit":8}
        response = requests.post(url=detail_url, headers=get_headers(self.host), json=data)
        # print(response.text)
        try:
            content_info = dict_res(response.text)["content"][0]
            # print(content_info)
            content_info_list = list(content_info.keys())
            # 优先判断返回的detail信息中是否包含APPId,存在就到yarn上查看对应的日志
            if 'appId' in content_info_list:
                appId = content_info["appId"]
                yarn_url = "http://info2:8088/cluster/app/%s" % appId
                # print(yarn_url)
                return yarn_url
            # 如果没有生成application id,就进入日志页面直接查看
            else:
                print('没有生成applicationId,需要进入详情页查看日志信息')
                loginfo_url = "%s/#/design/executingDetail/%s/exection/%s/logInfo" % (self.host, flow_id, job_id)
                # print(loginfo_url)
                return loginfo_url
        except Exception:
            return




#
# g = GetLog('6bac071d-5cc7-42bc-b64e-12b368ff7099', 'http://192.168.1.76:8515').get_log_url()
# print(g)
Esempio n. 3
0
def get_dataflow_data(flow_name):
    ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"],
               MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])
    try:
        sql = "select id, name,flow_type from merce_flow where name like '%s%%%%' order by create_time desc limit 1" % flow_name
        flow_info = ms.ExecuQuery(sql.encode('utf-8'))
        print("flow_id,flow_name,flow_type", flow_info[0]["id"],
              flow_info[0]["name"], flow_info[0]["flow_type"])
    except:
        return

    if "auto_api_test" in flow_name:
        data = {
            "configurations": {
                "arguments": [],
                "properties": [{
                    "name": "all.debug",
                    "value": "false",
                    "input": "false"
                }, {
                    "name": "all.dataset-nullable",
                    "value": "false",
                    "input": "false"
                }, {
                    "name": "all.optimized.enable",
                    "value": "true",
                    "input": "true"
                }, {
                    "name": "all.lineage.enable",
                    "value": "false",
                    "input": "false"
                }, {
                    "name": "all.debug-rows",
                    "value": "20",
                    "input": "20"
                }, {
                    "name": "all.runtime.cluster-id",
                    "value": "cluster1",
                    "input": ["cluster1"]
                }, {
                    "name": "dataflow.master",
                    "value": "yarn",
                    "input": "yarn"
                }, {
                    "name": "dataflow.deploy-mode",
                    "value": "client",
                    "input": ["client", "cluster"]
                }, {
                    "name": "dataflow.queue",
                    "value": "default",
                    "input": ["default"]
                }, {
                    "name": "dataflow.num-executors",
                    "value": "2",
                    "input": "2"
                }, {
                    "name": "dataflow.driver-memory",
                    "value": "512M",
                    "input": "512M"
                }, {
                    "name": "dataflow.executor-memory",
                    "value": "1G",
                    "input": "1G"
                }, {
                    "name": "dataflow.executor-cores",
                    "value": "2",
                    "input": "2"
                }, {
                    "name": "dataflow.verbose",
                    "value": "true",
                    "input": "true"
                }, {
                    "name": "dataflow.local-dirs",
                    "value": "",
                    "input": ""
                }, {
                    "name": "dataflow.sink.concat-files",
                    "value": "true",
                    "input": "true"
                }, {
                    "name": "dataflow.tempDirectory",
                    "value": "/tmp/dataflow/spark",
                    "input": "/tmp/dataflow/spark"
                }],
                "dependencies":
                "",
                "extraConfigurations": {},
                "startTime":
                1593964800000
            },
            "flowId": flow_info[0]["id"],
            "flowName": flow_info[0]["name"],
            "flowType": flow_info[0]["flow_type"],
            "name": "auto_api_test_随机数",
            "schedulerId": "once",
            "source": "rhinos"
        }
        return data
    elif "ES-upsert" in flow_name:
        data = {
            "configurations": {
                "startTime":
                1600617600000,
                "arguments": [],
                "dependencies": [],
                "extraConfigurations": {},
                "properties": [{
                    "name": "all.debug",
                    "value": "false",
                    "input": "false"
                }, {
                    "name": "all.dataset-nullable",
                    "value": "false",
                    "input": "false"
                }, {
                    "name": "all.optimized.enable",
                    "value": "true",
                    "input": "true"
                }, {
                    "name": "all.lineage.enable",
                    "value": "false",
                    "input": "false"
                }, {
                    "name": "all.debug-rows",
                    "value": "20",
                    "input": "20"
                }, {
                    "name": "all.runtime.cluster-id",
                    "value": "cluster1",
                    "input": ["cluster1"]
                }, {
                    "name": "dataflow.master",
                    "value": "yarn",
                    "input": "yarn"
                }, {
                    "name": "dataflow.deploy-mode",
                    "value": "client",
                    "input": ["client", "cluster"]
                }, {
                    "name": "dataflow.queue",
                    "value": "default",
                    "input": ["default"]
                }, {
                    "name": "dataflow.num-executors",
                    "value": "2",
                    "input": "2"
                }, {
                    "name": "dataflow.driver-memory",
                    "value": "512M",
                    "input": "512M"
                }, {
                    "name": "dataflow.executor-memory",
                    "value": "1G",
                    "input": "1G"
                }, {
                    "name": "dataflow.executor-cores",
                    "value": "2",
                    "input": "2"
                }, {
                    "name": "dataflow.verbose",
                    "value": "true",
                    "input": "true"
                }, {
                    "name": "dataflow.local-dirs",
                    "value": "",
                    "input": ""
                }, {
                    "name": "dataflow.sink.concat-files",
                    "value": "true",
                    "input": "true"
                }, {
                    "name": "dataflow.tempDirectory",
                    "value": "/tmp/dataflow/spark",
                    "input": "/tmp/dataflow/spark"
                }],
                "retry": {
                    "enable": "false",
                    "limit": 1,
                    "timeInterval": 1,
                    "intervalUnit": "MINUTES"
                }
            },
            "schedulerId": "once",
            "ource": "rhinos",
            "version": 1,
            "flowId": flow_info[0]["id"],
            "flowType": flow_info[0]["flow_type"],
            "name": "ES-upsert_随机数",
            "creator": "admin",
            "oldName": flow_info[0]["name"]
        }
        return data
    else:
        return
Esempio n. 4
0
from util.encrypt import parameter_ungzip
from basic_info.setting import MySQL_CONFIG
from util.Open_DB import MYSQL

# 配置数据库连接
ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"], MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])

sql = "select links from merce_flow where  name = 'minus_0628_3_no_element_test'"
result = ms.ExecuQuery(sql)
end_result = result[0]["links"]
print(result[0]["links"])
print(parameter_ungzip(end_result))
class GetCheckoutDataSet(object):
    """
    批量提交execution任务
    任务执行结束后根据任务执行结果获取到对应的sink数据集输出结果
    并对结果进行比对是否正确
    """

    def __init__(self):
        """初始化数据库连接"""
        self.ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"], MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])
        self.host = host
        self.table = "flow_dataset_info.xlsx"
        self.table_sheet = 'flow_info'

    def get_flow_id(self):
        """
        获取flow id并以list形式返回
        :return: flow_id_list
        """##
        print("------读取flow id list------")
        flow_table = load_workbook(abs_dir(self.table))
        info_sheet = flow_table.get_sheet_by_name(self.table_sheet)
        max_row = info_sheet.max_row  # 获取行数
        flow_id_list = []
        for i in range(2, max_row+1):
            if info_sheet.cell(row=i, column=2).value and len(info_sheet.cell(row=i, column=2).value) > 10:
                flow_id_list.append(info_sheet.cell(row=i, column=2).value)
        flow_id_list = list(set(flow_id_list))
        return flow_id_list

    def get_flow_id_not_distinct(self):
        """
        获取flow id并以list形式返回
        :return: flow_id_list
        """
        print("------读取flow id list------")
        flow_table = load_workbook(abs_dir(self.table))
        info_sheet = flow_table.get_sheet_by_name(self.table_sheet)
        max_row = info_sheet.max_row  # 获取行数
        flow_id_list = []
        for i in range(2, max_row+1):
            if info_sheet.cell(row=i, column=2).value and len(info_sheet.cell(row=i, column=2).value) > 10:
                flow_id_list.append(info_sheet.cell(row=i, column=2).value)
        return flow_id_list


    def data_for_create_scheduler(self):
        """
        1. 根据flow_id 查找flow_name等信息
        2. 根据查询到的flow信息,拼装创建scheduler所需要使用的data
        :return: data_list
        """
        print("------组装创建任务所需要的data------\n")
        data_list = []
        flow_id_list = self.get_flow_id()
        for flow_id in flow_id_list:
            try:
                sql = 'select name, flow_type, parameters from merce_flow where id = "%s"' % flow_id
                flow_info = self.ms.ExecuQuery(sql)
                print('flow_info:', flow_info)
            except Exception as e:
                return e
            else:
                try:
                    flow_name = flow_info[0]["name"]
                    flow_type = flow_info[0]["flow_type"]
                    flow_parameters = flow_info[0]["parameters"]
                    arguments_list = []
                    arguments = {}
                    if flow_parameters:  # flow parameters存在的情况下
                        print('parameters没有解压缩时:', flow_parameters)
                        parameters_use = parameter_ungzip(flow_parameters)  # 将加密后的参数进行解密和解压缩处理
                        print('parameters解压缩后:', parameters_use)
                        flow_parameters_list = dict_res(parameters_use)
                        if len(flow_parameters_list) > 0:
                            arguments["name"] = flow_parameters_list[0]["name"]
                            arguments["category"] = flow_parameters_list[0]["category"]
                            arguments["value"] = flow_parameters_list[0]["defaultVal"]
                            arguments["refs"] = flow_parameters_list[0]["refs"]
                            arguments["description"] = flow_parameters_list[0]["description"]
                            arguments_list.append(arguments)
                        # print('arguments:', arguments)
                except KeyError as e:
                    raise e
                except IndexError as T:
                    raise T
            data = {
                "configurations": {
                    "arguments": arguments_list,
                    "properties": [
                        {
                            "name": "all.debug",
                            "value": "false"
                        },
                        {
                            "name": "all.dataset-nullable",
                            "value": "false"
                        },
                        {
                            "name": "all.lineage.enable",
                            "value": "true"
                        },
                        {
                            "name": "all.notify-output",
                            "value": "false"
                        },
                        {
                            "name": "all.debug-rows",
                            "value": "20"
                        },
                        {
                            "name": "dataflow.master",
                            "value": "yarn"
                        },
                        {
                            "name": "dataflow.deploy-mode",
                            "value": "client"
                        },
                        {
                            "name": "dataflow.queue",
                            "value": "merce.normal"
                        },
                        {
                            "name": "dataflow.num-executors",
                            "value": "2"
                        },
                        {
                            "name": "dataflow.driver-memory",
                            "value": "512M"
                        },
                        {
                            "name": "dataflow.executor-memory",
                            "value": "1G"
                        },
                        {
                            "name": "dataflow.executor-cores",
                            "value": "2"
                        },
                        {
                            "name": "dataflow.verbose",
                            "value": "true"
                        },
                        {
                            "name": "dataflow.local-dirs",
                            "value": ""
                        },
                        {
                            "name": "dataflow.sink.concat-files",
                            "value": "true"
                        }
                    ],
                    "startTime": get_time()
                },
                "flowId": flow_id,
                "flowName": flow_name,
                "flowType": flow_type,
                "name": flow_name + 'scheduler' + str(random.randint(0, 9999))+str(random.randint(0, 9999)),
                "schedulerId": "once",
                "source": "rhinos"
            }
            data_list.append(data)
        print("------返回创建任务的data------")
        # print(data_list)
        return data_list

    def create_new_scheduler(self):
        """
        批量创建scheduler,
        并返回scheduler_id_list, 供get_execution_info(self)调用
        :return: scheduler_id_list
        """
        print("------开始创建任务------")
        from basic_info.url_info import create_scheduler_url
        scheduler_id_list = []
        scheduler_number = 1
        for data in self.data_for_create_scheduler():
            res = requests.post(url=create_scheduler_url, headers=get_headers(host), json=data)
            print('第%d 个scheduler' % scheduler_number)
            scheduler_number += 1
            time.sleep(2)
            # print(res.status_code, res.text)
            if res.status_code == 201 and res.text:
                scheduler_id_format = dict_res(res.text)
                try:
                    scheduler_id = scheduler_id_format["id"]
                except KeyError as e:
                    print("scheduler_id_format中存在异常%s" % e)
                else:
                    scheduler_id_list.append(scheduler_id)
            else:
                print("flow: %s scheduler创建失败" % data["flowid"])
                # return None
        print("------create_new_scheduler(self)执行结束, 返回scheduler_id_list------\n")
        print('scheduler_id_list', scheduler_id_list)
        return scheduler_id_list

    def get_execution_info(self):
        """
        根据schedulers id 查询出execution id, name, status,flow_id,多个任务的信息以list的形式存储。
        创建scheduler后查询execution有延迟,需要加等待时间
        :return: e_info_list
        """
        print("------查询execution信息------")
        scheduler_id_list = self.create_new_scheduler()
        if scheduler_id_list:
            e_info_list = []
            count = 1
            for scheduler_id in scheduler_id_list:
                print('开始第%d 次查询,查询scheduler id 为 %s 的execution info' % (count, scheduler_id))
                # 等待30S后查询
                time.sleep(20)
                # 若没有查到execution id, 需要再次查询
                e_info = self.get_e_finial_status(scheduler_id)
                e_info_list.append(e_info)
                print('e_info_list:', e_info_list)
                count += 1
            # print('查询得到的e_info_list', e_info_list)
            print("------查询 execution信息结束------\n")
            return e_info_list
        else:
            print("返回的scheduler_id_list为空", scheduler_id_list)
            return None

    def get_e_finial_status(self, scheduler_id):
        """
        根据get_execution_info(self)返回的scheduler id,
        查询该scheduler的execution id, name, status,flow id
        :return: e_info
        """
        print("------查询execution最终执行状态------")
        if scheduler_id:
            execution_sql = 'select id, status_type, flow_id , flow_scheduler_id from merce_flow_execution where flow_scheduler_id = "%s" ' % scheduler_id
            time.sleep(10)
            select_result = self.ms.ExecuQuery(execution_sql)
            if select_result:
                e_info = {}
                # 从查询结果中取值
                try:
                    e_info["e_id"] = select_result[0]["id"]
                    e_info["flow_id"] = select_result[0]["flow_id"]
                    e_info["flow_scheduler_id"] = select_result[0]["flow_scheduler_id"]
                    e_info["e_final_status"] = select_result[0]["status_type"]
                except IndexError as e:
                    print("取值时报错 %s" % e)
                    raise e
                print("------查询execution最终执行状态结束,返回execution id和status------\n")
                return e_info
            else:
                return
        else:
            return

    def get_execution_out_put(self):
        """
        最终返回execution执行成功后的dataset id
        :return: dataset id
        """
        print("------开始查询execution的输出------")
        e_info_list = self.get_execution_info()
        print("返回的e_info: %s " % e_info_list)
        # 数据无缺失,进行后续的判断
        if len(e_info_list) == len(self.get_flow_id()):  # flow id 数量相同
            sink_dataset_list = []
            for i in range(len(e_info_list)):
                sink_dataset_dict = {}
                try:
                    e_id = e_info_list[i]["e_id"]
                    e_final_status = e_info_list[i]["e_final_status"]
                    e_scheduler_id = e_info_list[i]["flow_scheduler_id"]
                    e_flow_id = e_info_list[i]["flow_id"]
                except Exception as e:
                    print('请确认该flow的e_info_list:')
                else:
                    sink_dataset_dict["flow_id"] = e_flow_id
                    sink_dataset_dict["execution_id"] = e_id
                    sink_dataset_dict["flow_scheduler_id"] = e_scheduler_id
                    if e_id:
                        print("------开始对%s 进行状态的判断------\n" % e_id)
                        while e_final_status in ("READY", "RUNNING"):
                            print("------进入while循环------\n")
                            # 状态为 ready 或者 RUNNING时,再次查询e_final_status
                            print("------查询前等待5S------\n")
                            time.sleep(5)
                            # 调用get_e_finial_status(e_scheduler_id)再次查询状态
                            e_info = self.get_e_finial_status(e_scheduler_id)
                            # 对e_final_status 重新赋值
                            e_final_status = e_info["e_final_status"]
                            print("------再次查询后的e_final_status: %s------\n" % e_final_status)
                            # time.sleep(50)
                        if e_final_status in ("FAILED", "KILLED"):
                            print("execution %s 执行失败" % e_id)
                            sink_dataset_dict["e_final_status"] = e_final_status
                            sink_dataset_dict["o_dataset"] = ""
                            sink_dataset_list.append(sink_dataset_dict)
                            # continue
                        elif e_final_status == "SUCCEEDED":
                            # 成功后查询flow_execution_output表中的dataset, 即sink对应的输出dataset,取出dataset id 并返回该ID,后续调用预览接口查看数据
                            sink_dataset_dict["e_final_status"] = e_final_status
                            # print(e_final_status, e_id)
                            data_json_sql = 'select b.dataset_json from merce_flow_execution as a  LEFT JOIN merce_flow_execution_output as b on a.id = b.execution_id where a.id ="%s"' % e_id
                            print(data_json_sql)
                            data_json = self.ms.ExecuQuery(data_json_sql)
                            print("打印data_json:", data_json)
                            for n in range(len(data_json)):
                                sink_dataset = data_json[n]["dataset_json"]  # 返回结果为元祖
                                print('-----sink_dataset-----', sink_dataset)
                                if sink_dataset:
                                    sink_dataset_id = dict_res(sink_dataset)["id"]  # 取出json串中的dataset id
                                    sink_dataset_dict["dataset_id"] = sink_dataset_id
                                    d = json.loads(json.dumps(sink_dataset_dict))
                                    sink_dataset_list.append(d)
                                else:
                                    continue
                        else:
                            print("返回的execution 执行状态不正确")
                            return
                    else:
                        print("execution不存在")
                        return
            return sink_dataset_list
        else:
            print("返回的scheduler_id_list值缺失")
            return

    def get_json(self):
        from util.count_items import count_items
        """
        1.通过dataset预览接口取得数据的预览json串 result.text
        2.判断dataset id是否存在,存在则通过数据集预览接口取回预览结果并找到表中相等的dataset id,写入预览结果
        3.Nokia环境代码使用的是旧版的预览接口,0.8.11及以后高版本代码使用的是新预览接口
        4.flow id 相等时,将execution id ,yarn上的log URL 和执行状态写入表中
        5.对比结果
        备注:对于sink multiouput flow特殊处理,表中把flow id全部写入,但是dataset是任务执行后再写入
        :return:
        """
        global dataset_id
        print("------开始执行get_json()------")
        sink_output_info = self.get_execution_out_put()
        flow_table = load_workbook(abs_dir(self.table))
        flow_sheet = flow_table.get_sheet_by_name(self.table_sheet)
        sheet_rows = flow_sheet.max_row  # 获取行数
        print(sink_output_info)
        flow_id_list = self.get_flow_id_not_distinct()  # 获取flow表中未去重的所有flow id
        sink_multi_flow_id = count_items(flow_id_list)[0]  # 重复的flow id
        sink_multi_flow_sink_num = count_items(flow_id_list)[1]  # 重复的flow id 所重复的次数
        # 处理multi_sink
        if sink_multi_flow_id:
            multi_flow_id = sink_multi_flow_id[0]
            multi_flow_sink_num = sink_multi_flow_sink_num[0]
            for j in range(2, sheet_rows + 1):
                    if sink_multi_flow_id == flow_sheet.cell(row=j, column=2).value:
                        for i in range(0, len(sink_output_info)):
                            log_url = GetLog(sink_output_info[i]["execution_id"], self.host).get_log_url()
                            dataset_id = sink_output_info[i]["dataset_id"]
                            flow_id = sink_output_info[i]["flow_id"]
                            if flow_id == sink_multi_flow_id:
                                if '57' in self.host:
                                    priview_url = "%s/api/datasets/%s/preview?rows=5000&tenant=%s&rows=50" % (
                                        self.host, dataset_id, get_tenant(self.host))
                                    res = requests.get(url=priview_url, headers=get_headers(self.host))
                                    result = res.text
                                else:
                                    statementID = statementId_flow_use(self.host, dataset_id, tenant_id_83)
                                    result = preview_result_flow_use(self.host, dataset_id, get_tenant(self.host),statementID)
                                flow_sheet.cell(row=j+i, column=4, value=dataset_id)
                                flow_sheet.cell(row=j+i, column=8, value=str(result))


        for i in range(0, len(sink_output_info)):
            
            ##print('sink_output_info:',sink_output_info)
            
            if "dataset_id" in sink_output_info[i].keys():#
                dataset_id = sink_output_info[i]["dataset_id"]
            
            flow_id = sink_output_info[i]["flow_id"]
            if '57' in self.host:
                priview_url = "%s/api/datasets/%s/preview?rows=5000&tenant=%s&rows=50" % (
                    self.host, dataset_id, get_tenant(self.host))
                res = requests.get(url=priview_url, headers=get_headers(self.host))
                result = res.text
            else:
                if "dataset_id" in sink_output_info[i].keys():#
                    statementID = statementId_flow_use(self.host, dataset_id, tenant_id_83)
                    result = preview_result_flow_use(self.host, dataset_id, get_tenant(self.host), statementID)
            
            for j in range(2, sheet_rows + 1):  # 按照行数进行循环
                    log_url = GetLog(sink_output_info[i]["execution_id"], self.host).get_log_url()
                    # 如果 dataset id相等就写入实际结果,不相等就向下找
                    if "dataset_id" in sink_output_info[i].keys():#
                        if dataset_id == flow_sheet.cell(row=j, column=4).value:#向左前进1格
                            flow_sheet.cell(row=j, column=8, value=str(result))  # dataset id 相等,实际结果写入表格  #向左前进1格
                    # flow id 相等时,将execution id ,yarn上的log URL 和执行状态写入
                    if flow_id == flow_sheet.cell(row=j, column=2).value:
                        # print(sink_dataset[i]["flow_id"])
                        flow_sheet.cell(row=j, column=5, value=sink_output_info[i]["execution_id"])
                        flow_sheet.cell(row=j, column=12, value=log_url)
                        flow_sheet.cell(row=j, column=6, value=sink_output_info[i]["e_final_status"])
                    else:
                        for t in range(j, 2, -1):
                            if flow_id == flow_sheet.cell(row=t - 1, column=2).value:
                                flow_sheet.cell(row=j, column=5, value=sink_output_info[i]["execution_id"])
                                flow_sheet.cell(row=j, column=12, value=log_url)
                                flow_sheet.cell(row=j, column=6, value=sink_output_info[i]["e_final_status"])
                                 # flow_sheet.cell(row=j, column=8, value=result.text)  # 实际结果写入表格
                                break
        flow_table.save(abs_dir(self.table))

        table = load_workbook(abs_dir(self.table))
        table_sheet = table.get_sheet_by_name(self.table_sheet)
        c_rows = table_sheet.max_row
        print('-----开始对比结果----')
        for i in range(2, c_rows+1):
            table_sheet.cell(row=i, column=1, value=i-1)
            # 对sample step涉及的flow单独进行结果判断
            if table_sheet.cell(row=i, column=2).value == '0822a0a2-ce58-42cb-82de-f2ec434b5d94':  #
                if table_sheet.cell(row=i, column=6).value == 'SUCCEEDED' and table_sheet.cell(row=i, column=8):
                    new_result = []
                    expect_result = list(eval(table_sheet.cell(row=i, column=7).value))
                    actual_result = list(eval(table_sheet.cell(row=i, column=8).value))
                    for item in actual_result:
                        if item in expect_result:
                            new_result.append(item)
                    if new_result == actual_result:
                        table_sheet.cell(row=i, column=9, value="pass")
                        # print('test_result:', table_sheet.cell(row=i, column=9).value)
                        table_sheet.cell(row=i, column=10, value="")
                    else:
                        table_sheet.cell(row=i, column=9, value="fail")
                        table_sheet.cell(row=i, column=10, value="execution: %s 预期结果实际结果不一致 " %
                                                                 (table_sheet.cell(row=i, column=5).value))
                elif table_sheet.cell(row=i, column=6).value == 'SUCCEEDED' and table_sheet.cell(row=i, column=8) == "":
                    table_sheet.cell(row=i, column=9, value="fail")
                    table_sheet.cell(row=i, column=10, value="execution: %s 预期结果实际结果不一致,实际结果为空 " %
                                                             (table_sheet.cell(row=i, column=5).value))
                elif table_sheet.cell(row=i, column=6).value == 'FAILED':
                    table_sheet.cell(row=i, column=9, value="fail")
                    table_sheet.cell(row=i, column=10, value="execution: %s 执行状态为 %s" % (
                        table_sheet.cell(row=i, column=5).value, table_sheet.cell(row=i, column=6).value))
                else:
                    print('请确认flow_id: %s的执行状态' % table_sheet.cell(row=i, column=2).value)

            else:
                # 判断mode为overwrite
                if table_sheet.cell(row=i, column=11).value == 'overwrite':  # 判断mode
                    # 实际结果存在并且执行结果为succeeded
                    if table_sheet.cell(row=i, column=8).value and table_sheet.cell(row=i, column=6).value == "SUCCEEDED":
                        # va7为预期结果,va8为实际结果,将二者先排序后对比是否相等
                        va7 = list(eval(table_sheet.cell(row=i, column=7).value))
                        #print('va7:',va7)
                        #if len(list(eval(table_sheet.cell(row=i, column=8).value))) != 0:
                        
                        va8 = list(eval(table_sheet.cell(row=i, column=8).value))   # 注意:是不是需要放在if 语句后面????
                        #print('va8',va8)
                        if va7 != [] and eval(table_sheet.cell(row=i, column=8).value).__class__ == [].__class__ :
                            va7_k = va7[0].keys()
                            va7_key = list(va7_k)
                            # 安排不同的key进行排序,只要有其中一个key排序后相等,就认为两个结果相等
                            result = []
                            for t in range(len(va7_key)):
                                S_va7 = sorted(va7, key=lambda item: item[va7_key[t]], reverse=True)  # 没有 id时候的排序
                                S_va8 = sorted(va8, key=lambda item: item[va7_key[t]], reverse=True)
                                result.append(S_va7 == S_va8)
                            print('-----确认结果------')
                            if True in result:
                                table_sheet.cell(row=i, column=9, value="pass")
                                print('test_result:', table_sheet.cell(row=i, column=9).value)
                                table_sheet.cell(row=i, column=10, value="")
                            else:
                                table_sheet.cell(row=i, column=9, value="fail")
                                table_sheet.cell(row=i, column=10, value="flowname: %s --->预期结果实际结果不一致 \n" %
                                                                         (table_sheet.cell(row=i, column=3).value))
                        elif va7 == [] and va8 == []:
                            table_sheet.cell(row=i, column=9, value="pass")
                            # print('test_result:', table_sheet.cell(row=i, column=9).value)
                            table_sheet.cell(row=i, column=10, value="")
                        else:
                            table_sheet.cell(row=i, column=9, value="")
                            table_sheet.cell(row=i, column=10, value="请确认预期结果和实际结果")
                    elif table_sheet.cell(row=i, column=6).value == "FAILED":
                        table_sheet.cell(row=i, column=9, value="fail")
                        table_sheet.cell(row=i, column=10, value="flowname: %s --->执行状态为 %s\n" % (table_sheet.cell(row=i, column=3).value, table_sheet.cell(row=i, column=6).value))
                    else:
                        table_sheet.cell(row=i, column=9, value="fail")
                        table_sheet.cell(row=i, column=10, value="用例参数或datasetID填写错误")
                # 判断mode为append
                elif table_sheet.cell(row=i, column=11).value == 'append':
                    if table_sheet.cell(row=i, column=8).value and table_sheet.cell(row=i, column=6).value == "SUCCEEDED":  # 实际结果存在
                        expect_result_list = list(eval(table_sheet.cell(row=i, column=7).value))
                        expect_len = len(expect_result_list)
                        actual_result_list = list(eval(table_sheet.cell(row=i, column=8).value))

                        if expect_result_list == actual_result_list[-expect_len:]:  # 实际结果切片和预期结果长度一致的数据结果是否相等
                            table_sheet.cell(row=i, column=9, value="pass")
                            table_sheet.cell(row=i, column=10, value="")
                        else:
                            table_sheet.cell(row=i, column=9, value="fail")
                            table_sheet.cell(row=i, column=10,
                                                   value="execution: %s 预期结果实际结果不一致 \n预期结果: %s\n实际结果: %s" % (
                                                   table_sheet.cell(row=i, column=5).value,
                                                   table_sheet.cell(row=i, column=7).value,
                                                   table_sheet.cell(row=i, column=8).value))
                    elif table_sheet.cell(row=i, column=6).value == "FAILED":  # execution执行失败
                        table_sheet.cell(row=i, column=9, value="fail")
                        table_sheet.cell(row=i, column=10,
                                               value="execution: %s 执行状态为 %s" % (
                                               table_sheet.cell(row=i, column=5).value, table_sheet.cell(row=i, column=6).value))
                    else:
                        table_sheet.cell(row=i, column=9, value="fail")
                        table_sheet.cell(row=i, column=10, value="用例参数或datasetID填写错误")

                else:
                    table_sheet.cell(row=i, column=9, value="fail")
                    table_sheet.cell(row=i, column=10, value="请确认flow的mode")
        table.save(abs_dir("flow_dataset_info.xlsx"))
        print('结果保存结束')
class GetExecutionStatus(object):
    """
    该类用来获取批量创建的scheduler对应的execution,
    并查询到execution的最终执行状态,写入到一个CSV文件中
    """

    def __init__(self):
        """初始化数据库连接"""
        self.ms = MYSQL(mysql_config["HOST"], mysql_config["USER"], mysql_config["PASSWORD"], mysql_config["DB"])

    def exec_sql(self, scheduler_id):
        """
        根据schedulers id 查询出execution id
        创建scheduler后查询execution有延迟,需要加等待时间
        """
        time.sleep(3)
        # if scheduler:
        execution_sql = "select id, status_type, flow_name, flow_id from merce_flow_execution where flow_scheduler_id = '%s'" % scheduler_id
        try:
            result = self.ms.ExecuQuery(execution_sql)
            return result[0]
        except Exception:
            return

    def get_execution_status(self):
        """该方法用来循环查询execution的最终执行状态,并将execution信息写入指定表中"""
        # csv表的header: scheduler_id,execution_id,status,flow_name,flow_id
        # 读取jmeter脚本创建scheduler后保存scheduler id的CSV文件,依次查询到其对应的execution信息,并写入一个新的file中
        header = ('scheduler_id', 'execution_id', 'status', 'flow_name', 'flow_id', 'log_url')
        print("------开始执行get_execution_status(self)------\n")
        with open(scheduler_file_path, "r") as f:
            readers = csv.reader(f)
            headers = next(readers)  # 抛去header行
            executions = []
            executions.append(header)
            for scheduler_id in readers:
                # print('scheduler_id:', scheduler_id)
                e_info = self.exec_sql(scheduler_id[0])
                print(e_info)
                while not e_info:
                    time.sleep(5)
                    e_info = self.exec_sql(scheduler_id[0])
                    print(e_info)
                if e_info:
                    # 使用元祖存储execution信息,方便后续向csv中写数据
                    print('sql第一次查询后返回的execution信息', e_info)
                    count = 0
                    while e_info["status_type"] in ("PREPARE", "READY", "RUNNING"):
                        print("------进入while循环------\n")
                        # 状态为 ready 或者 RUNNING时,再次查询e_final_status
                        print("------查询前等待5S------\n")
                        time.sleep(5)
                        # 调用get_e_finial_status(e_scheduler_id)再次查询状态
                        e_info = self.exec_sql(scheduler_id[0])
                        # count += 1
                        # if count == 100:
                        #     break
                        print('再次查询后的execution"status_type"]', e_info)
                # 获取execution的最终执行状态后,将信息存入元祖,并依次append到列表executions中
                if e_info["status_type"] in ("SUCCEEDED", "FAILED", "KILLED"):
                    e_log_url = GetLog(e_info["id"]).get_log_url()
                    tp_execution = (scheduler_id[0],) + (e_info["id"],) + (e_info["status_type"],) + (
                    e_info["flow_name"],) + (e_info["flow_id"],) + (e_log_url,)
                    # print("tp_execution", tp_execution)
                    executions.append(tp_execution)
                    print("executions", executions)

            # 依次从列表executions中读取元祖数据,并写入一个新的csv表中
            file = get_result_file_name(file_path, HOST)
            self.write_execution(file, executions)
            print('write over')

    @staticmethod
    def write_execution(file, executions):  # 向csv文件中写数据
        with open(file, 'a', newline='') as f:
            new_file = csv.writer(f, dialect='excel')
            m = len(executions)
            for i in range(m):
                print(executions[i])
                new_file.writerow(executions[i])
Esempio n. 7
0
def get_dataflow_data(flow_name):
    print("开始执行get_dataflow_data(flow_name)")
    ms = MYSQL(MySQL_CONFIG["HOST"], MySQL_CONFIG["USER"],
               MySQL_CONFIG["PASSWORD"], MySQL_CONFIG["DB"])
    try:
        sql = 'select id, flow_type from merce_flow where name = "%s"' % flow_name
        flow_info = ms.ExecuQuery(sql)
        print(sql)
        print('flow_info:', flow_info)
    except Exception as e:
        raise e
    else:
        try:
            flow_id = flow_info[0]["id"]
            flow_type = flow_info[0]["flow_type"]
            # print(flow_name, flow_type)
        except KeyError as e:
            raise e

    data = {
        "configurations": {
            "arguments": [],
            "properties": [{
                "name": "all.debug",
                "value": "false"
            }, {
                "name": "all.dataset-nullable",
                "value": "false"
            }, {
                "name": "all.lineage.enable",
                "value": "true"
            }, {
                "name": "all.notify-output",
                "value": "false"
            }, {
                "name": "all.debug-rows",
                "value": "20"
            }, {
                "name": "dataflow.master",
                "value": "yarn"
            }, {
                "name": "dataflow.deploy-mode",
                "value": "client"
            }, {
                "name": "dataflow.queue",
                "value": "merce.normal"
            }, {
                "name": "dataflow.num-executors",
                "value": "2"
            }, {
                "name": "dataflow.driver-memory",
                "value": "512M"
            }, {
                "name": "dataflow.executor-memory",
                "value": "1G"
            }, {
                "name": "dataflow.executor-cores",
                "value": "2"
            }, {
                "name": "dataflow.verbose",
                "value": "true"
            }, {
                "name": "dataflow.local-dirs",
                "value": ""
            }, {
                "name": "dataflow.sink.concat-files",
                "value": "true"
            }],
            "startTime":
            get_time()
        },
        "flowId": flow_id,
        "flowName": flow_name,
        "flowType": flow_type,
        "name": flow_name + str(random.randint(0, 99999)),
        "schedulerId": "once",
        "source": "rhinos"
    }
    return data