def statementId_flow_output_use(host, datasetId): url = '%s/api/datasets/%s/previewinit?rows=50' % (host, datasetId) res = requests.get(url=url, headers=get_headers(host)) try: res_statementId = dict_res(res.text) # print('%s数据集获取的statementID信息:%s' %(datasetId, res_statementId)) statementId = res_statementId['statementId'] print('%s数据集获取的statementID:%s' % (datasetId, statementId)) except: print('数据集%s的statementID返回空' % datasetId) return 0 else: return statementId
def get_woven_qaoutput_dataset_path(): """查找woven/qaoutput下的所有数据集name,并组装成woven/qaoutput/datasetname的格式""" url = '%s/api/datasets/query' % host data = {"fieldList":[{"fieldName":"parentId","fieldValue":"4f4d687c-12b3-4e09-9ba9-bcf881249ea0","comparatorOperator":"EQUAL","logicalOperator":"AND"},{"fieldName":"owner","fieldValue":"2059750c-a300-4b64-84a6-e8b086dbfd42","comparatorOperator":"EQUAL","logicalOperator":"AND"}],"sortObject":{"field":"lastModifiedTime","orderDirection":"DESC"},"offset":0,"limit":8} response = requests.post(url=url,headers=get_headers(), json=data) contents = dict_res(response.text)["content"] path = [] for content in contents: content_path = 'woven/qaoutput/' + content["name"] content_path.replace('/', '%252F') # 应该使用parse.quote() 进行URL编码进行处理。稍后解决 path.append(content_path.replace('/', '%252F')) # print(path) return path
def get_auth_token_customer(HOST): if '57' in HOST: res = requests.post(url=MY_LOGIN_INFO_dsp_customer["URL"], headers=MY_LOGIN_INFO_dsp_customer["HEADERS"], data=MY_LOGIN_INFO_dsp_customer["DATA"]) dict_headers = dict(res.headers) token = dict_headers['Authorization'] return token else: res = requests.post(url=MY_LOGIN_INFO_dsp_customer["URL"], headers=MY_LOGIN_INFO_dsp_customer["HEADERS"], data=MY_LOGIN_INFO_dsp_customer["DATA"]) dict_headers = dict_res(res.text) token = dict_headers['content']["access_token"] return 'Bearer ' + token
def statementId_flow_use(host, datasetId, tenant): url = '%s/api/datasets/%s/previewinit?tenant=%s&rows=50' % (host, datasetId, get_tenant(host)) res = requests.get(url=url, headers=get_headers(host)) print(res.status_code, res.text) try: res_statementId = dict_res(res.text) print('%s数据集获取的statementID信息:%s' % (datasetId, res_statementId)) statementId = res_statementId['statementId'] except: print('数据集%s的statementID返回空' % datasetId) return 1 else: return statementId
def upload_jar_file_filter(): url = "%s/api/processconfigs/uploadjar/filter class" % host # files = {"file": open('./new_api_cases/woven-common-3.0.jar', 'rb')} files = {"file": open(dir1, 'rb')} headers = get_headers(host) headers.pop('Content-Type') try: response = requests.post(url, files=files, headers=headers) # print(response.text) filter_fileName = dict_res(response.text)["fileName"] except: return else: return filter_fileName
def get_auth_token(HOST): try: if '57' in HOST: res = requests.post(url=MY_LOGIN_INFO_dam["URL"], headers=MY_LOGIN_INFO_dam["HEADERS"], data=MY_LOGIN_INFO_dam["DATA"]) dict_headers = dict(res.headers) token = dict_headers['X-AUTH-TOKEN'] return token else: res = requests.post(url=MY_LOGIN_INFO2["URL"], headers=MY_LOGIN_INFO2["HEADERS"], data=MY_LOGIN_INFO2["DATA"]) dict_headers = dict_res(res.text) token = dict_headers['content']["access_token"] return 'Bearer ' + token except: return
def get_auth_token_compass(HOST): if '57' in HOST: res = requests.post(url=MY_LOGIN_INFO_compass["URL"], headers=MY_LOGIN_INFO_compass["HEADERS"], data=MY_LOGIN_INFO_compass["DATA"]) dict_headers = dict(res.headers) token = dict_headers['Authorization'] return token else: #print("url-heads-data", MY_LOGIN_INFO_compass["URL"], MY_LOGIN_INFO_compass["HEADERS"], MY_LOGIN_INFO_compass["DATA"]) res = requests.post(url=MY_LOGIN_INFO_compass["URL"], headers=MY_LOGIN_INFO_compass["HEADERS"], json=MY_LOGIN_INFO_compass["DATA"]) #print("res: ", res) dict_headers = dict_res(res.text) token = dict_headers['content']["access_token"] return token
def upload_jar_file_dataflow(): url = "%s/api/processconfigs/uploadjar/dataflow selector" % host unquote_url = parse.unquote(url) # files = {"file": open('./new_api_cases/woven-common-3.0.jar', 'rb')} files = {"file": open(dir1, 'rb')} headers = get_headers(host) headers.pop('Content-Type') try: response = requests.post(url, files=files, headers=headers) # print(response.text) data_fileName = dict_res(response.text)["fileName"] print(data_fileName) except: return else: return data_fileName
def test_get_dataset_id(self): """获取execution的id和状态, 最终返回execution执行成功后的dataset id """ e_info = self.test_get_execution_info() if e_info: data_json_sql = 'select b.dataset_json from merce_flow_execution as a LEFT JOIN merce_flow_execution_output as b on a.id = b.execution_id where a.id ="%s"' % e_info[0]["id"] data_json = self.ms.ExecuQuery(data_json_sql) sink_dataset_list = [] for n in range(len(data_json)): sink_dataset = data_json[n]["dataset_json"] # 返回结果为元祖 sink_dataset_id = dict_res(sink_dataset)["id"] # 取出json串中的dataset id sink_dataset_list.append(sink_dataset_id) print('----------sink_dataset_list----------', sink_dataset_list) return sink_dataset_list else: return None
def get_job_tasks_id(job_id): url = '%s/api/woven/collectors/%s/tasks' % (host, job_id) data = {"fieldList": [], "sortObject": {"field": "lastModifiedTime", "orderDirection": "DESC"}, "offset": 0, "limit": 8} response = requests.post(url=url, headers=get_headers(host), json=data) all_task_id = [] try: tasks = dict_res(response.text)['content'] for item in tasks: task_id = item['id'] all_task_id.append(task_id) except Exception as e: print(e) return else: # print(all_task_id) return all_task_id
def preview_result_flow_use(host, datasetId, statementID): if isinstance(statementID, int): url = "%s/api/datasets/%s/previewresult?statementId=%d&clusterId=cluster1" % (host, datasetId, statementID) res = requests.get(url=url, headers=get_headers(host)) print(res.url) print('%s数据集preview_result:%s' % (datasetId, res.text)) while 'waiting' in res.text or 'running' in res.text: res = requests.get(url=url, headers=get_headers(host)) try: dataset_result = dict_res(res.text)['content'] except: return 0 else: print('%s数据集dataset_result: %s ' % (datasetId, dataset_result)) return dataset_result else: print('%s数据集返回的statementID为空')
def preview_result_flow_use(host, datasetId, tenant, statementID): if isinstance(statementID, int): url = "%s/api/datasets/%s/previewresult?tenant=%s&statementId=%d" % ( host, datasetId, tenant, statementID) res = requests.get(url=url, headers=get_headers(host)) print(res.url) print('%s数据集preview_result:%s' % (datasetId, res.text)) count_num = 0 while 'waiting' in res.text or 'running' in res.text: res = requests.get(url=url, headers=get_headers(host)) try: dataset_result = dict_res(res.text)['content'] except KeyError: return else: print('%s数据集dataset_result: %s ' % (datasetId, dataset_result)) return dataset_result else: print('%s数据集返回的statementID为空')
def get_exe_result(): sink_result_list = [] sink_result_dict = {} for i in create_flink_exe(): try: sql = "select flow_id,flow_type,status_type from merce_flow_execution where id='%s'" % i result = ms.ExecuQuery(sql) sink_result_dict["flow_id"] = result[0]["flow_id"] sink_result_dict["status_type"] = result[0]["status_type"] e = get_e_finial_status(i, sink_result_dict["status_type"]) sink_result_dict["status_type"] = e g_result = requests.get(url=host_for_url + "/api/executions/rtc/" + i + "/output", headers=get_headers(host_for_url)) sink_result_dict["dataset_id"] = dict_res(g_result.text)[0]["id"] sink_result_list.append(sink_result_dict) except: return return sink_result_list
def get_log_url(self): """ 根据flow id和job id,查询任务日志 1. 若任务生成application ID,则进入yarn直接查看日志 2. 若任务没有生成application ID,则进入系统页面查看是否有日志生成 """ flow_id = self.get_job_id()[0] job_id = self.get_job_id()[1] detail_url = "%s/api/executions/query" % self.host data = {"fieldList": [{"fieldName": "flowId", "fieldValue": flow_id, "comparatorOperator":"EQUAL","logicalOperator":"AND"},{"fieldName":"jobId","fieldValue":job_id,"comparatorOperator":"EQUAL"}],"sortObject":{"field":"lastModifiedTime","orderDirection":"DESC"},"offset":0,"limit":8} response = requests.post(url=detail_url, headers=get_headers(self.host), json=data) # print(response.text) try: content_info = dict_res(response.text)["content"][0] # print(content_info) content_info_list = list(content_info.keys()) # 优先判断返回的detail信息中是否包含APPId,存在就到yarn上查看对应的日志 if 'appId' in content_info_list: appId = content_info["appId"] yarn_url = "http://info2:8088/cluster/app/%s" % appId # print(yarn_url) return yarn_url # 如果没有生成application id,就进入日志页面直接查看 else: print('没有生成applicationId,需要进入详情页查看日志信息') loginfo_url = "%s/#/design/executingDetail/%s/exection/%s/logInfo" % (self.host, flow_id, job_id) # print(loginfo_url) return loginfo_url except Exception: return # # g = GetLog('6bac071d-5cc7-42bc-b64e-12b368ff7099', 'http://192.168.1.76:8515').get_log_url() # print(g)
def compare_text_result(self): for row in range(2, all_rows+1): response_text = case_table_sheet.cell(row=row, column=12).value # 接口返回的response.text response_text_dict = dict_res(response_text) expect_text = case_table_sheet.cell(row=row, column=10).value # 预期结果 key_word = case_table_sheet.cell(row=row, column=3).value # 接口关键字 code_result = case_table_sheet.cell(row=row, column=9).value # status_code对比结果 relation = case_table_sheet.cell(row=row, column=11).value # 预期text和response.text的关系 # 1.status_code 对比结果pass的前提下,判断response.text断言是否正确, # 2.status_code 对比结果fail时,用例整体结果设为fail if code_result == 'pass': if key_word in ('create', 'query', 'update', 'delete'): self.assert_deal(key_word, relation, expect_text, response_text, response_text_dict, row, 13) else: print('请确认第%d行的key_word' % row) elif code_result == 'fail': # case 结果列 case_table_sheet.cell(row=row, column=14, value='fail') # case失败原因 case_table_sheet.cell(row=row, column=15, value='status_code对比结果为%s' % code_result) else: print('请确认第 %d 行 status_code对比结果' % row) case_table.save(table_dir('api_cases.xlsx'))
def create_new_user(data): url = '%s/api/woven/users' % host response = requests.post(url=url, headers=get_headers(host), json=data) user_id = dict_res(response.text)["id"] print(user_id) return user_id
def create_schedulers(): from basic_info.url_info import create_scheduler_url flow_name = get_flows()[0]["name"] flow_type = get_flows()[0]["flow_type"] data = { "configurations": { "arguments": [], "properties": [{ "name": "all.debug", "value": "false" }, { "name": "all.dataset-nullable", "value": "false" }, { "name": "all.lineage.enable", "value": "true" }, { "name": "all.notify-output", "value": "false" }, { "name": "all.debug-rows", "value": "20" }, { "name": "dataflow.master", "value": "yarn" }, { "name": "dataflow.deploy-mode", "value": "client" }, { "name": "dataflow.queue", "value": "a1" }, { "name": "dataflow.num-executors", "value": "2" }, { "name": "dataflow.driver-memory", "value": "512M" }, { "name": "dataflow.executor-memory", "value": "1G" }, { "name": "dataflow.executor-cores", "value": "2" }, { "name": "dataflow.verbose", "value": "true" }, { "name": "dataflow.local-dirs", "value": "" }, { "name": "dataflow.sink.concat-files", "value": "true" }], "startTime": get_time() }, "flowId": flow_id, "flowName": flow_name, "flowType": flow_type, "name": "students_flow" + str(random.randint(0, 99999)), "schedulerId": "once", "source": "rhinos" } res = requests.post(url=create_scheduler_url, headers=get_headers(), data=json.dumps(data)) # print(res.status_code, res.text) if res.status_code == 201 and res.text: scheduler_id_format = dict_res(res.text) try: scheduler_id = scheduler_id_format["id"] except KeyError as e: print("scheduler_id_format中存在异常%s" % e) else: return scheduler_id else: return None
def data_for_create_scheduler(self): """ 1. 根据flow_id 查找flow_name等信息 2. 根据查询到的flow信息,拼装创建scheduler所需要使用的data :return: data_list """ print("------组装创建任务所需要的data------\n") data_list = [] flow_id_list = self.get_flow_id() for flow_id in flow_id_list: try: sql = 'select name, flow_type, parameters from merce_flow where id = "%s"' % flow_id flow_info = self.ms.ExecuQuery(sql) print('flow_info:', flow_info) except Exception as e: return e else: try: flow_name = flow_info[0]["name"] flow_type = flow_info[0]["flow_type"] flow_parameters = flow_info[0]["parameters"] arguments_list = [] arguments = {} if flow_parameters: # flow parameters存在的情况下 print('parameters没有解压缩时:', flow_parameters) parameters_use = parameter_ungzip(flow_parameters) # 将加密后的参数进行解密和解压缩处理 print('parameters解压缩后:', parameters_use) flow_parameters_list = dict_res(parameters_use) if len(flow_parameters_list) > 0: arguments["name"] = flow_parameters_list[0]["name"] arguments["category"] = flow_parameters_list[0]["category"] arguments["value"] = flow_parameters_list[0]["defaultVal"] arguments["refs"] = flow_parameters_list[0]["refs"] arguments["description"] = flow_parameters_list[0]["description"] arguments_list.append(arguments) # print('arguments:', arguments) except KeyError as e: raise e except IndexError as T: raise T data = { "configurations": { "arguments": arguments_list, "properties": [ { "name": "all.debug", "value": "false" }, { "name": "all.dataset-nullable", "value": "false" }, { "name": "all.lineage.enable", "value": "true" }, { "name": "all.notify-output", "value": "false" }, { "name": "all.debug-rows", "value": "20" }, { "name": "dataflow.master", "value": "yarn" }, { "name": "dataflow.deploy-mode", "value": "client" }, { "name": "dataflow.queue", "value": "merce.normal" }, { "name": "dataflow.num-executors", "value": "2" }, { "name": "dataflow.driver-memory", "value": "512M" }, { "name": "dataflow.executor-memory", "value": "1G" }, { "name": "dataflow.executor-cores", "value": "2" }, { "name": "dataflow.verbose", "value": "true" }, { "name": "dataflow.local-dirs", "value": "" }, { "name": "dataflow.sink.concat-files", "value": "true" } ], "startTime": get_time() }, "flowId": flow_id, "flowName": flow_name, "flowType": flow_type, "name": flow_name + 'scheduler' + str(random.randint(0, 9999))+str(random.randint(0, 9999)), "schedulerId": "once", "source": "rhinos" } data_list.append(data) print("------返回创建任务的data------") # print(data_list) return data_list
def get_execution_out_put(self): """ 最终返回execution执行成功后的dataset id :return: dataset id """ print("------开始查询execution的输出------") e_info_list = self.get_execution_info() print("返回的e_info: %s " % e_info_list) # 数据无缺失,进行后续的判断 if len(e_info_list) == len(self.get_flow_id()): # flow id 数量相同 sink_dataset_list = [] for i in range(len(e_info_list)): sink_dataset_dict = {} try: e_id = e_info_list[i]["e_id"] e_final_status = e_info_list[i]["e_final_status"] e_scheduler_id = e_info_list[i]["flow_scheduler_id"] e_flow_id = e_info_list[i]["flow_id"] except Exception as e: print('请确认该flow的e_info_list:') else: sink_dataset_dict["flow_id"] = e_flow_id sink_dataset_dict["execution_id"] = e_id sink_dataset_dict["flow_scheduler_id"] = e_scheduler_id if e_id: print("------开始对%s 进行状态的判断------\n" % e_id) while e_final_status in ("READY", "RUNNING"): print("------进入while循环------\n") # 状态为 ready 或者 RUNNING时,再次查询e_final_status print("------查询前等待5S------\n") time.sleep(5) # 调用get_e_finial_status(e_scheduler_id)再次查询状态 e_info = self.get_e_finial_status(e_scheduler_id) # 对e_final_status 重新赋值 e_final_status = e_info["e_final_status"] print("------再次查询后的e_final_status: %s------\n" % e_final_status) # time.sleep(50) if e_final_status in ("FAILED", "KILLED"): print("execution %s 执行失败" % e_id) sink_dataset_dict["e_final_status"] = e_final_status sink_dataset_dict["o_dataset"] = "" sink_dataset_list.append(sink_dataset_dict) # continue elif e_final_status == "SUCCEEDED": # 成功后查询flow_execution_output表中的dataset, 即sink对应的输出dataset,取出dataset id 并返回该ID,后续调用预览接口查看数据 sink_dataset_dict["e_final_status"] = e_final_status # print(e_final_status, e_id) data_json_sql = 'select b.dataset_json from merce_flow_execution as a LEFT JOIN merce_flow_execution_output as b on a.id = b.execution_id where a.id ="%s"' % e_id print(data_json_sql) data_json = self.ms.ExecuQuery(data_json_sql) print("打印data_json:", data_json) for n in range(len(data_json)): sink_dataset = data_json[n]["dataset_json"] # 返回结果为元祖 print('-----sink_dataset-----', sink_dataset) if sink_dataset: sink_dataset_id = dict_res(sink_dataset)["id"] # 取出json串中的dataset id sink_dataset_dict["dataset_id"] = sink_dataset_id d = json.loads(json.dumps(sink_dataset_dict)) sink_dataset_list.append(d) else: continue else: print("返回的execution 执行状态不正确") return else: print("execution不存在") return return sink_dataset_list else: print("返回的scheduler_id_list值缺失") return