def __init__(self, file_name, sheet_name=None, sheet_index=0): self.requestUtil = RequestUtil() self.excelUtil = ExcelUtil(file_name, sheet_name, sheet_index) self.dataUtil = DataUtil() self.assetUtil = AssertUtil() self.logger = Logger(self.__class__.__name__).get_logger_with_level() self.cookie_dict = {}
def get_all(test='', last='', file=''): if test is not '': domains = Query().get_sub_domains(domains=[test]) domains = domains[test] if file: domains = get_domains_from_file(file=file) test = file results = [] for domain in domains: domain = domain.replace('\n', '') print(domain) title = get_title(domain) ips = get_ip(domain) if ips is 'None': continue cname = get_cname(domain) one = { "domain": domain, "title": title, "ips": ips, "cname": cname, } one = DataUtil.format_by_ip(one) results = DataUtil.append(results, one) print(results) # write_to_excel(results, test + last)
def __init__(self, train_args, data_args): self.batch_size = train_args.batch_size self.feature_dim = train_args.feature_dim self.feature_max_length = train_args.feature_max_length self.mode = train_args.mode self.data_length = train_args.data_length self.shuffle = train_args.shuffle self.data_path = Const.SpeechDataPath self.thchs30 = data_args.thchs30 self.aishell = data_args.aishell self.stcmd = data_args.stcmd self.aidatatang = data_args.aidatatang self.aidatatang_1505 = data_args.aidatatang_1505 self.prime = data_args.prime self.noise = data_args.noise self.pinyin_dict = data_args.pinyin_dict self.hanzi_dict = data_args.hanzi_dict self.lfr_m = data_args.lfr_m self.lfr_n = data_args.lfr_n self.acoustic_vocab_size, self.pinyin2index, self.inde2pinyin = self.get_acoustic_vocab_list() self.language_vocab_size, self.word2index, self.index2word = self.get_language_vocab_list() self.data = DataUtil(data_args, train_args.batch_size, train_args.mode, train_args.data_length, train_args.shuffle) self.path_lst = self.data.path_lst self.pny_lst = self.data.pny_lst self.han_lst = self.data.han_lst
class DatasetUtil(Dataset): def __init__(self, conf): self.data_util = DataUtil(conf) self.input_list, self.target_list, self.intent_list = self.data_util.get_train_data( ) def __getitem__(self, index): return np.array(self.input_list[index]), np.array( self.target_list[index]), np.array(self.intent_list[index]) def __len__(self): return len(self.input_list)
def insert_country_currency(self, data: List[CountryCurrency]): """ Transform the data so that it is SQL query compatible REPLACE the data in COUNTRY CURRENCY table :param data: List[CountryCurrency] :return: None """ cursor = self.database_connection.cursor() query = 'REPLACE INTO {}.{} (country, currency, currency_code) VALUES {}'.format( DBConfig.DATABASE.value, DBConfig.TABLE_COUNTRY_CURRENCY.value, DataUtil.list_to_string_country_currency(data=data)) cursor.execute(query) self.database_connection.commit()
def __init__(self, config, device): super(JointNLU, self).__init__() self.embedding_size = config["embedding_size"] self.hidden_size = config["hidden_size"] self.batch_size = config["batch_size"] self.seq_length = config["seq_length"] self.dropout_p = 0.5 data_util = DataUtil(config) input_vocab, target_vocab, intent_vocab = data_util.get_vocab() input_size = len(input_vocab) target_size = len(target_vocab) intent_size = len(intent_vocab) self.input_vocab = input_vocab self.en_embedding = nn.Embedding(input_size, self.embedding_size) # batch_first=True: the input and output tensors are provided as (batch, seq, feature) self.en_lstm = nn.LSTM(self.embedding_size, self.hidden_size, batch_first=True) self.de_embedding = nn.Embedding(target_size, self.embedding_size) self.de_lstm = nn.LSTM(self.embedding_size, self.hidden_size, batch_first=True) self.de_start = torch.LongTensor( [[input_vocab.index(data_util.pad_token)]] * self.batch_size).to(device) self.de_slot_output = nn.Linear(self.hidden_size, target_size) self.de_intent_output = nn.Linear(self.hidden_size, intent_size) self.attn = nn.Linear(self.hidden_size * 2, self.seq_length) self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) self.attn_slot = nn.Linear(self.hidden_size * 2, self.seq_length) self.attn_slot_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) self.dropout = nn.Dropout(self.dropout_p)
def insert(self, data: List[Currency]): """ Transform the data so that it is SQL query compatible REPLACE the data in CURRENCY table INSERT the data in CURRENCY_HISTORY table :param data: List[Currency] :return: None """ cursor = self.database_connection.cursor() modes: dict = { 'REPLACE': DBConfig.TABLE_CURRENCY.value, 'INSERT': DBConfig.TABLE_CURRENCY_HISTORY.value } for mode, table in modes.items(): query = '{} INTO {}.{} (currency_code, value) VALUES {}'.format( mode, DBConfig.DATABASE.value, table, DataUtil.list_to_string_currency(data=data)) cursor.execute(query) self.database_connection.commit()
def train(config): hidden_size = config["hidden_size"] save_dir = config["save_dir"] learning_rate = config["learning_rate"] batch_size = config["batch_size"] epoch_size = config["epoch_size"] dataset = DataUtil(config) input_vocab, slot_vocab, intent_vocab = dataset.get_vocab() dataloader = DataLoader(dataset, batch_size, shuffle=True) if not os.path.exists(save_dir): os.makedirs(save_dir) encoder = Encoder(len(input_vocab), config) decoder = Decoder(len(slot_vocab), len(intent_vocab), hidden_size * 2) if USE_CUDA: encoder = encoder.cuda() decoder = decoder.cuda() encoder.init_weights() decoder.init_weights() loss_function_1 = nn.CrossEntropyLoss(ignore_index=0) loss_function_2 = nn.CrossEntropyLoss() enc_optim = optim.Adam(encoder.parameters(), lr=learning_rate) dec_optim = optim.Adam(decoder.parameters(), lr=learning_rate) for epoch in range(1, epoch_size + 1): losses = [] for i, batch in enumerate(dataloader): input_batch, slot_batch, intent_batch = batch input_batch = input_batch.long() slot_batch = slot_batch.long() if USE_CUDA: input_batch = input_batch.cuda() slot_batch = slot_batch.cuda() intent_batch = intent_batch.cuda() ''' input_mask = torch.cat([torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))).cuda() if USE_CUDA else torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))) for t in input_batch]).view(batch_size, -1) ''' input_mask = torch.cat([ torch.BoolTensor(tuple(map(lambda s: s == 0, t.data))).cuda() if USE_CUDA else torch.BoolTensor( tuple(map(lambda s: s == 0, t.data))) for t in input_batch ]).view(batch_size, -1) encoder.zero_grad() decoder.zero_grad() output, hidden_c = encoder(input_batch, input_mask) start_decode = torch.LongTensor([[input_vocab.index('PAD')] * batch_size]).transpose(1, 0) if USE_CUDA: start_decode = start_decode.cuda() tag_score, intent_score = decoder(start_decode, hidden_c, output, input_mask) loss_1 = loss_function_1(tag_score, slot_batch.view(-1)) loss_2 = loss_function_2(intent_score, intent_batch) loss = loss_1 + loss_2 losses.append( loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()) loss.backward() torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0) torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0) enc_optim.step() dec_optim.step() if i % 10 == 0: print(f"Epoch {epoch}: {np.mean(losses)}") losses = [] if epoch % 100 == 0: torch.save(encoder, os.path.join(save_dir, f'encoder-{epoch}.pt')) torch.save(decoder, os.path.join(save_dir, f'decoder-{epoch}.pt')) print(f"Epoch: {epoch} save model...") print("Training Complete!")
def train_model(data_args, am_hp): """ 声学模型 :param train_data: 训练数据集合 :param dev_data: 验证数据集合 :return: """ epochs = am_hp.epochs batch_size = am_hp.am_batch_size data_util_train = DataUtil(data_args, batch_size=batch_size, mode='train', data_length=None, shuffle=True) data_util_dev = DataUtil(data_args, batch_size=batch_size, mode='dev', data_length=None, shuffle=True) train_dataloader = DataLoader(data_util_train, data_args, am_hp) dev_dataloader = DataLoader(data_util_dev, data_args, am_hp) print(len(train_dataloader.path_lst)) with tf.Graph().as_default(): acoustic_model = CNNCTCModel(am_hp, train_dataloader.acoustic_vocab_size, train_dataloader.language_vocab_size) saver = tf.train.Saver(max_to_keep=5) # 数据读取处理部分 dataset = tf.data.Dataset.from_generator( train_dataloader.end2end_generator, output_types=(tf.float32, tf.int32, tf.int32, tf.int32, tf.int32, tf.int32)) dataset = dataset.map( lambda x, y, z, w, m, n: (x, y, z, w, m, n), num_parallel_calls=64).prefetch(buffer_size=10000) with tf.Session() as sess: latest = tf.train.latest_checkpoint(Const.AmModelFolder) latest = None if latest != None: print('load acoustic model...') sess.load_model(latest) else: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(Const.End2EndTensorboard, tf.get_default_graph()) batch_nums = len(train_dataloader) old_wer = 1 for epoch in range(epochs): total_loss = 0 iterator_train = dataset.make_one_shot_iterator().get_next() for train_step in range(batch_nums): input_x_batch, input_length_batch, pinyin_target, pinyin_length, word_target, word_length = \ sess.run(iterator_train) feed = { acoustic_model.wav_input: input_x_batch, acoustic_model.wav_length: input_length_batch, acoustic_model.target_py: pinyin_target, acoustic_model.target_py_length: pinyin_length, acoustic_model.target_hanzi: word_target, acoustic_model.target_hanzi_length: word_length } mean_loss, label_err, han_wer, summary, _ = sess.run( [ acoustic_model.lm_mean_loss, acoustic_model.label_err, acoustic_model.han_wer, acoustic_model.summary, acoustic_model.train_op ], feed_dict=feed) total_loss += mean_loss if (train_step + 1) % 2 == 0: print( 'epoch: {0:d} step:{1:d}/{2:d} average loss:{3:.4f} label_err:{4:.4f} acc:{5:.4f}' .format(epoch + 1, train_step + 1, batch_nums, total_loss / (train_step + 1), label_err, han_wer)) writer.add_summary(summary) # 验证集测试 total_wer = 0 total_acc = 0 total_loss = 0 total_am_loss = 0 eval_steps = len(dev_dataloader) for feature_input, logits_length, target_y, target_length in dev_dataloader: feed = { acoustic_model.wav_input: feature_input, acoustic_model.wav_length: logits_length, acoustic_model.target_py: target_y, acoustic_model.target_py_length: target_length, acoustic_model.target_hanzi: word_target, acoustic_model.target_hanzi_length: word_length } mean_loss, label_err, acc = sess.run([ acoustic_model.lm_mean_loss, acoustic_model.label_err, acoustic_model.han_wer ], feed_dict=feed) total_wer += label_err total_loss += mean_loss total_acc += acc total_am_loss += am_loss wer = total_wer / eval_steps acc = total_acc / eval_steps mean_loss = total_loss / eval_steps am_loss = total_am_loss / eval_steps print('epoch:%d loss:%.4f wer:%.4f acc:%.4f' % (epoch + 1, mean_loss, wer, acc)) save_ckpt = "model_{epoch_d}-{val_loss_.2f}-{acc_.2f}.ckpt" saver.save( sess, os.path.join(home_dir, Const.End2EndModelFolder, save_ckpt % (epoch, mean_loss, acc))) if wer < old_wer: saver.save( sess, os.path.join(home_dir, Const.End2EndModelFolder, 'final_model.ckpt')) old_wer = wer
def train_acoustic_model(data_args, am_hp): """ 声学模型 :param train_data: 训练数据集合 :param dev_data: 验证数据集合 :return: """ epochs = am_hp.epochs batch_size = am_hp.am_batch_size data_util_train = DataUtil(data_args, batch_size=batch_size, mode='train', data_length=None, shuffle=True) data_util_dev = DataUtil(data_args, batch_size=batch_size, mode='dev', data_length=None, shuffle=True) train_dataloader = DataLoader(data_util_train, data_args, am_hp) dev_dataloader = DataLoader(data_util_dev, data_args, am_hp) with tf.Graph().as_default(): acoustic_model = CNNCTCModel(am_hp, train_dataloader.acoustic_vocab_size, train_dataloader.language_vocab_size) saver = tf.train.Saver(max_to_keep=5) # 数据读取处理部分 dataset = tf.data.Dataset.from_generator( train_dataloader.am_generator, output_types=(tf.float32, tf.int32, tf.int32, tf.int32, tf.int32, tf.int32)) dataset = dataset.map( lambda x, y, z, w, m, n: (x, y, z, w, m, n), num_parallel_calls=64).prefetch(buffer_size=10000) with tf.Session() as sess: print('Start training') latest = tf.train.latest_checkpoint(Const.AmModelFolder) if latest != None: print('load acoustic model...') saver.restore(sess, latest) else: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(Const.AmModelTensorboard, tf.get_default_graph()) old_wer = 1 batch_nums = len(train_dataloader) for epoch in range(epochs): total_loss = 0 iterator_train = dataset.make_one_shot_iterator().get_next() for train_step in range(batch_nums): input_x_batch, input_length_batch, _, _, target_y_batch, seq_length_batch = sess.run( iterator_train) feed = { acoustic_model.wav_input: input_x_batch, acoustic_model.wav_length: input_length_batch, acoustic_model.target_hanzi: target_y_batch, acoustic_model.target_hanzi_length: seq_length_batch } loss, mean_loss, lr, summary, label_err, _ = sess.run( [ acoustic_model.loss, acoustic_model.mean_loss, acoustic_model.current_learning, acoustic_model.summary, acoustic_model.label_err, acoustic_model.train_op ], feed_dict=feed) total_loss += mean_loss if (train_step + 1) % 2 == 0: print( 'epoch: %d step: %d/%d mean_loss: %.4f total_loss: %.4f lr: %.6f label_err: %.4f' % (epoch + 1, train_step + 1, batch_nums, mean_loss, total_loss / (train_step + 1), lr, label_err)) print(loss) writer.add_summary(summary, epoch) # 测试集测试 total_err = 0 total_loss = 0 eval_steps = len(dev_dataloader) for feature_input, logits_length, _, _, target_y, target_length in dev_dataloader.am_generator( ): feed = { acoustic_model.wav_input: feature_input, acoustic_model.wav_length: logits_length, acoustic_model.target_hanzi: target_y, acoustic_model.target_hanzi_length: target_length } mean_loss, label_err = sess.run( [acoustic_model.mean_loss, acoustic_model.label_err], feed_dict=feed) total_loss += mean_loss total_err += label_err wer = total_err / eval_steps mean_loss = total_loss / eval_steps save_ckpt = 'epoch_%d_loss_%.2f_wer_%.2f.ckpt' saver.save( sess, os.path.join(Const.AmModelFolder, save_ckpt % (epoch, mean_loss, wer))) print('epoch: ', epoch + 1, ': average loss = ', mean_loss) if wer < old_wer: saver.save( sess, os.path.join(Const.AmModelFolder, 'final_model.ckpt')) old_wer = wer pass
def train_language_model(data_args, am_hp): """ 语言模型 :param train_data: 训练数据 :return: """ epochs = am_hp.epochs batch_size = am_hp.lm_batch_size data_util_train = DataUtil(data_args, batch_size=batch_size, mode='train', data_length=None, shuffle=True) data_util_eval = DataUtil(data_args, batch_size=batch_size, mode='dev', data_length=None, shuffle=True) dataloader = DataLoader(data_util_train, data_args, am_hp) dataloader_eval = DataLoader(data_util_eval, data_args, am_hp) lm_model = Language_Model(am_hp, dataloader.acoustic_vocab_size, dataloader.language_vocab_size) batch_num = len(data_util_train.path_lst) // batch_size eval_batch_num = len(data_util_eval.path_lst) // batch_size with lm_model.graph.as_default(): saver = tf.train.Saver(max_to_keep=5) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.85 # 占用GPU90%的显存 with tf.Session(graph=lm_model.graph, config=config) as sess: merged = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) add_num = 0 if os.path.exists(Const.LmModelFolder): latest = tf.train.latest_checkpoint(Const.LmModelFolder) if latest != None: print('loading language model...') saver.restore(sess, latest) # add_num = int(latest.split('_')[-1]) writer = tf.summary.FileWriter(Const.LmModelTensorboard, tf.get_default_graph()) old_acc = 0 for epoch in range(epochs): total_loss = 0 batch = dataloader.get_lm_batch() for i in range(batch_num): input_batch, _, label_batch = next(batch) feed = {lm_model.x: input_batch, lm_model.y: label_batch} cost, cur_lr, _ = sess.run([ lm_model.mean_loss, lm_model.current_learning, lm_model.train_op ], feed_dict=feed) total_loss += cost if i % 10 == 0: print("epoch: %d step: %d/%d lr:%.6f train loss=%.6f" % (epoch + 1, i, batch_num, cur_lr, cost)) summary = sess.run(merged, feed_dict=feed) writer.add_summary(summary, epoch) print('epochs', epoch + 1, ': average loss = ', total_loss / batch_num) saver.save( sess, Const.LmModelFolder + 'model_%d_%.3f.ckpt' % (epoch + 1, total_loss / batch_num)) ### test acc total_acc = 0 total_loss = 0 batch = dataloader_eval.get_lm_batch() for j in range(eval_batch_num): input_batch, _, label_batch = next(batch) feed = {lm_model.x: input_batch, lm_model.y: label_batch} loss, acc = sess.run([lm_model.mean_loss, lm_model.acc], feed_dict=feed) total_loss += cost total_acc += acc acc = total_acc / eval_batch_num loss = total_loss / eval_batch_num print("epoch: %d test acc:%.4f test loss=%.6f" % (epoch + 1, acc, loss)) if acc > old_acc: saver.save( sess, os.path.join(Const.LmModelFolder, 'final_model_%d.ckpt' % (epoch + 1))) old_acc = acc writer.close()
' set 汉字 word accuracy ratio: ', (1 - han_error_num / han_num) * 100, '%') if __name__ == '__main__': # 测试长度 # 1. 准备测试所需数据, 不必和训练数据一致,通过设置data_args.data_type测试 lm_data_params = LmDataHparams().args # 2.声学模型----------------------------------- hparams = AmLmHparams() parser = hparams.parser am_hp = parser.parse_args() test_data_util = DataUtil(lm_data_params, am_hp.am_batch_size, mode='test', data_length=None, shuffle=False) dataloader = DataLoader(test_data_util, lm_data_params, am_hp) lm_model = Language_Model(am_hp, dataloader.acoustic_vocab_size, dataloader.language_vocab_size) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(graph=lm_model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) with lm_model.graph.as_default(): print('loading language model...') saver = tf.train.Saver() latest = tf.train.latest_checkpoint(Const.LmModelFolder) saver.restore(sess, latest) test_count = 500 speech_test(lm_model, test_count, sess)
# 子域名数据获取 test = vt_client.get_subdomains(tests) # 获取子域名的CNAME A title PORT信息 for domain in test: # tests = test[domain] tests = ['www.baidu.com'] append_tasks(query, tasks=tasks, iters=tests, query_type='CNAME', _name=domain) append_tasks(query, tasks=tasks, iters=tests, query_type='A', _name=domain) append_tasks(HttpUtil.asnyc_get_title, tasks=tasks, iters=tests, results=all_results) loop.run_until_complete(asyncio.wait(tasks)) print('-----------------') print(all_results) print('数据爬取完毕,正在保存中 ...') for x in all_results: all_results[x] = DataUtil.format_by_ip(all_results[x]) write_to_excel(all_results, 'test') loop.close()
import torch from util.conf_util import read_config from util.data_util import DataUtil device = "cuda" if torch.cuda.is_available() else "cpu" print(f"device: {device}") config = read_config() save_dir = config["save_dir"] batch_size = config["batch_size"] seq_length = config["seq_length"] data_util = DataUtil(config) # load model model = torch.load(f"{save_dir}/model.pt", map_location=torch.device("cpu")) model.eval() def predict(example): input_vocab, target_vocab, intent_vocab = data_util.get_vocab() target_vocab = input_vocab input_list = [] for _ in range(batch_size): id_list = data_util.word2id(input_vocab, [c for c in example], seq_length) input_list.append(id_list) input_batch = torch.LongTensor(input_list).to(device) slot_scores, intent_score = model(input_batch)
class RunCase(object): CASE_ID = 1 MODULE_NAME = 2 CASE_NAME = 3 RUN_FLAG = 4 URL = 5 REQUEST_METHOD = 6 HEADERS = 7 COOKIES = 8 REQUEST_PARAM = 9 EXP_RESULT = 10 STATUS_CODE = 11 RESPONSE_TEXT = 12 ASSET_TYPE = 13 ASSET_PATTERN = 14 EXEC_RESLT = 15 """定义常量,指定表格每一列""" def __init__(self, file_name, sheet_name=None, sheet_index=0): self.requestUtil = RequestUtil() self.excelUtil = ExcelUtil(file_name, sheet_name, sheet_index) self.dataUtil = DataUtil() self.assetUtil = AssertUtil() self.logger = Logger(self.__class__.__name__).get_logger_with_level() self.cookie_dict = {} def run_case_by_data(self, data): """根据数据执行单个用例,格式:{"1":[test_001,订单,下单,www.baidu.com,xx,xx,]}""" row_no = 2 for key in data: row_no = key break row_data = data.get(row_no) self.logger.info( "执行用例:%s-%s-%s" % (row_data[RunCase.CASE_ID - 1], row_data[RunCase.MODULE_NAME - 1], row_data[RunCase.CASE_NAME - 1])) # 数据准备 case_id = row_data[self.CASE_ID - 1] # module_name = row_data[self.MODULE_NAME-1] run_flag = row_data[self.RUN_FLAG - 1] if run_flag == '否': # 用例不执行 return elif run_flag == '是': url = row_data[self.URL - 1] request_method = row_data[self.REQUEST_METHOD - 1] # 请求头处理 headers = row_data[self.HEADERS - 1] if headers is None: headers = {} else: headers = self.dataUtil.str_to_json(headers) # cookie处理 cookies = row_data[self.COOKIES - 1] if cookies: # 进行cookie的解析处理,判断是否存在cookie依赖 depend_cookie = self.cookie_depend(cookies) if depend_cookie is not None: if type(depend_cookie) == RequestsCookieJar: cookies = depend_cookie elif depend_cookie == '': cookies = {} else: cookies = self.dataUtil.str_to_json(depend_cookie) request_param = row_data[self.REQUEST_PARAM - 1] if request_param is not None: request_param = self.data_depend(request_param) exp_result = row_data[self.EXP_RESULT - 1] asset_type = row_data[self.ASSET_TYPE - 1] asset_pattern = row_data[self.ASSET_PATTERN - 1] # 执行并记录结果 self.logger.info("请求URL:%s" % url) self.logger.info("请求参数:%s" % request_param) self.logger.info("请求头:%s" % headers) self.logger.info("请求cookie:%s" % cookies) response = None if request_method == 'get': response = self.requestUtil.do_get(url, request_param, headers, cookies) elif request_method == 'post': # 将字符串转换成json对象 json_param = self.dataUtil.str_to_json(request_param) response = self.requestUtil.do_post(url, json_param, '', headers, cookies) response_text = response.text.strip() if case_id in self.cookie_dict: self.cookie_dict[case_id] = response.cookies self.logger.info("请求结果:%s\n" % response_text) self.excelUtil.set_data_by_row_col_no(row_no, self.STATUS_CODE, response.status_code) self.excelUtil.set_data_by_row_col_no(row_no, self.RESPONSE_TEXT, response_text) # 断言判断,记录最终结果 result = self.asset_handle(exp_result, response_text, asset_type, asset_pattern) if result: self.excelUtil.set_data_by_row_col_no(row_no, self.EXEC_RESLT, 'pass') else: self.excelUtil.set_data_by_row_col_no(row_no, self.EXEC_RESLT, 'fail') return result def data_depend(self, request_param): """处理数据依赖 ${test_03.data.orderId} 表示对返回结果的部分属性存在依赖 """ request_param_final = None # 处理返回结果属性依赖 match_results = re.findall(r'\$\{.+?\..+?\}', request_param) if match_results is None or match_results == []: return request_param else: for var_pattern in match_results: # 只考虑匹配到一个的情况 start_index = var_pattern.index("{") end_index = var_pattern.rindex("}") # 得到${}$中的值 pattern = var_pattern[start_index + 1:end_index] spilit_index = pattern.index(".") # 得到依赖的case_id和属性字段 case_id = pattern[:spilit_index] proper_pattern = pattern[spilit_index + 1:] row_no = self.excelUtil.get_row_no_by_cell_value( case_id, self.CASE_ID) response = self.excelUtil.get_data_by_row_col_no( row_no, self.RESPONSE_TEXT) result = self.dataUtil.json_data_analysis( proper_pattern, response) # 参数替换,str(result)进行字符串强转,防止找到的为整数 request_param_final = request_param.replace( var_pattern, str(result), 1) return request_param_final def cookie_depend(self, request_param): """处理数据依赖 1、${test_01} 表示对返回cookie存在依赖 2、${test_03.data.orderId} 表示对返回结果的部分属性存在依赖 """ cookie_final = None # 处理对返回cookie的依赖 match_results = re.match(r'^\$\{(.[^\.]+)\}$', request_param) if match_results: # 用例返回cookie依赖 depend_cookie = self.cookie_dict[match_results.group(1)] return depend_cookie else: # 非用例返回cookie依赖 cookie_final = self.data_depend(request_param) return cookie_final def asset_handle(self, exp_result, response_text, asset_type, asset_pattern): """根据断言方式进行断言判断""" asset_flag = None if asset_type == '相等': if asset_pattern is None or asset_pattern == '': asset_flag = self.assetUtil.equals(exp_result, response_text) else: exp_value = self.dataUtil.json_data_analysis( asset_pattern, exp_result) response_value = self.dataUtil.json_data_analysis( asset_pattern, response_text) asset_flag = self.assetUtil.equals(exp_value, response_value) elif asset_type == '包含': asset_flag = self.assetUtil.contains(response_text, asset_pattern) elif asset_type == '正则': asset_flag = self.assetUtil.re_matches(response_text, asset_pattern) return asset_flag
def __init__(self, conf): self.data_util = DataUtil(conf) self.input_list, self.target_list, self.intent_list = self.data_util.get_train_data( )