def __init__(self, file_name, sheet_name=None, sheet_index=0):
     self.requestUtil = RequestUtil()
     self.excelUtil = ExcelUtil(file_name, sheet_name, sheet_index)
     self.dataUtil = DataUtil()
     self.assetUtil = AssertUtil()
     self.logger = Logger(self.__class__.__name__).get_logger_with_level()
     self.cookie_dict = {}
def get_all(test='', last='', file=''):
    if test is not '':
        domains = Query().get_sub_domains(domains=[test])
        domains = domains[test]
    if file:
        domains = get_domains_from_file(file=file)
        test = file
    results = []
    for domain in domains:
        domain = domain.replace('\n', '')
        print(domain)
        title = get_title(domain)
        ips = get_ip(domain)
        if ips is 'None':
            continue
        cname = get_cname(domain)
        one = {
            "domain": domain,
            "title": title,
            "ips": ips,
            "cname": cname,
        }
        one = DataUtil.format_by_ip(one)
        results = DataUtil.append(results, one)
    print(results)

    #
    write_to_excel(results, test + last)
Exemple #3
0
    def __init__(self, train_args, data_args):
        self.batch_size = train_args.batch_size
        self.feature_dim = train_args.feature_dim
        self.feature_max_length = train_args.feature_max_length
        self.mode = train_args.mode
        self.data_length = train_args.data_length
        self.shuffle = train_args.shuffle

        self.data_path = Const.SpeechDataPath
        self.thchs30 = data_args.thchs30
        self.aishell = data_args.aishell
        self.stcmd = data_args.stcmd
        self.aidatatang = data_args.aidatatang
        self.aidatatang_1505 = data_args.aidatatang_1505
        self.prime = data_args.prime
        self.noise = data_args.noise
        self.pinyin_dict = data_args.pinyin_dict
        self.hanzi_dict = data_args.hanzi_dict

        self.lfr_m = data_args.lfr_m
        self.lfr_n = data_args.lfr_n

        self.acoustic_vocab_size, self.pinyin2index, self.inde2pinyin = self.get_acoustic_vocab_list()
        self.language_vocab_size, self.word2index, self.index2word = self.get_language_vocab_list()

        self.data = DataUtil(data_args, train_args.batch_size, train_args.mode, train_args.data_length, train_args.shuffle)

        self.path_lst = self.data.path_lst
        self.pny_lst = self.data.pny_lst
        self.han_lst = self.data.han_lst
Exemple #4
0
class DatasetUtil(Dataset):
    def __init__(self, conf):
        self.data_util = DataUtil(conf)
        self.input_list, self.target_list, self.intent_list = self.data_util.get_train_data(
        )

    def __getitem__(self, index):
        return np.array(self.input_list[index]), np.array(
            self.target_list[index]), np.array(self.intent_list[index])

    def __len__(self):
        return len(self.input_list)
    def insert_country_currency(self, data: List[CountryCurrency]):
        """
        Transform the data so that it is SQL query compatible
        REPLACE the data in COUNTRY CURRENCY table

        :param data: List[CountryCurrency]
        :return: None
        """
        cursor = self.database_connection.cursor()
        query = 'REPLACE INTO {}.{} (country, currency, currency_code) VALUES {}'.format(
            DBConfig.DATABASE.value, DBConfig.TABLE_COUNTRY_CURRENCY.value,
            DataUtil.list_to_string_country_currency(data=data))
        cursor.execute(query)

        self.database_connection.commit()
Exemple #6
0
    def __init__(self, config, device):
        super(JointNLU, self).__init__()
        self.embedding_size = config["embedding_size"]
        self.hidden_size = config["hidden_size"]
        self.batch_size = config["batch_size"]
        self.seq_length = config["seq_length"]
        self.dropout_p = 0.5

        data_util = DataUtil(config)
        input_vocab, target_vocab, intent_vocab = data_util.get_vocab()
        input_size = len(input_vocab)
        target_size = len(target_vocab)
        intent_size = len(intent_vocab)
        self.input_vocab = input_vocab
        self.en_embedding = nn.Embedding(input_size, self.embedding_size)
        # batch_first=True: the input and output tensors are provided as (batch, seq, feature)
        self.en_lstm = nn.LSTM(self.embedding_size,
                               self.hidden_size,
                               batch_first=True)

        self.de_embedding = nn.Embedding(target_size, self.embedding_size)
        self.de_lstm = nn.LSTM(self.embedding_size,
                               self.hidden_size,
                               batch_first=True)
        self.de_start = torch.LongTensor(
            [[input_vocab.index(data_util.pad_token)]] *
            self.batch_size).to(device)
        self.de_slot_output = nn.Linear(self.hidden_size, target_size)
        self.de_intent_output = nn.Linear(self.hidden_size, intent_size)

        self.attn = nn.Linear(self.hidden_size * 2, self.seq_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.attn_slot = nn.Linear(self.hidden_size * 2, self.seq_length)
        self.attn_slot_combine = nn.Linear(self.hidden_size * 2,
                                           self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
    def insert(self, data: List[Currency]):
        """
        Transform the data so that it is SQL query compatible
        REPLACE the data in CURRENCY table
        INSERT the data in CURRENCY_HISTORY table

        :param data: List[Currency]
        :return: None
        """
        cursor = self.database_connection.cursor()
        modes: dict = {
            'REPLACE': DBConfig.TABLE_CURRENCY.value,
            'INSERT': DBConfig.TABLE_CURRENCY_HISTORY.value
        }
        for mode, table in modes.items():
            query = '{} INTO {}.{} (currency_code, value) VALUES {}'.format(
                mode, DBConfig.DATABASE.value, table,
                DataUtil.list_to_string_currency(data=data))
            cursor.execute(query)

        self.database_connection.commit()
def train(config):
    hidden_size = config["hidden_size"]
    save_dir = config["save_dir"]
    learning_rate = config["learning_rate"]
    batch_size = config["batch_size"]
    epoch_size = config["epoch_size"]

    dataset = DataUtil(config)
    input_vocab, slot_vocab, intent_vocab = dataset.get_vocab()
    dataloader = DataLoader(dataset, batch_size, shuffle=True)

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    encoder = Encoder(len(input_vocab), config)
    decoder = Decoder(len(slot_vocab), len(intent_vocab), hidden_size * 2)

    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    encoder.init_weights()
    decoder.init_weights()

    loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim = optim.Adam(encoder.parameters(), lr=learning_rate)
    dec_optim = optim.Adam(decoder.parameters(), lr=learning_rate)

    for epoch in range(1, epoch_size + 1):
        losses = []
        for i, batch in enumerate(dataloader):
            input_batch, slot_batch, intent_batch = batch
            input_batch = input_batch.long()
            slot_batch = slot_batch.long()
            if USE_CUDA:
                input_batch = input_batch.cuda()
                slot_batch = slot_batch.cuda()
                intent_batch = intent_batch.cuda()
            '''

            input_mask = torch.cat([torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))).cuda()
                                if USE_CUDA else torch.ByteTensor(tuple(map(lambda s: s == 0, t.data)))
                                for t in input_batch]).view(batch_size, -1)
            '''

            input_mask = torch.cat([
                torch.BoolTensor(tuple(map(lambda s: s == 0, t.data))).cuda()
                if USE_CUDA else torch.BoolTensor(
                    tuple(map(lambda s: s == 0, t.data))) for t in input_batch
            ]).view(batch_size, -1)
            encoder.zero_grad()
            decoder.zero_grad()

            output, hidden_c = encoder(input_batch, input_mask)
            start_decode = torch.LongTensor([[input_vocab.index('PAD')] *
                                             batch_size]).transpose(1, 0)
            if USE_CUDA:
                start_decode = start_decode.cuda()

            tag_score, intent_score = decoder(start_decode, hidden_c, output,
                                              input_mask)

            loss_1 = loss_function_1(tag_score, slot_batch.view(-1))
            loss_2 = loss_function_2(intent_score, intent_batch)

            loss = loss_1 + loss_2
            losses.append(
                loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy())
            loss.backward()

            torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 10 == 0:
                print(f"Epoch {epoch}: {np.mean(losses)}")
                losses = []

        if epoch % 100 == 0:
            torch.save(encoder, os.path.join(save_dir, f'encoder-{epoch}.pt'))
            torch.save(decoder, os.path.join(save_dir, f'decoder-{epoch}.pt'))
            print(f"Epoch: {epoch} save model...")

    print("Training Complete!")
Exemple #9
0
def train_model(data_args, am_hp):
    """
    声学模型
    :param train_data: 训练数据集合
    :param dev_data: 验证数据集合
    :return:
    """
    epochs = am_hp.epochs
    batch_size = am_hp.am_batch_size
    data_util_train = DataUtil(data_args,
                               batch_size=batch_size,
                               mode='train',
                               data_length=None,
                               shuffle=True)
    data_util_dev = DataUtil(data_args,
                             batch_size=batch_size,
                             mode='dev',
                             data_length=None,
                             shuffle=True)

    train_dataloader = DataLoader(data_util_train, data_args, am_hp)
    dev_dataloader = DataLoader(data_util_dev, data_args, am_hp)
    print(len(train_dataloader.path_lst))

    with tf.Graph().as_default():
        acoustic_model = CNNCTCModel(am_hp,
                                     train_dataloader.acoustic_vocab_size,
                                     train_dataloader.language_vocab_size)
        saver = tf.train.Saver(max_to_keep=5)
        # 数据读取处理部分
        dataset = tf.data.Dataset.from_generator(
            train_dataloader.end2end_generator,
            output_types=(tf.float32, tf.int32, tf.int32, tf.int32, tf.int32,
                          tf.int32))
        dataset = dataset.map(
            lambda x, y, z, w, m, n: (x, y, z, w, m, n),
            num_parallel_calls=64).prefetch(buffer_size=10000)

        with tf.Session() as sess:
            latest = tf.train.latest_checkpoint(Const.AmModelFolder)
            latest = None
            if latest != None:
                print('load acoustic model...')
                sess.load_model(latest)
            else:
                sess.run(tf.global_variables_initializer())
            writer = tf.summary.FileWriter(Const.End2EndTensorboard,
                                           tf.get_default_graph())
            batch_nums = len(train_dataloader)
            old_wer = 1
            for epoch in range(epochs):
                total_loss = 0
                iterator_train = dataset.make_one_shot_iterator().get_next()
                for train_step in range(batch_nums):
                    input_x_batch, input_length_batch, pinyin_target, pinyin_length, word_target, word_length = \
                        sess.run(iterator_train)
                    feed = {
                        acoustic_model.wav_input: input_x_batch,
                        acoustic_model.wav_length: input_length_batch,
                        acoustic_model.target_py: pinyin_target,
                        acoustic_model.target_py_length: pinyin_length,
                        acoustic_model.target_hanzi: word_target,
                        acoustic_model.target_hanzi_length: word_length
                    }
                    mean_loss, label_err, han_wer, summary, _ = sess.run(
                        [
                            acoustic_model.lm_mean_loss,
                            acoustic_model.label_err, acoustic_model.han_wer,
                            acoustic_model.summary, acoustic_model.train_op
                        ],
                        feed_dict=feed)
                    total_loss += mean_loss
                    if (train_step + 1) % 2 == 0:
                        print(
                            'epoch: {0:d}   step:{1:d}/{2:d}   average loss:{3:.4f}   label_err:{4:.4f}   acc:{5:.4f}'
                            .format(epoch + 1, train_step + 1, batch_nums,
                                    total_loss / (train_step + 1), label_err,
                                    han_wer))
                writer.add_summary(summary)

                # 验证集测试
                total_wer = 0
                total_acc = 0
                total_loss = 0
                total_am_loss = 0
                eval_steps = len(dev_dataloader)
                for feature_input, logits_length, target_y, target_length in dev_dataloader:
                    feed = {
                        acoustic_model.wav_input: feature_input,
                        acoustic_model.wav_length: logits_length,
                        acoustic_model.target_py: target_y,
                        acoustic_model.target_py_length: target_length,
                        acoustic_model.target_hanzi: word_target,
                        acoustic_model.target_hanzi_length: word_length
                    }
                    mean_loss, label_err, acc = sess.run([
                        acoustic_model.lm_mean_loss, acoustic_model.label_err,
                        acoustic_model.han_wer
                    ],
                                                         feed_dict=feed)
                    total_wer += label_err
                    total_loss += mean_loss
                    total_acc += acc
                    total_am_loss += am_loss
                wer = total_wer / eval_steps
                acc = total_acc / eval_steps
                mean_loss = total_loss / eval_steps
                am_loss = total_am_loss / eval_steps
                print('epoch:%d   loss:%.4f   wer:%.4f   acc:%.4f' %
                      (epoch + 1, mean_loss, wer, acc))
                save_ckpt = "model_{epoch_d}-{val_loss_.2f}-{acc_.2f}.ckpt"
                saver.save(
                    sess,
                    os.path.join(home_dir, Const.End2EndModelFolder,
                                 save_ckpt % (epoch, mean_loss, acc)))
                if wer < old_wer:
                    saver.save(
                        sess,
                        os.path.join(home_dir, Const.End2EndModelFolder,
                                     'final_model.ckpt'))
                    old_wer = wer
def train_acoustic_model(data_args, am_hp):
    """
    声学模型
    :param train_data: 训练数据集合
    :param dev_data: 验证数据集合
    :return:
    """
    epochs = am_hp.epochs
    batch_size = am_hp.am_batch_size
    data_util_train = DataUtil(data_args,
                               batch_size=batch_size,
                               mode='train',
                               data_length=None,
                               shuffle=True)
    data_util_dev = DataUtil(data_args,
                             batch_size=batch_size,
                             mode='dev',
                             data_length=None,
                             shuffle=True)

    train_dataloader = DataLoader(data_util_train, data_args, am_hp)
    dev_dataloader = DataLoader(data_util_dev, data_args, am_hp)

    with tf.Graph().as_default():
        acoustic_model = CNNCTCModel(am_hp,
                                     train_dataloader.acoustic_vocab_size,
                                     train_dataloader.language_vocab_size)
        saver = tf.train.Saver(max_to_keep=5)
        # 数据读取处理部分
        dataset = tf.data.Dataset.from_generator(
            train_dataloader.am_generator,
            output_types=(tf.float32, tf.int32, tf.int32, tf.int32, tf.int32,
                          tf.int32))
        dataset = dataset.map(
            lambda x, y, z, w, m, n: (x, y, z, w, m, n),
            num_parallel_calls=64).prefetch(buffer_size=10000)
        with tf.Session() as sess:
            print('Start training')
            latest = tf.train.latest_checkpoint(Const.AmModelFolder)
            if latest != None:
                print('load acoustic model...')
                saver.restore(sess, latest)
            else:
                sess.run(tf.global_variables_initializer())
            writer = tf.summary.FileWriter(Const.AmModelTensorboard,
                                           tf.get_default_graph())
            old_wer = 1
            batch_nums = len(train_dataloader)
            for epoch in range(epochs):
                total_loss = 0
                iterator_train = dataset.make_one_shot_iterator().get_next()
                for train_step in range(batch_nums):
                    input_x_batch, input_length_batch, _, _, target_y_batch, seq_length_batch = sess.run(
                        iterator_train)
                    feed = {
                        acoustic_model.wav_input: input_x_batch,
                        acoustic_model.wav_length: input_length_batch,
                        acoustic_model.target_hanzi: target_y_batch,
                        acoustic_model.target_hanzi_length: seq_length_batch
                    }
                    loss, mean_loss, lr, summary, label_err, _ = sess.run(
                        [
                            acoustic_model.loss, acoustic_model.mean_loss,
                            acoustic_model.current_learning,
                            acoustic_model.summary, acoustic_model.label_err,
                            acoustic_model.train_op
                        ],
                        feed_dict=feed)
                    total_loss += mean_loss
                    if (train_step + 1) % 2 == 0:
                        print(
                            'epoch: %d    step: %d/%d  mean_loss: %.4f    total_loss: %.4f  lr: %.6f   label_err: %.4f'
                            % (epoch + 1, train_step + 1, batch_nums,
                               mean_loss, total_loss /
                               (train_step + 1), lr, label_err))
                        print(loss)
                writer.add_summary(summary, epoch)

                # 测试集测试
                total_err = 0
                total_loss = 0
                eval_steps = len(dev_dataloader)
                for feature_input, logits_length, _, _, target_y, target_length in dev_dataloader.am_generator(
                ):
                    feed = {
                        acoustic_model.wav_input: feature_input,
                        acoustic_model.wav_length: logits_length,
                        acoustic_model.target_hanzi: target_y,
                        acoustic_model.target_hanzi_length: target_length
                    }
                    mean_loss, label_err = sess.run(
                        [acoustic_model.mean_loss, acoustic_model.label_err],
                        feed_dict=feed)
                    total_loss += mean_loss
                    total_err += label_err
                wer = total_err / eval_steps
                mean_loss = total_loss / eval_steps
                save_ckpt = 'epoch_%d_loss_%.2f_wer_%.2f.ckpt'
                saver.save(
                    sess,
                    os.path.join(Const.AmModelFolder,
                                 save_ckpt % (epoch, mean_loss, wer)))
                print('epoch: ', epoch + 1, ': average loss = ', mean_loss)
                if wer < old_wer:
                    saver.save(
                        sess,
                        os.path.join(Const.AmModelFolder, 'final_model.ckpt'))
                    old_wer = wer
            pass
def train_language_model(data_args, am_hp):
    """
    语言模型
    :param train_data: 训练数据
    :return:
    """
    epochs = am_hp.epochs
    batch_size = am_hp.lm_batch_size
    data_util_train = DataUtil(data_args,
                               batch_size=batch_size,
                               mode='train',
                               data_length=None,
                               shuffle=True)
    data_util_eval = DataUtil(data_args,
                              batch_size=batch_size,
                              mode='dev',
                              data_length=None,
                              shuffle=True)
    dataloader = DataLoader(data_util_train, data_args, am_hp)
    dataloader_eval = DataLoader(data_util_eval, data_args, am_hp)
    lm_model = Language_Model(am_hp, dataloader.acoustic_vocab_size,
                              dataloader.language_vocab_size)
    batch_num = len(data_util_train.path_lst) // batch_size
    eval_batch_num = len(data_util_eval.path_lst) // batch_size

    with lm_model.graph.as_default():
        saver = tf.train.Saver(max_to_keep=5)
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.85  # 占用GPU90%的显存

    with tf.Session(graph=lm_model.graph, config=config) as sess:
        merged = tf.summary.merge_all()
        sess.run(tf.global_variables_initializer())
        add_num = 0
        if os.path.exists(Const.LmModelFolder):
            latest = tf.train.latest_checkpoint(Const.LmModelFolder)
            if latest != None:
                print('loading language model...')
                saver.restore(sess, latest)
                # add_num = int(latest.split('_')[-1])
        writer = tf.summary.FileWriter(Const.LmModelTensorboard,
                                       tf.get_default_graph())
        old_acc = 0
        for epoch in range(epochs):
            total_loss = 0
            batch = dataloader.get_lm_batch()
            for i in range(batch_num):
                input_batch, _, label_batch = next(batch)
                feed = {lm_model.x: input_batch, lm_model.y: label_batch}
                cost, cur_lr, _ = sess.run([
                    lm_model.mean_loss, lm_model.current_learning,
                    lm_model.train_op
                ],
                                           feed_dict=feed)
                total_loss += cost
                if i % 10 == 0:
                    print("epoch: %d    step: %d/%d lr:%.6f train loss=%.6f" %
                          (epoch + 1, i, batch_num, cur_lr, cost))
            summary = sess.run(merged, feed_dict=feed)
            writer.add_summary(summary, epoch)
            print('epochs', epoch + 1, ': average loss = ',
                  total_loss / batch_num)
            saver.save(
                sess, Const.LmModelFolder + 'model_%d_%.3f.ckpt' %
                (epoch + 1, total_loss / batch_num))
            ### test acc
            total_acc = 0
            total_loss = 0
            batch = dataloader_eval.get_lm_batch()
            for j in range(eval_batch_num):
                input_batch, _, label_batch = next(batch)
                feed = {lm_model.x: input_batch, lm_model.y: label_batch}
                loss, acc = sess.run([lm_model.mean_loss, lm_model.acc],
                                     feed_dict=feed)
                total_loss += cost
                total_acc += acc
            acc = total_acc / eval_batch_num
            loss = total_loss / eval_batch_num
            print("epoch: %d test acc:%.4f  test loss=%.6f" %
                  (epoch + 1, acc, loss))
            if acc > old_acc:
                saver.save(
                    sess,
                    os.path.join(Const.LmModelFolder,
                                 'final_model_%d.ckpt' % (epoch + 1)))
                old_acc = acc
        writer.close()
Exemple #12
0
        ' set 汉字 word accuracy ratio: ', (1 - han_error_num / han_num) * 100,
        '%')


if __name__ == '__main__':
    # 测试长度
    # 1. 准备测试所需数据, 不必和训练数据一致,通过设置data_args.data_type测试
    lm_data_params = LmDataHparams().args

    # 2.声学模型-----------------------------------
    hparams = AmLmHparams()
    parser = hparams.parser
    am_hp = parser.parse_args()
    test_data_util = DataUtil(lm_data_params,
                              am_hp.am_batch_size,
                              mode='test',
                              data_length=None,
                              shuffle=False)
    dataloader = DataLoader(test_data_util, lm_data_params, am_hp)
    lm_model = Language_Model(am_hp, dataloader.acoustic_vocab_size,
                              dataloader.language_vocab_size)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(graph=lm_model.graph,
                      config=tf.ConfigProto(gpu_options=gpu_options))
    with lm_model.graph.as_default():
        print('loading language model...')
        saver = tf.train.Saver()
        latest = tf.train.latest_checkpoint(Const.LmModelFolder)
        saver.restore(sess, latest)
    test_count = 500
    speech_test(lm_model, test_count, sess)
Exemple #13
0
    # 子域名数据获取
    test = vt_client.get_subdomains(tests)

    # 获取子域名的CNAME A title PORT信息
    for domain in test:
        # tests = test[domain]
        tests = ['www.baidu.com']
        append_tasks(query,
                     tasks=tasks,
                     iters=tests,
                     query_type='CNAME',
                     _name=domain)
        append_tasks(query,
                     tasks=tasks,
                     iters=tests,
                     query_type='A',
                     _name=domain)
        append_tasks(HttpUtil.asnyc_get_title,
                     tasks=tasks,
                     iters=tests,
                     results=all_results)
    loop.run_until_complete(asyncio.wait(tasks))
    print('-----------------')
    print(all_results)
    print('数据爬取完毕,正在保存中 ...')

    for x in all_results:
        all_results[x] = DataUtil.format_by_ip(all_results[x])
    write_to_excel(all_results, 'test')
    loop.close()
Exemple #14
0
import torch

from util.conf_util import read_config
from util.data_util import DataUtil

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"device: {device}")

config = read_config()
save_dir = config["save_dir"]
batch_size = config["batch_size"]
seq_length = config["seq_length"]
data_util = DataUtil(config)

# load model
model = torch.load(f"{save_dir}/model.pt", map_location=torch.device("cpu"))
model.eval()


def predict(example):
    input_vocab, target_vocab, intent_vocab = data_util.get_vocab()
    target_vocab = input_vocab

    input_list = []
    for _ in range(batch_size):
        id_list = data_util.word2id(input_vocab, [c for c in example],
                                    seq_length)
        input_list.append(id_list)
    input_batch = torch.LongTensor(input_list).to(device)
    slot_scores, intent_score = model(input_batch)
class RunCase(object):

    CASE_ID = 1
    MODULE_NAME = 2
    CASE_NAME = 3
    RUN_FLAG = 4
    URL = 5
    REQUEST_METHOD = 6
    HEADERS = 7
    COOKIES = 8
    REQUEST_PARAM = 9
    EXP_RESULT = 10
    STATUS_CODE = 11
    RESPONSE_TEXT = 12
    ASSET_TYPE = 13
    ASSET_PATTERN = 14
    EXEC_RESLT = 15
    """定义常量,指定表格每一列"""
    def __init__(self, file_name, sheet_name=None, sheet_index=0):
        self.requestUtil = RequestUtil()
        self.excelUtil = ExcelUtil(file_name, sheet_name, sheet_index)
        self.dataUtil = DataUtil()
        self.assetUtil = AssertUtil()
        self.logger = Logger(self.__class__.__name__).get_logger_with_level()
        self.cookie_dict = {}

    def run_case_by_data(self, data):
        """根据数据执行单个用例,格式:{"1":[test_001,订单,下单,www.baidu.com,xx,xx,]}"""
        row_no = 2
        for key in data:
            row_no = key
            break
        row_data = data.get(row_no)
        self.logger.info(
            "执行用例:%s-%s-%s" %
            (row_data[RunCase.CASE_ID - 1], row_data[RunCase.MODULE_NAME - 1],
             row_data[RunCase.CASE_NAME - 1]))

        # 数据准备
        case_id = row_data[self.CASE_ID - 1]
        # module_name = row_data[self.MODULE_NAME-1]
        run_flag = row_data[self.RUN_FLAG - 1]
        if run_flag == '否':
            # 用例不执行
            return
        elif run_flag == '是':
            url = row_data[self.URL - 1]
            request_method = row_data[self.REQUEST_METHOD - 1]
            # 请求头处理
            headers = row_data[self.HEADERS - 1]
            if headers is None:
                headers = {}
            else:
                headers = self.dataUtil.str_to_json(headers)
            # cookie处理
            cookies = row_data[self.COOKIES - 1]
            if cookies:
                # 进行cookie的解析处理,判断是否存在cookie依赖
                depend_cookie = self.cookie_depend(cookies)
                if depend_cookie is not None:
                    if type(depend_cookie) == RequestsCookieJar:
                        cookies = depend_cookie
                    elif depend_cookie == '':
                        cookies = {}
                    else:
                        cookies = self.dataUtil.str_to_json(depend_cookie)
            request_param = row_data[self.REQUEST_PARAM - 1]
            if request_param is not None:
                request_param = self.data_depend(request_param)
            exp_result = row_data[self.EXP_RESULT - 1]
            asset_type = row_data[self.ASSET_TYPE - 1]
            asset_pattern = row_data[self.ASSET_PATTERN - 1]

            # 执行并记录结果
            self.logger.info("请求URL:%s" % url)
            self.logger.info("请求参数:%s" % request_param)
            self.logger.info("请求头:%s" % headers)
            self.logger.info("请求cookie:%s" % cookies)
            response = None
            if request_method == 'get':
                response = self.requestUtil.do_get(url, request_param, headers,
                                                   cookies)
            elif request_method == 'post':
                # 将字符串转换成json对象
                json_param = self.dataUtil.str_to_json(request_param)
                response = self.requestUtil.do_post(url, json_param, '',
                                                    headers, cookies)
            response_text = response.text.strip()
            if case_id in self.cookie_dict:
                self.cookie_dict[case_id] = response.cookies
            self.logger.info("请求结果:%s\n" % response_text)
            self.excelUtil.set_data_by_row_col_no(row_no, self.STATUS_CODE,
                                                  response.status_code)
            self.excelUtil.set_data_by_row_col_no(row_no, self.RESPONSE_TEXT,
                                                  response_text)

            # 断言判断,记录最终结果
            result = self.asset_handle(exp_result, response_text, asset_type,
                                       asset_pattern)
            if result:
                self.excelUtil.set_data_by_row_col_no(row_no, self.EXEC_RESLT,
                                                      'pass')
            else:
                self.excelUtil.set_data_by_row_col_no(row_no, self.EXEC_RESLT,
                                                      'fail')
            return result

    def data_depend(self, request_param):
        """处理数据依赖
            ${test_03.data.orderId}   表示对返回结果的部分属性存在依赖
        """
        request_param_final = None
        # 处理返回结果属性依赖
        match_results = re.findall(r'\$\{.+?\..+?\}', request_param)
        if match_results is None or match_results == []:
            return request_param
        else:
            for var_pattern in match_results:
                # 只考虑匹配到一个的情况
                start_index = var_pattern.index("{")
                end_index = var_pattern.rindex("}")
                # 得到${}$中的值
                pattern = var_pattern[start_index + 1:end_index]
                spilit_index = pattern.index(".")
                # 得到依赖的case_id和属性字段
                case_id = pattern[:spilit_index]
                proper_pattern = pattern[spilit_index + 1:]
                row_no = self.excelUtil.get_row_no_by_cell_value(
                    case_id, self.CASE_ID)
                response = self.excelUtil.get_data_by_row_col_no(
                    row_no, self.RESPONSE_TEXT)
                result = self.dataUtil.json_data_analysis(
                    proper_pattern, response)
                # 参数替换,str(result)进行字符串强转,防止找到的为整数
                request_param_final = request_param.replace(
                    var_pattern, str(result), 1)
            return request_param_final

    def cookie_depend(self, request_param):
        """处理数据依赖
			1、${test_01}                表示对返回cookie存在依赖
            2、${test_03.data.orderId}   表示对返回结果的部分属性存在依赖
        """
        cookie_final = None
        # 处理对返回cookie的依赖
        match_results = re.match(r'^\$\{(.[^\.]+)\}$', request_param)
        if match_results:
            # 用例返回cookie依赖
            depend_cookie = self.cookie_dict[match_results.group(1)]
            return depend_cookie
        else:
            # 非用例返回cookie依赖
            cookie_final = self.data_depend(request_param)
            return cookie_final

    def asset_handle(self, exp_result, response_text, asset_type,
                     asset_pattern):
        """根据断言方式进行断言判断"""
        asset_flag = None
        if asset_type == '相等':
            if asset_pattern is None or asset_pattern == '':
                asset_flag = self.assetUtil.equals(exp_result, response_text)
            else:
                exp_value = self.dataUtil.json_data_analysis(
                    asset_pattern, exp_result)
                response_value = self.dataUtil.json_data_analysis(
                    asset_pattern, response_text)
                asset_flag = self.assetUtil.equals(exp_value, response_value)
        elif asset_type == '包含':
            asset_flag = self.assetUtil.contains(response_text, asset_pattern)
        elif asset_type == '正则':
            asset_flag = self.assetUtil.re_matches(response_text,
                                                   asset_pattern)
        return asset_flag
Exemple #16
0
 def __init__(self, conf):
     self.data_util = DataUtil(conf)
     self.input_list, self.target_list, self.intent_list = self.data_util.get_train_data(
     )