Beispiel #1
0
def find_example():
    _data_type = DateSet.MR
    _model_type = ModelType.GRU
    _k = 2
    _total_symbols = get_total_symbols(_data_type)
    print(_data_type.upper(), _model_type.upper())
    _total_symbols = get_total_symbols(_data_type)
    _device = "cpu"
    _pt_type = PartitionType.KM
    _data_source = "train"
    _use_clean = is_use_clean(_data_type)
    alpha = 64
    input_dim = get_input_dim(_data_type)
    model_file = get_model_file(_data_type, _model_type)
    _model_path = TrainedModel.NO_STOPW.format(_data_type, _model_type,
                                               model_file)

    model = load_model(_model_type,
                       _data_type,
                       device="cpu",
                       load_model_path=_model_path)
    data = load_pickle(
        get_path(getattr(DataPath, _data_type.upper()).PROCESSED_DATA))
    wv_matrix = load_pickle(
        get_path(getattr(DataPath, _data_type.upper()).WV_MATRIX))

    pt_path = AbstractData.Level1.NO_STOPW.format(
        _data_type, _model_type, _k, _data_source + "_partition.pkl")
    _dfa_file_path = AbstractData.Level2.NO_STOPW.format(
        _data_type, _model_type, _k, alpha)
    _dfa_file_path = os.path.join(_dfa_file_path,
                                  "{}_{}_transfunc.pkl").format(
                                      _data_source, _total_symbols)
    partitioner = load_pickle(pt_path)
    dfa = load_pickle(get_path(_dfa_file_path))
    show_runexamples(data["test_x"],
                     data["test_y"],
                     model,
                     partitioner,
                     dfa,
                     data["word_to_idx"],
                     wv_matrix,
                     use_clean=_use_clean,
                     input_dim=input_dim,
                     device=_device,
                     pt_type=_pt_type)
 # time_out = 1200
 for _data_type in [DateSet.Tomita1, DateSet.Tomita2, DateSet.Tomita3,
                    DateSet.Tomita4, DateSet.Tomita5, DateSet.Tomita6,
                    DateSet.Tomita7, DateSet.BP, DateSet.MR, DateSet.IMDB]:
 # for _data_type in [DateSet.MR, DateSet.IMDB]:
     for _model_type in [ModelType.LSTM, ModelType.GRU]:
         print("=============={}==============={}==============".format(_data_type.upper(),
                                                                        _model_type.upper()))
         _total_symbols = get_total_symbols(_data_type)
         input_dim = get_input_dim(_data_type)
         use_clean = is_use_clean(_data_type)
         _pt_type = PartitionType.KM
         _data_source = "train"
         _use_clean = is_use_clean(_data_type)
         input_dim = get_input_dim(_data_type)
         model_file = get_model_file(_data_type, _model_type)
         _model_path = TrainedModel.NO_STOPW.format(_data_type, _model_type, model_file)
         _ori_data_path = OriTrace.NO_STOPW.format(_data_type, _model_type)
         _ori_traces = load_pickle(_ori_data_path)
         ##############
         # overall learn
         ###############
         _output_path = "./tmp/{}/{}/".format(_data_type, _model_type)
         _gamma_a = 0.99
         pfa, total_states, tmp_prims_data, partitioner, k = overall(_output_path, _alpha, _ori_traces, _gamma_a,
                                                                     time_out)
         ###########
         # test
         ##########
         final_output(k, pfa, total_states, tmp_prims_data, partitioner, pt_type=_pt_type, use_clean=use_clean,
                      input_dim=input_dim, device=_device)
Beispiel #3
0
    data = load_pickle(get_path(DataPath.MR.PROCESSED_DATA))
    ori_traces = load_pickle(ori_traces_path)
    predicts = ori_traces["test_pre_y"]
    labels = data["test_y"]
    cnt = 0
    for pre_y, y in zip(predicts, labels):
        if y == pre_y:
            cnt += 1
    print(cnt / len(labels))


if __name__ == '__main__':
    data_type = sys.argv[1]
    model_type = sys.argv[2]
    device_id = int(sys.argv[3])

    use_clean = is_use_clean(data_type)
    _device = "cuda:{}".format(device_id) if device_id >= 0 else "cpu"
    model_file = get_model_file(data_type, model_type)
    save_path = OriTrace.NO_STOPW.format(data_type, model_type)
    model_path = get_path(
        TrainedModel.NO_STOPW.format(data_type, model_type, model_file))
    make_ori_trace(model_type,
                   data_type,
                   _device,
                   use_clean=use_clean,
                   path_mode=save_path,
                   model_path=model_path)
    # valid_pre_y(save_path)
    # get_path(OriTrace.LSTM.MR)
    model_type = sys.argv[2]
    gpu = int(sys.argv[3])
    aug_scale = 0.5
    _device = "cuda:{}".format(gpu) if gpu >= 0 else "cpu"
    use_clean = is_use_clean(dataset)
    assert dataset.startswith("tomita")
    gram_id = int(dataset[-1])
    params = getattr(train_args, "args_{}_{}".format(model_type, "tomita"))()
    data = load_pickle(
        get_path(getattr(DataPath,
                         "TOMITA").PROCESSED_DATA).format(gram_id, gram_id))
    wv_matrix = load_pickle(
        get_path(getattr(DataPath,
                         "TOMITA").WV_MATRIX).format(gram_id, gram_id))
    _data = data_augmentation(data, aug_scale)
    model_file = get_model_file(dataset, model_type)
    model_path = get_path(
        TrainedModel.NO_STOPW.format(dataset, model_type, model_file))
    _model = load_model(model_type,
                        "tomita",
                        device=_device,
                        load_model_path=model_path)

    #######################
    # load training params
    #######################
    params = getattr(train_args, "args_{}_{}".format(model_type, "tomita"))()
    train_args.add_data_info(data, params)
    params["WV_MATRIX"] = wv_matrix
    params["device"] = _device
    params["rnn_type"] = model_type