예제 #1
0
    # parameters
    config_file = 'default.ini'
    config = Configurable(config_file)

    # model
    model = CompanyPredict()
    # load data
    train_data = read_pkl(config.train_pkl)
    dev_data = None
    if config.para_dev_file:
        dev_data = read_pkl(config.dev_pkl)
    test_data = read_pkl(config.test_pkl)

    word_list = read_pkl(config.load_feature_voc)
    p_label_list, s_label_list = read_pkl(config.load_label_voc)
    word_voc = VocabSrc(word_list)
    p_label_voc = VocabTgt(p_label_list)
    s_label_voc = VocabTgt(s_label_list)

    embedding = None
    if os.path.isfile(config.embedding_pkl):
        embedding = read_pkl(config.embedding_pkl)

    # 开始
    corrects, size = 0, 0
    start_time = time.time()
    # 初始化宏平均
    macro_averaging = {}
    for i in p_label_voc.i2w:
        macro_averaging[i] = {'tp': 0, 'fn': 0, 'fp': 0}
예제 #2
0
    parse.add_argument('--thread', type=int, default=1)
    parse.add_argument('--use_cuda', action='store_true', default=False)
    parse.add_argument('--model', type=str, default='model.742')
    args, extra_args = parse.parse_known_args()

    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)
    config.use_cuda = False
    if gpu and args.use_cuda:
        config.use_cuda = True
    print("\nGPU using status: ", config.use_cuda)

    # load vocab and model
    feature_list = read_pkl(config.load_feature_voc)
    label_list = read_pkl(config.load_label_voc)
    feature_vec = VocabSrc(feature_list)
    label_vec = VocabTgt(label_list)

    # model
    if config.which_model == 'Vanilla':
        model = Vanilla(config, feature_vec.size, config.embed_dim, PAD,
                        label_vec.size)
    elif config.which_model == 'Contextualized':
        model = Contextualized(config, feature_vec.size, config.embed_dim, PAD,
                               label_vec.size)
    elif config.which_model == 'ContextualizedGates':
        model = ContextualizedGates(config, feature_vec.size, config.embed_dim,
                                    PAD, label_vec.size)
    else:
        print('please choose right model')
        exit()
예제 #3
0
    config = Configurable(args.config_file, extra_args)
    torch.set_num_threads(args.thread)
    config.use_cuda = False
    if gpu and args.use_cuda:
        config.use_cuda = True
    print("\nGPU using status: ", config.use_cuda)

    # load data
    train_data = read_pkl(config.train_pkl)
    dev_data = None
    if config.dev_file:
        dev_data = read_pkl(config.dev_pkl)
    test_data = read_pkl(config.test_pkl)

    feature_list = read_pkl(config.feature_voc)
    feature_voc = VocabSrc(feature_list)

    label_list = read_pkl(config.label_voc)
    label_voc = VocabTgt(label_list)

    embedding = None
    if os.path.isfile(config.embedding_pkl):
        embedding = read_pkl(config.embedding_pkl)

    # model
    model = LSTM(config, feature_voc.size,
                 embedding[1] if embedding else config.embed_dim,
                 PAD, label_voc.size,
                 embedding[0] if embedding else None)

    if config.use_cuda:
예제 #4
0
        analysis(dev_data, dev_sentence_len)
    print('\n')
    test_data, test_sentence_len = read_doc(config.para_test_file,
                                            config.sen_test_file,
                                            config.max_length)
    analysis(test_data, test_sentence_len)

    if not os.path.isdir(config.save_dir):
        os.mkdir(config.save_dir)
    pickle.dump(train_data, open(config.train_pkl, 'wb'))
    if config.para_dev_file:
        pickle.dump(dev_data, open(config.dev_pkl, 'wb'))
    pickle.dump(test_data, open(config.test_pkl, 'wb'))

    # vocab
    feature_list = [k for k, v in word_dict.most_common(config.vocab_size)]
    para_label_list = [k for k in para_label_dict.keys()]
    s_label_list = [k for k in s_label_dict.keys()]
    pickle.dump(feature_list, open(config.save_feature_voc, 'wb'))
    pickle.dump((para_label_list, s_label_list),
                open(config.save_label_voc, 'wb'))

    feature_voc = VocabSrc(feature_list)
    p_label_voc = VocabTgt(para_label_dict)
    s_label_voc = VocabTgt(s_label_dict)

    # embedding
    if config.embedding_file:
        embedding = feature_voc.create_vocab_embs(config.embedding_file)
        pickle.dump(embedding, open(config.embedding_pkl, 'wb'))
예제 #5
0
    torch.set_num_threads(args.thread)
    config.use_cuda = False
    if gpu and args.use_cuda:
        config.use_cuda = True
    print("\nGPU using status: ", config.use_cuda)

    # load data
    train_data = read_pkl(config.train_pkl)
    dev_data = None
    if config.para_dev_file:
        dev_data = read_pkl(config.dev_pkl)
    test_data = read_pkl(config.test_pkl)

    word_list = read_pkl(config.load_feature_voc)
    p_label_list, s_label_list = read_pkl(config.load_label_voc)
    word_voc = VocabSrc(word_list, p_label_list)
    p_label_voc = VocabTgt(p_label_list)
    s_label_voc = VocabTgt(s_label_list)

    embedding = None
    if os.path.isfile(config.embedding_pkl):
        embedding = read_pkl(config.embedding_pkl)

    # model
    model = None
    if config.which_model == 'HierarchicalTarget':
        model = HierarchicalTarget(
            config, word_voc.size,
            embedding[1] if embedding else config.embedding_dim, PAD,
            p_label_voc.size, embedding[0] if embedding else None)
    else: