Exemple #1
0
    if status == 'train':
        print("Model saved to:", save_model_dir)
    # 立即把stdout缓存内容输出
    sys.stdout.flush()

    if status == 'train':
        data = Data()
        data.model_name = model_name
        data.HP_gpu = gpu
        data.use_bichar = conf_dict['use_bichar']
        data.HP_batch_size = conf_dict['HP_batch_size']  # 1
        data.HP_iteration = conf_dict['HP_iteration']  # 100
        data.HP_lr = conf_dict['HP_lr']  # 0.015
        data.HP_lr_decay = conf_dict['HP_lr_decay']  # 0.5
        data.HP_hidden_dim = conf_dict['HP_hidden_dim']
        data.MAX_SENTENCE_LENGTH = conf_dict['MAX_SENTENCE_LENGTH']
        data.HP_lstm_layer = conf_dict['HP_lstm_layer']
        data_initialization(data, gaz_file, train_file, dev_file, test_file)

        if data.model_name in ['CNN_model', 'LSTM_model']:
            data.generate_instance_with_gaz_2(train_file, 'train')
            data.generate_instance_with_gaz_2(dev_file, 'dev')
            data.generate_instance_with_gaz_2(test_file, 'test')
        elif data.model_name in ['WC-LSTM_model']:
            data.generate_instance_with_gaz_3(train_file, 'train')
            data.generate_instance_with_gaz_3(dev_file, 'dev')
            data.generate_instance_with_gaz_3(test_file, 'test')
        else:
            print("model_name is not set!")
            sys.exit(1)
        data.build_char_pretrain_emb(char_emb)
Exemple #2
0
    print("Gaz file:", gaz_file)
    if status == 'train':
        print("Model saved to:", save_model_dir)
    sys.stdout.flush()

    if status == 'train':
        data = Data()
        data.HP_gpu = gpu
        data.HP_lr = float(lr)
        data.HP_use_char = False
        data.HP_batch_size = 1
        data.use_bigram = False if bichar_emb is None else True
        data.gaz_dropout = 0.5
        data.norm_gaz_emb = False
        data.HP_fix_gaz_emb = False
        data.MAX_SENTENCE_LENGTH = maxlen
        data_initialization(data, gaz_file, train_file, dev_file, test_file,
                            word_sense_map_file)
        data_build_gold(data, train_gold, dev_gold, test_gold)
        data.generate_instance_with_gaz(train_file, 'train')
        data.generate_instance_with_gaz(dev_file, 'dev')
        data.generate_instance_with_gaz(test_file, 'test')
        data.build_word_pretrain_emb(char_emb)
        data.build_biword_pretrain_emb(bichar_emb)
        data.build_gaz_pretrain_emb(gaz_file)
        train(data, save_model_dir, dataset, seg)
    elif status == 'test':
        data = load_data_setting(dset_dir)
        data.build_word_sense_map(word_sense_map_file)
        data.MAX_SENTENCE_LENGTH = maxlen
        data_build_gold(data, train_gold, dev_gold, test_gold)