print('Char Embedding: ', char_emb_file)

        name = 'BaseLSTM'  # catnlp
        config = Config()
        config.layers = 2
        config.optim = 'Adam'
        config.char_features = 'CNN'
        config.lr = 0.015
        config.hidden_dim = 200
        config.bid_flag = True
        config.number_normalized = True
        data_initialization(config, train_file, dev_file, test_file)
        config.gpu = gpu
        config.word_features = name
        print('Word features: ', config.word_features)
        config.generate_instance(train_file, 'train')
        config.generate_instance(dev_file, 'dev')
        config.generate_instance(test_file, 'test')
        if emb_file:
            print('load word emb file...norm: ', config.norm_word_emb)
            config.build_word_pretain_emb(emb_file)
        if char_emb_file != 'none':
            print('load char emb file...norm: ', config.norm_char_emb)
            config.build_char_pretrain_emb(char_emb_file)

        name = 'joint-disease' # catnlp
        train(config, name, dset_dir, save_model_dir, seg, ignore=True)
    elif status == 'test':
        data = load_data_setting(dset_dir)
        data.generate_instance(dev_file, 'dev')
        load_model_decode(model_dir, data, 'dev', gpu, seg)
Beispiel #2
0
                test_file = os.path.join(line, args.test)

                print('Train file: ', train_file)
                print('Dev file: ', devel_file)
                print('Test file: ', test_file)
                data_initialization(config, train_file, devel_file, test_file)
            # config.fix_alphabet()
        config.num_corpus = count

        with open(args.dataset, 'r') as f:
            for line in f:
                line = line.strip()
                train_file = os.path.join(line, args.train)
                devel_file = os.path.join(line, args.devel)
                test_file = os.path.join(line, args.test)
                config.generate_instance(train_file, 'train')
                config.generate_instance(devel_file, 'dev')
                config.generate_instance(test_file, 'test')

        if emb_file:
            print('load word emb file...norm: ', config.norm_word_emb)
            config.build_word_pretain_emb(emb_file)
        if char_emb_file != 'none':
            print('load char emb file...norm: ', config.norm_char_emb)
            config.build_char_pretrain_emb(char_emb_file)

        for label in config.label_alphabet.instances:
            print(label)
        name = 'MultiMetaLSTM'
        train(config, name, dset_dir, save_model_dir, seg)
    # elif status == 'test':