info = sys.argv[7] train_file = './data/intermediate/' + dataset + '/rm/train.data' dev_file = './data/intermediate/' + dataset + '/rm/dev.data' test_file = './data/intermediate/' + dataset + '/rm/test.data' feature_file = './data/intermediate/' + dataset + '/rm/feature.txt' type_file = './data/intermediate/' + dataset + '/rm/type.txt' type_file_test = './data/intermediate/' + dataset + '/rm/type_test.txt' none_ind = utils.get_none_id(type_file) print("None id:", none_ind) label_distribution = utils.get_distribution(type_file) label_distribution_test = utils.get_distribution(type_file_test) word_size, pos_embedding_tensor = utils.initialize_embedding( feature_file, embLen) _, type_size, _, _, _ = utils.load_corpus(train_file) nocluster = noCluster.noCluster(embLen, word_size, type_size, drop_prob, label_distribution, label_distribution_test) nocluster.load_state_dict( torch.load('./dumped_models/ffnn_dump_' + '_'.join(sys.argv[1:7]) + '.pth')) torch.cuda.set_device(0) nocluster.cuda() if_cuda = True packer = pack.repack(repack_ratio, 20, if_cuda) print('in the order of: train, dev, test...\n')
print('Using Random Seed: ' + str(SEED)) torch.manual_seed(SEED) np.random.seed(SEED) random.seed(SEED) # read data data_dir = os.path.join(opt['data_dir'], opt['dataset'], 'rm') train_file = os.path.join(data_dir, 'train.data') dev_file = os.path.join(data_dir, 'dev.data') test_file = os.path.join(data_dir, 'test.data') feature_file = os.path.join(data_dir, 'feature.txt') type_file = os.path.join(data_dir, 'type.txt') type_file_test = os.path.join(data_dir, 'type_test.txt') none_ind = utils.get_none_id(type_file) word_size, pos_embedding_tensor = utils.initialize_embedding( feature_file, opt['emb_len']) doc_size, type_size, feature_list, label_list, type_list = utils.load_corpus( train_file) doc_size_test, _, feature_list_test, label_list_test, type_list_test = utils.load_corpus( test_file) doc_size_dev, _, feature_list_dev, label_list_dev, type_list_dev = utils.load_corpus( dev_file) # set up configs opt['none_ind'] = none_ind opt['label_distribution'] = utils.get_distribution(type_file) opt['word_size'], opt['type_size'] = word_size, type_size opt['if_average'] = False bat_size = opt['batch_size'] # initialize model