def create_vocab(path): wd_counter = Counter() # tag_counter = Counter() dataset = load_dataset(path) for inst in dataset: wd_counter.update(inst.words) # tag_counter[inst.tag] += 1 return WordVocab(wd_counter)
torch.manual_seed(1234) torch.cuda.manual_seed(1344) torch.cuda.manual_seed_all(1344) print('cuda available:', torch.cuda.is_available()) print('cuDnn available:', torch.backends.cudnn.enabled) print('GPU numbers:', torch.cuda.device_count()) data_path = get_data_path("./conf/datapath.json") char_vocab, bichar_vocab = create_vocab(data_path['data']['train_data']) char_embed_weights = char_vocab.get_embedding_weights( data_path['pretrained']['char_embedding']) bichar_embed_weights = bichar_vocab.get_embedding_weights( data_path['pretrained']['bichar_embedding']) train_data = load_dataset(data_path['data']['train_data'], char_vocab) print('train data size:', len(train_data)) dev_data = load_dataset(data_path['data']['dev_data'], char_vocab) print('dev data size:', len(dev_data)) test_data = load_dataset(data_path['data']['test_data'], char_vocab) print('test data size:', len(test_data)) args = args_config() args.char_vocab_size = char_vocab.vocab_size args.bichar_vocab_size = bichar_vocab.vocab_size args.tag_size = char_vocab.tag_size args.rel_size = char_vocab.rel_size parser_model = ParserModel(args, char_embed_weights, bichar_embed_weights) if torch.cuda.is_available() and args.cuda >= 0: args.device = torch.device('cuda', args.cuda)
from classifier import Classifier if __name__ == '__main__': # 设置随机种子(固定随机值) np.random.seed(666) torch.manual_seed(6666) torch.cuda.manual_seed(1234) # 为当前GPU设置种子 # torch.cuda.manual_seed_all(4321) # 为所有GPU设置种子(如果有多个GPU) print('GPU available: ', torch.cuda.is_available()) print('CuDNN available: ', torch.backends.cudnn.enabled) print('GPU number: ', torch.cuda.device_count()) # 加载数据(训练集-学习、开发集-调参、测试集-评估) data_opts = config.data_path_parse('./config/data_path.json') train_data = load_dataset(data_opts['data']['train_data']) dev_data = load_dataset(data_opts['data']['dev_data']) test_data = load_dataset(data_opts['data']['test_data']) print('train_size=%d dev_size=%d test_size=%d' % (len(train_data), len(dev_data), len(test_data))) # 设置参数(数据参数+模型参数) args = config.arg_parse() if args.enable_cuda and torch.cuda.is_available(): args.device = torch.device('cuda', args.cuda) else: args.device = torch.device('cpu') print(args.device) # 创建词表 vocab = create_vocab(data_opts['data']['train_data'])