def main(config): loaders = DataLoader(train_fn=config.train_fn, batch_size=config.batch_size, min_freq=config.min_vocab_freq, max_vocab=config.max_vocab_size, device=config.gpu_id) print( '|train| =', len(loaders.train_loader.dataset), '|valid| =', len(loaders.valid_loader.dataset), ) ## 여기서 문장 개수를 알 수 있음. vocab_size = len( loaders.text.vocab) ## loaders 안에서 text set 의 vocab (train set 기준) n_classes = len(loaders.label.vocab) print('|vocab| =', vocab_size, '|classes| =', n_classes) if config.rnn is False and config.cnn is False: raise Exception( 'You need to specify an architecture to train. (--rnn or --cnn)') if config.rnn: # Declare model and loss. model = RNNClassifier( input_size=vocab_size, word_vec_size=config.word_vec_size, hidden_size=config.hidden_size, n_classes=n_classes, n_layers=config.n_layers, dropout_p=config.dropout, ) optimizer = optim.Adam(model.parameters()) crit = nn.NLLLoss() print(model) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) rnn_trainer = Trainer(config) rnn_model = rnn_trainer.train(model, crit, optimizer, loaders.train_loader, loaders.valid_loader) if config.cnn: # Declare model and loss. model = CNNClassifier( input_size=vocab_size, word_vec_size=config.word_vec_size, n_classes=n_classes, use_batch_norm=config.use_batch_norm, dropout_p=config.dropout, window_sizes=config.window_sizes, n_filters=config.n_filters, ) optimizer = optim.Adam(model.parameters()) crit = nn.NLLLoss() print(model) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) cnn_trainer = Trainer(config) cnn_model = cnn_trainer.train(model, crit, optimizer, loaders.train_loader, loaders.valid_loader) torch.save( { 'rnn': rnn_model.state_dict() if config.rnn else None, 'cnn': cnn_model.state_dict() if config.cnn else None, 'config': config, 'vocab': loaders.text.vocab, 'classes': loaders.label.vocab, }, config.model_fn)
def main(config): saved_data = torch.load( config.model_fn, map_location='cpu' if config.gpu_id < 0 else 'cuda:%d' % config.gpu_id) train_config = saved_data['config'] rnn_best = saved_data['rnn'] cnn_best = saved_data['cnn'] vocab = saved_data['vocab'] classes = saved_data['classes'] vocab_size = len(vocab) n_classes = len(classes) text_field, label_field = define_field() text_field.vocab = vocab label_field.vocab = classes lines = read_text(max_length=config.max_length) with torch.no_grad(): ensemble = [] if rnn_best is not None and not config.drop_rnn: # Declare model and load pre-trained weights. model = RNNClassifier( input_size=vocab_size, word_vec_size=train_config.word_vec_size, hidden_size=train_config.hidden_size, n_classes=n_classes, n_layers=train_config.n_layers, dropout_p=train_config.dropout, ) model.load_state_dict(rnn_best) ensemble += [model] if cnn_best is not None and not config.drop_cnn: # Declare model and load pre-trained weights. model = CNNClassifier( input_size=vocab_size, word_vec_size=train_config.word_vec_size, n_classes=n_classes, use_batch_norm=train_config.use_batch_norm, dropout_p=train_config.dropout, window_sizes=train_config.window_sizes, n_filters=train_config.n_filters, ) model.load_state_dict(cnn_best) ensemble += [model] y_hats = [] # Get prediction with iteration on ensemble. for model in ensemble: if config.gpu_id >= 0: model.cuda(config.gpu_id) # Don't forget turn-on evaluation mode. model.eval() y_hat = [] for idx in range(0, len(lines), config.batch_size): # Converts string to list of index. x = text_field.numericalize( text_field.pad(lines[idx:idx + config.batch_size]), device='cuda:%d' % config.gpu_id if config.gpu_id >= 0 else 'cpu', ) y_hat += [model(x).cpu()] # Concatenate the mini-batch wise result y_hat = torch.cat(y_hat, dim=0) # |y_hat| = (len(lines), n_classes) y_hats += [y_hat] model.cpu() # Merge to one tensor for ensemble result and make probability from log-prob. y_hats = torch.stack(y_hats).exp() # |y_hats| = (len(ensemble), len(lines), n_classes) y_hats = y_hats.sum(dim=0) / len(ensemble) # Get average # |y_hats| = (len(lines), n_classes) probs, indice = y_hats.topk(config.top_k) for i in range(len(lines)): sys.stdout.write('%s\t%s\n' % (' '.join( [classes.itos[indice[i][j]] for j in range(config.top_k)]), ' '.join(lines[i])))
def main(config): saved_data = torch.load( config.model_fn, map_location='cpu' if config.gpu_id < 0 else 'cuda:%d' % config.gpu_id ) train_config = saved_data['config'] rnn_best = saved_data['rnn'] cnn_best = saved_data['cnn'] vocab = saved_data['vocab'] classes = saved_data['classes'] vocab_size = len(vocab) n_classes = len(classes) text_field, label_field = define_field() text_field.vocab = vocab label_field.vocab = classes lines, label = read_text(max_length=config.max_length) with torch.no_grad(): ensemble = [] if rnn_best is not None and not config.drop_rnn: # Declare model and load pre-trained weights. model = RNNClassifier( input_size=vocab_size, word_vec_size=train_config.word_vec_size, hidden_size=train_config.hidden_size, n_classes=n_classes, n_layers=train_config.n_layers, dropout_p=train_config.dropout, ) model.load_state_dict(rnn_best) ensemble += [model] if cnn_best is not None and not config.drop_cnn: # Declare model and load pre-trained weights. model = CNNClassifier( input_size=vocab_size, word_vec_size=train_config.word_vec_size, n_classes=n_classes, use_batch_norm=train_config.use_batch_norm, dropout_p=train_config.dropout, window_sizes=train_config.window_sizes, n_filters=train_config.n_filters, ) model.load_state_dict(cnn_best) ensemble += [model] y_hats = [] # Get prediction with iteration on ensemble. for model in ensemble: if config.gpu_id >= 0: model.cuda(config.gpu_id) # Don't forget turn-on evaluation mode. model.eval() y_hat = [] for idx in range(0, len(lines), config.batch_size): # Converts string to list of index. x = text_field.numericalize( text_field.pad(lines[idx:idx + config.batch_size]), device='cuda:%d' % config.gpu_id if config.gpu_id >= 0 else 'cpu', ) y_hat += [model(x).cpu()] # Concatenate the mini-batch wise result y_hat = torch.cat(y_hat, dim=0) # |y_hat| = (len(lines), n_classes) y_hats += [y_hat] model.cpu() # Merge to one tensor for ensemble result and make probability from log-prob. y_hats = torch.stack(y_hats).exp() # |y_hats| = (len(ensemble), len(lines), n_classes) y_hats = y_hats.sum(dim=0) / len(ensemble) # Get average # |y_hats| = (len(lines), n_classes) probs, indice = y_hats.topk(config.top_k) ''' for i in range(30): sys.stdout.write('%s\t%s\n' % ( ' '.join([classes.itos[indice[i][j]] for j in range(config.top_k)]), ' '.join(lines[i]) )) ''' #print([indice[i][j] for j in range(config.top_k)], [classes.itos[indice[i][j]] for j in range(config.top_k)], label[i]) ''' correct = 0 total = 0 for i in range(len(lines)): if [label[i]] == [classes.itos[indice[i][j]] for j in range(config.top_k)]: correct += 1 print('Accuracy: %d %%' % (100 * correct / len(lines))) ''' count1 = 0 count2 = 0 count3 = 0 count4 = 0 cnt = Counter() for i in range(len(lines)): if [label[i]] == [classes.itos[indice[i][0]]] or [classes.itos[indice[i][1]]] : if abs(probs[i][0] - probs[i][1]) <= 0.1 : twoclasses = [ classes.itos[indice[i][0]] , classes.itos[indice[i][1]] ] diff = float(abs(probs[i][0] - probs[i][1])) for two in twoclasses : cnt[two] += 1 print(cnt) two classes = [ [가설 설정, 이론모형] , [가설 설정, 데이터처리] , ''' for t in twoclasses : if t == [ '가설 설정', '이론/모형' ] : count1 += 1 if t == [ '가설 설정', '데이터처리'] : count2 += 1 if t == [ '가설 설정', '대상 데이터' ] : count3 += 1 if t == [ '가설 설정', '제안 방법'] : count4 += 1 print('가설 설정, 이론/모형 : ', count1) print('가설 설정, 데이터처리 : ', count2) print('가설 설정, 대상 데이터 : ', count3) print('가설 설정, 제안 방법 : ', count4) ''' if __name__ == '__main__': config = define_argparser() main(config)