subchar=args.subchar) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint('best_snu_{}.tar'.format( args.pretrained_config)) config = BertConfig(ptr_config.config) model = SentenceClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device) summary_manager.load('summary_snu_{}.json'.format(args.pretrained_config)) summary_manager.update({'{}'.format(args.dataset): summary}) summary_manager.save('summary_snu_{}.json'.format(args.pretrained_config))
def train(cfgpath): # parsing json with open(os.path.join(os.getcwd(), cfgpath)) as io: params = json.loads(io.read()) # creating preprocessor tokenizer = JamoTokenizer() padder = PadSequence(300) # creating model model = CharCNN(num_classes=params['model'].get('num_classes'), embedding_dim=params['model'].get('embedding_dim'), dic=tokenizer.token2idx) # creating dataset, dataloader tr_filepath = os.path.join(os.getcwd(), params['filepath'].get('tr')) val_filepath = os.path.join(os.getcwd(), params['filepath'].get('val')) batch_size = params['training'].get('batch_size') tr_ds = Corpus(tr_filepath, tokenizer, padder) tr_dl = DataLoader(tr_ds, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True) val_ds = Corpus(val_filepath, tokenizer, padder) val_dl = DataLoader(val_ds, batch_size=batch_size, num_workers=4) # training loss_fn = nn.CrossEntropyLoss() opt = optim.Adam(params=model.parameters(), lr=params['training'].get('learning_rate')) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) epochs = params['training'].get('epochs') for epoch in tqdm(range(epochs), desc='epochs'): avg_tr_loss = 0 avg_val_loss = 0 tr_step = 0 val_step = 0 model.train() for x_mb, y_mb in tqdm(tr_dl, desc='iters'): x_mb = x_mb.to(device) y_mb = y_mb.to(device) score = model(x_mb) opt.zero_grad() tr_loss = loss_fn(score, y_mb) tr_loss.backward() opt.step() avg_tr_loss += tr_loss.item() tr_step += 1 else: avg_tr_loss /= tr_step model.eval() for x_mb, y_mb in tqdm(val_dl): x_mb = x_mb.to(device) y_mb = y_mb.to(device) with torch.no_grad(): score = model(x_mb) val_loss = loss_fn(score, y_mb) avg_val_loss += val_loss.item() val_step += 1 else: avg_val_loss /= val_step tqdm.write('epoch : {}, tr_loss : {:.3f}, val_loss : {:.3f}'.format( epoch + 1, avg_tr_loss, avg_val_loss)) ckpt = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'opt_state_dict': opt.state_dict() } savepath = os.path.join(os.getcwd(), params['filepath'].get('ckpt')) torch.save(ckpt, savepath)
def get_data_loaders(dataset_config, tokenizer, batch_size): tr_ds = Corpus(dataset_config.train, tokenizer.split_and_transform) tr_dl = DataLoader(tr_ds, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True) val_ds = Corpus(dataset_config.validation, tokenizer.split_and_transform) val_dl = DataLoader(val_ds, batch_size=batch_size, num_workers=4) return tr_dl, val_dl
def main(): train_path = Path.cwd() / 'data_in' / 'train.txt' val_path = Path.cwd() / 'data_in' / 'val.txt' vocab_path = Path.cwd() / 'data_in' / 'vocab.pkl' with open(vocab_path, mode='rb') as io: vocab = pickle.load(io) tokenizer = MeCab() padder = PadSequence(length=70, pad_val=vocab.token_to_idx['<pad>']) tr_ds = Corpus(train_path, vocab, tokenizer, padder) tr_dl = DataLoader(tr_ds, batch_size=1024, shuffle=True, num_workers=1, drop_last=True) val_ds = Corpus(val_path, vocab, tokenizer, padder) val_dl = DataLoader(val_ds, batch_size=1024) model = Net(vocab_len=len(vocab)) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) for epoch in range(1): model.train() index = 0 acc = 0 for label, sen1, sen2 in tqdm(tr_dl, disable=True): optimizer.zero_grad() pre_label = model(sen1, sen2) loss = loss_fn(pre_label, label) loss.backward() optimizer.step() pred_cls = pre_label.data.max(1)[1] acc += pred_cls.eq(label.data).cpu().sum() print("epoch: {}, index: {}, loss: {}".format((epoch + 1), index, loss.item())) index += len(label) print('Accuracy : %d %%' % ( 100 * acc / index)) for epoch in range(1): model.train() index = 0 acc = 0 for label, sen1, sen2 in tqdm(val_dl, disable=True): optimizer.zero_grad() pre_label = model(sen1, sen2) loss = loss_fn(pre_label, label) loss.backward() optimizer.step() pred_cls = pre_label.data.max(1)[1] acc += pred_cls.eq(label.data).cpu().sum() print("epoch: {}, index: {}, loss: {}".format((epoch + 1), index, loss.item())) index += len(label) print('Accuracy : %d %%' % ( 100 * acc / index))
# model(restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint(args.restore_file + '.tar') model = ConvRec(num_classes=model_config.num_classes, embedding_dim=model_config.embedding_dim, hidden_dim=model_config.hidden_dim, vocab=tokenizer.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation model.eval() summary_manager = SummaryManager(model_dir) filepath = getattr(data_config, args.data_name) ds = Corpus(filepath, tokenizer.split_and_transform, min_length=model_config.min_length, pad_val=tokenizer.vocab.to_indices(' ')) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4, collate_fn=batchify) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device)
# model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = SAN( num_classes=model_config.num_classes, lstm_hidden_dim=model_config.lstm_hidden_dim, hidden_dim=model_config.hidden_dim, da=model_config.da, r=model_config.r, vocab=tokenizer.vocab, ) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader( ds, batch_size=model_config.batch_size, num_workers=4, collate_fn=batchify ) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, {"loss": nn.CrossEntropyLoss(), "acc": acc}, device) summary_manager.load("summary.json") summary_manager.update({"{}".format(args.dataset): summary}) summary_manager.save("summary.json") print("loss: {:.3f}, acc: {:.2%}".format(summary["loss"], summary["acc"]))
def validation_dataloader(self): val_ds = Corpus(self.hparams.validation, self.tokenizer.split_and_transform) return DataLoader(val_ds, batch_size=self.hparams.batch_size, num_workers=4)
def main(): train_path = Path.cwd() / '..' / 'data_in' / 'train.txt' val_path = Path.cwd() / '..' / 'data_in' / 'val.txt' vocab_path = Path.cwd() / '..' / 'data_in' / 'vocab.pkl' length = 70 dim = 300 batch_size = 1024 learning_rate = 0.01 epochs = 10 hidden = 50 with open(vocab_path, mode='rb') as io: vocab = pickle.load(io) train = tf.data.TextLineDataset(str(train_path)).shuffle( buffer_size=batch_size).batch(batch_size=batch_size, drop_remainder=True) eval = tf.data.TextLineDataset(str(val_path)).batch(batch_size=batch_size, drop_remainder=True) tokenizer = MeCab() corpus = Corpus(vocab, tokenizer) malstm = MaLSTM(length, dim, len(vocab)) opt = tf.optimizers.Adam(learning_rate=learning_rate) loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True) ''' loss, accuracy ''' train_loss_metric = tf.keras.metrics.Mean(name='train_loss') train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') val_loss_metric = tf.keras.metrics.Mean(name='val_loss') val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy( name='val_accuracy') for epoch in range(epochs): train_loss_metric.reset_states() train_acc_metric.reset_states() val_loss_metric.reset_states() val_acc_metric.reset_states() tf.keras.backend.set_learning_phase(1) for step, val in tqdm(enumerate(train)): sen1, sen2, label = corpus.token2idx(val) with tf.GradientTape() as tape: logits = malstm(sen1, sen2) train_loss = loss_fn(label, logits) grads = tape.gradient(target=train_loss, sources=malstm.trainable_variables) opt.apply_gradients( grads_and_vars=zip(grads, malstm.trainable_variables)) train_loss_metric.update_state(train_loss) train_acc_metric.update_state(label, logits) tr_loss = train_loss_metric.result() tqdm.write('epoch : {}, tr_acc : {:.3f}%, tr_loss : {:.3f}'.format( epoch + 1, train_acc_metric.result() * 100, tr_loss))
tokenizer = Tokenizer(vocab=vocab, split_fn=ptr_tokenizer.tokenize, pad_fn=pad_sequence) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint(args.restore_file + '.tar') config = BertConfig('pretrained/bert_config.json') model = BertClassifier(config, num_labels=model_config.num_classes, vocab=tokenizer.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation filepath = getattr(data_config, args.data_name) ds = Corpus(filepath, tokenizer.preprocess) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device) summary_manager.load('summary.json') summary_manager.update({'{}'.format(args.data_name): summary}) summary_manager.save('summary.json')
def main(argv): train_data = Path.cwd() / '..' / 'data_in' / 'train.txt' val_data = Path.cwd() / '..' / 'data_in' / 'val.txt' test_data = Path.cwd() / '..' / 'data_in' / 'test.txt' dev_data = Path.cwd() / '..' / 'data_in' / 'dev.txt' # init params classes = FLAGS.classes max_length = FLAGS.length epochs = FLAGS.epochs learning_rate = FLAGS.learning_rate dim = FLAGS.embedding_dim global_step = 1000 batch_size = FLAGS.batch_size with open(Path.cwd() / '..' / 'data_in' / 'vocab.pkl', mode='rb') as io: vocab = pickle.load(io) train = tf.data.TextLineDataset(str(train_data)).shuffle( buffer_size=batch_size).batch(batch_size=batch_size) eval = tf.data.TextLineDataset(str(val_data)).batch(batch_size=batch_size) test = tf.data.TextLineDataset(str(test_data)).batch(batch_size=batch_size) dev = tf.data.TextLineDataset(str(dev_data)).batch(batch_size=batch_size) padder = PadSequence(max_length, pad_val=vocab.to_indices(vocab.padding_token)) processing = Corpus(vocab=vocab, split_fn=Split(), pad_fn=padder) # create model char_cnn = CharCNN(vocab=vocab, classes=classes, dim=dim) # create optimizer & loss_fn opt = tf.optimizers.Adam(learning_rate=learning_rate) loss_fn = tf.losses.SparseCategoricalCrossentropy() train_loss_metric = tf.keras.metrics.Mean(name='train_loss') train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') val_loss_metric = tf.keras.metrics.Mean(name='val_loss') val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy( name='val_accuracy') # train_summary_writer = tf.summary.create_file_writer('./data_out/summaries/train') # eval_summary_writer = tf.summary.create_file_writer('./data_out/summaries/eval') # ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=opt, net=char_cnn) # manager = tf.train.CheckpointManager(ckpt, './data_out/tf_ckpts', max_to_keep=3) # ckpt.restore(manager.latest_checkpoint) # # if manager.latest_checkpoint: # print("Restored from {}".format(manager.latest_checkpoint)) # else: # print("Initializing from scratch.") #training for epoch in tqdm(range(epochs), desc='epochs'): train_loss_metric.reset_states() train_acc_metric.reset_states() val_loss_metric.reset_states() val_acc_metric.reset_states() tf.keras.backend.set_learning_phase(1) #with train_summary_writer.as_default(): for step, val in tqdm(enumerate(train), desc='steps'): data, label = processing.token2idex(val) with tf.GradientTape() as tape: logits = char_cnn(data) train_loss = loss_fn(label, logits) #ckpt.step.assign_add(1) grads = tape.gradient(target=train_loss, sources=char_cnn.trainable_variables) opt.apply_gradients( grads_and_vars=zip(grads, char_cnn.trainable_variables)) train_loss_metric.update_state(train_loss) train_acc_metric.update_state(label, logits) # if tf.equal(opt.iterations % global_step, 0): # tf.summary.scalar('loss', train_loss_metric.result(), step=opt.iterations) tr_loss = train_loss_metric.result() #save_path = manager.save() #print(save_path) tqdm.write('epoch : {}, tr_acc : {:.3f}%, tr_loss : {:.3f}'.format( epoch + 1, train_acc_metric.result() * 100, tr_loss))
label_vocab = pickle.load(io) token_tokenizer = Tokenizer(token_vocab, split_to_self) label_tokenizer = Tokenizer(label_vocab, split_to_self) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint(args.restore_file + ".tar") model = BilstmCRF(label_vocab, token_vocab, model_config.lstm_hidden_dim) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation summary_manager = SummaryManager(model_dir) filepath = getattr(data_config, args.data_name) ds = Corpus( filepath, token_tokenizer.split_and_transform, label_tokenizer.split_and_transform, ) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4, collate_fn=batchify) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) f1_score = get_f1_score(model, dl, device) summary_manager.load("summary.json") summary_manager._summary[args.data_name].update({"f1": f1_score}) summary_manager.save("summary.json")
# loading trained model save_path = params['filepath'].get('ckpt') # save_path = 'tokenize.pth' ckpt = torch.load(save_path) config = BertConfig('bert/bert_config.json') model = BertTagger(config=config, num_labels=len(label_vocab.token_to_idx), vocab=token_vocab) model.load_state_dict(ckpt['model_state_dict']) # loading datasets batch_size = params['training'].get('batch_size') train_path = params['filepath'].get('train') val_path = params['filepath'].get('val') test_path = params['filepath'].get('test') train_data = Corpus(train_path, token_vocab.to_indices, label_vocab.to_indices) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=False, num_workers=16, drop_last=True, collate_fn=batchify) val_data = Corpus(val_path, token_vocab.to_indices, label_vocab.to_indices) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=16, drop_last=True, collate_fn=batchify) test_data = Corpus(test_path, token_vocab.to_indices, label_vocab.to_indices) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=16, drop_last=True, collate_fn=batchify) # using gpu device = torch.device('cuda:1') if torch.cuda.is_available() else torch.device('cpu') model.to(device)
pad_sequence = PadSequence(length=model_config.length, pad_val=vocab.to_indices(vocab.padding_token)) preprocessor = PreProcessor(vocab=vocab, split_fn=ptr_tokenizer.tokenize, pad_fn=pad_sequence) # model config = BertConfig('pretrained/bert_config.json') model = PairwiseClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) bert_pretrained = torch.load('pretrained/pytorch_model.bin') model.load_state_dict(bert_pretrained, strict=False) # training tr_ds = Corpus(data_config.tr, preprocessor.preprocess) tr_dl = DataLoader(tr_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, drop_last=True) val_ds = Corpus(data_config.val, preprocessor.preprocess) val_dl = DataLoader(val_ds, batch_size=model_config.batch_size) loss_fn = nn.CrossEntropyLoss() opt = optim.Adam([ { "params": model.bert.parameters(), "lr": model_config.learning_rate / 100 }, {
model_config = Config(json_path=model_dir / 'config.json') # tokenizer with open(data_config.vocab, mode='rb') as io: vocab = pickle.load(io) tokenizer = Tokenizer(vocab=vocab, split_fn=split_to_jamo) # model model = ConvRec(num_classes=model_config.num_classes, embedding_dim=model_config.embedding_dim, hidden_dim=model_config.hidden_dim, vocab=tokenizer.vocab) # training tr_ds = Corpus(data_config.train, tokenizer.split_and_transform, min_length=model_config.min_length, pad_val=tokenizer.vocab.to_indices(' ')) tr_dl = DataLoader(tr_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, collate_fn=batchify, drop_last=True) val_ds = Corpus(data_config.validation, tokenizer.split_and_transform, min_length=model_config.min_length, pad_val=tokenizer.vocab.to_indices(' ')) val_dl = DataLoader(val_ds, batch_size=model_config.batch_size, collate_fn=batchify)
def main(json_path): cwd = Path.cwd() with open(cwd / json_path) as io: params = json.loads(io.read()) # tokenizer vocab_path = params['filepath'].get('vocab') with open(cwd / vocab_path, mode='rb') as io: vocab = pickle.load(io) tokenizer = Tokenizer(vocab=vocab, split_fn=MeCab().morphs) # model num_classes = params['model'].get('num_classes') lstm_hidden_dim = params['model'].get('lstm_hidden_dim') hidden_dim = params['model'].get('hidden_dim') da = params['model'].get('da') r = params['model'].get('r') model = SAN(num_classes=num_classes, lstm_hidden_dim=lstm_hidden_dim, hidden_dim=hidden_dim, da=da, r=r, vocab=tokenizer.vocab) # training epochs = params['training'].get('epochs') batch_size = params['training'].get('batch_size') learning_rate = params['training'].get('learning_rate') global_step = params['training'].get('global_step') tr_path = cwd / params['filepath'].get('tr') val_path = cwd / params['filepath'].get('val') tr_ds = Corpus(tr_path, tokenizer.split_and_transform) tr_dl = DataLoader(tr_ds, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True, collate_fn=batchify) val_ds = Corpus(val_path, tokenizer.split_and_transform) val_dl = DataLoader(val_ds, batch_size=batch_size, num_workers=4, collate_fn=batchify) loss_fn = nn.CrossEntropyLoss() opt = optim.Adam(params=model.parameters(), lr=learning_rate) scheduler = ReduceLROnPlateau(opt, patience=5) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) writer = SummaryWriter('./runs/{}'.format(params['version'])) for epoch in tqdm(range(epochs), desc='epochs'): tr_loss = 0 model.train() for step, mb in tqdm(enumerate(tr_dl), desc='steps', total=len(tr_dl)): queries_a_mb, queries_b_mb, y_mb = map(lambda elm: elm.to(device), mb) queries_mb = (queries_a_mb, queries_b_mb) opt.zero_grad() score, queries_a_attn_mat, queries_b_attn_mat = model(queries_mb) a_reg = regularize(queries_a_attn_mat, r, device) b_reg = regularize(queries_b_attn_mat, r, device) mb_loss = loss_fn(score, y_mb) mb_loss.add_(a_reg) mb_loss.add_(b_reg) mb_loss.backward() opt.step() tr_loss += mb_loss.item() if (epoch * len(tr_dl) + step) % global_step == 0: val_loss = evaluate(model, val_dl, loss_fn, device) writer.add_scalars('loss', { 'train': tr_loss / (step + 1), 'validation': val_loss }, epoch * len(tr_dl) + step) model.train() else: tr_loss /= (step + 1) val_loss = evaluate(model, val_dl, loss_fn, device) scheduler.step(val_loss) tqdm.write('epoch : {}, tr_loss : {:.3f}, val_loss : {:.3f}'.format( epoch + 1, tr_loss, val_loss)) ckpt = { 'model_state_dict': model.state_dict(), 'opt_state_dict': opt.state_dict() } save_path = cwd / params['filepath'].get('ckpt') torch.save(ckpt, save_path)
preprocessor = PreProcessor(ptr_tokenizer, model_config.max_len) # Load Model config_filepath = ptr_dir / "{}-config.json".format(args.type) config = BertConfig.from_pretrained(config_filepath, output_hidden_states=False) model = BIIN(config, vocab, model_config.hidden_size, enc_num_layers=len(model_config.hidden_size)) # Data Loader tr_ds = Corpus(data_config.tr_path, preprocessor.preprocess, sep='\t', doc_col='question1', label_col='is_duplicate', is_pair=True, doc_col_second='question2') val_ds = Corpus(data_config.dev_path, preprocessor.preprocess, sep='\t', doc_col='question1', label_col='is_duplicate', is_pair=True, doc_col_second='question2') tr_dl = DataLoader(tr_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, drop_last=True)
pad_val=vocab.to_indices( vocab.padding_token)) preprocessor = PreProcessor(vocab=vocab, split_fn=ptr_tokenizer, pad_fn=pad_sequence) # model config = BertConfig(ptr_config.config) model = PairwiseClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) # bert_pretrained = torch.load(ptr_config.bert) # model.load_state_dict(bert_pretrained, strict=False) # training tr_ds = Corpus(data_config.train, preprocessor.preprocess) tr_dl = DataLoader(tr_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, drop_last=True) val_ds = Corpus(data_config.validation, preprocessor.preprocess) val_dl = DataLoader(val_ds, batch_size=model_config.batch_size, num_workers=4) loss_fn = nn.CrossEntropyLoss() opt = optim.Adam([ { "params": model.bert.parameters(), "lr": model_config.learning_rate / 100
def main(json_path): cwd = Path.cwd() with open(cwd / json_path) as io: params = json.loads(io.read()) # tokenizer vocab_path = params['filepath'].get('vocab') with open(cwd / vocab_path, mode='rb') as io: vocab = pickle.load(io) length = params['padder'].get('length') padder = PadSequence(length=length, pad_val=vocab.to_indices(vocab.padding_token)) tokenizer = Tokenizer(vocab=vocab, split_fn=MeCab().morphs, pad_fn=padder) # model num_classes = params['model'].get('num_classes') model = SenCNN(num_classes=num_classes, vocab=tokenizer.vocab) # training epochs = params['training'].get('epochs') batch_size = params['training'].get('batch_size') learning_rate = params['training'].get('learning_rate') global_step = params['training'].get('global_step') tr_path = cwd / params['filepath'].get('tr') val_path = cwd / params['filepath'].get('val') tr_ds = Corpus(tr_path, tokenizer.split_and_transform) tr_dl = DataLoader(tr_ds, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True) val_ds = Corpus(val_path, tokenizer.split_and_transform) val_dl = DataLoader(val_ds, batch_size=batch_size) loss_fn = nn.CrossEntropyLoss() opt = optim.Adam(params=model.parameters(), lr=learning_rate) scheduler = ReduceLROnPlateau(opt, patience=5) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) writer = SummaryWriter('./runs/{}'.format(params['version'])) for epoch in tqdm(range(epochs), desc='epochs'): tr_loss = 0 model.train() for step, mb in tqdm(enumerate(tr_dl), desc='steps', total=len(tr_dl)): x_mb, y_mb = map(lambda elm: elm.to(device), mb) opt.zero_grad() mb_loss = loss_fn(model(x_mb), y_mb) mb_loss.backward() clip_grad_norm_(model._fc.weight, 5) opt.step() tr_loss += mb_loss.item() if (epoch * len(tr_dl) + step) % global_step == 0: val_loss = evaluate(model, val_dl, loss_fn, device) writer.add_scalars('loss', {'train': tr_loss / (step + 1), 'val': val_loss}, epoch * len(tr_dl) + step) model.train() else: tr_loss /= (step + 1) val_loss = evaluate(model, val_dl, loss_fn, device) scheduler.step(val_loss) tqdm.write('epoch : {}, tr_loss : {:.3f}, val_loss : {:.3f}'.format(epoch + 1, tr_loss, val_loss)) ckpt = {'model_state_dict': model.state_dict(), 'opt_state_dict': opt.state_dict()} save_path = cwd / params['filepath'].get('ckpt') torch.save(ckpt, save_path)
# tokenizer with open(data_config.vocab, mode='rb') as io: vocab = pickle.load(io) pad_sequence = PadSequence(length=model_config.length, pad_val=vocab.to_indices(vocab.padding_token)) tokenizer = Tokenizer(vocab=vocab, split_fn=split_to_jamo, pad_fn=pad_sequence) # model model = CharCNN(num_classes=model_config.num_classes, embedding_dim=model_config.embedding_dim, vocab=tokenizer.vocab) # training tr_ds = Corpus(data_config.train, tokenizer.split_and_transform) tr_dl = DataLoader(tr_ds, batch_size=model_config.batch_size, shuffle=True, num_workers=4, drop_last=True) val_ds = Corpus(data_config.validation, tokenizer.split_and_transform) val_dl = DataLoader(val_ds, batch_size=model_config.batch_size, num_workers=4) loss_fn = nn.CrossEntropyLoss() opt = optim.Adam(params=model.parameters(), lr=model_config.learning_rate) scheduler = ReduceLROnPlateau(opt, patience=5) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu')
def main(argv): train_data = Path.cwd() / 'data_in' / 'train.txt' val_data = Path.cwd() / 'data_in' / 'val.txt' with open(Path.cwd() / 'data_in' / 'vocab.pkl', mode='rb') as io: vocab = pickle.load(io) train = tf.data.TextLineDataset(str(train_data)).shuffle(buffer_size=1000).batch(batch_size=FLAGS.batch_size, drop_remainder=True) eval = tf.data.TextLineDataset(str(val_data)).batch(batch_size=FLAGS.batch_size, drop_remainder=True) tokenized = MeCab() processing = Corpus(vocab=vocab, tokenizer=tokenized) # init params classes = FLAGS.classes max_length = FLAGS.length epochs = FLAGS.epochs learning_rate = FLAGS.learning_rate global_step = 1000 # create model sen_cnn = SenCNN(vocab=vocab, classes=classes) # create optimizer & loss_fn opt = tf.optimizers.Adam(learning_rate=learning_rate) loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True) train_loss_metric = tf.keras.metrics.Mean(name='train_loss') train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss_metric = tf.keras.metrics.Mean(name='val_loss') val_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') train_summary_writer = tf.summary.create_file_writer('./data_out/summaries/train') eval_summary_writer = tf.summary.create_file_writer('./data_out/summaries/eval') ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=opt, net=sen_cnn) manager = tf.train.CheckpointManager(ckpt, './data_out/tf_ckpts', max_to_keep=3) ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: print("Restored from {}".format(manager.latest_checkpoint)) else: print("Initializing from scratch.") # training for epoch in tqdm(range(epochs), desc='epochs'): train_loss_metric.reset_states() train_acc_metric.reset_states() val_loss_metric.reset_states() val_acc_metric.reset_states() tf.keras.backend.set_learning_phase(1) tr_loss = 0 with train_summary_writer.as_default(): for step, val in tqdm(enumerate(train), desc='steps'): data, label = processing.token2idex(val) with tf.GradientTape() as tape: logits = sen_cnn(data) train_loss = loss_fn(label, logits) ckpt.step.assign_add(1) grads = tape.gradient(target=train_loss, sources=sen_cnn.trainable_variables) opt.apply_gradients(grads_and_vars=zip(grads, sen_cnn.trainable_variables)) # tr_loss += pred_loss.numpy() train_loss_metric.update_state(train_loss) train_acc_metric.update_state(label, logits) if tf.equal(opt.iterations % global_step, 0): tf.summary.scalar('loss', train_loss_metric.result(), step=opt.iterations) # else: # tr_loss /= (step + 1) # print("t_loss {}".format(tr_loss)) tr_loss = train_loss_metric.result() save_path = manager.save() print(save_path) tf.keras.backend.set_learning_phase(0) val_loss = 0 with eval_summary_writer.as_default(): for step, val in tqdm(enumerate(eval), desc='steps'): data, label = processing.token2idex(val) logits = sen_cnn(data) val_loss = loss_fn(label, logits) # val_loss += mb_loss.numpy() val_loss_metric.update_state(val_loss) val_acc_metric.update_state(label, logits) tf.summary.scalar('loss', val_loss_metric.result(), step=step) val_loss = val_loss_metric.result() tqdm.write( 'epoch : {}, tr_acc : {:.3f}%, tr_loss : {:.3f}, val_acc : {:.3f}%, val_loss : {:.3f}'.format(epoch + 1, train_acc_metric.result() * 100, tr_loss, val_acc_metric.result() * 100, val_loss))