def train(model, tokenizer, train_data, valid_data, args, eos=False): model.train() train_dataset = TextDataset(train_data) train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=args.train_batch_size, num_workers=args.num_workers, collate_fn=lambda x: collate_fn(x, tokenizer, args.max_seq_length, eos=eos, tokenizer_type=args.tokenizer)) valid_dataset = TextDataset(valid_data) valid_dataloader = DataLoader(valid_dataset, sampler=SequentialSampler(valid_dataset), batch_size=args.eval_batch_size, num_workers=args.num_workers, collate_fn=lambda x: collate_fn(x, tokenizer, args.max_seq_length, eos=eos, tokenizer_type=args.tokenizer)) valid_noisy = [x['noisy'] for x in valid_data] valid_clean = [x['clean'] for x in valid_data] epochs = (args.max_steps - 1) // len(train_dataloader) + 1 optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=eval(args.adam_betas), eps=args.eps, weight_decay=args.weight_decay) lr_lambda = lambda x: x / args.num_warmup_steps if x <= args.num_warmup_steps else (x / args.num_warmup_steps) ** -0.5 scheduler = LambdaLR(optimizer, lr_lambda) step = 0 best_val_gleu = -float("inf") meter = Meter() for epoch in range(1, epochs + 1): print("===EPOCH: ", epoch) for batch in train_dataloader: step += 1 batch = tuple(t.to(args.device) for t in batch) loss, items = calc_loss(model, batch) meter.add(*items) loss.backward() if args.max_grad_norm > 0: nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() model.zero_grad() scheduler.step() if step % args.log_interval == 0: lr = scheduler.get_lr()[0] loss_sent, loss_token = meter.average() logger.info(f' [{step:5d}] lr {lr:.6f} | {meter.print_str(True)}') nsml.report(step=step, scope=locals(), summary=True, train__lr=lr, train__loss_sent=loss_sent, train__token_ppl=math.exp(loss_token)) meter.init() if step % args.eval_interval == 0: start_eval = time.time() (val_loss, val_loss_token), valid_str = evaluate(model, valid_dataloader, args) prediction = correct(model, tokenizer, valid_noisy, args, eos=eos, length_limit=0.1) val_em = em(prediction, valid_clean) cnt = 0 for noisy, pred, clean in zip(valid_noisy, prediction, valid_clean): print(f'[{noisy}], [{pred}], [{clean}]') # 10개만 출력하기 cnt += 1 if cnt == 20: break val_gleu = gleu(prediction, valid_clean) logger.info('-' * 89) logger.info(f' [{step:6d}] valid | {valid_str} | em {val_em:5.2f} | gleu {val_gleu:5.2f}') logger.info('-' * 89) nsml.report(step=step, scope=locals(), summary=True, valid__loss_sent=val_loss, valid__token_ppl=math.exp(val_loss_token), valid__em=val_em, valid__gleu=val_gleu) if val_gleu > best_val_gleu: best_val_gleu = val_gleu nsml.save("best") meter.start += time.time() - start_eval if step >= args.max_steps: break #nsml.save(epoch) if step >= args.max_steps: break
def main(): global opts opts = parser.parse_args() opts.cuda = 0 # Set GPU seed = opts.seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) os.environ['CUDA_VISIBLE_DEVICES'] = opts.gpu_ids use_gpu = torch.cuda.is_available() if use_gpu: opts.cuda = 1 print("Number of device: {}".format(torch.cuda.device_count())) print("Currently using GPU {}".format(opts.gpu_ids)) cudnn.benchmark = True torch.cuda.manual_seed_all(seed) else: print("Currently using CPU (GPU is highly recommended)") if opts.model == 'resnet18': model = resnet18(pretrained=False) if opts.batchsize == -1: opts.batchsize = 1024 if opts.half else 512 elif opts.model == 'resnet50': model = resnet50(pretrained=False) if opts.batchsize == -1: opts.batchsize = 512 if opts.half else 256 elif opts.model == 'resnet101': model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet101', pretrained=False) if opts.batchsize == -1: opts.batchsize = 360 if opts.half else 180 elif opts.model == 'resnet152': model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet152', pretrained=False) if opts.batchsize == -1: opts.batchsize = 256 if opts.half else 128 elif opts.model == 'ran56': model = ResidualAttentionModel_56() if opts.batchsize == -1: opts.batchsize = 140 elif opts.model == 'ran92': model = ResidualAttentionModel_92() if opts.batchsize == -1: opts.batchsize = 80 ch = model.fc.in_features model.fc = nn.Sequential(nn.Linear(ch, ch), nn.ReLU(), nn.Linear(ch, ch)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # model = torch.nn.DataParallel(model) model.eval() if opts.half: model.half() for layer in model.modules(): if isinstance(layer, nn.BatchNorm2d): layer.float() parameters = filter(lambda p: p.requires_grad, model.parameters()) n_parameters = sum([p.data.nelement() for p in model.parameters()]) print(' + Number of params: {}'.format(n_parameters)) ### GPU Setup ### if use_gpu: model.cuda() ### DO NOT MODIFY THIS BLOCK ### if IS_ON_NSML: bind_nsml(model) if opts.pause: nsml.paused(scope=locals()) ################################ bind_nsml(model) if opts.mode == 'train': model.train() # Set dataloader train_ids, val_ids, unl_ids = split_ids(os.path.join(DATASET_PATH, 'train/train_label'), 0.2) print('found {} train, {} validation and {} unlabeled images'.format(len(train_ids), len(val_ids), len(unl_ids))) label_loader = torch.utils.data.DataLoader( SimpleImageLoader(DATASET_PATH, 'train', train_ids, transform=transforms.Compose([ transforms.Resize(opts.imResize), transforms.RandomResizedCrop(opts.imsize), # transforms.Resize((opts.imsize, opts.imsize)), transforms.RandomHorizontalFlip(), # transforms.ColorJitter(0.5, 0.5, 0.5, 0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])), batch_size=opts.batchsize * 2, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) print('train_loader done') unlabel_loader = torch.utils.data.DataLoader( SimpleImageLoader(DATASET_PATH, 'unlabel', unl_ids, transform=transforms.Compose([ transforms.Resize(opts.imResize), transforms.RandomResizedCrop(opts.imsize), transforms.ColorJitter(0.5, 0.5, 0.5, 0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])), batch_size=opts.batchsize, shuffle=True, num_workers=0, pin_memory=True, drop_last=True) print('unlabel_loader done') validation_loader = torch.utils.data.DataLoader( SimpleImageLoader(DATASET_PATH, 'val', val_ids, transform=transforms.Compose([ transforms.Resize(opts.imResize), transforms.CenterCrop(opts.imsize), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),])), batch_size=opts.batchsize, shuffle=False, num_workers=0, pin_memory=True, drop_last=False) print('validation_loader done') model = torch.nn.DataParallel(model) model.to(device) bind_nsml(model) #Set optimizer if opts.optimizer == 'SGD': if opts.adaptive_lr: base_optimizer = optim.SGD(model.parameters(), lr=0.3*opts.batchsize/256) else: if opts.lr == -1: base_optimizer = optim.SGD(model.parameters(), lr=0.001) else: base_optimizer = optim.SGD(model.parameters(), lr=opts.lr) elif opts.optimizer == 'Adam': if opts.adaptive_lr: base_optimizer = optim.Adam(model.parameters(), lr=0.3*opts.batchsize/256) else: if opts.lr == -1: base_optimizer = optim.Adam(model.parameters(), lr=0.001) else: base_optimizer = optim.Adam(model.parameters(), lr=opts.lr) if opts.LARS: optimizer = torchlars.LARS(optimizer=base_optimizer, eps=1e-8, trust_coef=0.001) else: optimizer = base_optimizer # INSTANTIATE LOSS CLASS unlabel_criterion = nt_cross_entropy # INSTANTIATE STEP LEARNING SCHEDULER CLASS if opts.scheduler == 'linear': scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 150], gamma=0.1) elif opts.scheduler == 'exp': scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=1e-6) model.train() print('==================================') print(opts) print('==================================') print('starting pretask') total_iter = 0 for epoch in range(1, 201): for it, data in enumerate(unlabel_loader): total_iter += 1 d = data.size() if opts.half: x = data.view(d[0]*2, d[2], d[3], d[4]).half().to(device) else: x = data.view(d[0]*2, d[2], d[3], d[4]).to(device) optimizer.zero_grad() p = model(x) if opts.half: loss = unlabel_criterion(p.float()) else: loss = unlabel_criterion(p) loss.backward() if opts.half: model.float() optimizer.step() if opts.half: model.half() for layer in model.modules(): if isinstance(layer, nn.BatchNorm2d): layer.float() print("epoch: ", epoch, "loss: ", loss.item()) nsml.report(summary=True, loss=loss.item(), step=total_iter) scheduler.step() print("epoch: ", epoch, "loss: ", loss.item()) if (epoch) % 2 == 0: if IS_ON_NSML: nsml.save(opts.name + '_pre{}'.format(epoch)) else: torch.save(model.state_dict(), os.path.join('runs', opts.name + '_pre{}'.format(epoch)))
labels = np.asarray(label_list) labels = np.concatenate((labels, labels), axis=0) y_train = keras.utils.to_categorical(labels, num_classes=num_classes) x_train = x_train.astype('float32') x_train /= 255 print(len(labels), 'train samples') """ Callback """ monitor = 'get_categorical_accuracy_keras' reduce_lr = ReduceLROnPlateau(monitor=monitor, patience=3) """ Training loop """ for epoch in range(nb_epoch): res = model.fit(x_train, y_train, batch_size=batch_size, initial_epoch=epoch, epochs=epoch + 1, callbacks=[reduce_lr], verbose=1, shuffle=True) print(res.history) train_loss, train_acc = res.history['loss'][0], res.history[ 'get_categorical_accuracy_keras'][0] nsml.report(summary=True, epoch=epoch, epoch_total=nb_epoch, loss=train_loss, acc=train_acc) nsml.save(epoch)
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument( '--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument( '--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) parser.add_argument("--visdom", type=bool, default=False) parser.add_argument("--use_stft", type=bool, default=False, help="use stft or log mel + specaugmentation") parser.add_argument("--mels", type=int, default=128) parser.add_argument("--use_rnn", type=bool, default=False) # Low Frame Rate (stacking and skipping frames) parser.add_argument('--LFR_m', default=4, type=int, help='Low Frame Rate: number of frames to stack') parser.add_argument('--LFR_n', default=3, type=int, help='Low Frame Rate: number of frames to skip') # EncoderTrans parser.add_argument('--n_layers_enc', default=2, type=int, help='Number of encoder stacks') parser.add_argument('--n_head', default=4, type=int, help='Number of Multi Head Attention (MHA)') parser.add_argument('--d_k', default=64, type=int, help='Dimension of key') parser.add_argument('--d_v', default=64, type=int, help='Dimension of value') parser.add_argument('--d_model', default=512, type=int, help='Dimension of model') parser.add_argument('--d_inner', default=512, type=int, help='Dimension of inner') parser.add_argument('--dropout', default=0.1, type=float, help='Dropout rate') parser.add_argument('--pe_maxlen', default=5000, type=int, help='Positional Encoding max len') # Decoder Trans parser.add_argument('--d_word_vec', default=512, type=int, help='Dim of decoder embedding') parser.add_argument('--n_layers_dec', default=2, type=int, help='Number of decoder stacks') parser.add_argument('--tgt_emb_prj_weight_sharing', default=1, type=int, help='share decoder embedding with decoder projection') # TransLoss parser.add_argument('--label_smoothing', default=0.1, type=float, help='label smoothing') # Optimizer parser.add_argument('--k', default=1.0, type=float, help='tunable scalar multiply to learning rate') parser.add_argument('--warmup_steps', default=4000, type=int, help='warmup steps') args = parser.parse_args() char2index, index2char = label_loader.load_label('./data/hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] # Setting seed random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Setting device args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # Feature extractor if args.use_stft: feature_size = N_FFT / 2 + 1 else: feature_size = args.mels # Actual model if args.use_rnn: # RNN structure # Define model enc = EncoderRNN(feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) dec = DecoderRNN(len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1), SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell='gru', bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout, use_attention=args.use_attention) model = Seq2seq(enc, dec) model.flatten_parameters() # Parameters initialization for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(args, model, optimizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append( os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append( os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 best_cer = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.05) logger.info('start') if args.visdom: train_visual = Visual(train_batch_num) eval_visual = Visual(1) train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() if args.visdom: train_loss, train_cer = trainRNN(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing, train_visual) else: train_loss, train_cer = trainRNN(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() if args.visdom: eval_loss, eval_cer = evaluateRNN(model, valid_loader, valid_queue, criterion, device, eval_visual) else: eval_loss, eval_cer = evaluateRNN(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_loss_model = (eval_loss < best_loss) best_cer_model = (eval_cer < best_cer) nsml.save(args.save_name) if best_loss_model: nsml.save('best_loss') best_loss = eval_loss if best_cer_model: nsml.save('best_cer') best_cer = eval_cer else: # Transformer structure # Define model enc = EncoderTrans(feature_size, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) dec = DecoderTrans( SOS_token, EOS_token, len(char2index), args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(enc, dec) # Parameter initialization for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(args, model, optimizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append( os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append( os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 best_cer = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.05) logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() if args.visdom: train_loss, train_cer = trainTrans(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing, train_visual) else: train_loss, train_cer = trainTrans( model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing, label_smoothing=args.label_smoothing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() if args.visdom: eval_loss, eval_cer = evaluateTrans(model, valid_loader, valid_queue, criterion, device, eval_visual) else: eval_loss, eval_cer = evaluateTrans( model, valid_loader, valid_queue, criterion, device, label_smoothing=args.label_smoothing) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_loss_model = (eval_loss < best_loss) best_cer_model = (eval_cer < best_cer) nsml.save(args.save_name) if best_loss_model: nsml.save('best_loss') best_loss = eval_loss if best_cer_model: nsml.save('best_cer') best_cer = eval_cer
def train_fine(opts, train_loader, model, criterion, optimizer, ema_optimizer, epoch, use_gpu, scheduler, is_mixsim): global global_step scaler = torch.cuda.amp.GradScaler() losses = AverageMeter() losses_x = AverageMeter() losses_curr = AverageMeter() losses_x_curr = AverageMeter() weight_scale = AverageMeter() acc_top1 = AverageMeter() acc_top5 = AverageMeter() model.train() # nCnt =0 out = False local_step = 0 while not out: labeled_train_iter = iter(train_loader) for batch_idx in range(len(train_loader)): try: data = labeled_train_iter.next() inputs_x, targets_x = data except: labeled_train_iter = iter(train_loader) data = labeled_train_iter.next() inputs_x, targets_x = data batch_size = inputs_x.size(0) # Transform label to one-hot classno = NUM_CLASSES targets_org = targets_x targets_x = torch.zeros(batch_size, classno).scatter_(1, targets_x.view(-1, 1), 1) if use_gpu: # Send input value to device 0, where first parameters of MixSim_Model is. if is_mixsim: dev0 = 'cuda:{}'.format(opts.gpu_ids.split(',')[0]) else: dev0 = 'cuda' inputs_x, targets_x = inputs_x.to(dev0), targets_x.to(dev0) optimizer.zero_grad() with torch.cuda.amp.autocast(): _, logits = model(inputs_x) loss = criterion(logits, targets_x, opts.temperature) losses.update(loss.item(), inputs_x.size(0)) losses_curr.update(loss.item(), inputs_x.size(0)) # compute gradient and do SGD step scaler.scale(loss).backward() scaler.step(optimizer) ema_optimizer.step() scaler.update() scheduler.step() with torch.no_grad(): # compute guessed labels of unlabel samples _, pred_x1 = model(inputs_x) if IS_ON_NSML and global_step % opts.log_interval == 0: nsml.report(step=global_step, loss=losses_curr.avg) losses_curr.reset() acc_top1b = top_n_accuracy_score(targets_org.data.cpu().numpy(), pred_x1.data.cpu().numpy(), n=1) * 100 acc_top5b = top_n_accuracy_score(targets_org.data.cpu().numpy(), pred_x1.data.cpu().numpy(), n=5) * 100 acc_top1.update(torch.as_tensor(acc_top1b), inputs_x.size(0)) acc_top5.update(torch.as_tensor(acc_top5b), inputs_x.size(0)) local_step += 1 global_step += 1 if local_step >= opts.steps_per_epoch: out = True break return losses.avg, acc_top1.avg, acc_top5.avg
for epoch in range(config.epochs): for i, (data, labels) in enumerate(_batch_loader(dataset, config.batch)): _, c = sess.run([train, cost], feed_dict={x: data, y: labels}) w_v, b_v, h, c, p, a = sess.run( [w, b, hypothesis, cost, prediction, accuracy], feed_dict={ x: data, y: labels }) print('epoch:', epoch, "accuracy:", a) nsml.report(summary=True, scope=locals(), epoch=epoch, accuracy=accuracy, step=epoch) # save session if IS_ON_NSML: # on nsml nsml.save(epoch) else: # on local if epoch % 1000 == 0: print(b_v) save_path = saver.save( sess, os.path.join('save2', str(epoch) + '.ckpt')) print("Model saved in file: %s" % save_path) # local test debug mode
def on_epoch_end(self, epoch, logs={}): nsml.report(summary=True, epoch=epoch, loss=logs.get('loss'), val_loss=logs.get('val_loss')) nsml.save(self.prefix +'_'+ str(self.seed)+'_' +str(epoch))
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument( '--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument( '--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=50, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) args = parser.parse_args() char2index, index2char = label_loader.load_label('./hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py #feature_size = N_FFT / 2 + 1 #feature_size = 40 feature_size = 45 #Mel function이면 다르게 해야할듯? enc = EncoderRNN(feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) dec = DecoderRNN(len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1), SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell='gru', bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout, use_attention=args.use_attention) model = Seq2seq(enc, dec) model.flatten_parameters() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr, weight_decay=1e-06) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(model, optimizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return ''' nsml.load(checkpoint='best', session='team228/sr-hack-2019-dataset/414') nsml.save('best') ''' data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append( os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.05) logger.info('start') train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_model = (eval_loss < best_loss) nsml.save(args.save_name) if best_model: nsml.save('best') best_loss = eval_loss
t0 = time.time() for epoch in range(nb_epoch): t1 = time.time() res = model.fit_generator(generator=train_generator, steps_per_epoch=STEP_SIZE_TRAIN, initial_epoch=epoch, validation_data=val_generator, epochs=epoch + 1, callbacks=[reduce_lr], verbose=1, shuffle=True, validation_steps=val_generator.n // batch_size) t2 = time.time() print(res.history) print('Training time for one epoch : %.1f' % ((t2 - t1))) train_loss, train_acc = res.history['loss'][0], res.history['acc'][ 0] val_loss, val_acc = res.history['val_loss'][0], res.history[ 'val_acc'][0] nsml.report(summary=True, step=epoch, epoch=epoch, epoch_total=nb_epoch, loss=train_loss, acc=train_acc, val_loss=val_loss, val_acc=val_acc) nsml.save(epoch) print('Total training time : %.1f' % (time.time() - t0))
loss = criterion(predictions, label_vars) optimizer.zero_grad() loss.backward() if grad_clip: grad_norm = nn.utils.clip_grad_norm(model.parameters(), 1) grad_norm = grad_norm else: grad_norm = 0 optimizer.step() avg_loss += loss.data[0] accumulate(model_run, model) print('batch', i, 'loss', loss.data[0], 'norm', grad_norm) nsml.report(summary=True, scope=locals(), epoch=epoch, epoch_total=config.epochs, train__loss=float(loss.data[0]), step=epoch * total_batch + i) #print('batch:', i, 'loss:', loss.data[0]) print('epoch:', epoch, ' train_loss:', float(avg_loss/total_batch)) if epoch in [4, 6]: optimizer.param_groups[0]['lr'] /= 10 print('reduce learning rate') # nsml ps, 혹은 웹 상의 텐서보드에 나타나는 값을 리포트하는 함수입니다. # #nsml.report(summary=True, scope=locals(), epoch=epoch, epoch_total=config.epochs, # train__loss=float(avg_loss/total_batch), step=epoch) # DONOTCHANGE (You can decide how often you want to save the model) nsml.save(epoch)
def train(*, env_id, num_env, hps, num_timesteps, seed): venv = VecFrameStack( make_atari_env(env_id, num_env, seed, wrapper_kwargs=dict(), start_index=num_env * MPI.COMM_WORLD.Get_rank(), max_episode_steps=hps.pop('max_episode_steps')), hps.pop('frame_stack')) venv.score_multiple = 1 venv.record_obs = False ob_space = venv.observation_space ac_space = venv.action_space gamma = hps.pop('gamma') policy = {'rnn': CnnGruPolicy, 'cnn': CnnPolicy}[hps.pop('policy')] agent = PpoAgent( scope='ppo', ob_space=ob_space, ac_space=ac_space, stochpol_fn=functools.partial( policy, scope='pol', ob_space=ob_space, ac_space=ac_space, update_ob_stats_independently_per_gpu=hps.pop( 'update_ob_stats_independently_per_gpu'), proportion_of_exp_used_for_predictor_update=hps.pop( 'proportion_of_exp_used_for_predictor_update'), exploration_type=hps.pop("exploration_type"), beta=hps.pop("beta"), ), gamma=gamma, gamma_ext=hps.pop('gamma_ext'), lam=hps.pop('lam'), nepochs=hps.pop('nepochs'), nminibatches=hps.pop('nminibatches'), lr=hps.pop('lr'), cliprange=0.1, nsteps=128, ent_coef=0.001, max_grad_norm=hps.pop('max_grad_norm'), use_news=hps.pop("use_news"), comm=MPI.COMM_WORLD if MPI.COMM_WORLD.Get_size() > 1 else None, update_ob_stats_every_step=hps.pop('update_ob_stats_every_step'), int_coeff=hps.pop('int_coeff'), ext_coeff=hps.pop('ext_coeff'), noise_type=hps.pop('noise_type'), noise_p=hps.pop('noise_p'), use_sched=hps.pop('use_sched'), num_env=num_env, exp_name=hps.pop('exp_name'), ) agent.start_interaction([venv]) if hps.pop('update_ob_stats_from_random_agent'): agent.collect_random_statistics(num_timesteps=128 * 50) assert len(hps) == 0, "Unused hyperparameters: %s" % list(hps.keys()) counter = 0 while True: info = agent.step() n_updates = 0 if info['update']: logger.logkvs(info['update']) logger.dumpkvs() if NSML: n_updates = int(info['update']['n_updates']) nsml_dict = { k: np.float64(v) for k, v in info['update'].items() if isinstance(v, Number) } nsml.report(step=n_updates, **nsml_dict) counter += 1 #if n_updates >= 40*1000: # 40K updates # break if agent.I.stats['tcount'] > num_timesteps: break agent.stop_interaction()
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 512)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument('--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument('--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument('--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument('--pause', type=int, default=0) parser.add_argument('--log_dir', help='directory for logging, valid in local only') parser.add_argument('--patience', type=int, help='patience before early stopping (default to None)') parser.add_argument('--weight_decay', type=float, default=0, help='weight for L2 regularization') parser.add_argument('--save_from', type=int, default=0, help='starting epoch to save models') parser.add_argument('--load_ckpt', nargs=2, help='session and checkpoint to load') parser.add_argument('--transformer_encoder', action='store_true') parser.add_argument('--share_params', action='store_true') args = parser.parse_args() for name, value in args.__dict__.items(): print('{}:\t{}'.format(name, value)) print() if nsml.IS_ON_NSML: args.log_dir = None if args.log_dir is not None: if not osp.exists(args.log_dir): os.makedirs(args.log_dir) with open(osp.join(args.log_dir, 'args.txt'), 'w') as f: for name, value in args.__dict__.items(): f.write('{}\t{}\n'.format(name, value)) char2index, index2char = label_loader.load_label('./hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') # N_FFT: defined in loader.py feature_size = N_FFT / 2 + 1 if args.transformer_encoder: enc = Encoder(len_max_seq=1248, d_word_vec=257, n_layers=6, n_head=8, d_k=64, d_v=64, d_model=257, d_inner=2048, dropout=0.1, share_params=args.share_params) else: enc = EncoderRNN( feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell='gru', variable_lengths=False) dec = DecoderRNN( len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1), SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell='gru', bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout, use_attention=args.use_attention) if args.transformer_encoder: model = Seq2SeqTransformerEncoder(enc, dec) else: model = Seq2seq(enc, dec) model.flatten_parameters() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(model, optimizer) if args.load_ckpt is not None: nsml.load(session=args.load_ckpt[0], checkpoint=args.load_ckpt[1]) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = osp.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(osp.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append(osp.join(DATASET_PATH, 'train_data', script_path)) cnt_converged = 0 best_loss = 1e10 begin_epoch = 0 # load all target scripts for reducing disk i/o target_path = osp.join(DATASET_PATH, 'train_label') load_targets(target_path) train_batch_num, train_dataset_list, valid_dataset = split_dataset( args, wav_paths, script_paths, valid_ratio=0.05) logger.info('start') train_begin = time.time() if args.log_dir is not None: train_writer = SummaryWriter(logdir=osp.join(args.log_dir, 'train')) valid_writer = SummaryWriter(logdir=osp.join(args.log_dir, 'valid')) else: train_writer, valid_writer = None, None for epoch in range(begin_epoch, args.max_epochs): if args.load_ckpt is not None: valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Eval right after model loading (just for checking)') logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) train_queue = queue.Queue(args.workers * 2) train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers) train_loader.start() train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 100, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) if args.log_dir is not None: train_writer.add_scalar('epoch/loss', train_loss, epoch) train_writer.add_scalar('epoch/CER', train_cer, epoch) train_loader.join() valid_queue = queue.Queue(args.workers * 2) valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0) valid_loader.start() eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) if args.log_dir is not None: valid_writer.add_scalar('epoch/loss', eval_loss, epoch) valid_writer.add_scalar('epoch/CER', eval_cer, epoch) with open(osp.join(args.log_dir, 'loss.txt'), 'a') as f: f.write('epoch: {}, train: {:.6f}, valid: {:.6f}\n'.format(epoch, train_loss, eval_loss)) with open(osp.join(args.log_dir, 'CER.txt'), 'a') as f: f.write('epoch: {}, train: {:.6f}, valid: {:.6f}\n'.format(epoch, train_cer, eval_cer)) valid_loader.join() nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) if epoch > args.save_from: nsml.save(args.save_name + '_e{}'.format(epoch)) best_model = (eval_loss < best_loss) if best_model: nsml.save('best') best_loss = eval_loss if eval_loss > best_loss: cnt_converged += 1 if args.patience is not None and cnt_converged > args.patience: break else: cnt_converged = 0
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon Baseline') parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)') parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument( '--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)') parser.add_argument( '--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)') parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)') parser.add_argument( '--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)') parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)') parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)') parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)') parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) parser.add_argument('--rnn_cell', type=str, default='gru') parser.add_argument("--iteration", type=int, default=0) parser.add_argument('--feature', type=str, default='spec') parser.add_argument('--save_dir', type=str, default='') args = parser.parse_args() char2index, index2char = label_loader.load_label('./hackathon.labels') SOS_token = char2index['<s>'] EOS_token = char2index['</s>'] PAD_token = char2index['_'] random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') logger.info('Using %s as feature' % args.feature) if args.save_dir: logger.info('Save directory: %s' % args.save_dir) os.makedirs(args.save_dir, exist_ok=True) # N_FFT: defined in loader.py if args.feature == 'mfcc': feature_size = N_MFCC * 3 # concat of mfcc, mfcc' mfcc'' elif args.feature == 'melspec': feature_size = N_MELS elif args.feature == 'spec': feature_size = N_FFT / 2 + 1 else: raise ValueError('Unsupported feature %s' % args.feature) enc = EncoderRNN(feature_size, args.hidden_size, input_dropout_p=args.dropout, dropout_p=args.dropout, n_layers=args.layer_size, bidirectional=args.bidirectional, rnn_cell=args.rnn_cell, variable_lengths=False) dec = DecoderRNN(len(char2index), args.max_len, args.hidden_size * (2 if args.bidirectional else 1), SOS_token, EOS_token, n_layers=args.layer_size, rnn_cell=args.rnn_cell, bidirectional=args.bidirectional, input_dropout_p=args.dropout, dropout_p=args.dropout, use_attention=args.use_attention) model = Seq2seq(enc, dec) model.flatten_parameters() for param in model.parameters(): param.data.uniform_(-0.08, 0.08) model = nn.DataParallel(model).to(device) optimizer = optim.Adam(model.module.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device) bind_model(model, optimizer, args.feature) if args.pause != 1: nsml.load(checkpoint='10', session='team236/sr-hack-2019-dataset/122') nsml.save('init') logger.info('Saved!') if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv') wav_paths = list() script_paths = list() # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') target_dict = load_targets(target_path) with open(data_list, 'r') as f: for line in f: # line: "aaa.wav,aaa.label" wav_path, script_path = line.strip().split(',') wav_paths.append(os.path.join(DATASET_PATH, 'train_data', wav_path)) script_paths.append( os.path.join(DATASET_PATH, 'train_data', script_path)) best_loss = 1e10 begin_epoch = 0 train_dataset, valid_dataset = split_dataset(args, wav_paths, script_paths, target_dict, args.feature, valid_ratio=0.05) train_begin = time.time() for epoch in range(begin_epoch, args.max_epochs): train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=collate_fn) train_loss, train_cer = train(model, train_loader, criterion, optimizer, device, train_begin, 10, args.teacher_forcing) logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer)) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=4, shuffle=False, num_workers=args.workers, collate_fn=collate_fn) eval_loss, eval_cer = evaluate(model, valid_loader, criterion, device) logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer)) nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, eval__loss=eval_loss, eval__cer=eval_cer) best_model = (eval_loss < best_loss) nsml.save(args.save_name) nsml.save(str(epoch)) if args.save_dir: save_model( model, optimizer, os.path.join(args.save_dir, './epoch-%d-cer-%d.pt' % (epoch, eval_cer))) if best_model: nsml.save('best') best_loss = eval_loss
def train(model, data_loader, criterion, optimizer, device, train_begin, print_batch=5, teacher_forcing_ratio=1): total_loss = 0. total_num = 0 total_dist = 0 total_length = 0 total_sent_num = 0 batch = 0 model.train() total_batch_size = len(data_loader) logger.info('train() start') begin = epoch_begin = time.time() for batch_index, batch in enumerate(data_loader): feats, scripts, feat_lengths, script_lengths = batch optimizer.zero_grad() feats = feats.to(device) scripts = scripts.to(device) src_len = scripts.size(1) target = scripts[:, 1:] model.module.flatten_parameters() logit = model(feats, feat_lengths, scripts, teacher_forcing_ratio=teacher_forcing_ratio) logit = torch.stack(logit, dim=1).to(device) y_hat = logit.max(-1)[1] loss = criterion(logit.contiguous().view(-1, logit.size(-1)), target.contiguous().view(-1)) total_loss += loss.item() total_num += sum(feat_lengths) display = random.randrange(0, 100) == 0 dist, length = get_distance(target, y_hat, display=display) total_dist += dist total_length += length total_sent_num += target.size(0) loss.backward() optimizer.step() if batch_index % print_batch == 0: current = time.time() elapsed = current - begin epoch_elapsed = (current - epoch_begin) / 60.0 train_elapsed = (current - train_begin) / 3600.0 logger.info( 'batch: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h' .format(batch_index, total_batch_size, total_loss / total_num, total_dist / total_length, elapsed, epoch_elapsed, train_elapsed)) begin = time.time() nsml.report(False, step=train.cumulative_batch_count, train_step__loss=total_loss / total_num, train_step__cer=total_dist / total_length) train.cumulative_batch_count += 1 logger.info('train() completed') return total_loss / total_num, total_dist / total_length
def train(self): with tf.control_dependencies(self.update_ops): step = tf.Variable(0, trainable=False) lr_ = config.lr lr = tf.placeholder(tf.float32) optimizer = tf.train.AdamOptimizer(lr, beta1=config.Beta1, beta2=config.Beta2) if (config.loss_seq in ('two', 'first', 'second')) & (config.hybrid == True): self.trainer_pre = optimizer.minimize( self.loss_pre, var_list=tf.trainable_variables(), global_step=step) self.trainer = optimizer.minimize( self.loss, var_list=tf.trainable_variables(), global_step=step) NUM_THREADS = 3 configuration = tf.ConfigProto(inter_op_parallelism_threads=NUM_THREADS, \ intra_op_parallelism_threads=NUM_THREADS, \ allow_soft_placement=True, \ device_count={'CPU': 3}, \ ) with tf.Session(config=configuration) as sess: path = os.path.join(os.getcwd(), 'Results', 'Spectrograms') os.makedirs(path, exist_ok=True) path = os.path.join(os.getcwd(), 'Results', 'Test') os.makedirs(path, exist_ok=True) path = os.path.join(os.getcwd(), 'Results', 'Simulation') os.makedirs(path, exist_ok=True) path = os.path.join(os.getcwd(), 'Results', 'CheckPoint') os.makedirs(path, exist_ok=True) path = os.path.join(os.getcwd(), 'Results', 'Logs_Training') os.makedirs(path, exist_ok=True) tf.global_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord=coord) saver = tf.train.Saver(max_to_keep=10) writer = tf.summary.FileWriter('./Results/Logs_Training', sess.graph) """ Restore Restore checkpoint only when the user specify the 'config.restore' to be True """ if config.restore: ckpt = tf.train.get_checkpoint_state(checkpoint_dir="./Results/CheckPoint") try: if ckpt and ckpt.model_checkpoint_path: print("check point path : ", ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) print('Restored!') except AttributeError: print("No checkpoint") else: print("No model restoration") training_loss = 0 training_alpha_sum = 0 training_alpha_list = [] f = open(os.path.join(os.getcwd(), 'Results', 'Logs_Training', 'training_loss.txt'), 'a+') f.write("Variable Size : {}\n".format(self.variable_size)) f.close() for iter in range(config.iter + 1): if coord.should_stop(): print("break data pipeline") break if (config.loss_seq in ('two', 'first', 'second')) & (config.hybrid == True): if iter <= config.loss_thereshold: _, loss_, alpha_ = sess.run([self.trainer_pre, self.loss_pre, self.graph.alpha], feed_dict={lr: lr_}) else: _, loss_, alpha_ = sess.run([self.trainer, self.loss, self.graph.alpha], feed_dict={lr: lr_}) else: _, loss_, alpha_ = sess.run([self.trainer, self.loss, self.graph.alpha], feed_dict={lr: lr_}) training_loss += loss_ training_alpha_sum += np.mean(alpha_) training_alpha_list.append(np.squeeze(alpha_)) if iter % 10 == 0: print("iter : {} (loss : {:.3f}, alpha : {:.3f}/ {:.3f})".format(iter, loss_, np.mean(alpha_), np.std(alpha_))) summary_ = sess.run(self.summary) writer.add_summary(summary_, iter) # print spectrograms if iter % 5000 == 0: spec_mix_, spec_tgt_, spec_est_ = sess.run( [self.graph.input_spec, self.graph.source_spec, self.graph.masked_spec]) mag_mix_ = np.sqrt(np.square(spec_mix_[:, :, :, 0]) + np.square(spec_mix_[:, :, :, 1])) mag_tgt_ = np.sqrt(np.square(spec_tgt_[:, :, :, 0]) + np.square(spec_tgt_[:, :, :, 1])) mag_est_ = np.sqrt(np.square(spec_est_[:, :, :, 0]) + np.square(spec_est_[:, :, :, 1])) mag_image1 = np.log10(np.concatenate((mag_mix_[0], mag_mix_[1], mag_mix_[2]), axis=1) + 1e-02) mag_image2 = np.log10(np.concatenate((mag_tgt_[0], mag_tgt_[1], mag_tgt_[2]), axis=1) + 1e-02) mag_image3 = np.log10(np.concatenate((mag_est_[0], mag_est_[1], mag_est_[2]), axis=1) + 1e-02) mag_image4 = np.log10( np.concatenate((mag_mix_[-3], mag_mix_[-2], mag_mix_[-1]), axis=1) + 1e-02) mag_image5 = np.log10( np.concatenate((mag_tgt_[-3], mag_tgt_[-2], mag_tgt_[-1]), axis=1) + 1e-02) mag_image6 = np.log10( np.concatenate((mag_est_[-3], mag_est_[-2], mag_est_[-1]), axis=1) + 1e-02) fig = plt.figure(figsize=(15, 15)) fig.suptitle('Spectrograms', fontsize=20, family='serif') font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 15, } ax1 = fig.add_subplot(1, 3, 1) ax1.set_title('Mixture', fontdict=font) ax1.imshow(mag_image1, interpolation='nearest', aspect='auto', cmap='jet') ax1.xaxis.set_tick_params(labelsize=15) ax1.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax2 = fig.add_subplot(1, 3, 2) ax2.set_title('True Vocal', fontdict=font) ax2.imshow(mag_image2, interpolation='nearest', aspect='auto', cmap='jet') ax2.xaxis.set_tick_params(labelsize=15) ax2.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax3 = fig.add_subplot(1, 3, 3) ax3.set_title('Estimated Vocal', fontdict=font) ax3.imshow(mag_image3, interpolation='nearest', aspect='auto', cmap='jet') ax3.xaxis.set_tick_params(labelsize=15) ax3.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() plt.tight_layout(pad=0.4, w_pad=1, h_pad=1.0) plt.subplots_adjust(top=0.85) fig.savefig("./Results/Spectrograms/Spectrogram{}.png".format(iter)) plt.close(fig) fig = plt.figure(figsize=(15, 15)) fig.suptitle('NoiseSpectrograms', fontsize=20, family='serif') font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 15, } ax1 = fig.add_subplot(1, 3, 1) ax1.set_title('Noise', fontdict=font) ax1.imshow(mag_image4, interpolation='nearest', aspect='auto', cmap='jet') ax1.xaxis.set_tick_params(labelsize=15) ax1.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax2 = fig.add_subplot(1, 3, 2) ax2.set_title('True Zeros', fontdict=font) ax2.imshow(mag_image5, interpolation='nearest', aspect='auto', cmap='jet') ax2.xaxis.set_tick_params(labelsize=15) ax2.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax3 = fig.add_subplot(1, 3, 3) ax3.set_title('Estimated Zeros', fontdict=font) ax3.imshow(mag_image6, interpolation='nearest', aspect='auto', cmap='jet') ax3.xaxis.set_tick_params(labelsize=15) ax3.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() plt.tight_layout(pad=0.4, w_pad=1, h_pad=1.0) plt.subplots_adjust(top=0.85) fig.savefig( "./Results/Spectrograms/Noise_input_Spectrogram{}.png".format(iter)) plt.close(fig) if (iter) % 10000 == 0: training_alpha_std = np.std(np.array(training_alpha_list)) f = open(os.path.join(os.getcwd(), 'Results', 'Logs_Training', 'training_loss.txt'), 'a+') f.write("average {} loss : {:.4f} (iter {} ~ {})\n".format(config.loss, training_loss / 10000, iter - 9999, iter + 1)) f.close() f = open(os.path.join(os.getcwd(), 'Results', 'Logs_Training', 'training_alpha.txt'), 'a+') f.write("average alpha : {:.4f} / std : {:.4f} (iter {} ~ {})\n".format(training_alpha_sum / 10000, training_alpha_std ,iter - 9999, iter + 1)) f.close() nsml.report(summary=True, iter=iter, train__loss=float(training_loss / 10000), alpha=float(training_alpha_sum / 10000), alpha_std=float(training_alpha_std), lr=float(lr_)*10000 ) training_loss = 0 training_alpha_sum = 0 training_alpha_list = [] if (iter+1) % config.lr_decay_iter == 0: if config.lr_decay: lr_ = lr_/config.lr_decay_rate print("learning rate is decayed : {}".format(lr_)) # test songs if (iter + 1) % config.model_save_iter == 0: moving_rate = config.moving_rate hop = int(config.audio_size / moving_rate) # This has to be an interger without any rounding mask = (signal.get_window('hanning', config.audio_size)) / (moving_rate / 2) # mask = (1 / moving_rate) for folder, root in [('Test', config.test_sample_root), ('Simulation', config.simul_sample_root)]: test_sample_path = glob(os.path.join(root, "noisy/*.wav")) print("{} audios for test".format(len(test_sample_path))) SNR_list = [] infer_time = [] wb = openpyxl.Workbook() ws = wb.active for k, song_path in enumerate(sorted(test_sample_path)): print("\n %d th song" % k) # Load song name = os.path.basename(song_path) print("Loading {}...".format(name)) noisy, _ = librosa.core.load(song_path, sr=16000) clean, _ = librosa.core.load(os.path.join(root, "spk", name[:8]+".wav"), sr=16000) noise = noisy - clean print("song shape : ", noisy.shape) song_length = noisy.shape[0] num_frame = int(song_length / config.audio_size) + 1 # pad zeros to song to make the length of it be the multiple of config.audio_size noisy_pad = np.lib.pad(noisy, ( config.audio_size, num_frame * config.audio_size - song_length + config.audio_size), 'constant') clean_pad = np.lib.pad(clean, ( config.audio_size, num_frame * config.audio_size - song_length + config.audio_size), 'constant') padded_song_shape = noisy_pad.shape[0] # Slice mixture and source noisy_slice_list = [] clean_slice_list = [] hop = int( config.audio_size / config.moving_rate) # This has to be an interger without any rounding num_to_move = int((padded_song_shape - config.audio_size) / hop) + 1 for i in range(num_to_move): start_point = int(i * hop) end_point = int(start_point + config.audio_size) noisy_slice = noisy_pad[start_point:end_point] # ex: (1,16384) noisy_slice_list.append(noisy_slice) clean_slice = clean_pad[start_point:end_point] clean_slice_list.append(clean_slice) num_slice = len(noisy_slice_list) noisy_stacked = np.array(noisy_slice_list) clean_stacked = np.array(clean_slice_list) # Separation segments = [] spec_mix_list = [] spec_tgt_list = [] spec_est_list = [] spec_est1_list = [] spec_est2_list = [] alpha_list = [] for n, i in enumerate(range(num_slice)): start_time = time.time() print("processing {}: {}/{}".format(name, n, num_slice)) estimated_sample, spec_mix_, spec_tgt_, spec_est_, spec_est1_, spec_est2_, alpha_ \ = sess.run([self.graph_test.estimated, self.graph_test.input_spec, self.graph_test.source_spec, self.graph_test.masked_spec, self.graph_test.masked_spec1, self.graph_test.masked_spec2, self.graph_test.alpha], feed_dict={self.mix_test: noisy_stacked[i:i + 1], self.source_test: clean_stacked[i:i + 1]}) infer_time.append(time.time() - start_time) masked_sample = np.expand_dims(mask, axis=0) * estimated_sample segments.append(masked_sample) alpha_list.append(np.mean(alpha_)) if (n+1) % 4 == 0: spec_mix_list.append(spec_mix_) spec_tgt_list.append(spec_tgt_) spec_est_list.append(spec_est_) spec_est1_list.append(spec_est1_) spec_est2_list.append(spec_est2_) spec_mix_ = np.concatenate(spec_mix_list, axis=0) spec_tgt_ = np.concatenate(spec_tgt_list, axis=0) spec_est_ = np.concatenate(spec_est_list, axis=0) spec_est1_ = np.concatenate(spec_est1_list, axis=0) spec_est2_ = np.concatenate(spec_est2_list, axis=0) # spectrograms mag_mix_ = np.sqrt(np.square(spec_mix_[:, :, :, 0]) + np.square(spec_mix_[:, :, :, 1])) mag_tgt_ = np.sqrt(np.square(spec_tgt_[:, :, :, 0]) + np.square(spec_tgt_[:, :, :, 1])) mag_est_ = np.sqrt(np.square(spec_est_[:, :, :, 0]) + np.square(spec_est_[:, :, :, 1])) mag_est1_ = np.sqrt(np.square(spec_est1_[:, :, :, 0]) + np.square(spec_est1_[:, :, :, 1])) mag_est2_ = np.sqrt(np.square(spec_est2_[:, :, :, 0]) + np.square(spec_est2_[:, :, :, 1])) mag_image1 = np.log10(np.concatenate((mag_mix_[0], mag_mix_[1], mag_mix_[2]), axis=1) + 1e-02) mag_image2 = np.log10(np.concatenate((mag_tgt_[0], mag_tgt_[1], mag_tgt_[2]), axis=1) + 1e-02) mag_image3 = np.log10(np.concatenate((mag_est_[0], mag_est_[1], mag_est_[2]), axis=1) + 1e-02) mag_image4 = np.log10(np.concatenate((mag_est1_[0], mag_est1_[1], mag_est1_[2]), axis=1) + 1e-02) mag_image5 = np.log10(np.concatenate((mag_est2_[0], mag_est1_[1], mag_est2_[2]), axis=1) + 1e-02) fig = plt.figure(figsize=(25, 15)) fig.suptitle('Spectrograms', fontsize=20, family='serif') font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 15, } ax1 = fig.add_subplot(1, 5, 1) ax1.set_title('Mixture', fontdict=font) ax1.imshow(mag_image1, interpolation='nearest', aspect='auto', cmap='jet') ax1.xaxis.set_tick_params(labelsize=15) ax1.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax2 = fig.add_subplot(1, 5, 2) ax2.set_title('True Vocal', fontdict=font) ax2.imshow(mag_image2, interpolation='nearest', aspect='auto', cmap='jet') ax2.xaxis.set_tick_params(labelsize=15) ax2.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax3 = fig.add_subplot(1, 5, 3) ax3.set_title('Estimated Vocal {:.3f}'.format(np.mean(alpha_)), fontdict=font) ax3.imshow(mag_image3, interpolation='nearest', aspect='auto', cmap='jet') ax3.xaxis.set_tick_params(labelsize=15) ax3.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax3 = fig.add_subplot(1, 5, 4) ax3.set_title('Estimated-1 Vocal', fontdict=font) ax3.imshow(mag_image4, interpolation='nearest', aspect='auto', cmap='jet') ax3.xaxis.set_tick_params(labelsize=15) ax3.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() ax3 = fig.add_subplot(1, 5, 5) ax3.set_title('Estimated-2 Vocal', fontdict=font) ax3.imshow(mag_image5, interpolation='nearest', aspect='auto', cmap='jet') ax3.xaxis.set_tick_params(labelsize=15) ax3.yaxis.set_tick_params(labelsize=15) plt.gca().invert_yaxis() plt.tight_layout(pad=0.4, w_pad=1, h_pad=1.0) plt.subplots_adjust(top=0.85) fig.savefig("./Results/{}/{}_{}.png".format(folder, iter, os.path.splitext(name)[0])) plt.close(fig) # Post-processing(triangle mask) # num_to_pad = int((config.audio_size/2)*(num_slice-1)) # temp = np.zeros(shape=(config.audio_size + num_to_pad)) temp = np.zeros(shape=(padded_song_shape)) for i in range(len(segments)): start_point = int(i * (config.audio_size / config.moving_rate)) end_point = int(start_point + config.audio_size) temp[start_point:end_point] = temp[start_point:end_point] + segments[i] # Match the original song length estimation = np.squeeze(temp[config.audio_size:config.audio_size + song_length]) # print("estimation shape: {}".format(estimation.shape)) # save separated source as audio OUTPUT_FILENAME = "./Results/{}/{}_{}_{}_{}.wav".format(folder, iter, os.path.splitext(name)[0], config.network, config.network2) librosa.output.write_wav(OUTPUT_FILENAME, estimation, 16000) s_target = clean * np.sum(estimation * clean) / np.sum(clean ** 2) error = estimation - s_target snr = 10 * np.log10(sum(s_target ** 2) / sum(error ** 2)) SNR_list.append(snr) noise_type = os.path.splitext(name)[0].split("_")[2] noise_snr = os.path.splitext(name)[0].split("_")[3] ws.cell(row= 3, column= k % 3 + 2).value = noise_snr ws.cell(row= k//3 + 4, column= k % 3 + 2).value = snr ws.cell(row= k//3 + 4, column= 1).value = noise_type ws.cell(row= 3, column= k % 3 + 6).value = noise_snr ws.cell(row= k//3 + 4, column= k % 3 + 6).value = np.mean(np.array(alpha_list)) snr_mean = np.mean(np.array(SNR_list)) snr_std = np.std(np.array(SNR_list)) print("snr mean : {}, std : {}".format(snr_mean, snr_std)) ws.cell(row=1, column=2).value = "mean" ws.cell(row=1, column=3).value = "std" ws.cell(row=1, column=4).value = "infer_time per frame" ws.cell(row=2, column=2).value = snr_mean ws.cell(row=2, column=3).value = snr_std ws.cell(row=2, column=4).value = np.mean(np.array(infer_time)) ws.cell(row=2, column=6).value = "alpha" wb.save('./Results/{}/snr_{}_{}_{}.xlsx'.format(folder, iter, config.network, config.network2)) wb.close() print("Save model for epoch {}".format(iter)) saver.save(sess, "./Results/CheckPoint/model.ckpt", global_step=(iter+1)) print("-------------------Model saved-------------------") coord.request_stop() coord.join(threads)
def train(model, total_batch_size, queue, criterion, optimizer, device, train_begin, train_loader_count, print_batch=5, teacher_forcing_ratio=1): total_loss = 0. total_num = 0 total_dist = 0 total_length = 0 total_sent_num = 0 batch = 0 model.train() logger.info('train() start') begin = epoch_begin = time.time() while True: if queue.empty(): logger.debug('queue is empty') feats, scripts, feat_lengths, script_lengths = queue.get() if feats.shape[0] == 0: # empty feats means closing one loader train_loader_count -= 1 logger.debug('left train_loader: %d' % (train_loader_count)) if train_loader_count == 0: break else: continue optimizer.zero_grad() feats = feats.to(device) scripts = scripts.to(device) src_len = scripts.size(1) target = scripts[:, 1:] model.module.flatten_parameters() logit = model(feats, feat_lengths, scripts, teacher_forcing_ratio=teacher_forcing_ratio) logit = torch.stack(logit, dim=1).to(device) y_hat = logit.max(-1)[1] loss = criterion(logit.contiguous().view(-1, logit.size(-1)), target.contiguous().view(-1)) total_loss += loss.item() total_num += sum(feat_lengths) display = random.randrange(0, 100) == 0 dist, length = get_distance(target, y_hat, display=display) total_dist += dist total_length += length total_sent_num += target.size(0) loss.backward() optimizer.step() if batch % print_batch == 0: current = time.time() elapsed = current - begin epoch_elapsed = (current - epoch_begin) / 60.0 train_elapsed = (current - train_begin) / 3600.0 logger.info( 'batch: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h' .format( batch, #len(dataloader), total_batch_size, total_loss / total_num, total_dist / total_length, elapsed, epoch_elapsed, train_elapsed)) begin = time.time() nsml.report(False, step=train.cumulative_batch_count, train_step__loss=total_loss / total_num, train_step__cer=total_dist / total_length) batch += 1 train.cumulative_batch_count += 1 logger.info('train() completed') return total_loss / total_num, total_dist / total_length
def on_epoch_end(self, epoch, logs={}): nsml.report(summary=True, scope=locals(), epoch=epoch, epoch_total=config.epochs, step=epoch) nsml.save(epoch)
', Accuracy:', round(accuracy, 2), ', time: %.2f' % (time.time() - t0)) t0 = time.time() if (i+1) >= total_eval_batch: break if total_eval_batch > 0: eval_accuracy = float(avg_accuracy / total_eval_batch) eval_loss = float(avg_loss/total_eval_batch) print('\t eval_loss: %.3f' % eval_loss, 'accuracy: %.2f' % eval_accuracy, ', time: %.1fs' % (time.time() - t1)) else: eval_accuracy = 0 eval_loss = 0 # nsml ps, 혹은 웹 상의 텐서보드에 나타나는 값을 리포트하는 함수입니다. nsml.report(summary=True, scope=locals(), epoch=epoch, epoch_total=config.epochs, train__loss=train_loss, train_accuracy=train_accuracy, eval__loss=eval_loss, eval_accuracy=eval_accuracy, step=epoch) # DONOTCHANGE (You can decide how often you want to save the model) nsml.save(epoch) # 로컬 테스트 모드일때 사용합니다 # 결과가 아래와 같이 나온다면, nsml submit을 통해서 제출할 수 있습니다. # [(0.0, 9.045), (0.0, 5.91), ... ] elif config.mode == 'test_local': with open(os.path.join(DATASET_PATH, 'train/train_data'), 'rt', encoding='utf-8') as f: reviews = f.readlines()[:config.max_dataset] res = nsml.infer(reviews) print(res)
def train(model, train_loader, args): # Set mode to 'train' (for batch norm.) model.train() # Enable CUDA if specified use_cuda = args.cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = model.to(device) # Initialize visdom object if not args.local: viz = Visdom(visdom=visdom) # Specify loss function & optimizer if args.loss == 'L1': criterion = nn.L1Loss() elif args.loss == 'MSE': criterion = nn.MSELoss() else: raise NotImplementedError("Only L1Loss(cmd arg:'L1') and MSELoss(cmd arg:'MSE') are supported.") parameters = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params = parameters, lr = args.learning_rate) # Compute the number of iterations in an epoch num_iters = len(train_loader) if args.max_dataset_size > 0: num_iters = args.max_dataset_size // args.batch_size min_loss = 10000000 # For NSML saving function bind_model(model=model, optimizer=optimizer) # Add perceptual loss term (use upto 'relu2-2' layer of VGG-16) if args.gamma > 0: vgg16_model = models.vgg16_bn(pretrained=True).to(device) layers_list = list(vgg16_model.features.children())[:14] ''' for idx in [2,5,9,12]: layers_list[idx] = nn.ReLU(inplace=False) for layer in layers_list: print(layer) ''' perceptual_model = torch.nn.Sequential(*layers_list).to(device) for param in perceptual_model.parameters(): param.requires_grad = False perceptual_criterion = nn.MSELoss() # Conduct training for epoch in range(args.epochs): total_loss = 0 if args.verbose: print('=============== Epoch {} ==============='.format(epoch+1)) for it, (covers, secrets) in enumerate(train_loader): print('\tIter: {} / {}'.format(it+1, num_iters)) # Enable CUDA if specified covers, secrets = covers.to(device), secrets.to(device) # Forward run prepped, container, revealed = model.forward(covers, secrets, device=device, noise_level=args.noise_level) # Compute loss loss = criterion(container, covers) loss += args.beta * criterion(revealed, secrets) if args.gamma > 0: # Normalize each image in a minibatch of {covers, container} size = container.size() means = torch.tensor([0.485, 0.456, 0.406]) stds = torch.tensor([0.229, 0.224, 0.225]) means, stds = [item.view(3,1,1).expand(container.size()).to(device) for item in [means, stds]] normalized = [torch.div(torch.add(minibatch,-1,means),stds).to(device) \ for minibatch in [container, covers]] loss += args.gamma * criterion(*normalized) # Do back-propagation model.zero_grad() loss.backward() optimizer.step() # Collect statistics total_loss += loss.item() if args.verbose: print("\t\tLoss sum at iter {}: {}".format(it+1, loss.item())) if args.max_dataset_size > 0 and (it+1) * args.batch_size >= args.max_dataset_size: break if args.verbose: print('Loss at epoch {}: {}'.format(epoch+1, total_loss / num_iters)) # Report statistics to NSML (if not draft) if not args.local: nsml.report(summary = True, step = (epoch+1) * len(train_loader), epoch_total = args.epochs, train__loss = total_loss / num_iters) # Visualize input & output images for idx in range(args.viz_count): images = [images.detach()[idx] \ for images in [covers, secrets, container, revealed]] # select example images visualize(viz, images, epoch, c_mode=args.c_mode, s_mode = args.s_mode, c_draw_rgb=args.c_draw_rgb, s_draw_rgb = args.s_draw_rgb, use_cuda = use_cuda) # Save session if minimum loss is renewed if not args.draft and not args.local and total_loss < min_loss: nsml.save(epoch) min_loss = total_loss if args.draft: break
def train_distill(opts, train_loader, unlabel_loader, model, criterion, optimizer, ema_optimizer, epoch, use_gpu, scheduler, is_mixsim): global global_step scaler = torch.cuda.amp.GradScaler() losses = AverageMeter() losses_x = AverageMeter() losses_un = AverageMeter() losses_curr = AverageMeter() losses_x_curr = AverageMeter() losses_un_curr = AverageMeter() weight_scale = AverageMeter() acc_top1 = AverageMeter() acc_top5 = AverageMeter() model.train() # nCnt =0 out = False local_step = 0 while not out: labeled_train_iter = iter(train_loader) unlabeled_train_iter = iter(unlabel_loader) for batch_idx in range(len(train_loader)): try: data = labeled_train_iter.next() inputs_x, targets_x = data except: labeled_train_iter = iter(train_loader) data = labeled_train_iter.next() inputs_x, targets_x = data try: data = unlabeled_train_iter.next() inputs_u1, inputs_u2 = data except: unlabeled_train_iter = iter(unlabel_loader) data = unlabeled_train_iter.next() inputs_u1, inputs_u2 = data batch_size = inputs_x.size(0) # Transform label to one-hot classno = NUM_CLASSES targets_org = targets_x targets_x = torch.zeros(batch_size, classno).scatter_(1, targets_x.view(-1, 1), 1) if use_gpu: # Send input value to device 0, where first parameters of MixSim_Model is. if is_mixsim: dev0 = 'cuda:{}'.format(opts.gpu_ids.split(',')[0]) else: dev0 = 'cuda' inputs_x, targets_x = inputs_x.to(dev0), targets_x.to(dev0) inputs_u1, inputs_u2 = inputs_u1.to(dev0), inputs_u2.to(dev0) with torch.no_grad(): # compute guessed labels of unlabel samples _, pred_u1 = model(inputs_u1) _, pred_u2 = model(inputs_u2) pred_u_all = (torch.softmax(pred_u1, dim=1) + torch.softmax(pred_u2, dim=1)) / 2 pt = pred_u_all**(1 / opts.T) targets_u = pt / pt.sum(dim=1, keepdim=True) targets_u = targets_u.detach() # mixup all_inputs = torch.cat([inputs_x, inputs_u1, inputs_u2], dim=0) all_targets = torch.cat([targets_x, targets_u, targets_u], dim=0) lamda = np.random.beta(opts.alpha, opts.alpha) lamda = max(lamda, 1 - lamda) newidx = torch.randperm(all_inputs.size(0)) input_a, input_b = all_inputs, all_inputs[newidx] target_a, target_b = all_targets, all_targets[newidx] mixed_input = lamda * input_a + (1 - lamda) * input_b mixed_target = lamda * target_a + (1 - lamda) * target_b # interleave labeled and unlabed samples between batches to get correct batchnorm calculation mixed_input = list(torch.split(mixed_input, batch_size)) mixed_input = interleave(mixed_input, batch_size) optimizer.zero_grad() with torch.cuda.amp.autocast(): _, logits_temp = model(mixed_input[0]) logits = [logits_temp] for newinput in mixed_input[1:]: _, logits_temp = model(newinput) logits.append(logits_temp) # put interleaved samples back logits = interleave(logits, batch_size) logits_x = logits[0] logits_u = torch.cat(logits[1:], dim=0) loss_x, loss_un, weigts_mixing = criterion( logits_x, mixed_target[:batch_size], logits_u, mixed_target[batch_size:], epoch + batch_idx / len(train_loader), opts.epochs) loss = loss_x + weigts_mixing * loss_un losses.update(loss.item(), inputs_x.size(0)) losses_x.update(loss_x.item(), inputs_x.size(0)) losses_un.update(loss_un.item(), inputs_x.size(0)) weight_scale.update(weigts_mixing, inputs_x.size(0)) losses_curr.update(loss.item(), inputs_x.size(0)) losses_x_curr.update(loss_x.item(), inputs_x.size(0)) losses_un_curr.update(loss_un.item(), inputs_x.size(0)) # compute gradient and do SGD step using amp scaler.scale(loss).backward() scaler.step(optimizer) ema_optimizer.step() scaler.update() scheduler.step() with torch.no_grad(): # compute guessed labels of unlabel samples _, pred_x1 = model(inputs_x) if IS_ON_NSML and global_step % opts.log_interval == 0: nsml.report(step=global_step, loss=losses_curr.avg, loss_x=losses_x_curr.avg, loss_un=losses_un_curr.avg) losses_curr.reset() losses_x_curr.reset() losses_un_curr.reset() acc_top1b = top_n_accuracy_score(targets_org.data.cpu().numpy(), pred_x1.data.cpu().numpy(), n=1) * 100 acc_top5b = top_n_accuracy_score(targets_org.data.cpu().numpy(), pred_x1.data.cpu().numpy(), n=5) * 100 acc_top1.update(torch.as_tensor(acc_top1b), inputs_x.size(0)) acc_top5.update(torch.as_tensor(acc_top5b), inputs_x.size(0)) local_step += 1 global_step += 1 if local_step >= opts.steps_per_epoch: out = True break return losses.avg, losses_x.avg, losses_un.avg, acc_top1.avg, acc_top5.avg
train_loss = float(avg_loss / total_batch) val_loss = sess.run(textRNN.loss_val, feed_dict={ textRNN.input_y: y_test, textRNN.input_x: x_test, textRNN.dropout_keep_prob: 1 }) if val_loss < min_val_loss: min_val_loss = val_loss print('epoch:', epoch, ' train_loss:', train_loss, ' val_loss:', val_loss) nsml.report(summary=True, scope=locals(), epoch=epoch, epoch_total=config.epochs, train__loss=float(avg_loss / total_batch), step=epoch) # DONOTCHANGE (You can decide how often you want to save the model) nsml.save(epoch) # 로컬 테스트 모드일때 사용합니다 # 결과가 아래와 같이 나온다면, nsml submit을 통해서 제출할 수 있습니다. # [(0.3, 0), (0.7, 1), ... ] elif config.mode == 'test_local': with open(os.path.join(DATASET_PATH, 'train/train_data'), 'rt', encoding='utf-8') as f: reviews = f.readlines() res = nsml.infer(reviews)
data, y = data_loader(img_path, label_path) X = preprocessing(data) # Data loader batch_loader = DataLoader( dataset=MammoDataset(X, y), ## pytorch data loader 사용 batch_size=batch_size, shuffle=True) # Train the model total_step = len(batch_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(batch_loader): images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() nsml.report(summary=True, step=epoch, epoch_total=num_epochs, loss=loss.item()) #, acc=train_acc) nsml.save(epoch)
if step % 150 == 0 or (config.debug and step % 1 == 0): infer_result = local_infer(queries, references, queries_img, reference_img, batch_size) mAP, mean_recall_at_K, min_first_1_at_K = evaluate_rank(infer_result) if best_min_first_K >= min_first_1_at_K: best_min_first_K = min_first_1_at_K best_min_first_K_step = step print("----> First_K @ 1 recall : %d / %d" % (min_first_1_at_K, len(mean_recall_at_K))) do_save = True if best_mAP <= mAP : best_mAP = mAP print("----> Best mAP : best-mAP {:g}".format(best_mAP)) do_save = True if epoch - prev_epoch == 1: print("----> Epoch changed saving") do_save = True if do_save: # save model nsml.report(summary=True, epoch=str(step), epoch_total=nb_epoch) nsml.save(step) print("Model saved : %d step" % step) print("=============================================================================================================") except tf.errors.OutOfRangeError: print("finish train!") break
def trainTrans(model, total_batch_size, queue, criterion, optimizer, device, train_begin, train_loader_count, print_batch=5, teacher_forcing_ratio=1, visual=None, label_smoothing=0.1): total_loss = 0. total_num = 0 total_dist = 0 total_length = 0 total_sent_num = 0 batch = 0 model.train() logger.info('train() start') begin = epoch_begin = time.time() while True: if queue.empty(): logger.debug('queue is empty') feats, scripts, feat_lengths, script_lengths = queue.get() if feats.shape[0] == 0: # empty feats means closing one loader train_loader_count -= 1 logger.debug('left train_loader: %d' % (train_loader_count)) if train_loader_count == 0: break else: continue optimizer.zero_grad() feats = feats.to(device) scripts = scripts.to(device) src_len = scripts.size(1) target = scripts[:, 1:] pred, gold = model(feats, feat_lengths, scripts) loss, n_correct = cal_performance(pred, gold, smoothing=label_smoothing) y_hat = pred.max(-1)[1] total_loss += loss.item() total_num += sum(feat_lengths) display = random.randrange(0, 100) == 0 dist, length = get_distance(target, y_hat, display=display) total_dist += dist total_length += length total_sent_num += target.size(0) loss.backward() optimizer.step() if visual: vis_log = { 'Train Loss': total_loss / total_num, 'Train CER': total_dist / total_length } visual.log(vis_log) if batch % print_batch == 0: current = time.time() elapsed = current - begin epoch_elapsed = (current - epoch_begin) / 60.0 train_elapsed = (current - train_begin) / 3600.0 logger.info( 'batch: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h' .format( batch, #len(dataloader), total_batch_size, total_loss / total_num, total_dist / total_length, elapsed, epoch_elapsed, train_elapsed)) begin = time.time() nsml.report(False, step=trainTrans.cumulative_batch_count, train_step__loss=total_loss / total_num, train_step__cer=total_dist / total_length) batch += 1 trainTrans.cumulative_batch_count += 1 logger.info('train() completed') return total_loss / total_num, total_dist / total_length
def run(self): mb_obs, mb_td_targets, mb_base_actions, \ mb_xy0, mb_xy1, \ mb_values, mb_dones \ = [],[],[],[],[],[], [] # ,[],[],[],[],[],[],[],[],[],[],[] mb_states = self.states for n in range(self.nsteps): # pi, pi2, x1, y1, x2, y2, v0 pi1, pi_xy0, pi_xy1, values, states = self.model.step( self.obs, self.states, self.dones) pi1_noise = np.random.random_sample((self.nenv, 2)) * 0.3 # avail = self.env.available_actions() # print("pi1 : ", pi1) # print("pi1 * self.base_act_mask : ", pi1 * self.base_act_mask) # print("pi1 * self.base_act_mask + pi1_noise : ", pi1 * self.base_act_mask + pi1_noise) base_actions = np.argmax(pi1 * self.base_act_mask + pi1_noise, axis=1) xy0 = np.argmax(pi_xy0, axis=1) x0 = (xy0 % 32).astype(int) y0 = (xy0 / 32).astype(int) xy1 = np.argmax(pi_xy1, axis=1) x1 = (xy1 % 32).astype(int) y1 = (xy1 / 32).astype(int) # pi (2?, 524) * (2?, 524) masking # print("base_actions : ", base_actions) # print("base_action_spec : ", base_action_spec) # sub1_act_mask, sub2_act_mask, sub3_act_mask = self.get_sub_act_mask(base_action_spec) # print("base_actions : ", base_actions, "base_action_spec", base_action_spec, # "sub1_act_mask :", sub1_act_mask, "sub2_act_mask :", sub2_act_mask, "sub3_act_mask :", sub3_act_mask) # sub3_actions = np.argmax(pi_sub3, axis=1) # pi (2?, 2) [1 0] # sub4_actions = np.argmax(pi_sub4, axis=1) # pi (2?, 5) [4 4] # sub5_actions = np.argmax(pi_sub5, axis=1) # pi (2?, 10) [1 4] # sub6_actions = np.argmax(pi_sub6, axis=1) # pi (2?, 4) [3 1] # sub7_actions = np.argmax(pi_sub7, axis=1) # pi (2?, 2) # sub8_actions = np.argmax(pi_sub8, axis=1) # pi (2?, 4) # sub9_actions = np.argmax(pi_sub9, axis=1) # pi (2?, 500) # sub10_actions = np.argmax(pi_sub10, axis=1) # pi (2?, 4) # sub11_actions = np.argmax(pi_sub11, axis=1) # pi (2?, 10) # sub12_actions = np.argmax(pi_sub12, axis=1) # pi (2?, 500) # Scripted Agent Hacking for env_num in range(self.nenv): if (env_num >= self.nscripts): # only for scripted agents continue ob = self.obs[env_num, :, :, :] # extra = ob[:,:,-1] # selected = ob[:, :, -2] player_relative = ob[:, :, -1] #if(common.check_group_list()) self.group_list[env_num] = common.update_group_list2( self.control_groups[env_num]) # if(len(self.action_queue[env_num]) == 0 and len(self.group_list[env_num]) == 0): # # # Scripted Agent is only for even number agents # self.action_queue[env_num] = common.group_init_queue(player_relative) if (len(self.action_queue[env_num]) == 0): self.action_queue[env_num], self.group_id[env_num], self.dest_per_marine[env_num], self.xy_per_marine[env_num] = \ common.solve_tsp(player_relative, self.selected[env_num][0], self.group_list[env_num], self.group_id[env_num], self.dest_per_marine[env_num], self.xy_per_marine[env_num]) base_actions[env_num] = 0 x0[env_num] = 0 y0[env_num] = 0 x1[env_num] = 0 y1[env_num] = 0 if (len(self.action_queue[env_num]) > 0): action = self.action_queue[env_num].pop(0) # print("action :", action) base_actions[env_num] = action.get("base_action", 0) x0[env_num] = action.get("x0", 0) y0[env_num] = action.get("y0", 0) xy0[env_num] = y0[env_num] * 32 + x0[env_num] x1[env_num] = action.get("x1", 0) y1[env_num] = action.get("y1", 0) xy1[env_num] = y1[env_num] * 32 + x1[env_num] base_actions = self.valid_base_action(base_actions) # print("valid_base_actions : ", base_actions) new_base_actions = self.trans_base_actions(base_actions) # print("new_base_actions : ", new_base_actions) base_action_spec = self.env.action_spec(new_base_actions) actions = self.construct_action( base_actions, base_action_spec, # sub3_actions, sub4_actions, sub5_actions, # sub6_actions, # sub7_actions, sub8_actions, # sub9_actions, sub10_actions, # sub11_actions, sub12_actions, x0, y0, x1, y1 # , x2, y2 ) mb_obs.append(np.copy(self.obs)) mb_base_actions.append(base_actions) # mb_sub3_actions.append(sub3_actions) # mb_sub4_actions.append(sub4_actions) # mb_sub5_actions.append(sub5_actions) # mb_sub6_actions.append(sub6_actions) # mb_sub7_actions.append(sub7_actions) # mb_sub8_actions.append(sub8_actions) # mb_sub9_actions.append(sub9_actions) # mb_sub10_actions.append(sub10_actions) # mb_sub11_actions.append(sub11_actions) # mb_sub12_actions.append(sub12_actions) mb_xy0.append(xy0) # mb_y0.append(y0) mb_xy1.append(xy1) # mb_y1.append(y1) # mb_x2.append(x2) # mb_y2.append(y2) mb_values.append(values) mb_dones.append(self.dones) #print("final acitons : ", actions) obs, rewards, dones, available_actions, army_counts, control_groups, selected, xy_per_marine = self.env.step( actions=actions) self.army_counts = army_counts self.control_groups = control_groups self.selected = selected for env_num, data in enumerate(xy_per_marine): # print("env_num", env_num, "xy_per_marine:", data) self.xy_per_marine[env_num] = data self.update_available(available_actions) self.states = states self.dones = dones for n, done in enumerate(dones): self.total_reward[n] += float(rewards[n]) if done: self.obs[n] = self.obs[n] * 0 self.episodes += 1 num_episodes = self.episodes self.episode_rewards.append(self.total_reward[n]) mean_100ep_reward = round( np.mean(self.episode_rewards[-101:-1]), 1) if (n < self.nscripts): # scripted agents self.episode_rewards_script.append( self.total_reward[n]) mean_100ep_reward_script = round( np.mean(self.episode_rewards_script[-101:-1]), 1) # logger.record_tabular("reward script", # self.total_reward[n]) # logger.record_tabular("mean reward script", # mean_100ep_reward_script) nsml.report( reward_script=self.total_reward[n], mean_reward_script=mean_100ep_reward_script, reward=self.total_reward[n], mean_100ep_reward=mean_100ep_reward, episodes=self.episodes, step=self.episodes, scope=locals()) else: self.episode_rewards_a2c.append(self.total_reward[n]) mean_100ep_reward_a2c = round( np.mean(self.episode_rewards_a2c[-101:-1]), 1) # logger.record_tabular("reward a2c", # self.total_reward[n]) # logger.record_tabular("mean reward a2c", # mean_100ep_reward_a2c) nsml.report(reward_a2c=self.total_reward[n], mean_reward_a2c=mean_100ep_reward_a2c, reward=self.total_reward[n], mean_100ep_reward=mean_100ep_reward, episodes=self.episodes, step=self.episodes, scope=locals()) #print("env %s done! reward : %s mean_100ep_reward : %s " % # (n, self.total_reward[n], mean_100ep_reward)) # logger.record_tabular("reward", self.total_reward[n]) # logger.record_tabular("mean 100 episode reward", # mean_100ep_reward) # logger.record_tabular("episodes", self.episodes) # logger.dump_tabular() self.total_reward[n] = 0 self.group_list[n] = [] model = self.model if self.callback is not None: self.callback(locals(), globals()) #print("rewards : ", rewards) #print("self.total_reward :", self.total_reward) self.update_obs(obs) mb_td_targets.append(rewards) mb_dones.append(self.dones) #batch of steps to batch of rollouts mb_obs = np.asarray(mb_obs, dtype=np.uint8).swapaxes(1, 0).reshape( self.batch_ob_shape) mb_td_targets = np.asarray(mb_td_targets, dtype=np.float32).swapaxes(1, 0) mb_base_actions = np.asarray(mb_base_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub3_actions = np.asarray(mb_sub3_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub4_actions = np.asarray(mb_sub4_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub5_actions = np.asarray(mb_sub5_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub6_actions = np.asarray(mb_sub6_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub7_actions = np.asarray(mb_sub7_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub8_actions = np.asarray(mb_sub8_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub9_actions = np.asarray(mb_sub9_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub10_actions = np.asarray(mb_sub10_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub11_actions = np.asarray(mb_sub11_actions, dtype=np.int32).swapaxes(1, 0) # mb_sub12_actions = np.asarray(mb_sub12_actions, dtype=np.int32).swapaxes(1, 0) mb_xy0 = np.asarray(mb_xy0, dtype=np.int32).swapaxes(1, 0) # mb_y0 = np.asarray(mb_y0, dtype=np.int32).swapaxes(1, 0) mb_xy1 = np.asarray(mb_xy1, dtype=np.int32).swapaxes(1, 0) # mb_y1 = np.asarray(mb_y1, dtype=np.int32).swapaxes(1, 0) # mb_x2 = np.asarray(mb_x2, dtype=np.int32).swapaxes(1, 0) # mb_y2 = np.asarray(mb_y2, dtype=np.int32).swapaxes(1, 0) mb_values = np.asarray(mb_values, dtype=np.float32).swapaxes(1, 0) mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0) mb_masks = mb_dones[:, :-1] mb_dones = mb_dones[:, 1:] last_values = self.model.value(self.obs, self.states, self.dones).tolist() #discount/bootstrap off value fn for n, (rewards, dones, value) in enumerate(zip(mb_td_targets, mb_dones, last_values)): rewards = rewards.tolist() dones = dones.tolist() if dones[-1] == 0: rewards = discount_with_dones(rewards + [value], dones + [0], self.gamma)[:-1] else: rewards = discount_with_dones(rewards, dones, self.gamma) mb_td_targets[n] = rewards mb_td_targets = mb_td_targets.flatten() mb_base_actions = mb_base_actions.flatten() # mb_sub3_actions = mb_sub3_actions.flatten() # mb_sub4_actions = mb_sub4_actions.flatten() # mb_sub5_actions = mb_sub5_actions.flatten() # mb_sub6_actions = mb_sub6_actions.flatten() # mb_sub7_actions = mb_sub7_actions.flatten() # mb_sub8_actions = mb_sub8_actions.flatten() # mb_sub9_actions = mb_sub9_actions.flatten() # mb_sub10_actions = mb_sub10_actions.flatten() # mb_sub11_actions = mb_sub11_actions.flatten() # mb_sub12_actions = mb_sub12_actions.flatten() mb_xy0 = mb_xy0.flatten() # mb_y0 = mb_y0.flatten() mb_xy1 = mb_xy1.flatten() # mb_y1 = mb_y1.flatten() # mb_x2 = mb_x2.flatten() # mb_y2 = mb_y2.flatten() mb_values = mb_values.flatten() mb_masks = mb_masks.flatten() return mb_obs, mb_states, mb_td_targets, mb_masks, \ mb_base_actions, mb_xy0, mb_xy1, mb_values
for iter_, (image, image_id, label) in enumerate(train_loader): image = image.cuda() label = label.cuda() pred = model(image) loss = criterion(pred, label) total_loss += loss.item() * image.size(0) num_images += image.size(0) optimizer.zero_grad() loss.backward() optimizer.step() loss_average = total_loss / float(num_images) scheduler.step(metrics=loss_average, epoch=epoch) if IS_ON_NSML: nsml.save(str(epoch + 1)) gt_label = os.path.join(DATASET_PATH, 'train/train_data/val_label') acc = local_eval(model, val_loader, gt_label) print(f'[{epoch + 1}/{config.num_epochs}] ' f'Validation performance: {acc:.3f}') nsml.report(step=epoch, val_acc=acc) nsml.report(step=epoch, train_loss=loss_average, learning_rate=optimizer.param_groups[0]['lr']) if IS_ON_NSML and acc > best_accuracy: nsml.save(str(epoch + 1)) best_accuracy = acc
def learn(policy, env, seed, total_timesteps=int(40e6), gamma=0.99, log_interval=1, nprocs=24, nscripts=12, nsteps=20, nstack=4, ent_coef=0.01, vf_coef=0.5, vf_fisher_coef=1.0, lr=0.25, max_grad_norm=0.01, kfac_clip=0.001, save_interval=None, lrschedule='linear', callback=None): tf.reset_default_graph() set_global_seeds(seed) nenvs = nprocs ob_space = (32, 32, 3) # env.observation_space ac_space = (32, 32) make_model = lambda: Model(policy, ob_space, ac_space, nenvs, total_timesteps, nprocs=nprocs, nscripts=nscripts, nsteps=nsteps, nstack=nstack, ent_coef=ent_coef, vf_coef=vf_coef, vf_fisher_coef=vf_fisher_coef, lr=lr, max_grad_norm=max_grad_norm, kfac_clip=kfac_clip, lrschedule=lrschedule) if save_interval and logger.get_dir(): import cloudpickle with open(osp.join(logger.get_dir(), 'make_model.pkl'), 'wb') as fh: fh.write(cloudpickle.dumps(make_model)) model = make_model() print("make_model complete!") runner = Runner(env, model, nsteps=nsteps, nscripts=nscripts, nstack=nstack, gamma=gamma, callback=callback) nbatch = nenvs * nsteps tstart = time.time() #enqueue_threads = model.q_runner.create_threads(model.sess, coord=tf.train.Coordinator(), start=True) for update in range(1, total_timesteps // nbatch + 1): # print('iteration: %d' % update) obs, states, td_targets, masks, actions, xy0, xy1, values = runner.run( ) # print('values: %s' % values) policy_loss, value_loss, policy_entropy, \ policy_loss_xy0, policy_entropy_xy0, \ policy_loss_xy1, policy_entropy_xy1, \ = model.train(obs, states, td_targets, masks, actions, xy0, xy1, values) model.old_obs = obs nseconds = time.time() - tstart fps = int((update * nbatch) / nseconds) if update % log_interval == 0 or update == 1: ev = explained_variance(values, td_targets) nsml.report(nupdates=update, total_timesteps=update * nbatch, fps=fps, policy_entropy=float(policy_entropy), policy_loss=float(policy_loss), policy_loss_xy0=float(policy_loss_xy0), policy_entropy_xy0=float(policy_entropy_xy0), policy_loss_xy1=float(policy_loss_xy1), policy_entropy_xy1=float(policy_entropy_xy1), value_loss=float(value_loss), explained_variance=float(ev), batch_size=nbatch, step=update * nbatch, scope=locals()) # logger.record_tabular("nupdates", update) # logger.record_tabular("total_timesteps", update * nbatch) # logger.record_tabular("fps", fps) # logger.record_tabular("policy_entropy", float(policy_entropy)) # logger.record_tabular("policy_loss", float(policy_loss)) # logger.record_tabular("policy_loss_xy0", float(policy_loss_xy0)) # logger.record_tabular("policy_entropy_xy0", # float(policy_entropy_xy0)) # logger.record_tabular("policy_loss_xy1", float(policy_loss_xy1)) # logger.record_tabular("policy_entropy_xy1", # float(policy_entropy_xy1)) # # logger.record_tabular("policy_loss_y0", float(policy_loss_y0)) # # logger.record_tabular("policy_entropy_y0", float(policy_entropy_y0)) # logger.record_tabular("value_loss", float(value_loss)) # logger.record_tabular("explained_variance", float(ev)) # logger.dump_tabular() if save_interval and (update % save_interval == 0 or update == 1) and logger.get_dir(): savepath = osp.join(logger.get_dir(), 'checkpoint%.5i' % update) print('Saving to', savepath) model.save(savepath) env.close()
def main(): global char2index global index2char global SOS_token global EOS_token global PAD_token parser = argparse.ArgumentParser(description='Speech hackathon lilililill model') parser.add_argument('--max_epochs', type=int, default=1000, help='number of max epochs in training (default: 1000)') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local') # parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)') parser.add_argument('--lr_1', type=float, default=1e-04, help='learning rate 1 (default: 0.0001)') parser.add_argument('--lr_2', type=float, default=1e-04, help='learning rate 2 (default: 0.0001)') parser.add_argument('--num_mels', type=int, default=160, help='number of the mel bands (default: 160)') parser.add_argument('--num_layers', type=int, default=1, help='number of layers (default: 1)') parser.add_argument('--batch_size', type=int, default=128, help='batch size in training (default: 128)') parser.add_argument("--num_thread", type=int, default=4, help='number of the loading thread (default: 4)') parser.add_argument('--num_hidden_enc', type=int, default=1024, help='hidden size of model (default: 1024)') parser.add_argument('--num_hidden_dec', type=int, default=512, help='hidden size of model decoder (default: 512)') parser.add_argument('--num_hidden_seq', type=int, default=1024, help='hidden size of model seq2seq (default: 1024)') parser.add_argument('--nsc_in_ms', type=int, default=40, help='Number of sample size per time segment in ms (default: 40)') parser.add_argument('--ref_repeat', type=int, default=1, help='Number of repetition of reference seq2seq (default: 1)') parser.add_argument('--loss_lim', type=float, default=0.05, help='Minimum loss threshold (default: 0.05)') parser.add_argument('--mode', type=str, default='train') parser.add_argument("--pause", type=int, default=0) parser.add_argument('--memo', type=str, default='', help='Comment you wish to leave') parser.add_argument('--train_ratio', type=float, default=0.5, help='Training ratio (default: 0.5)') parser.add_argument('--load', type=str, default=None) parser.add_argument('--debug', type=str, default='False', help='debug mode') args = parser.parse_args() batch_size = args.batch_size num_thread = args.num_thread num_mels = args.num_mels char2index, index2char = load_label('./hackathon.labels') SOS_token = char2index['<s>'] # '<sos>' or '<s>' EOS_token = char2index['</s>'] # '<eos>' or '</s>' PAD_token = char2index['_'] # '-' or '_' unicode_jamo_list = My_Unicode_Jamo_v2() tokenizer = Tokenizer(unicode_jamo_list) jamo_tokens = tokenizer.word2num(unicode_jamo_list) args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device('cuda' if args.cuda else 'cpu') net = Mel2SeqNet_General(num_mels, args.num_hidden_enc, args.num_hidden_dec, len(unicode_jamo_list), args.num_layers, device) net_optimizer = optim.Adam(net.parameters(), lr=args.lr_1) ctc_loss = nn.CTCLoss().to(device) net_B = Seq2SeqNet_v2(args.num_hidden_seq, jamo_tokens, char2index, device) net_B_optimizer = optim.Adam(net_B.parameters(), lr=args.lr_2) net_B_criterion = nn.NLLLoss(reduction='none').to(device) bind_model(net, net_B, net_optimizer, net_B_optimizer, index2char, tokenizer) if args.pause == 1: nsml.paused(scope=locals()) if args.mode != "train": return if args.load != None: nsml.load(checkpoint='model', session='team47/sr-hack-2019-50000/' + args.load) nsml.save('saved') for g in net_optimizer.param_groups: g['lr'] = args.lr_1 logger.info('Learning rate of the net: {}'.format(g['lr'])) for g in net_B_optimizer.param_groups: g['lr'] = args.lr_2 logger.info('Learning rate of the net B: {}'.format(g['lr'])) print(net) print(net_B) wav_paths, script_paths, korean_script_paths = get_paths(DATASET_PATH) logger.info('wav_paths len: {}'.format(len(wav_paths))) logger.info('script_paths len: {}'.format(len(script_paths))) logger.info('korean_script_paths len: {}'.format(len(korean_script_paths))) korean_script_list, jamo_script_list = get_korean_and_jamo_list_v2(korean_script_paths) logger.info('Korean script 0: {}'.format(korean_script_list[0])) logger.info('Korean script 0 length: {}'.format(len(korean_script_list[0]))) logger.info('Jamo script 0: {}'.format(jamo_script_list[0])) logger.info('Jamo script 0 length: {}'.format(len(jamo_script_list[0]))) script_path_list = get_script_list(script_paths, SOS_token, EOS_token) ground_truth_list = [(tokenizer.word2num(['<s>'] + list(jamo_script_list[i]) + ['</s>'])) for i in range(len(jamo_script_list))] logger.info('Train Ratio: {}'.format(args.train_ratio)) split_index = int(args.train_ratio * len(wav_paths)) # split_index = int(0.9 * len(wav_paths)) wav_path_list_train = wav_paths[:split_index] ground_truth_list_train = ground_truth_list[:split_index] korean_script_list_train = korean_script_list[:split_index] script_path_list_train = script_path_list[:split_index] wav_path_list_eval = wav_paths[split_index:] ground_truth_list_eval = ground_truth_list[split_index:] korean_script_list_eval = korean_script_list[split_index:] script_path_list_eval = script_path_list[split_index:] logger.info('Total:Train:Eval = {}:{}:{}'.format(len(wav_paths), len(wav_path_list_train), len(wav_path_list_eval))) preloader_train = Threading_Batched_Preloader_v2(wav_path_list_train, ground_truth_list_train, script_path_list_train, korean_script_list_train, batch_size, num_mels, args.nsc_in_ms, is_train=True) preloader_eval = Threading_Batched_Preloader_v2(wav_path_list_eval, ground_truth_list_eval, script_path_list_eval, korean_script_list_eval, batch_size, num_mels, args.nsc_in_ms, is_train=False) best_loss = 1e10 best_eval_cer = 1e10 # load all target scripts for reducing disk i/o target_path = os.path.join(DATASET_PATH, 'train_label') load_targets(target_path) logger.info('start') train_begin = time.time() for epoch in range(args.max_epochs): logger.info((datetime.now().strftime('%m-%d %H:%M:%S'))) net.train() net_B.train() preloader_train.initialize_batch(num_thread) loss_list_train = list() seq2seq_loss_list_train = list() seq2seq_loss_list_train_ref = list() logger.info("Initialized Training Preloader") count = 0 total_dist = 0 total_length = 1 total_dist_ref = 0 total_length_ref = 1 while not preloader_train.end_flag: batch = preloader_train.get_batch() if batch is not None: tensor_input, ground_truth, loss_mask, length_list, batched_num_script, batched_num_script_loss_mask = batch pred_tensor, loss = train(net, net_optimizer, ctc_loss, tensor_input.to(device), ground_truth.to(device), length_list.to(device), device) loss_list_train.append(loss) jamo_result = Decode_Prediction_No_Filtering(pred_tensor, tokenizer) true_string_list = Decode_Num_Script(batched_num_script.detach().cpu().numpy(), index2char) for i in range(args.ref_repeat): lev_input_ref = ground_truth lev_pred_ref, attentions_ref, seq2seq_loss_ref = net_B.net_train(lev_input_ref.to(device), batched_num_script.to(device), batched_num_script_loss_mask.to( device), net_B_optimizer, net_B_criterion) pred_string_list_ref = Decode_Lev_Prediction(lev_pred_ref, index2char) seq2seq_loss_list_train_ref.append(seq2seq_loss_ref) dist_ref, length_ref = char_distance_list(true_string_list, pred_string_list_ref) pred_string_list = [None] dist = 0 length = 0 if (loss < args.loss_lim): lev_input = Decode_CTC_Prediction_And_Batch(pred_tensor) lev_pred, attentions, seq2seq_loss = net_B.net_train(lev_input.to(device), batched_num_script.to(device), batched_num_script_loss_mask.to(device), net_B_optimizer, net_B_criterion) pred_string_list = Decode_Lev_Prediction(lev_pred, index2char) seq2seq_loss_list_train.append(seq2seq_loss) dist, length = char_distance_list(true_string_list, pred_string_list) total_dist_ref += dist_ref total_length_ref += length_ref total_dist += dist total_length += length count += 1 if count % 25 == 0: logger.info( "Train: Count {} | {} => {}".format(count, true_string_list[0], pred_string_list_ref[0])) logger.info("Train: Count {} | {} => {} => {}".format(count, true_string_list[0], jamo_result[0], pred_string_list[0])) else: logger.info("Training Batch is None") train_loss = np.mean(np.asarray(loss_list_train)) train_cer = np.mean(np.asarray(total_dist / total_length)) train_cer_ref = np.mean(np.asarray(total_dist_ref / total_length_ref)) logger.info("Mean Train Loss: {}".format(train_loss)) logger.info("Total Train CER: {}".format(train_cer)) logger.info("Total Train Reference CER: {}".format(train_cer_ref)) preloader_eval.initialize_batch(num_thread) loss_list_eval = list() seq2seq_loss_list_eval = list() seq2seq_loss_list_eval_ref = list() logger.info("Initialized Evaluation Preloader") count = 0 total_dist = 0 total_length = 1 total_dist_ref = 0 total_length_ref = 1 net.eval() net_B.eval() while not preloader_eval.end_flag: batch = preloader_eval.get_batch() if batch is not None: tensor_input, ground_truth, loss_mask, length_list, batched_num_script, batched_num_script_loss_mask = batch pred_tensor, loss = evaluate(net, ctc_loss, tensor_input.to(device), ground_truth.to(device), length_list.to(device), device) loss_list_eval.append(loss) jamo_result = Decode_Prediction_No_Filtering(pred_tensor, tokenizer) true_string_list = Decode_Num_Script(batched_num_script.detach().cpu().numpy(), index2char) lev_input_ref = ground_truth lev_pred_ref, attentions_ref, seq2seq_loss_ref = net_B.net_eval(lev_input_ref.to(device), batched_num_script.to(device), batched_num_script_loss_mask.to(device), net_B_criterion) pred_string_list_ref = Decode_Lev_Prediction(lev_pred_ref, index2char) seq2seq_loss_list_train_ref.append(seq2seq_loss_ref) dist_ref, length_ref = char_distance_list(true_string_list, pred_string_list_ref) pred_string_list = [None] dist = 0 length = 0 if (loss < args.loss_lim): lev_input = Decode_CTC_Prediction_And_Batch(pred_tensor) lev_pred, attentions, seq2seq_loss = net_B.net_eval(lev_input.to(device), batched_num_script.to(device), batched_num_script_loss_mask.to(device), net_B_criterion) pred_string_list = Decode_Lev_Prediction(lev_pred, index2char) seq2seq_loss_list_train.append(seq2seq_loss) dist, length = char_distance_list(true_string_list, pred_string_list) total_dist_ref += dist_ref total_length_ref += length_ref total_dist += dist total_length += length count += 1 if count % 10 == 0: logger.info("Eval: Count {} | {} => {}".format(count, true_string_list[0], pred_string_list_ref[0])) logger.info("Eval: Count {} | {} => {} => {}".format(count, true_string_list[0], jamo_result[0], pred_string_list[0])) else: logger.info("Training Batch is None") eval_cer = total_dist / total_length eval_cer_ref = total_dist_ref / total_length_ref eval_loss = np.mean(np.asarray(loss_list_eval)) logger.info("Mean Evaluation Loss: {}".format(eval_loss)) logger.info("Total Evaluation CER: {}".format(eval_cer)) logger.info("Total Evaluation Reference CER: {}".format(eval_cer_ref)) nsml.report(False, step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer, train_epoch__cer_ref=train_cer_ref, eval__loss=eval_loss, eval__cer=eval_cer, eval__cer_ref=eval_cer_ref) nsml.save(args.save_name) best_model = (eval_cer < best_eval_cer) if best_model: nsml.save('best') best_eval_cer = eval_cer
def run(self): mb_obs, mb_td_targets, mb_base_actions, \ mb_xy0, mb_xy1, \ mb_values, mb_dones \ = [], [], [], [], [], [], [] mb_states = self.states for n in range(self.nsteps): # pi, pi2, x1, y1, x2, y2, v0 pi1, pi_xy0, pi_xy1, values, states = self.model.step( self.obs, self.states, self.dones) pi1_noise = np.random.random_sample((self.nenv, 3)) * 0.3 base_actions = np.argmax( pi1 * self.base_act_mask + pi1_noise, axis=1) xy0 = np.argmax(pi_xy0, axis=1) x0 = (xy0 % 32).astype(int) y0 = (xy0 / 32).astype(int) xy1 = np.argmax(pi_xy1, axis=1) x1 = (xy1 % 32).astype(int) y1 = (xy1 / 32).astype(int) # Scripted Agent Hacking for env_num in range(self.nenv): if env_num >= self.nscripts: # only for scripted agents continue ob = self.obs[env_num, :, :, :] player_relative = ob[:, :, -1] self.group_list[env_num] = common.update_group_list2( self.control_groups[env_num]) if len(self.action_queue[env_num]) == 0: self.action_queue[env_num], self.group_id[env_num], self.dest_per_marine[env_num], self.xy_per_marine[env_num] = \ common.solve_tsp(player_relative, self.selected[env_num][0], self.group_list[env_num], self.group_id[env_num], self.dest_per_marine[env_num], self.xy_per_marine[env_num]) base_actions[env_num] = 0 x0[env_num] = 0 y0[env_num] = 0 x1[env_num] = 0 y1[env_num] = 0 if len(self.action_queue[env_num]) > 0: action = self.action_queue[env_num].pop(0) base_actions[env_num] = action.get("base_action", 0) x0[env_num] = action.get("x0", 0) y0[env_num] = action.get("y0", 0) xy0[env_num] = y0[env_num] * 32 + x0[env_num] x1[env_num] = action.get("x1", 0) y1[env_num] = action.get("y1", 0) xy1[env_num] = y1[env_num] * 32 + x1[env_num] base_actions = self.valid_base_action(base_actions) new_base_actions = self.trans_base_actions(base_actions) base_action_spec = self.env.action_spec(new_base_actions) # print("base_actions:", base_actions) actions = self.construct_action( base_actions, base_action_spec, x0, y0, x1, y1 ) mb_obs.append(np.copy(self.obs)) mb_base_actions.append(base_actions) mb_xy0.append(xy0) mb_xy1.append(xy1) mb_values.append(values) mb_dones.append(self.dones) #print("final acitons : ", actions) obs, rewards, dones,\ available_actions, army_counts,\ control_groups, selected, xy_per_marine\ = self.env.step( actions=actions) self.army_counts = army_counts self.control_groups = control_groups self.selected = selected for env_num, data in enumerate(xy_per_marine): self.xy_per_marine[env_num] = data self.update_available(available_actions) self.states = states self.dones = dones mean_100ep_reward_a2c = 0 for n, done in enumerate(dones): self.total_reward[n] += float(rewards[n]) if done: self.obs[n] = self.obs[n] * 0 self.episodes += 1 num_episodes = self.episodes self.episode_rewards.append(self.total_reward[n]) model = self.model mean_100ep_reward = round( np.mean(self.episode_rewards[-101:]), 1) if (n < self.nscripts): # scripted agents self.episode_rewards_script.append( self.total_reward[n]) mean_100ep_reward_script = round( np.mean(self.episode_rewards_script[-101:]), 1) nsml.report( reward_script=self.total_reward[n], mean_reward_script=mean_100ep_reward_script, reward=self.total_reward[n], mean_100ep_reward=mean_100ep_reward, episodes=self.episodes, step=self.episodes, scope=locals() ) else: self.episode_rewards_a2c.append(self.total_reward[n]) mean_100ep_reward_a2c = round( np.mean(self.episode_rewards_a2c[-101:]), 1) nsml.report( reward_a2c=self.total_reward[n], mean_reward_a2c=mean_100ep_reward_a2c, reward=self.total_reward[n], mean_100ep_reward=mean_100ep_reward, episodes=self.episodes, step=self.episodes, scope=locals() ) print("mean_100ep_reward_a2c", mean_100ep_reward_a2c) if self.callback is not None: self.callback(locals(), globals()) self.total_reward[n] = 0 self.group_list[n] = [] self.update_obs(obs) mb_td_targets.append(rewards) mb_dones.append(self.dones) #batch of steps to batch of rollouts mb_obs = np.asarray( mb_obs, dtype=np.uint8).swapaxes(1, 0).reshape( self.batch_ob_shape) mb_td_targets = np.asarray(mb_td_targets, dtype=np.float32).swapaxes(1, 0) mb_base_actions = np.asarray( mb_base_actions, dtype=np.int32).swapaxes(1, 0) mb_xy0 = np.asarray(mb_xy0, dtype=np.int32).swapaxes(1, 0) mb_xy1 = np.asarray(mb_xy1, dtype=np.int32).swapaxes(1, 0) mb_values = np.asarray(mb_values, dtype=np.float32).swapaxes(1, 0) mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0) mb_masks = mb_dones[:, :-1] mb_dones = mb_dones[:, 1:] last_values = self.model.value(self.obs, self.states, self.dones).tolist() #discount/bootstrap off value fn for n, (rewards, dones, value) in enumerate( zip(mb_td_targets, mb_dones, last_values)): rewards = rewards.tolist() dones = dones.tolist() if dones[-1] == 0: rewards = discount_with_dones(rewards + [value], dones + [0], self.gamma)[:-1] else: rewards = discount_with_dones(rewards, dones, self.gamma) mb_td_targets[n] = rewards mb_td_targets = mb_td_targets.flatten() mb_base_actions = mb_base_actions.flatten() mb_xy0 = mb_xy0.flatten() mb_xy1 = mb_xy1.flatten() mb_values = mb_values.flatten() mb_masks = mb_masks.flatten() return mb_obs, mb_states, mb_td_targets, mb_masks, \ mb_base_actions, mb_xy0, mb_xy1, mb_values
def main(): seed_everything() # yml = 'configs/base.yml' # config = utils.config.load(yml) # pprint.pprint(config, indent=2) model = get_model(config).cuda() bind_model(model) args = get_args() if args.pause: ## test mode일 때 print('Inferring Start...') nsml.paused(scope=locals()) if args.mode == 'train': ### training mode일 때 print('Training Start...') # no bias decay if config.OPTIMIZER.NO_BIAS_DECAY: group_decay, group_no_decay = group_weight(model) params = [{'params': group_decay}, {'params': group_no_decay, 'weight_decay': 0.0}] else: params = model.parameters() optimizer = get_optimizer(config, params) optimizer.param_groups[0]['initial_lr'] = config.OPTIMIZER.LR if config.OPTIMIZER.NO_BIAS_DECAY: optimizer.param_groups[1]['initial_lr'] = config.OPTIMIZER.LR ############################################################################################### if IS_LOCAL: prepare_train_directories(config) utils.config.save_config(yml, config.LOCAL_TRAIN_DIR) checkpoint = utils.checkpoint.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, score, loss = utils.checkpoint.load_checkpoint(config, model, checkpoint) else: print('[*] no checkpoint found') last_epoch, score, loss = -1, -1, float('inf') print('last epoch:{} score:{:.4f} loss:{:.4f}'.format(last_epoch, score, loss)) else: last_epoch = -1 ############################################################################################### scheduler = get_scheduler(config, optimizer, last_epoch=last_epoch) ############################################################################################### if IS_LOCAL: if config.SCHEDULER.NAME == 'multi_step': if config.SCHEDULER.WARMUP: scheduler_dict = scheduler.state_dict()['after_scheduler'].state_dict() else: scheduler_dict = scheduler.state_dict() milestones = scheduler_dict['milestones'] step_count = len([i for i in milestones if i < last_epoch]) optimizer.param_groups[0]['lr'] *= scheduler_dict['gamma'] ** step_count if config.OPTIMIZER.NO_BIAS_DECAY: optimizer.param_groups[1]['initial_lr'] *= scheduler_dict['gamma'] ** step_count if last_epoch != -1: scheduler.step() ############################################################################################### # for dirname, _, filenames in os.walk(DATASET_PATH): # for filename in filenames: # print(os.path.join(dirname, filename)) # if preprocessing possible preprocess_type = config.DATA.PREPROCESS cv2_size = (config.DATA.IMG_W, config.DATA.IMG_H) if not IS_LOCAL: preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'NOR'), os.path.join(preprocess_type, 'NOR'), preprocess_type, cv2_size) preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'AMD'), os.path.join(preprocess_type, 'AMD'), preprocess_type, cv2_size) preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'RVO'), os.path.join(preprocess_type, 'RVO'), preprocess_type, cv2_size) preprocess(os.path.join(DATASET_PATH, 'train', 'train_data', 'DMR'), os.path.join(preprocess_type, 'DMR'), preprocess_type, cv2_size) data_dir = preprocess_type # data_dir = os.path.join(DATASET_PATH, 'train/train_data') else: # IS_LOCAL data_dir = os.path.join(DATASET_PATH, preprocess_type) # eda # train_std(data_dir, preprocess_type, cv2_size) fold_df = split_cv(data_dir, n_splits=config.NUM_FOLDS) val_fold_idx = config.IDX_FOLD ############################################################################################### train_loader = get_dataloader(config, data_dir, fold_df, val_fold_idx, 'train', transform=Albu()) val_loader = get_dataloader(config, data_dir, fold_df, val_fold_idx, 'val') postfix = dict() num_epochs = config.TRAIN.NUM_EPOCHS val_acc_list = [] for epoch in range(last_epoch+1, num_epochs): if epoch >= config.LOSS.FINETUNE_EPOCH: criterion = get_loss(config.LOSS.FINETUNE_LOSS) else: criterion = get_loss(config.LOSS.NAME) train_values = train_single_epoch(config, model, train_loader, criterion, optimizer, scheduler, epoch) val_values = evaluate_single_epoch(config, model, val_loader, criterion, epoch) val_acc_list.append((epoch, val_values[2])) if config.SCHEDULER.NAME != 'one_cyle_lr': scheduler.step() if IS_LOCAL: utils.checkpoint.save_checkpoint(config, model, epoch, val_values[1], val_values[0]) else: postfix['train_loss'] = train_values[0] postfix['train_res'] = train_values[1] postfix['train_acc'] = train_values[2] postfix['train_sens'] = train_values[3] postfix['train_spec'] = train_values[4] postfix['val_loss'] = val_values[0] postfix['val_res'] = val_values[1] postfix['val_acc'] = val_values[2] postfix['val_sens'] = val_values[3] postfix['val_spec'] = val_values[4] nsml.report(**postfix, summary=True, step=epoch) val_res = '%.10f' % val_values[1] val_res = val_res.replace(".", "") val_res = val_res[:4] + '.' + val_res[4:8] + '.' + val_res[8:] save_name = 'epoch_%02d_score%s_loss%.4f.pth' % (epoch, val_res, val_values[0]) # nsml.save(save_name) nsml.save(epoch) for e, val_acc in val_acc_list: print('%02d %s' % (e, val_acc))