def predict(args): assert args.dets_file_name is not None model, _ = create_model(args) #model = model.cuda() if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name model = model.cuda() preds, scores = [], [] for i in range(args.tta_num): test_loader = get_test_loader(args.dets_file_name, batch_size=args.val_batch_size, dev_mode=args.dev_mode) pred, score = pred_model_output(model, test_loader, labeled=False) preds.append(pred) tta_pred = np.mean(preds, 0).astype(np.int32) tta_score = np.mean(scores, 0) print(tta_pred.shape) print(tta_pred[:2]) create_submission(args, test_loader.df, tta_pred, tta_score)
def tta_validate(args): model, _ = create_model(args) #model = model.cuda() if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name model = model.cuda() #preds = [] labels = None for i in range(args.tta_num): _, val_loader = get_train_val_loaders( val_batch_size=args.val_batch_size, val_num=args.val_num, dev_mode=args.dev_mode, val_tta=i) pred, labels = pred_model_output(model, val_loader) #preds.append(pred) np.save('output/val/val_tta_pred_{}.npy'.format(i), pred) #tta_pred = np.mean(preds, 0) #np.save('val_tta_pred.npy', tta_pred) np.save('output/val/val_labels.npy', labels) print('computing score...') calc_val_score(args.tta_num)
def create_feature_model(args): args.predict = True cls_model, _ = create_model(args) model = FeatureNetV1(args.backbone, cls_model=cls_model) if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name model = model.cuda() model.eval() return model
def create_model(args): model = FeatureNetV2(args.backbone, num_classes=args.num_classes, cls_model=None, suffix_name=args.suffix_name) model_file = os.path.join(MODEL_DIR, model.name, args.ckp_name) parent_dir = os.path.dirname(model_file) if not os.path.exists(parent_dir): os.makedirs(parent_dir) if os.path.exists(model_file): print('loading {}...'.format(model_file)) model.load_state_dict(torch.load(model_file)) if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name model = model.cuda() return model, model_file
def train(args): print('start training...') model, model_file = create_model(args) #model = model.cuda() if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name model = model.cuda() if args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001) if args.lrs == 'plateau': lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7) _, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_num=args.val_num) best_top1_acc = 0. print( 'epoch | lr | % | loss | avg | loss | top1 | top10 | best | time | save |' ) if not args.no_first_val: top10_acc, best_top1_acc, total_loss = validate( args, model, val_loader) print( 'val | | | | | {:.4f} | {:.4f} | {:.4f} | {:.4f} | | |' .format(total_loss, best_top1_acc, top10_acc, best_top1_acc)) if args.val: return model.train() if args.lrs == 'plateau': lr_scheduler.step(best_top1_acc) else: lr_scheduler.step() train_iter = 0 for epoch in range(args.start_epoch, args.epochs): train_loader, val_loader = get_train_val_loaders( batch_size=args.batch_size, dev_mode=args.dev_mode, val_num=args.val_num) train_loss = 0 current_lr = get_lrs( optimizer) #optimizer.state_dict()['param_groups'][2]['lr'] bg = time.time() for batch_idx, data in enumerate(train_loader): train_iter += 1 img, target = data img, target = img.cuda(), target.cuda() optimizer.zero_grad() output = model(img) loss = criterion(args, output, target) loss.backward() optimizer.step() train_loss += loss.item() print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format( epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1), train_loader.num, loss.item(), train_loss / (batch_idx + 1)), end='') if train_iter > 0 and train_iter % args.iter_val == 0: top10_acc, top1_acc, total_loss = validate( args, model, val_loader) _save_ckp = '' if args.always_save or top1_acc > best_top1_acc: best_top1_acc = top1_acc if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file) else: torch.save(model.state_dict(), model_file) _save_ckp = '*' print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} |'. format(total_loss, top1_acc, top10_acc, best_top1_acc, (time.time() - bg) / 60, _save_ckp)) model.train() if args.lrs == 'plateau': lr_scheduler.step(top1_acc) else: lr_scheduler.step() current_lr = get_lrs(optimizer)
def train(args): print('start training...') model, model_file = create_model(args) train_loader, val_loader = get_train_val_loaders( batch_size=args.batch_size, val_batch_size=args.val_batch_size) train_loader = get_frame_train_loader(batch_size=args.batch_size) #model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0) if args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) elif args.optim == 'RAdam': optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0001) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001) if args.lrs == 'plateau': lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) model = model.cuda() if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name #model=model.train() best_f2 = 99999. best_key = 'loss' print( 'epoch | lr | % | loss | avg | loss | 0.01 | 0.20 | 0.50 | best | time | save |' ) if not args.no_first_val: val_metrics = validate(args, model, val_loader) print( 'val | | | | | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | | |' .format(val_metrics['loss'], val_metrics['f2_th_0.01'], val_metrics['f2_th_0.20'], val_metrics['f2_th_0.50'], val_metrics[best_key])) best_f2 = val_metrics[best_key] if args.val: return model.train() if args.lrs == 'plateau': lr_scheduler.step(best_f2) else: lr_scheduler.step() train_iter = 0 for epoch in range(args.start_epoch, args.num_epochs): #train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, val_num=args.val_num) train_loss = 0 current_lr = get_lrs(optimizer) bg = time.time() for batch_idx, data in enumerate(train_loader): train_iter += 1 if train_loader.seg: rgb, audio, labels = [x.cuda() for x in data] else: rgb, audio, labels = data[0].cuda(), data[2].cuda( ), data[4].cuda() output = model(rgb, audio) loss = criterion(output, labels) batch_size = rgb.size(0) loss.backward() optimizer.step() optimizer.zero_grad() #with amp.scale_loss(loss, optimizer) as scaled_loss: # scaled_loss.backward() train_loss += loss.item() print('\r {:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format( epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1), train_loader.num, loss.item(), train_loss / (batch_idx + 1)), end='') if train_iter > 0 and train_iter % args.iter_val == 0: if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file + '_latest') else: torch.save(model.state_dict(), model_file + '_latest') val_metrics = validate(args, model, val_loader) _save_ckp = '' if args.always_save or val_metrics[best_key] < best_f2: best_f2 = val_metrics[best_key] if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file) else: torch.save(model.state_dict(), model_file) _save_ckp = '*' print( ' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} |' .format(val_metrics['loss'], val_metrics['f2_th_0.01'], val_metrics['f2_th_0.20'], val_metrics['f2_th_0.50'], best_f2, (time.time() - bg) / 60, _save_ckp)) model.train() if args.lrs == 'plateau': lr_scheduler.step(best_f2) else: lr_scheduler.step() current_lr = get_lrs(optimizer)
def train(args): print('start training...') model, model_file = create_model(args) #model = model.cuda() model = model.cuda() if args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.lr) #, weight_decay=0.0001) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001) if args.lrs == 'plateau': lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7) if args.apex: model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name val_loader = get_val_loader(batch_size=args.val_batch_size, val_num=args.val_num, dev_mode=args.dev_mode) train_loader = get_train_loader(batch_size=args.batch_size, dev_mode=args.dev_mode) best_metrics = 0. best_key = 'acc' print( 'epoch | lr | % | loss | avg | loss | acc | best | time | save |' ) if not args.no_first_val: val_metrics = validate(args, model, val_loader) print( 'val | | | | | {:.4f} | {:.4f} | {:.4f} | | |' .format(val_metrics['valid_loss'], val_metrics['acc'], val_metrics[best_key])) best_metrics = val_metrics[best_key] if args.val: return model.train() if args.lrs == 'plateau': lr_scheduler.step(best_metrics) else: lr_scheduler.step() train_iter = 0 for epoch in range(args.start_epoch, args.epochs): train_loss = 0 current_lr = get_lrs( optimizer) #optimizer.state_dict()['param_groups'][2]['lr'] bg = time.time() for batch_idx, data in enumerate(train_loader): train_iter += 1 img, label1, target = data img, label1, target = img.cuda(), label1.cuda(), target.cuda() output = model(img, label1) loss = criterion(output, target) batch_size = img.size(0) #(batch_size * loss).backward() if args.apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() optimizer.zero_grad() train_loss += loss.item() print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format( epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1), train_loader.num, loss.item(), train_loss / (batch_idx + 1)), end='') if train_iter > 0 and train_iter % args.iter_val == 0: if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file + '_latest') else: torch.save(model.state_dict(), model_file + '_latest') val_metrics = validate(args, model, val_loader) _save_ckp = '' if args.always_save or val_metrics[best_key] > best_metrics: best_metrics = val_metrics[best_key] if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file) else: torch.save(model.state_dict(), model_file) _save_ckp = '*' print(' {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} |'.format( val_metrics['valid_loss'], val_metrics['acc'], best_metrics, (time.time() - bg) / 60, _save_ckp)) model.train() if args.lrs == 'plateau': lr_scheduler.step(best_metrics) else: lr_scheduler.step() current_lr = get_lrs(optimizer)
def train(args): print('start training...') model, model_file = create_model(args) train_loader, val_loader = get_train_val_loaders(batch_size=args.train_batch_size, val_batch_size=args.val_batch_size) frame_loader, _ = get_frame_train_loader(batch_size=args.frame_batch_size) #model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0) if args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001) elif args.optim == 'RAdam': optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0001) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001) if args.lrs == 'plateau': lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) model = model.cuda() if torch.cuda.device_count() > 1: model_name = model.name model = DataParallel(model) model.name = model_name #model=model.train() best_f2 = 0. best_key = 'top1' print('epoch | lr | % | loss | avg | loss | top1 | top10 | best | time | save |') if not args.no_first_val: val_metrics = validate(args, model, val_loader) print('val | | | | | {:.4f} | {:.4f} | {:.4f} | {:.4f} | | |'.format( val_metrics['valid_loss'], val_metrics['top1'], val_metrics['top10'], val_metrics[best_key] )) best_f2 = val_metrics[best_key] if args.val: return model.train() if args.lrs == 'plateau': lr_scheduler.step(best_f2) else: lr_scheduler.step() #for epoch in range(args.start_epoch, args.num_epochs): def get_batch(loader, iterator=None, epoch=0, batch_idx=0): ret_epoch = epoch ret_batch_idx = batch_idx + 1 if iterator is None: iterator = loader.__iter__() try: b = iterator.__next__() except StopIteration: iterator = loader.__iter__() b = iterator.__next__() ret_epoch += 1 ret_epoch = 0 return b, iterator, epoch, ret_batch_idx frame_epoch = args.start_epoch train_epoch = 0 frame_iter = frame_loader.__iter__() train_iter = train_loader.__iter__() train_step = 0 frame_batch_idx = -1 train_batch_idx = -1 while frame_epoch <= args.num_epochs: frame_loss = 0. train_loss = 0. current_lr = get_lrs(optimizer) bg = time.time() def train_batch(rgb, audio, labels): output = model(rgb, audio) loss = criterion(output, labels) batch_size = rgb.size(0) loss.backward() optimizer.step() optimizer.zero_grad() return loss.item() for i in range(200): batch, frame_iter, frame_epoch, frame_batch_idx = get_batch(frame_loader, frame_iter, frame_epoch, frame_batch_idx) rgb, audio, labels = batch[0].cuda(), batch[2].cuda(), batch[4].cuda() loss_val = train_batch(rgb, audio, labels) frame_loss += loss_val print('\r F{:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format( frame_epoch, float(current_lr[0]), args.frame_batch_size*(frame_batch_idx+1), frame_loader.num, loss_val, frame_loss/(i+1)), end='') print('') for i in range(100): batch, train_iter, train_epoch, train_batch_idx = get_batch(train_loader, train_iter, train_epoch, train_batch_idx) rgb, audio, labels = [x.cuda() for x in batch] loss_val = train_batch(rgb, audio, labels) train_loss += loss_val print('\r T{:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format( train_epoch, float(current_lr[0]), args.train_batch_size*(train_batch_idx+1), train_loader.num, loss_val, train_loss/(i+1)), end='') if train_step > 0 and train_step % args.iter_val == 0: if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file+'_latest') else: torch.save(model.state_dict(), model_file+'_latest') val_metrics = validate(args, model, val_loader) _save_ckp = '' if args.always_save or val_metrics[best_key] > best_f2: best_f2 = val_metrics[best_key] if isinstance(model, DataParallel): torch.save(model.module.state_dict(), model_file) else: torch.save(model.state_dict(), model_file) _save_ckp = '*' print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} |'.format( val_metrics['valid_loss'], val_metrics['top1'], val_metrics['top10'], best_f2, (time.time() - bg) / 60, _save_ckp)) model.train() if args.lrs == 'plateau': lr_scheduler.step(best_f2) else: lr_scheduler.step() current_lr = get_lrs(optimizer) train_step += 1