def train_baselines(): train_data, val_data = get_dataloader(96) for model, batch in zip(models, batch_size): name = str(model).split()[1] print('*****Start Training {} with batch size {}******'.format( name, batch)) print( ' epoch iter rate | smooth_loss | train_loss (acc) | valid_loss (acc) | total_train_loss\n' ) logger = Logger( '/mnt/home/dunan/Learn/Kaggle/planet_amazon/log/full_data_{}_10xlr_2' .format(name), name) # load pre-trained model on train-37479 net = model(pretrained=True) net = nn.DataParallel(net.cuda()) # load_net(net, name) # optimizer = get_optimizer(net.module, lr=.005, pretrained=True, resnet=True if 'resnet' in name else False) optimizer = get_optimizer(net.module, lr=.01, pretrained=True, resnet=True if 'resnet' in name else False) train_data.batch_size = batch val_data.batch_size = batch num_epoches = 60 print_every_iter = 20 epoch_test = 1 smooth_loss = 0.0 train_loss = np.nan train_acc = np.nan best_test_loss = np.inf t = time.time() for epoch in range( num_epoches): # loop over the dataset multiple times # train loss averaged every epoch total_epoch_loss = 0.0 # lr_schedule(epoch, optimizer, base_lr=0.005, pretrained=True) new_lr_schedule(epoch, optimizer) rate = get_learning_rate(optimizer)[0] # check sum_smooth_loss = 0.0 total_sum = 0 sum = 0 net.cuda().train() num_its = len(train_data) for it, (images, labels, indices) in enumerate(train_data, 0): logits = net(Variable(images.cuda())) probs = F.sigmoid(logits) loss = multi_criterion(logits, labels.cuda()) optimizer.zero_grad() loss.backward() optimizer.step() # additional metrics sum_smooth_loss += loss.data[0] total_epoch_loss += loss.data[0] sum += 1 total_sum += 1 # print statistics if it % print_every_iter == print_every_iter - 1: smooth_loss = sum_smooth_loss / sum sum_smooth_loss = 0.0 sum = 0 train_acc = multi_f_measure(probs.data, labels.cuda()) train_loss = loss.data[0] print( '\r{} {} {} | {} | {} {} | ... '.format( epoch + it / num_its, it + 1, rate, smooth_loss, train_loss, train_acc), ) total_epoch_loss = total_epoch_loss / total_sum if epoch % epoch_test == epoch_test - 1 or epoch == num_epoches - 1: net.cuda().eval() test_loss, test_acc = evaluate(net, val_data) print('\r') print('{} {} {} | {} | {} {} | {} {} | {}'.format( epoch + 1, it + 1, rate, smooth_loss, train_loss, train_acc, test_loss, test_acc, total_epoch_loss)) # save if the current loss is better if test_loss < best_test_loss: print('save {} {}'.format(test_loss, best_test_loss)) torch.save( net.state_dict(), '/mnt/home/dunan/Learn/Kaggle/planet_amazon/model/full_data_{}_10xlr_2.pth' .format(name)) best_test_loss = test_loss logger.add_record('train_loss', total_epoch_loss) logger.add_record('evaluation_loss', test_loss) logger.add_record('f2_score', test_acc) logger.save() logger.save_plot() logger.save_time(start_time=t, end_time=time.time())
def train_baselines(): train_data, val_data = get_dataloader(96) for model, batch in zip(models, batch_size): name = str(model).split()[1] print('*****Start Training {} with batch size {}******'.format( name, batch)) print( ' epoch iter rate | smooth_loss | train_loss (acc) | valid_loss (acc) | total_train_loss\n' ) logger = Logger('../log/{}'.format(name), name) net = model(pretrained=True) optimizer = get_optimizer(net, lr=.01, pretrained=True, resnet=True if 'resnet' in name else False) net = nn.DataParallel(net.cuda()) train_data.batch_size = batch val_data.batch_size = batch num_epoches = 50 #100 print_every_iter = 20 epoch_test = 1 # optimizer # optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005) # optimizer = optim.Adam(net.parameters(), lr=1e-4, weight_decay=5e-4) smooth_loss = 0.0 train_loss = np.nan train_acc = np.nan # test_loss = np.nan best_test_loss = np.inf # test_acc = np.nan t = time.time() for epoch in range( num_epoches): # loop over the dataset multiple times # train loss averaged every epoch total_epoch_loss = 0.0 lr_schedule(epoch, optimizer, pretrained=True) rate = get_learning_rate(optimizer)[0] # check sum_smooth_loss = 0.0 total_sum = 0 sum = 0 net.cuda().train() num_its = len(train_data) for it, (images, labels, indices) in enumerate(train_data, 0): logits = net(Variable(images.cuda())) probs = F.sigmoid(logits) loss = multi_criterion(logits, labels.cuda()) optimizer.zero_grad() loss.backward() optimizer.step() # additional metrics sum_smooth_loss += loss.data[0] total_epoch_loss += loss.data[0] sum += 1 total_sum += 1 # print statistics if it % print_every_iter == print_every_iter - 1: smooth_loss = sum_smooth_loss / sum sum_smooth_loss = 0.0 sum = 0 train_acc = multi_f_measure(probs.data, labels.cuda()) train_loss = loss.data[0] print('\r{} {} {} | {} | {} {} | ... '.format( epoch + it / num_its, it + 1, rate, smooth_loss, train_loss, train_acc), end='', flush=True) total_epoch_loss = total_epoch_loss / total_sum if epoch % epoch_test == epoch_test - 1 or epoch == num_epoches - 1: net.cuda().eval() test_loss, test_acc = evaluate(net, val_data) print('\r', end='', flush=True) print('{} {} {} | {} | {} {} | {} {} | {}'.format( epoch + 1, it + 1, rate, smooth_loss, train_loss, train_acc, test_loss, test_acc, total_epoch_loss)) # save if the current loss is better if test_loss < best_test_loss: print('save {} {}'.format(test_loss, best_test_loss)) torch.save(net.state_dict(), '../models/{}.pth'.format(name)) net.load_state_dict( torch.load('../models/{}.pth'.format(name))) print(evaluate(net, val_data)) best_test_loss = test_loss logger.add_record('train_loss', total_epoch_loss) logger.add_record('evaluation_loss', test_loss) logger.add_record('f2_score', test_acc) logger.save() logger.save_plot() logger.save_time(start_time=t, end_time=time.time())