def train_baselines():
    train_data, val_data = get_dataloader(96)

    for model, batch in zip(models, batch_size):
        name = str(model).split()[1]
        print('*****Start Training {} with batch size {}******'.format(
            name, batch))
        print(
            ' epoch   iter   rate  |  smooth_loss   |  train_loss  (acc)  |  valid_loss  (acc)  | total_train_loss\n'
        )
        logger = Logger(
            '/mnt/home/dunan/Learn/Kaggle/planet_amazon/log/full_data_{}_10xlr_2'
            .format(name), name)

        # load pre-trained model on train-37479
        net = model(pretrained=True)
        net = nn.DataParallel(net.cuda())
        # load_net(net, name)
        # optimizer = get_optimizer(net.module, lr=.005, pretrained=True, resnet=True if 'resnet' in name else False)
        optimizer = get_optimizer(net.module,
                                  lr=.01,
                                  pretrained=True,
                                  resnet=True if 'resnet' in name else False)
        train_data.batch_size = batch
        val_data.batch_size = batch

        num_epoches = 60
        print_every_iter = 20
        epoch_test = 1

        smooth_loss = 0.0
        train_loss = np.nan
        train_acc = np.nan
        best_test_loss = np.inf
        t = time.time()

        for epoch in range(
                num_epoches):  # loop over the dataset multiple times

            # train loss averaged every epoch
            total_epoch_loss = 0.0

            # lr_schedule(epoch, optimizer, base_lr=0.005, pretrained=True)
            new_lr_schedule(epoch, optimizer)

            rate = get_learning_rate(optimizer)[0]  # check

            sum_smooth_loss = 0.0
            total_sum = 0
            sum = 0
            net.cuda().train()

            num_its = len(train_data)
            for it, (images, labels, indices) in enumerate(train_data, 0):

                logits = net(Variable(images.cuda()))
                probs = F.sigmoid(logits)
                loss = multi_criterion(logits, labels.cuda())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # additional metrics
                sum_smooth_loss += loss.data[0]
                total_epoch_loss += loss.data[0]
                sum += 1
                total_sum += 1

                # print statistics
                if it % print_every_iter == print_every_iter - 1:
                    smooth_loss = sum_smooth_loss / sum
                    sum_smooth_loss = 0.0
                    sum = 0

                    train_acc = multi_f_measure(probs.data, labels.cuda())
                    train_loss = loss.data[0]
                    print(
                        '\r{}   {}    {}   |  {}  | {}  {} | ... '.format(
                            epoch + it / num_its, it + 1, rate, smooth_loss,
                            train_loss, train_acc), )

            total_epoch_loss = total_epoch_loss / total_sum
            if epoch % epoch_test == epoch_test - 1 or epoch == num_epoches - 1:
                net.cuda().eval()
                test_loss, test_acc = evaluate(net, val_data)
                print('\r')
                print('{}   {}    {}   |  {}  | {}  {} | {}  {} | {}'.format(
                    epoch + 1, it + 1, rate, smooth_loss, train_loss,
                    train_acc, test_loss, test_acc, total_epoch_loss))

                # save if the current loss is better
                if test_loss < best_test_loss:
                    print('save {} {}'.format(test_loss, best_test_loss))
                    torch.save(
                        net.state_dict(),
                        '/mnt/home/dunan/Learn/Kaggle/planet_amazon/model/full_data_{}_10xlr_2.pth'
                        .format(name))
                    best_test_loss = test_loss

            logger.add_record('train_loss', total_epoch_loss)
            logger.add_record('evaluation_loss', test_loss)
            logger.add_record('f2_score', test_acc)

            logger.save()
            logger.save_plot()
            logger.save_time(start_time=t, end_time=time.time())
def train_baselines():

    train_data, val_data = get_dataloader(96)

    for model, batch in zip(models, batch_size):
        name = str(model).split()[1]
        print('*****Start Training {} with batch size {}******'.format(
            name, batch))
        print(
            ' epoch   iter   rate  |  smooth_loss   |  train_loss  (acc)  |  valid_loss  (acc)  | total_train_loss\n'
        )
        logger = Logger('../log/{}'.format(name), name)

        net = model(pretrained=True)
        optimizer = get_optimizer(net,
                                  lr=.01,
                                  pretrained=True,
                                  resnet=True if 'resnet' in name else False)
        net = nn.DataParallel(net.cuda())

        train_data.batch_size = batch
        val_data.batch_size = batch

        num_epoches = 50  #100
        print_every_iter = 20
        epoch_test = 1

        # optimizer
        # optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
        # optimizer = optim.Adam(net.parameters(), lr=1e-4, weight_decay=5e-4)

        smooth_loss = 0.0
        train_loss = np.nan
        train_acc = np.nan
        # test_loss = np.nan
        best_test_loss = np.inf
        # test_acc = np.nan
        t = time.time()

        for epoch in range(
                num_epoches):  # loop over the dataset multiple times

            # train loss averaged every epoch
            total_epoch_loss = 0.0

            lr_schedule(epoch, optimizer, pretrained=True)

            rate = get_learning_rate(optimizer)[0]  # check

            sum_smooth_loss = 0.0
            total_sum = 0
            sum = 0
            net.cuda().train()

            num_its = len(train_data)
            for it, (images, labels, indices) in enumerate(train_data, 0):

                logits = net(Variable(images.cuda()))
                probs = F.sigmoid(logits)
                loss = multi_criterion(logits, labels.cuda())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # additional metrics
                sum_smooth_loss += loss.data[0]
                total_epoch_loss += loss.data[0]
                sum += 1
                total_sum += 1

                # print statistics
                if it % print_every_iter == print_every_iter - 1:
                    smooth_loss = sum_smooth_loss / sum
                    sum_smooth_loss = 0.0
                    sum = 0

                    train_acc = multi_f_measure(probs.data, labels.cuda())
                    train_loss = loss.data[0]
                    print('\r{}   {}    {}   |  {}  | {}  {} | ... '.format(
                        epoch + it / num_its, it + 1, rate, smooth_loss,
                        train_loss, train_acc),
                          end='',
                          flush=True)

            total_epoch_loss = total_epoch_loss / total_sum
            if epoch % epoch_test == epoch_test - 1 or epoch == num_epoches - 1:
                net.cuda().eval()
                test_loss, test_acc = evaluate(net, val_data)
                print('\r', end='', flush=True)
                print('{}   {}    {}   |  {}  | {}  {} | {}  {} | {}'.format(
                    epoch + 1, it + 1, rate, smooth_loss, train_loss,
                    train_acc, test_loss, test_acc, total_epoch_loss))

                # save if the current loss is better
                if test_loss < best_test_loss:
                    print('save {} {}'.format(test_loss, best_test_loss))
                    torch.save(net.state_dict(),
                               '../models/{}.pth'.format(name))
                    net.load_state_dict(
                        torch.load('../models/{}.pth'.format(name)))
                    print(evaluate(net, val_data))

                    best_test_loss = test_loss

            logger.add_record('train_loss', total_epoch_loss)
            logger.add_record('evaluation_loss', test_loss)
            logger.add_record('f2_score', test_acc)

            logger.save()
            logger.save_plot()
            logger.save_time(start_time=t, end_time=time.time())