def escalate(init_size=32, sizes=[256, 512, 1024, 2048]):
    """Run the model multiple times with increasing resolution."""
    first = True
    for s in sizes:
        if first:
            ops.clean_filenames(path.rawC, rename='pic_%s')
            ops.crop_square_resize(path.rawC, path.C, init_size, init_size, s,
                                   s)
            first = False
        else:
            # copy output to input
            ops.output_as_input(path.test, path.tempC)
            ops.clean_filenames(path.tempC, rename='pic_%s')
            ops.blur_resize(path.tempC, path.C, s, s, blur=0)

        ops.combine(path.C, path.C, path.val, s)
        # run model on input
        ops.test(path.model, path.val, path.test, s)
              (vm.GPU_INSTANCE, path.GIT_REPO_URL, path.GIT_REPO_NAME))
    # install packages
    os.system(
        'ssh %s "sudo apt-get install -y ffmpeg python-imaging python3-pil"' %
        (vm.GPU_INSTANCE))
elif args.cmd == 'train_remote':
    """Run on GPU_INSTANCE via ssh and tmux.
    To keep a process running I use:
        tmux -d python script.py
    Manually running tmux first works too. Detaching is done with [ctrl]-[b], [d].
    And a running tmux session can be reatached with: tmux attach
    """
    os.system('ssh %s "cd git; git pull"' % (vm.GPU_INSTANCE))
    os.system(
        'ssh %s "python git/pix2pix-tensorflow/project_enhance.py extract"' %
        (vm.GPU_INSTANCE))
    os.system(
        'ssh %s "python git/pix2pix-tensorflow/project_enhance.py prep"' %
        (vm.GPU_INSTANCE))
    vm.call_remote_cmd_in_tmux(
        vm.GPU_INSTANCE,
        "python git/pix2pix-tensorflow/project_enhance.py train")
elif args.cmd == 'test':
    ops.test(path.model, path.val, path.test, args.size)
elif args.cmd == 'push':
    ops.push(PROJECT)
elif args.cmd == 'pull':
    ops.pull(PROJECT)
elif args.cmd == 'pull_from_relay':
    ops.pull_from_relay(PROJECT)
Ejemplo n.º 3
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch Clothing1M')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--test_batch_size',
                        type=int,
                        default=256,
                        help='input batch size for testing (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='number of epochs to train (default: 120)')
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='index of gpu to use (default: 0)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='init learning rate (default: 0.1)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--save',
                        action='store_true',
                        default=False,
                        help='For saving softmax_out_avg')
    parser.add_argument('--SEAL',
                        type=int,
                        default=0,
                        help='Phase of self-evolution')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data/Clothing1M'
    num_classes = 14
    kwargs = {
        'num_workers': 32,
        'pin_memory': True
    } if torch.cuda.is_available() else {}
    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    transform_test = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    train_dataset = Clothing1M(root, mode='train', transform=transform_train)
    val_dataset = Clothing1M(root, mode='val', transform=transform_test)
    test_dataset = Clothing1M(root, mode='test', transform=transform_test)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    softmax_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.test_batch_size,
        shuffle=False,
        **kwargs)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.test_batch_size,
                                             shuffle=False,
                                             **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    def learning_rate(lr_init, epoch):
        optim_factor = 0
        if (epoch > 5):
            optim_factor = 1
        return lr_init * math.pow(0.1, optim_factor)

    def load_pretrain(num_classes, device):
        model_pre = resnet50(
            num_classes=1000,
            pretrained=True)  # imagenet pretrained, numclasses=1000
        if num_classes == 1000:
            return model_pre.to(device)

        else:
            model = resnet50(num_classes=num_classes, pretrained=False)
            params_pre = model_pre.state_dict().copy()
            params = model.state_dict()
            for i in params_pre:
                if not i.startswith('fc'):
                    params[i] = params_pre[i]
            model.load_state_dict(params)
            return model.to(device)

    # results
    results_root = os.path.join('results', 'clothing')
    if not os.path.isdir(results_root):
        os.makedirs(results_root)
    """ Test model """
    if args.SEAL == -1:
        model = resnet50().to(device)
        model.load_state_dict(
            torch.load(os.path.join(results_root, 'seed0_clothing_normal.pt')))
        test(args, model, device, test_loader)
    """ Get softmax_out_avg - normal training on noisy labels """
    if args.SEAL == 0:
        print(
            'The DMI model is trained using the official pytorch implemention of L_DMI <https://github.com/Newbeeer/L_DMI>.\n'
        )
    """ Self Evolution - training on softmax_out_avg from DMI model """
    if args.SEAL == 1:
        # Loading softmax_out_avg of last phase
        softmax_root = os.path.join(results_root, 'softmax_out_dmi.npy')
        softmax_out_avg = np.load(softmax_root).reshape(
            [-1, len(train_dataset), num_classes])
        softmax_out_avg = softmax_out_avg[:5].mean(
            axis=0
        )  # We found that the DMI model may not converged in the last 5 epochs.
        print('softmax_out_avg loaded from', softmax_root, ', shape: ',
              softmax_out_avg.shape)

        # Dataset with soft targets
        train_dataset_soft = Clothing1M_soft(root,
                                             targets_soft=torch.Tensor(
                                                 softmax_out_avg.copy()),
                                             mode='train',
                                             transform=transform_train)
        train_loader_soft = torch.utils.data.DataLoader(
            train_dataset_soft,
            batch_size=args.batch_size,
            shuffle=True,
            **kwargs)

        # Building model
        model = load_pretrain(num_classes, device)
        model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])
        model.load_state_dict(
            torch.load(os.path.join(results_root, 'clothing_dmi.pt')))
        print('Initialize the model using DMI model.')

        # Training
        best_val_acc = 0
        save_path = os.path.join(
            results_root, 'seed' + str(args.seed) + '_clothing_dmi_SEAL1.pt')
        softmax_out = []
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(),
                                  lr=learning_rate(args.lr, epoch),
                                  momentum=0.9,
                                  weight_decay=1e-3)
            train_soft(args, model, device, train_loader_soft, optimizer,
                       epoch)
            best_val_acc = val_test(args, model, device, val_loader,
                                    test_loader, best_val_acc, save_path)
            softmax_out.append(get_softmax_out(model, softmax_loader, device))

        if args.save:
            softmax_root = os.path.join(
                results_root,
                'seed' + str(args.seed) + '_softmax_out_dmi_SEAL1.npy')
            softmax_out = np.concatenate(softmax_out)
            np.save(softmax_root, softmax_out)
            print('new softmax_out saved to', softmax_root, ', shape: ',
                  softmax_out.shape)

    if args.SEAL >= 2:
        # Loading softmax_out_avg of last phase
        softmax_root = os.path.join(
            results_root, 'seed' + str(args.seed) + '_softmax_out_dmi_SEAL' +
            str(args.SEAL - 1) + '.npy')
        softmax_out_avg = np.load(softmax_root).reshape(
            [-1, len(train_dataset), num_classes])
        softmax_out_avg = softmax_out_avg.mean(axis=0)
        print('softmax_out_avg loaded from', softmax_root, ', shape: ',
              softmax_out_avg.shape)

        # Dataset with soft targets
        train_dataset_soft = Clothing1M_soft(root,
                                             targets_soft=torch.Tensor(
                                                 softmax_out_avg.copy()),
                                             mode='train',
                                             transform=transform_train)
        train_loader_soft = torch.utils.data.DataLoader(
            train_dataset_soft,
            batch_size=args.batch_size,
            shuffle=True,
            **kwargs)

        # Building model
        model = load_pretrain(num_classes, device)
        model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])
        model_path = os.path.join(
            results_root, 'seed' + str(args.seed) + '_clothing_dmi_SEAL' +
            str(args.SEAL - 1) + '.pt')
        model.load_state_dict(torch.load(model_path))
        print('Initialize the model using {}.'.format(model_path))

        # Training
        best_val_acc = 0
        save_path = os.path.join(
            results_root, 'seed' + str(args.seed) + '_clothing_dmi_SEAL' +
            str(args.SEAL) + '.pt')
        softmax_out = []
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(),
                                  lr=learning_rate(args.lr, epoch),
                                  momentum=0.9,
                                  weight_decay=1e-3)
            train_soft(args, model, device, train_loader_soft, optimizer,
                       epoch)
            best_val_acc = val_test(args, model, device, val_loader,
                                    test_loader, best_val_acc, save_path)
            softmax_out.append(get_softmax_out(model, softmax_loader, device))

        if args.save:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) +
                '_softmax_out_dmi_SEAL' + str(args.SEAL) + '.npy')
            softmax_out = np.concatenate(softmax_out)
            np.save(softmax_root, softmax_out)
            print('new softmax_out saved to', softmax_root, ', shape: ',
                  softmax_out.shape)
Ejemplo n.º 4
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST')
    parser.add_argument('--batch_size', type=int, default=64, help='input batch size for training (default: 64)')
    parser.add_argument('--test_batch_size', type=int, default=1000, help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=50, help='number of epochs to train (default: 50)')
    parser.add_argument('--gpu_id', type=int, default=0, help='index of gpu to use (default: 0)')
    parser.add_argument('--lr', type=float, default=0.01, help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, help='SGD momentum (default: 0.5)')
    parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)')
    parser.add_argument('--noise_pattern', type=str, default='dependent', help='Noise pattern (default: dependent)')
    parser.add_argument('--noise_rate', type=float, default=0.0, help='Noise rate (default: 0.0)')
    parser.add_argument('--save', action='store_true', default=False, help='For saving softmax_out_avg')
    parser.add_argument('--SEAL', type=int, default=0, help='Phase of self-evolution')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device('cuda:'+str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data'
    kwargs = {'num_workers': 4, 'pin_memory': True} if torch.cuda.is_available() else {}
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) #0.1307, 0.3081 are the mean and std of mnist
    train_dataset = datasets.MNIST(root, train=True, download=True, transform=transform)
    train_dataset_noisy = datasets.MNIST(root, train=True, transform=transform)
    test_dataset = datasets.MNIST(root, train=False, transform=transform)

    targets_noisy = torch.Tensor(pd.read_csv(os.path.join('./data/MNIST/label_noisy', args.noise_pattern+str(args.noise_rate)+'.csv'))['label_noisy'].values.astype(int))
    train_dataset_noisy.targets = targets_noisy
    
    train_loader = torch.utils.data.DataLoader(train_dataset_noisy, batch_size=args.batch_size, shuffle=True, **kwargs)
    softmax_loader = torch.utils.data.DataLoader(train_dataset_noisy, batch_size=args.test_batch_size, shuffle=False, **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False, **kwargs)

    # results
    results_root = os.path.join('results', 'mnist_'+args.noise_pattern+str(args.noise_rate))
    if not os.path.isdir(results_root):
        os.makedirs(results_root)

    """ Get softmax_out_avg - normal training on noisy labels """
    if args.SEAL==0:
        # Building model
        model = MNIST_CNN().to(device)
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset_noisy), 10])
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_normal.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ', softmax_out_avg.shape)

    """ Self Evolution - training on softmax_out_avg """
    if args.SEAL>=1:
        # Loading softmax_out_avg of last phase
        if args.SEAL==1:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_normal.npy')
        else:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_SEAL'+str(args.SEAL-1)+'.npy')
        softmax_out_avg = np.load(softmax_root)
        print('softmax_out_avg loaded from', softmax_root, ', shape: ', softmax_out_avg.shape)
       
        # Dataset with soft targets
        train_dataset_soft = MNIST_soft(root, targets_soft=torch.Tensor(softmax_out_avg.copy()), train=True, transform=transform)
        train_dataset_soft.targets = targets_noisy
        train_loader_soft = torch.utils.data.DataLoader(train_dataset_soft, batch_size=args.batch_size, shuffle=True, **kwargs)

        # Building model
        model = MNIST_CNN().to(device)
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

        # Training 
        softmax_out_avg = np.zeros([len(train_dataset_noisy), 10])
        for epoch in range(1, args.epochs + 1):
            train_soft(args, model, device, train_loader_soft, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(results_root, 'seed'+str(args.seed)+'_softmax_out_avg_'+args.noise_pattern+str(args.noise_rate)+'_SEAL'+str(args.SEAL)+'.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ', softmax_out_avg.shape)
Ejemplo n.º 5
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch cifar10')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        help='input batch size for training (default: 128)')
    parser.add_argument('--test_batch_size',
                        type=int,
                        default=1000,
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=150,
                        help='number of epochs to train (default: 150)')
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='index of gpu to use (default: 0)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='init learning rate (default: 0.1)')
    parser.add_argument('--dp',
                        type=float,
                        default=0.0,
                        help='dropout rate (default: 0.0)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--noise_pattern',
                        type=str,
                        default='dependent',
                        help='Noise pattern (default: dependent)')
    parser.add_argument('--noise_rate',
                        type=float,
                        default=0.0,
                        help='Noise rate (default: 0.0)')
    parser.add_argument('--save',
                        action='store_true',
                        default=False,
                        help='For saving softmax_out_avg')
    parser.add_argument('--SEAL',
                        type=int,
                        default=0,
                        help='Phase of self-evolution')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data/CIFAR10'
    num_classes = 10
    kwargs = {
        'num_workers': 4,
        'pin_memory': True
    } if torch.cuda.is_available() else {}
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    train_dataset = datasets.CIFAR10(root,
                                     train=True,
                                     download=True,
                                     transform=transform_train)
    train_dataset_noisy = datasets.CIFAR10(root,
                                           train=True,
                                           transform=transform_train)
    test_dataset = datasets.CIFAR10(root,
                                    train=False,
                                    transform=transform_test)

    targets_noisy = list(
        pd.read_csv(
            os.path.join('./data/CIFAR10/label_noisy',
                         args.noise_pattern + str(args.noise_rate) +
                         '.csv'))['label_noisy'].values.astype(int))
    train_dataset_noisy.targets = targets_noisy

    train_loader = torch.utils.data.DataLoader(train_dataset_noisy,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    softmax_loader = torch.utils.data.DataLoader(
        train_dataset_noisy,
        batch_size=args.test_batch_size,
        shuffle=False,
        **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    def learning_rate(lr_init, epoch):
        optim_factor = 0
        if (epoch > 120):
            optim_factor = 2
        elif (epoch > 60):
            optim_factor = 1
        return lr_init * math.pow(0.2, optim_factor)

    # results
    results_root = os.path.join(
        'results', 'cifar10_' + args.noise_pattern + str(args.noise_rate))
    if not os.path.isdir(results_root):
        os.makedirs(results_root)
    """ Get softmax_out_avg - normal training on noisy labels """
    if args.SEAL == 0:
        # Building model
        model = Wide_ResNet(depth=28,
                            widen_factor=10,
                            dropout_rate=args.dp,
                            num_classes=num_classes).to(device)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset_noisy), num_classes])
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(),
                                  lr=learning_rate(args.lr, epoch),
                                  momentum=0.9,
                                  weight_decay=5e-4)
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_normal.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ',
                  softmax_out_avg.shape)
    """ Self Evolution - training on softmax_out_avg """
    if args.SEAL >= 1:
        # Loading softmax_out_avg of last phase
        if args.SEAL == 1:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_normal.npy')
        else:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_SEAL' +
                str(args.SEAL - 1) + '.npy')
        softmax_out_avg = np.load(softmax_root)
        print('softmax_out_avg loaded from', softmax_root, ', shape: ',
              softmax_out_avg.shape)

        # Dataset with soft targets
        train_dataset_soft = CIFAR10_soft(root,
                                          targets_soft=torch.Tensor(
                                              softmax_out_avg.copy()),
                                          train=True,
                                          transform=transform_train)
        train_dataset_soft.targets = targets_noisy
        train_loader_soft = torch.utils.data.DataLoader(
            train_dataset_soft,
            batch_size=args.batch_size,
            shuffle=True,
            **kwargs)

        # Building model
        model = Wide_ResNet(depth=28,
                            widen_factor=10,
                            dropout_rate=args.dp,
                            num_classes=num_classes).to(device)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset_noisy), num_classes])
        for epoch in range(1, args.epochs + 1):
            optimizer = optim.SGD(model.parameters(),
                                  lr=learning_rate(args.lr, epoch),
                                  momentum=0.9,
                                  weight_decay=5e-4)
            train_soft(args, model, device, train_loader_soft, optimizer,
                       epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        if args.save:
            softmax_root = os.path.join(
                results_root, 'seed' + str(args.seed) + '_softmax_out_avg_' +
                args.noise_pattern + str(args.noise_rate) + '_SEAL' +
                str(args.SEAL) + '.npy')
            np.save(softmax_root, softmax_out_avg)
            print('new softmax_out_avg saved to', softmax_root, ', shape: ',
                  softmax_out_avg.shape)
Ejemplo n.º 6
0
import ops
import sys
import convert_to_midi
import os

if __name__ == "__main__":
    ops.test(sys.argv[1])
    a = convert_to_midi.transcribe(sys.argv[1])
    command = "open /Applications/MuseScore\ 3.app " + a
    os.system(command)
Ejemplo n.º 7
0
def main():
    # Settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test_batch_size',
                        type=int,
                        default=1000,
                        help='input batch size for testing (default: 1000)')
    parser.add_argument(
        '--epochs',
        type=int,
        default=20,
        help='number of epochs to train (default: 20)'
    )  # On clean data, 20 is sufficiently large to achiece 100% training accuracy.
    parser.add_argument('--gpu_id',
                        type=int,
                        default=0,
                        help='index of gpu to use (default: 0)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--noise_rate',
                        type=float,
                        default=0.0,
                        help='Noise rate (default: 0.0)')
    parser.add_argument('--load',
                        action='store_true',
                        default=False,
                        help='Load existing averaged softmax')
    parser.add_argument('--gen',
                        action='store_true',
                        default=False,
                        help='Generate noisy labels')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = torch.device(
        'cuda:' + str(args.gpu_id) if torch.cuda.is_available() else 'cpu')

    # Datasets
    root = './data'
    kwargs = {
        'num_workers': 4,
        'pin_memory': True
    } if torch.cuda.is_available() else {}
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])  #0.1307, 0.3081 are the mean and std of mnist
    train_dataset = datasets.MNIST(root,
                                   train=True,
                                   download=True,
                                   transform=transform)
    test_dataset = datasets.MNIST(root, train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch_size,
                                              shuffle=False,
                                              **kwargs)

    if args.load:
        softmax_out_avg = np.load('data/MNIST/label_noisy/softmax_out_avg.npy')
        print('softmax_out_avg loaded, shape: ', softmax_out_avg.shape)

    else:
        # Building model
        model = MNIST_CNN().to(device)
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum)

        # Training
        softmax_out_avg = np.zeros([len(train_dataset), 10])
        softmax_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
            test(args, model, device, test_loader)
            softmax_out_avg += get_softmax_out(model, softmax_loader, device)

        softmax_out_avg /= args.epochs
        np.save('data/MNIST/label_noisy/softmax_out_avg.npy', softmax_out_avg)

    if args.gen:
        print('Generating noisy labels according to softmax_out_avg...')
        label = np.array(train_dataset.targets)
        label_noisy_cand, label_noisy_prob = [], []
        for i in range(len(label)):
            pred = softmax_out_avg[i, :].copy()
            pred[label[i]] = -1
            label_noisy_cand.append(np.argmax(pred))
            label_noisy_prob.append(np.max(pred))

        label_noisy = label.copy()
        index = np.argsort(label_noisy_prob)[-int(args.noise_rate *
                                                  len(label)):]
        label_noisy[index] = np.array(label_noisy_cand)[index]

        save_pth = os.path.join('./data/MNIST/label_noisy',
                                'dependent' + str(args.noise_rate) + '.csv')
        pd.DataFrame.from_dict({
            'label': label,
            'label_noisy': label_noisy
        }).to_csv(save_pth, index=False)
        print('Noisy label data saved to ', save_pth)