Example #1
0
def run():
    print("CUDA is available: {}".format(torch.cuda.is_available()))
    data_transform = transforms.Compose(
        [Rescale(250), CenterCrop(224),
         Normalize(), ToTensor()])

    # loader will split datatests into batches witht size defined by batch_size
    train_loader = initialize_train_loader(data_transform)
    test_loader = initialize_test_loader(data_transform)

    model_id = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime())
    # instantiate the neural network
    net = Net()
    net.to(device=device)
    summary(net, (1, 224, 224))
    # define the loss function using SmoothL1Loss
    criterion = nn.SmoothL1Loss()
    # define the params updating function using Adam
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    loss_logger = []

    for i in range(1, epochs + 1):
        model_name = 'model-{}-epoch-{}.pt'.format(model_id, i)

        # train all data for one epoch
        train(net, criterion, optimizer, i, train_loader, model_id,
              loss_logger)

        # evaludate the accuracy after each epoch
        evaluate(net, criterion, i, test_loader)

        # save model after every 5 epochs
        # https://discuss.pytorch.org/t/loading-a-saved-model-for-continue-training/17244/3
        # https://github.com/pytorch/pytorch/issues/2830
        # https://pytorch.org/tutorials/beginner/saving_loading_models.html
        if i % 5 == 1:
            torch.save(
                {
                    'epoch': i,
                    'model': net.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'loss_logger': loss_logger,
                }, model_dir + model_name)

    print("Finished training!")
    model_name = 'model-{}-final.pt'.format(model_id)
    torch.save(
        {
            'epoch': epochs,
            'model': net.state_dict(),
            'optimizer': optimizer.state_dict(),
            'loss_logger': loss_logger,
        }, model_dir + model_name)
def main(args):
    train_loader, val_loader = create_dataloaders(args.batch_size)

    model = Net().to(device)
    optim = torch.optim.Adam(model.parameters())
    lr_schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optim,
                                                             patience=1,
                                                             verbose=True)
    criterion = torch.nn.CrossEntropyLoss()

    best_accuracy = 0
    for epoch in range(1, args.epochs + 1):
        model.train()
        train_loss, train_accuracy = do_epoch(model,
                                              train_loader,
                                              criterion,
                                              optim=optim)

        model.eval()
        with torch.no_grad():
            val_loss, val_accuracy = do_epoch(model,
                                              val_loader,
                                              criterion,
                                              optim=None)

        tqdm.write(
            f'EPOCH {epoch:03d}: train_loss={train_loss:.4f}, train_accuracy={train_accuracy:.4f} '
            f'val_loss={val_loss:.4f}, val_accuracy={val_accuracy:.4f}')

        if val_accuracy > best_accuracy:
            print('Saving model...')
            best_accuracy = val_accuracy
            torch.save(model.state_dict(), 'trained_models/source.pt')

        lr_schedule.step(val_loss)
Example #3
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch Vec2Color Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=1500, metavar='N',
                        help='number of epochs to train (default: 1500)')
    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    file_names = ('capitalize', 'lower', 'upper', 'title')
    x_df = pd.concat([pd.read_csv('doc2color/data/{}.csv'.format(file_name)) for file_name in file_names])
    y_df = pd.concat([pd.read_csv('doc2color/data/rgb.csv')] * len(file_names))

    tensor_x = torch.stack([torch.from_numpy(np.array(i)) for i in x_df.values.astype(np.float32)])
    tensor_y = torch.stack([torch.from_numpy(np.array(i)) for i in y_df.values.astype(np.float32) / 255.0])

    x_train, x_test, y_train, y_test = train_test_split(
        tensor_x, tensor_y, test_size=0.01, random_state=args.seed)

    train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
        batch_size=args.batch_size, shuffle=True, **kwargs)

    test_dataset = torch.utils.data.TensorDataset(x_test, y_test)
    test_loader = torch.utils.data.DataLoader(test_dataset,
        batch_size=args.test_batch_size, shuffle=True, **kwargs)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "doc2color/pt_objects/vec2color.pt")
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "--batch-size",
        type=int,
        default=1,
        metavar="N",
        help="input batch size for training (default: 1)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1,
        metavar="N",
        help="input batch size for testing (default: 1)",
    )
    parser.add_argument(
        "--epochs",
        type=int,
        default=3,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=1.0,
        metavar="LR",
        help="learning rate (default: 1.0)",
    )
    parser.add_argument(
        "--gamma",
        type=float,
        default=0.7,
        metavar="M",
        help="Learning rate step gamma (default: 0.7)",
    )
    parser.add_argument("--no-cuda",
                        action="store_true",
                        default=False,
                        help="disables CUDA training")
    parser.add_argument(
        "--dry-run",
        action="store_true",
        default=False,
        help="quickly check a single pass",
    )
    parser.add_argument("--seed",
                        type=int,
                        default=1,
                        metavar="S",
                        help="random seed (default: 1)")
    parser.add_argument(
        "--log-interval",
        type=int,
        default=100,
        metavar="N",
        help="how many batches to wait before logging training status",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="For Saving the current Model",
    )
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {"batch_size": args.batch_size}
    if use_cuda:
        kwargs.update({
            "num_workers": 1,
            "pin_memory": True,
            "shuffle": True
        }, )

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset1 = datasets.MNIST("../data",
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST("../data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **kwargs)

    model = Net().to(device)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, epoch)
        test(model, device, test_loader)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--datapath",
                        type=str,
                        default="data/3droad.mat",
                        help="dataset file")
    parser.add_argument('--epochs',
                        type=int,
                        default=30,
                        help='number of epochs to train')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1024,
                        help='input batch size for training')
    parser.add_argument('--val_batch_size',
                        type=int,
                        default=1024,
                        help='input batch size for valing')
    # Training Settings
    parser.add_argument('--arch_file',
                        type=str,
                        default=None,
                        help='name of file which defines the architecture')
    parser.add_argument('--arch_name',
                        type=str,
                        default='LeNet5',
                        help='name of the architecture')
    parser.add_argument('--arch_args',
                        type=json.loads,
                        default=None,
                        help='[JSON] arguments for the architecture')
    parser.add_argument('--optim_name',
                        type=str,
                        default=VIOptimizer.__name__,
                        help='name of the optimizer')
    parser.add_argument('--optim_args',
                        type=json.loads,
                        default=None,
                        help='[JSON] arguments for the optimizer')
    parser.add_argument('--curv_args',
                        type=json.loads,
                        default=dict(),
                        help='[JSON] arguments for the curvature')
    parser.add_argument('--fisher_args',
                        type=json.loads,
                        default=dict(),
                        help='[JSON] arguments for the fisher')
    parser.add_argument('--scheduler_name',
                        type=str,
                        default=None,
                        help='name of the learning rate scheduler')
    parser.add_argument('--scheduler_args',
                        type=json.loads,
                        default=None,
                        help='[JSON] arguments for the scheduler')
    # Options
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, help='random seed')
    parser.add_argument(
        '--log_interval',
        type=int,
        default=20,
        help='how many batches to wait before logging training status')
    parser.add_argument(
        '--checkpoint_interval',
        type=int,
        default=50,
        help='how many epochs to wait before logging training status')
    parser.add_argument('--out',
                        type=str,
                        default='results/regression/',
                        help='dir to save output files')
    parser.add_argument('--config', default=None, help='config file path')
    parser.add_argument("--log_name",
                        default=None,
                        required=True,
                        type=str,
                        help="log name")
    args = parser.parse_args()
    dict_args = vars(args)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if args.config is not None:
        with open(args.config) as f:
            config = json.load(f)

        dict_args.update(config)

    if not os.path.isfile(args.datapath):
        print('Downloading \'3droad\' UCI dataset...')
        urllib.request.urlretrieve(
            'https://www.dropbox.com/s/f6ow1i59oqx05pl/3droad.mat?dl=1',
            args.datapath)
    data = loadmat(args.datapath)['data']
    X = data[:, :-1]
    X = X - X.min(0)[0]
    X = 2 * (X / X.max(0)[0]) - 1
    y = data[:, -1]

    train_val_n = int(floor(0.8 * len(X)))
    X_train_val = X[:train_val_n]
    y_train_val = y[:train_val_n]
    X_test = X[train_val_n:, :]
    y_test = y[train_val_n:]

    X_train, X_val, y_train, y_val = train_test_split(X_train_val,
                                                      y_train_val,
                                                      test_size=0.2)
    dtype = torch.float32
    X_train = torch.tensor(X_train, dtype=dtype, requires_grad=False)
    y_train = torch.tensor(y_train, dtype=dtype, requires_grad=False)
    X_val = torch.tensor(X_val, dtype=dtype, requires_grad=False)
    y_val = torch.tensor(y_val, dtype=dtype, requires_grad=False)
    X_test = torch.tensor(X[train_val_n:, :], dtype=dtype, requires_grad=False)
    y_test = torch.tensor(y[train_val_n:], dtype=dtype, requires_grad=False)

    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    val_dataset = TensorDataset(X_val, y_val)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.val_batch_size,
                            shuffle=True)
    test_dataset = TensorDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.val_batch_size,
                             shuffle=True)

    data_dim = X_train.size(-1)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net(data_dim).to(device).float()

    optim_kwargs = {} if args.optim_args is None else args.optim_args

    if args.optim_name == SecondOrderOptimizer.__name__:
        optimizer = SecondOrderOptimizer(model,
                                         **config["optim_args"],
                                         curv_kwargs=config["curv_args"])
    elif args.optim_name == VIOptimizer.__name__:
        optimizer = VIOptimizer(model,
                                dataset_size=len(train_loader.dataset),
                                seed=args.seed,
                                **config["optim_args"],
                                curv_kwargs=config["curv_args"])
    else:
        modules = import_module("optimizers")
        optim_class = getattr(modules, args.optim_name)
        optimizer = optim_class(model.parameters())

    if args.scheduler_name is None:
        scheduler = None
    else:
        scheduler_class = getattr(torchsso.optim.lr_scheduler,
                                  args.scheduler_name, None)
        if scheduler_class is None:
            scheduler_class = getattr(torch.optim.lr_scheduler,
                                      args.scheduler_name)
        scheduler_kwargs = config["scheduler_args"]
        scheduler = scheduler_class(optimizer, **scheduler_kwargs)

    log_file_name = "log_" + args.log_name
    logger = Logger(args.out, log_file_name)
    logger.start()
    # train
    epochs = 120
    model.train()
    print("=========== Start ===========")
    for i in range(args.epochs):
        losses = 0
        for minibatch_i, (x_batch, y_batch) in enumerate(train_loader):

            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            """
            def closure():
                optimizer.zero_grad()
                output = model(x_batch)
                loss = F.mse_loss(output, y_batch, reduction="sum").float()
                loss.backward()

                return loss, output
            """
            optimizer.zero_grad()
            output = model(x_batch)
            loss = F.mse_loss(output, y_batch, reduction="sum").float()
            loss.backward()
            optimizer.step()
            losses += loss.item()
            if (minibatch_i + 1) % args.log_interval == 0:
                print(
                    "Train Epoch: {} batch idx: {} elapsed time:  {:.1f}s MSE: {}"
                    .format(i + 1, minibatch_i + 1, logger.elapsed_time,
                            loss.item() / args.batch_size))
        losses = losses / len(train_loader.dataset)

        val_mse = validate(model, device, val_loader, optimizer, "Eval")

        iteration = (i + 1) * len(train_loader)
        log = {
            "epoch": i + 1,
            "iteration": iteration,
            "mse": losses,
            "val_mse": val_mse,
            "lr": optimizer.param_groups[0]["lr"],
            "momentum": optimizer.param_groups[0].get("momentum", 0)
        }
        logger.write(log)

        if i % args.checkpoint_interval == 0 or i + 1 == args.epochs:
            path = os.path.join(args.out,
                                "epoch{}_{}.ckpt".format(i + 1, args.log_name))
            data = {
                "model": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "epoch": i + 1
            }
            torch.save(data, path)

    print("=========== Test ===========")
    test_mse = validate(model, device, test_loader, optimizer, "Test")
    log = {"test_mse": test_mse}
    logger.write(log)
class TrainBigramNN(tune.Trainable):
    def _setup(self, config):
        print("Loading word vectors...")
        word2index, word_vecs = process_word_vecs(FAST_TEXT)
        # Note that the word embeddings are normalized.
        self.wv = WV(F.normalize(word_vecs), word2index)
        # wv = WV(word_vecs, word2index)
        print("Done.")
        self.corpus_size = config["corpus_size"]
        bigram_fn_name = "diff"
        out_bigram_dim = 300
        dist_fn_name = "cos_dist"
        loss_fn_name = "mrl"
        margin = config["margin"]
        self.lr = config["lr"]
        self.num_epochs = config["num_epochs"]
        self.batch_size = config["batch_size"]
        self.test_model = True
        self.test_freq = config["test_freq"]
        with open(PROCESSED / "train.{}.pkl".format(str(self.corpus_size)), "rb") as f:
            wiki_train = pickle.load(f)
        with open(PROCESSED / "valid.pkl", "rb") as f:
            wiki_valid = pickle.load(f)
        wiki_combined = wiki_train + wiki_valid
        self.corpus = Corpus("wiki", wiki_combined, self.wv, filter_stopwords=True)
        self.model = Net(
            self.wv.vecs.size(1), BigramEncoder(bigram_fn_name), out_bigram_dim
        )
        self.model.to(device)
        self.dist_fn = DistanceFunction(dist_fn_name)
        self.loss_fn = LossFunction(loss_fn_name, margin=margin)
        self.device = device
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        torch.manual_seed(config["seed"])
        print("Traninig on Wikipedia corpus of size {}".format(self.corpus_size))

    def _train(self):
        result = train(
            self.wv,
            self.corpus.ix_sents[: self.corpus_size],
            self.corpus.sent_lengths[: self.corpus_size],
            self.corpus.ix_sents[self.corpus_size :],
            self.corpus.sent_lengths[self.corpus_size :],
            self.model,
            self.wv.vecs,
            self.dist_fn,
            self.loss_fn,
            self.optimizer,
            self.lr,
            self.num_epochs,
            self.batch_size,
            self._iteration,
            self.test_model,
            self.test_freq,
            self.device,
        )
        return result

    def _save(self, tmp_checkpoint_dir):
        checkpoint_path = str(Path(tmp_checkpoint_dir) / "model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return checkpoint_path

    def _restore(self, tmp_checkpoint_dir):
        checkpoint_path = str(Path(tmp_checkpoint_dir) / "model.pth")
        self.model.load_state_dict(torch.load(checkpoint_path))
Example #7
0
            output_pts = net(images)

            loss = criterion(output_pts, key_pts)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        with torch.no_grad():
            for data in test_loader:
                images = data["image"]
                key_pts = data["keypoints"]
                key_pts = key_pts.view(key_pts.size(0), -1)
                key_pts = key_pts.type(dtype)
                images = images.type(dtype)
                output_pts = net(images)
                loss = criterion(output_pts, key_pts)
                test_loss += loss.item()
            print('Epoch: {}, trainloss: {}, testloss {}'.format(
                epoch + 1, train_loss / len(train_loader),
                test_loss / len(test_loader)))

    print('Finished Training')


train_net(42)

torch.save(net.state_dict(), "v2_model.pt")
Example #8
0
def test(epoch):
    model.eval()  # 把module设置为评估模式,只对Dropout和BatchNorm模块有影响
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]  # Variable.data
        # get the index of the max log-probability
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    # loss function already averages over batch size
    test_loss /= len(test_loader)
    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


for epoch in range(1, args.epochs + 1):
    train(epoch)
    test(epoch)
    if ((epoch + 1) % args.save_interval) == 0:
        path_checkpoint = '{0}/{1}_state_epoch{2}.pth'.format(
            'checkpoints', 'mnist', epoch + 1)
        utils.save_checkpoint(model.state_dict(), path_checkpoint)
Example #9
0
def main():
    # Define network
    net = Net()
    print(net)

    data_transform = transforms.Compose(
        [Rescale(250), Normalize(), RandomCrop(224)])

    train_dataset = FacialKeypointsDataset(
        csv_file='data/training_frames_keypoints.csv',
        root_dir='data/training/',
        transform=data_transform)

    # iterate through the transformed dataset and print some stats about the first few samples
    for i in range(4):
        sample = train_dataset[i]
        print(i, sample['image'].size, sample['keypoints'].size)

    train_loader = DataLoader(train_dataset,
                              batch_size=10,
                              shuffle=True,
                              num_workers=4)

    test_dataset = FacialKeypointsDataset(
        csv_file='data/test_frames_keypoints.csv',
        root_dir='data/test/',
        transform=data_transform)

    test_loader = DataLoader(test_dataset,
                             batch_size=1,
                             shuffle=True,
                             num_workers=4)

    test_images, test_outputs, gt_pts = net_sample_output(net, test_loader)

    # print out the dimensions of the data to see if they make sense
    print(test_images.data.size())
    print(test_outputs.data.size())
    print(gt_pts.size())

    # call it
    visualize_output(test_images, test_outputs, gt_pts, 1)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(),
                           lr=0.001,
                           betas=(0.9, 0.999),
                           eps=1e-8)

    n_epochs = 2

    train_net(net, criterion, optimizer, train_loader, n_epochs)

    # get a sample of test data again
    test_images, test_outputs, gt_pts = net_sample_output(net, test_loader)

    print(test_images.data.size())
    print(test_outputs.data.size())
    print(gt_pts.size())

    model_dir = 'saved_models/'
    model_name = 'keypoints_model_1.pt'

    # after training, save your model parameters in the dir 'saved_models'
    torch.save(net.state_dict(), model_dir + model_name)

    weights1 = net.conv1.weight.data

    w = weights1.numpy()

    filter_index = 0

    print(w[filter_index][0])
    print(w[filter_index][0].shape)

    # display the filter weights
    plt.imshow(w[filter_index][0], cmap='gray')
Example #10
0
# Make sure the relevant folders have been created -------------

if not os.path.exists('disturbance-fig'):
    os.makedirs('disturbance-fig')

if not os.path.exists('weights'):
    os.makedirs('weights')

# -----------------------------------------------------------------

# name to be used for saving the network weights and perturbation data
out_filename = 'CE_with_sd_coef_of_' + str(args.sd_coef)

if not args.resume:
    fout_weights = os.path.join('weights', out_filename + '.pt')
    torch.save(model.state_dict(), fout_weights)
    print('Written to: {}'.format(fout_weights))

model.eval()
model.cuda()
foolbox_model = foolbox.models.PyTorchModel(model=model,
                                            num_classes=2,
                                            bounds=(-1, 1))
criterion = foolbox.criteria.Misclassification()
attack = foolbox.attacks.PGD(foolbox_model,
                             criterion,
                             distance=foolbox.distances.Linfinity)


def get_adv_disturbance_mag(attack, dataset, true_label_for_dataset, order):
    list_of_mag = []
Example #11
0
#inputs, labels = Variable(inputs), Variable(labels)
#print( inputs.size() )
#outputs = model(inputs)
#print( outputs.size() )
#
#
#import sys
#sys.exit()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

dloaders = {'train': train_loader, 'valid': valid_loader}

best_model_wts = model.state_dict()
best_acc = 0.0
dataset_sizes = {
    'train': len(dloaders['train'].dataset),
    'valid': len(dloaders['valid'].dataset)
}
for epoch in range(num_epochs):
    print('Epoch', epoch)
    for phase in ['train', 'valid']:
        if phase == 'train':
            scheduler.step()
            model.train(True)
        else:
            model.train(False)

        running_loss = 0.0
def main(args):
    clf_model = Net().to(device)
    clf_model.load_state_dict(torch.load(args.MODEL_FILE))

    feature_extractor = clf_model.feature_extractor
    discriminator = clf_model.classifier

    critic = nn.Sequential(nn.Linear(320, 50), nn.ReLU(), nn.Linear(50, 20),
                           nn.ReLU(), nn.Linear(20, 1)).to(device)

    half_batch = args.batch_size // 2
    if args.adapt_setting == 'mnist2mnistm':
        source_dataset = MNIST(config.DATA_DIR / 'mnist',
                               train=True,
                               download=True,
                               transform=Compose(
                                   [GrayscaleToRgb(),
                                    ToTensor()]))
        target_dataset = MNISTM(train=False)
    elif args.adapt_setting == 'svhn2mnist':
        source_dataset = ImageClassdata(txt_file=args.src_list,
                                        root_dir=args.src_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.Resize(28),
                                            transforms.ToTensor(),
                                        ]))
        target_dataset = ImageClassdata(txt_file=args.tar_list,
                                        root_dir=args.tar_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    elif args.adapt_setting == 'mnist2usps':
        source_dataset = ImageClassdata(txt_file=args.src_list,
                                        root_dir=args.src_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
        target_dataset = ImageClassdata(txt_file=args.tar_list,
                                        root_dir=args.tar_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.Resize(28),
                                            transforms.ToTensor(),
                                        ]))
    else:
        raise NotImplementedError
    source_loader = DataLoader(source_dataset,
                               batch_size=half_batch,
                               shuffle=True,
                               num_workers=1,
                               pin_memory=True,
                               drop_last=True)
    target_loader = DataLoader(target_dataset,
                               batch_size=half_batch,
                               shuffle=True,
                               num_workers=1,
                               pin_memory=True,
                               drop_last=True)

    critic_optim = torch.optim.Adam(critic.parameters(), lr=1e-4)
    clf_optim = torch.optim.Adam(clf_model.parameters(), lr=1e-4)
    clf_criterion = nn.CrossEntropyLoss()

    if not os.path.exists('logs'): os.makedirs('logs')
    f = open(f'logs/{args.adapt_setting}_{args.name}.txt', 'w+')

    for epoch in range(1, args.epochs + 1):
        batch_iterator = zip(loop_iterable(source_loader),
                             loop_iterable(target_loader))

        total_loss = 0
        total_accuracy = 0
        target_label_accuracy = 0
        for _ in trange(args.iterations, leave=False):
            (source_x, source_y), (target_x, target_y) = next(batch_iterator)
            # Train critic
            set_requires_grad(feature_extractor, requires_grad=False)
            set_requires_grad(critic, requires_grad=True)

            source_x, target_x = source_x.to(device), target_x.to(device)
            source_y = source_y.to(device)

            with torch.no_grad():
                h_s = feature_extractor(source_x).data.view(
                    source_x.shape[0], -1)
                h_t = feature_extractor(target_x).data.view(
                    target_x.shape[0], -1)
            for _ in range(args.k_critic):
                gp = gradient_penalty(critic, h_s, h_t)

                critic_s = critic(h_s)
                critic_t = critic(h_t)
                wasserstein_distance = critic_s.mean() - critic_t.mean()

                critic_cost = -wasserstein_distance + args.gamma * gp

                critic_optim.zero_grad()
                critic_cost.backward()
                critic_optim.step()

                total_loss += critic_cost.item()

            # Train classifier
            set_requires_grad(feature_extractor, requires_grad=True)
            set_requires_grad(critic, requires_grad=False)
            for _ in range(args.k_clf):
                source_features = feature_extractor(source_x).view(
                    source_x.shape[0], -1)
                target_features = feature_extractor(target_x).view(
                    target_x.shape[0], -1)

                source_preds = discriminator(source_features)
                clf_loss = clf_criterion(source_preds, source_y)
                wasserstein_distance = critic(source_features).mean() - critic(
                    target_features).mean()

                loss = clf_loss + args.wd_clf * wasserstein_distance
                clf_optim.zero_grad()
                loss.backward()
                clf_optim.step()

                target_preds = discriminator(target_features)
                target_label_accuracy += (target_preds.cpu().max(1)[1] ==
                                          target_y).float().mean().item()

        mean_loss = total_loss / (args.iterations * args.k_critic)
        # mean_accuracy = total_accuracy / (args.iterations * args.k_disc)
        target_mean_accuracy = target_label_accuracy / (args.iterations *
                                                        args.k_clf)
        tqdm.write(
            f'EPOCH {epoch:03d}: critic_loss={mean_loss:.4f}, target_accuracy={target_mean_accuracy:.4f}'
        )
        f.write(
            f'EPOCH {epoch:03d}: critic_loss={mean_loss:.4f}, target_accuracy={target_mean_accuracy:.4f}'
        )
        torch.save(
            clf_model.state_dict(),
            f'trained_models/{args.adapt_setting}_{args.name}_ep{epoch}.pt')
    f.close()
Example #13
0

if __name__ == '__main__':
    if MODE == "train":
        print('Size of training datset: ', trainset_length)
        if not os.path.exists(os.path.join(os.getcwd(), exp_name, 'weights')):
            os.makedirs(os.path.join(os.getcwd(), exp_name, 'weights'))

        print('Training Started!')
        for e in range(NUM_EPOCHS):
            train(e)
            r_scheduler.step(e)
            d_scheduler.step(e)
            if e % 2 == 0:
                torch.save(
                    refine_net.state_dict(),
                    os.path.join(os.getcwd(), exp_name, 'weights',
                                 "weights_net_epoch_{}.pth".format(e)))
                torch.save(
                    d_net.state_dict(),
                    os.path.join(os.getcwd(), exp_name, 'weights',
                                 "weights_net_epoch_disc_{}.pth".format(e)))
                if e == 0:
                    cmd = f"gsutil -m cp -r {exp_name}/ {save_path}/"
                    os.system(cmd)
                else:
                    try:
                        cmd = f"gsutil -m cp -r {exp_name}/weights/weights_net_epoch_{e}.pth {save_path}/{exp_name}/weights/"
                        os.system(cmd)
                        cmd = f"gsutil -m cp -r {exp_name}/weights/weights_net_epoch_disc_{e}.pth {save_path}/{exp_name}/weights/"
                        os.system(cmd)
Example #14
0
            loss = criterion(output_pts, key_pts)

            # zero the parameter (weight) gradients
            optimizer.zero_grad()

            # backward pass to calculate the weight gradients
            loss.backward()

            # update the weights
            optimizer.step()

            # print loss statistics
            running_loss += loss.item()
            if batch_i % 10 == 9:  # print every 10 batches
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, batch_i + 1, running_loss / 1000))
                running_loss = 0.0

    print('Finished Training')

    # train your network


n_epochs = 100  # start small, and increase when you've decided on your model structure and hyperparams

# this is a Workspaces-specific context manager to keep the connection
# alive while training your model, not part of pytorch
#with active_session():
train_net(n_epochs)
PATH = r'C:\Users\Semanti Basu\Documents\OneDrive_2020-02-19\3D Ceaser dataset\Image and point generation\Image and point generation\frontaltrainedmodel_10epoch.pth'
torch.save(net.state_dict(), PATH)
def train(options):
    exp_name = options['exp_name']
    batch_size = options['batch_size']
    use_pca = options['use_pca']
    model_type = options['model_type']
    loss_fn = options['loss_fn']
    optim = options['optim']
    use_scheduler = options['use_scheduler']
    lr = options['lr']
    epochs = options['epochs']
    pca_var_hold = options['pca_var_hold']
    debug_mode = options['debug_mode']
    
    if os.path.exists(exp_name):
        shutil.rmtree(exp_name)

    time.sleep(1)
    writer = SummaryWriter(exp_name,flush_secs=1)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    X = os.listdir('hilbert_data')
    X_train = X[:int(0.8*len(X))]
    X_test = X[int(0.8*len(X)):]
    # X = np.load('bined_x.npy')
    # y = np.load('bined_y.npy')
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    # if use_pca and 'Raw' in exp_name:
    #     scaler = PCA(pca_var_hold)
    #     scaler.fit(X_train)
    #     X_train = scaler.transform(X_train)
    #     X_test = scaler.transform(X_test)

    # needed_dim = X_train.shape[1]

    dataset_train = HIL_MOOD(X_train, model_type=model_type,data_type='train',debug_mode=debug_mode)
    train_loader = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True)
    
    dataset_val = HIL_MOOD(X_test, model_type=model_type,data_type='val')
    valid_loader = DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False)
    
    model = Net()
    model.to(device)
    if optim == None:
        print('you need to specify an optimizer')
        exit()
    elif optim == 'adam':
        optimizer = torch.optim.Adam(   model.parameters(), lr=lr)
    elif optim == 'sgd':
        optimizer = torch.optim.SGD(   model.parameters(), lr=lr,momentum=0.9)
    if use_scheduler:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',verbose=True,threshold=0.0001,patience = 10)
    if loss_fn == None:
        print('you need to specify an optimizer')
        exit()
    else:

        if loss_fn == 'mse':

            loss_fn = torch.nn.MSELoss()
        elif loss_fn == 'cross_entropy':
            loss_fn = torch.nn.CrossEntropyLoss()
    
    
    
    mean_train_losses = []
    mean_valid_losses = []
    valid_acc_list = []
    best = 0  #small number for acc big number for loss to save a model
    
    for epoch in range(epochs):
        model.train()
        train_losses = []
        valid_losses = []
        for i, (images, labels) in enumerate(train_loader):
            if images.shape[0] != batch_size:
                continue
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()

            # print(images.shape)
            outputs = model(images)
            # print(images.shape)
            # print(outputs.shape)
            # print(labels.shape)
            # print(i)
            loss =loss_fn(outputs,labels)
            # print('loss: ',loss.item())
            writer.add_scalar('Loss/train', loss.item(), len(train_loader)*epoch+i)

            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
            del outputs
            # if (i * batch_size) % (batch_size * 100) == 0:
            #     print(f'{i * batch_size} / 50000')
                
        model.eval()
        correct_5_2 = 0
        correct_5_1 = 0
        
        total_loss = 0
        total = 0
        accsat =[0.5,0.05,0.005]
        accs = np.zeros(len(accsat))
        # corrs = np.zeros(len(accsat))
        correct_array = np.zeros(len(accsat))
        with torch.no_grad():
            for i, (images, labels) in enumerate(valid_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss =  loss_fn(outputs, labels)

                
                for i in range(len(accsat)):

                    correct_array[i] += accat(outputs,labels,thresh=accsat[i])

                # total_loss += loss.item()
                total += labels.size(0)
                
                
                valid_losses.append(loss.item())


                
        mean_train_losses.append(np.mean(train_losses))
        mean_valid_losses.append(np.mean(valid_losses))
        # scheduler.step(np.mean(valid_losses))
        for i in range(len(accsat)):
            accs[i] = 100*correct_array[i]/total
            writer.add_scalar('Acc/val_@'+str(accsat[i]), accs[i], epoch)
        
        if np.mean(valid_losses) < best:
            best = np.mean(valid_losses)
            torch.save(model.state_dict(),os.path.join(os.getcwd(),'models','meh.pth'))
        
        writer.add_scalar('Loss/val', np.mean(valid_losses), epoch)
        # valid_acc_list.append(accuracy)
        if epoch ==epochs-1:
            print('epoch : {}, train loss : {:.4f}, valid loss : {:.4f}, [email protected] : {:.4f}'\
                .format(epoch+1, np.mean(train_losses), np.mean(valid_losses), accsat[1]))
Example #16
0
def main():

    check_dir = '../LPSfiles/' + name

    if not os.path.exists(check_dir):
        os.mkdir(check_dir)

    # data
    val_loader = torch.utils.data.DataLoader(PriorFolder(opt.val_dir,
                                                         opt.prior_dir,
                                                         size=256,
                                                         mean=mean,
                                                         std=std),
                                             batch_size=opt.b * 3,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True)
    train_loader = torch.utils.data.DataLoader(Folder(opt.train_dir,
                                                      scales=[64] * 3 +
                                                      [128, 256],
                                                      crop=0.9,
                                                      flip=True,
                                                      rotate=None,
                                                      mean=mean,
                                                      std=std),
                                               collate_fn=collate_more,
                                               batch_size=opt.b * 6,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)
    # models
    p = 5
    net = Net(base=opt.base)
    fcn = FCN(net)
    net = nn.DataParallel(net).cuda()
    net.train()
    """
    # fcn = nn.DataParallel(fcn).cuda()
    # sdict =torch.load('/home/crow/LPSfiles/Train2_vgg16/fcn-iter13800.pth')
    # fcn.load_state_dict(sdict)
    fcn = nn.DataParallel(fcn).cuda()
    fcn.train()
    optimizer = torch.optim.Adam([
        {'params': fcn.parameters(), 'lr': 1e-4},
    ])
    logs = {'best_it':0, 'best': 0}
    sal_data_iter = iter(train_loader)
    i_sal_data = 0
    for it in tqdm(range(opt.max)):
    # for it in tqdm(range(1)):
        # if it > 1000 and it % 100 == 0:
        #     optimizer.param_groups[0]['lr'] *= 0.5
        if i_sal_data >= len(train_loader):
            sal_data_iter = iter(train_loader)
            i_sal_data = 0
        data, lbls, _ = sal_data_iter.next()
        i_sal_data += 1
        data = data.cuda()
        lbls = [lbl.unsqueeze(1).cuda() for lbl in lbls]
        msks = fcn(data)
        loss = sum([F.binary_cross_entropy_with_logits(msk, lbl) for msk, lbl in zip(msks, lbls)])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if it % 10 == 0:
            writer.add_scalar('loss', loss.item(), it)
            image = make_image_grid(data[:6], mean, std)
            writer.add_image('Image', torchvision.utils.make_grid(image), it)
            big_msk = F.sigmoid(msks[-1]).expand(-1, 3, -1, -1)
            writer.add_image('msk', torchvision.utils.make_grid(big_msk.data[:6]), it)
            big_msk = lbls[-1].expand(-1, 3, -1, -1)
            writer.add_image('gt', torchvision.utils.make_grid(big_msk.data[:6]), it)
        # if it % 100 == 0:
        if it != 0 and it % 100 == 0:
            fm, mae = validate(val_loader, fcn, os.path.join(check_dir, 'results'),
                               os.path.join(opt.val_dir, 'masks'))
            print(u'损失: %.4f'%(loss.item()))
            print(u'最大FM: iteration %d的%.4f, 这次FM: %.4f'%(logs['best_it'], logs['best'], fm))
            logs[it] = {'FM': fm}
            if fm > logs['best']:
                logs['best'] = fm
                logs['best_it'] = it
                torch.save(fcn.state_dict(), '%s/fcn-best.pth' % (check_dir))
            with open(os.path.join(check_dir, 'logs.json'), 'w') as outfile:
                json.dump(logs, outfile)
            torch.save(fcn.state_dict(), '%s/fcn-iter%d.pth' % (check_dir, it))
            """
    ###################################################################################################
    val_loader = torch.utils.data.DataLoader(PriorFolder(opt.val_dir,
                                                         opt.prior_dir,
                                                         size=256,
                                                         mean=mean,
                                                         std=std),
                                             batch_size=opt.b,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True)
    train_loader = torch.utils.data.DataLoader(Folder(opt.train_dir,
                                                      scales=[256],
                                                      crop=0.9,
                                                      flip=True,
                                                      rotate=None,
                                                      mean=mean,
                                                      std=std),
                                               collate_fn=collate_more,
                                               batch_size=opt.b,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)
    optimizer = torch.optim.Adam([
        {
            'params': net.parameters(),
            'lr': 1e-4
        },
    ])
    logs = {'best_it': 0, 'best': 0}
    sal_data_iter = iter(train_loader)
    i_sal_data = 0
    for it in tqdm(range(opt.max)):
        # if it > 1000 and it % 100 == 0:
        #     optimizer.param_groups[0]['lr'] *= 0.5
        if i_sal_data >= len(train_loader):
            sal_data_iter = iter(train_loader)
            i_sal_data = 0
        data, lbl, _ = sal_data_iter.next()
        i_sal_data += 1
        data = data.cuda()
        lbl = lbl[0].unsqueeze(1)
        noisy_label = (lbl.numpy() +
                       np.random.binomial(1,
                                          float(p) / 100.0, (256, 256))) % 2
        noisy_label = torch.Tensor(noisy_label).cuda()
        lbl = lbl.cuda()
        msk = net(data, noisy_label)
        loss = F.binary_cross_entropy_with_logits(msk, lbl)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if it % 10 == 0:
            writer.add_scalar('loss', loss.item(), it)
            image = make_image_grid(data[:6], mean, std)
            writer.add_image('Image', torchvision.utils.make_grid(image), it)
            big_msk = F.sigmoid(msk).expand(-1, 3, -1, -1)
            writer.add_image('msk',
                             torchvision.utils.make_grid(big_msk.data[:6]), it)
            big_msk = lbl.expand(-1, 3, -1, -1)
            writer.add_image('gt',
                             torchvision.utils.make_grid(big_msk.data[:6]), it)
        # if it % 200 == 0:
        if it != 0 and it % 100 == 0:
            fm, mae = validate(val_loader, net,
                               os.path.join(check_dir, 'results'),
                               os.path.join(opt.val_dir, 'masks'))
            print(u'损失: %.4f' % (loss.item()))
            print(u'最大FM: iteration %d的%.4f, 这次FM: %.4f' %
                  (logs['best_it'], logs['best'], fm))
            logs[it] = {'FM': fm}
            if fm > logs['best']:
                logs['best'] = fm
                logs['best_it'] = it
                torch.save(net.state_dict(), '%s/net-best.pth' % (check_dir))
            with open(os.path.join(check_dir, 'logs.json'), 'w') as outfile:
                json.dump(logs, outfile)
            torch.save(net.state_dict(), '%s/net-iter%d.pth' % (check_dir, it))
Example #17
0
class Agent(object):
    def __init__(self,
                 n_s,
                 n_a,
                 hiddens=(128, 64),
                 epsilon=1.0,
                 epsilon_min=0.005,
                 epsilon_decay=0.05,
                 gamma=0.99,
                 batch_size=64,
                 memory_capacity=100000,
                 lr=0.001,
                 is_dueling=False,
                 is_prioritize=True,
                 replace_iter=100,
                 is_soft=False,
                 tau=0.01,
                 e=0.01,
                 a=0.6,
                 b=0.4):
        self.n_s = n_s
        self.n_a = n_a
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.replace_iter = replace_iter
        self.lr = lr
        self.gamma = gamma
        self.batch_size = batch_size
        self.memory_capacity = memory_capacity
        self.is_soft = is_soft
        self.is_prioritize = is_prioritize
        self.tau = tau
        if use_gpu:
            self.eval_net = Net(n_s,
                                n_a,
                                hiddens=hiddens,
                                is_dueling=is_dueling).cuda()
            self.target_net = Net(n_s,
                                  n_a,
                                  hiddens=hiddens,
                                  is_dueling=is_dueling).cuda()
        else:
            self.eval_net = Net(n_s,
                                n_a,
                                hiddens=hiddens,
                                is_dueling=is_dueling)
            self.target_net = Net(n_s,
                                  n_a,
                                  hiddens=hiddens,
                                  is_dueling=is_dueling)
        if is_prioritize:
            self.memory = Memory(memory_capacity, e, a, b)
        else:
            self.memory = np.zeros((memory_capacity, self.n_s * 2 + 2))
        self.memory_count = 0
        self.learn_count = 0

        self.loss_func = nn.MSELoss()
        self.optimizer = optim.Adam(self.eval_net.parameters(), lr=self.lr)

    def act(self, s):
        if np.random.random() <= self.epsilon:
            # random
            return np.random.randint(self.n_a)
        else:
            # max
            s = FloatTensor(s)
            action_value = self.eval_net(s)
            a = torch.max(action_value, 1)[1].data.cpu().numpy()[0]
            return a

    def step(self, s, a, r, s_, done):
        if self.is_prioritize:
            # experience = s, a, r, s_, done
            experience = np.hstack((s, [a, r], s_))
            self.memory.store(experience)
            self.memory_count += 1
            if np.count_nonzero(self.memory.tree.tree) > self.batch_size:
                tree_idx, batch, ISWeights_mb = self.memory.sample(
                    self.batch_size)
                self.learn(batch, tree_idx, ISWeights_mb)
        else:
            transition = np.hstack((s, [a, r], s_))
            # replace the old memory with new memory
            index = self.memory_count % self.memory_capacity
            self.memory[index, :] = transition
            self.memory_count += 1
            if self.memory_count < self.memory_capacity:
                return
            # sample batch transitions
            sample_index = np.random.choice(self.memory_capacity,
                                            self.batch_size)
            batch = self.memory[sample_index, :]
            self.learn(batch)

    def learn(self, batch, tree_idx=None, ISWeights_mb=None):
        b_s = torch.squeeze(FloatTensor(batch[:, :self.n_s]), 0)
        b_a = torch.squeeze(LongTensor(batch[:, self.n_s:self.n_s + 1]), 0)
        b_r = torch.squeeze(FloatTensor(batch[:, self.n_s + 1:self.n_s + 2]),
                            0)
        b_s_ = torch.squeeze(FloatTensor(batch[:, -self.n_s:]), 0)
        temp = self.eval_net(b_s)
        eval_q = torch.gather(temp, 1, b_a)
        next_max_from_eval = self.eval_net(b_s_)
        next_max_from_eval_index = next_max_from_eval.max(1)[1].unsqueeze(1)
        next_actions = self.target_net(b_s_).detach()
        next_max = next_actions.gather(1, next_max_from_eval_index)
        target_q = b_r + self.gamma * next_max  # * (1 - b_done)
        abs_errors = numpy(torch.sum(torch.abs(target_q - eval_q), dim=1))
        loss = self.loss_func(eval_q, target_q)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        if self.is_prioritize:
            self.memory.batch_update(tree_idx=tree_idx, abs_errors=abs_errors)
        self.update()
        self.learn_count += 1

    def update(self):
        next_epsilon = self.epsilon * self.epsilon_decay
        if next_epsilon <= self.epsilon_min:
            self.epsilon = self.epsilon_min
        else:
            self.epsilon = next_epsilon
        if self.is_soft:
            for target_param, local_param in zip(self.target_net.parameters(),
                                                 self.eval_net.parameters()):
                target_param.data.copy_(self.tau * local_param.data +
                                        (1.0 - self.tau) * target_param.data)
        else:
            if self.learn_count % self.replace_iter == 0:
                self.target_net.load_state_dict(self.eval_net.state_dict())

    # save all net
    def save(self, name):
        torch.save(self.eval_net, name)

    # load all net
    def load(self, name):
        return torch.load(name)
Example #18
0
def main(args):
    clf_model = Net().to(device)
    clf_model.load_state_dict(torch.load(args.MODEL_FILE))

    feature_extractor = clf_model.feature_extractor
    discriminator = clf_model.classifier

    critic = nn.Sequential(nn.Linear(320, 50), nn.ReLU(), nn.Linear(50, 20),
                           nn.ReLU(), nn.Linear(20, 1)).to(device)

    half_batch = args.batch_size // 2
    source_dataset = MNIST(config.DATA_DIR / 'mnist',
                           train=True,
                           download=True,
                           transform=Compose([GrayscaleToRgb(),
                                              ToTensor()]))
    source_loader = DataLoader(source_dataset,
                               batch_size=half_batch,
                               drop_last=True,
                               shuffle=True,
                               num_workers=0,
                               pin_memory=True)

    target_dataset = MNISTM(train=False)
    target_loader = DataLoader(target_dataset,
                               batch_size=half_batch,
                               drop_last=True,
                               shuffle=True,
                               num_workers=0,
                               pin_memory=True)

    critic_optim = torch.optim.Adam(critic.parameters(), lr=1e-4)
    clf_optim = torch.optim.Adam(clf_model.parameters(), lr=1e-4)
    clf_criterion = nn.CrossEntropyLoss()

    for epoch in range(1, args.epochs + 1):
        batch_iterator = zip(loop_iterable(source_loader),
                             loop_iterable(target_loader))

        total_loss = 0
        total_accuracy = 0
        for _ in trange(args.iterations, leave=False):
            (source_x, source_y), (target_x, _) = next(batch_iterator)
            # Train critic
            set_requires_grad(feature_extractor, requires_grad=False)
            set_requires_grad(critic, requires_grad=True)

            source_x, target_x = source_x.to(device), target_x.to(device)
            source_y = source_y.to(device)

            with torch.no_grad():
                h_s = feature_extractor(source_x).data.view(
                    source_x.shape[0], -1)
                h_t = feature_extractor(target_x).data.view(
                    target_x.shape[0], -1)
            for _ in range(args.k_critic):
                gp = gradient_penalty(critic, h_s, h_t)

                critic_s = critic(h_s)
                critic_t = critic(h_t)
                wasserstein_distance = critic_s.mean() - critic_t.mean()

                critic_cost = -wasserstein_distance + args.gamma * gp

                critic_optim.zero_grad()
                critic_cost.backward()
                critic_optim.step()

                total_loss += critic_cost.item()

            # Train classifier
            set_requires_grad(feature_extractor, requires_grad=True)
            set_requires_grad(critic, requires_grad=False)
            for _ in range(args.k_clf):
                source_features = feature_extractor(source_x).view(
                    source_x.shape[0], -1)
                target_features = feature_extractor(target_x).view(
                    target_x.shape[0], -1)

                source_preds = discriminator(source_features)
                clf_loss = clf_criterion(source_preds, source_y)
                wasserstein_distance = critic(source_features).mean() - critic(
                    target_features).mean()

                loss = clf_loss + args.wd_clf * wasserstein_distance
                clf_optim.zero_grad()
                loss.backward()
                clf_optim.step()

        mean_loss = total_loss / (args.iterations * args.k_critic)
        tqdm.write(f'EPOCH {epoch:03d}: critic_loss={mean_loss:.4f}')
        torch.save(clf_model.state_dict(), 'trained_models/wdgrl.pt')
Example #19
0
class Trainer(object):
    def __init__(self, train_loader, test_loader, config):
        self.config = config
        self.device = config.device

        self.train_loader = train_loader
        self.test_loader = test_loader

        self.n_epoch = config.n_epoch
        self.lr = config.lr
        self.gamma = config.gamma
        self.device = config.device

        # self.start_epoch = 1
        self.start_itr = 1

        n_classes = len(self.train_loader.dataset.classes)
        self.model = Net(n_classes=n_classes).to(self.device)
        print(self.model)
        print('Initialized model...\n')

        self.optim = torch.optim.Adadelta(self.model.parameters(), self.lr)
        self.scheduler = StepLR(self.optim, step_size=1, gamma=self.gamma)

        # if not self.config.model_state_path == '':
        #     self._load_models(self.config.model_state_path)

        self.writer = SummaryWriter(log_dir=self.config.log_dir)

    def train(self):
        self.model.train()

        n_itr = self.start_itr
        print('Start training...!')
        for epoch in range(1, self.n_epoch + 1):
            with tqdm(total=len(self.train_loader)) as pbar:
                for idx, (img, label) in enumerate(self.train_loader):
                    pbar.set_description(
                        f'Epoch[{epoch}/{self.n_epoch}], iteration[{idx}/{len(self.train_loader)}]'
                    )

                    img, label = img.to(self.device), label.to(self.device)

                    self.optim.zero_grad()
                    output = self.model(img)
                    loss = F.nll_loss(output, label)
                    loss.backward()
                    self.optim.step()

                    if n_itr % self.config.log_interval == 0:
                        pbar.set_postfix(OrderedDict(loss=loss.item()))
                        tqdm.write(
                            f'Epoch[{epoch}], iteration[{idx}/{len(self.train_loader)}], loss: {loss.item()}'
                        )
                        self.writer.add_scalar('loss/loss', loss.item(), n_itr)

                    if n_itr % self.config.checkpoint_interval == 0:
                        self._save_models(epoch, n_itr)

                    n_itr += 1
                    pbar.update()
            self.scheduler.step()
            self.test(n_itr)

        self.writer.close()

    def test(self, n_itr):
        self.model.eval()
        test_loss = 0
        correct = 0
        print('Start testing...!')
        with torch.no_grad():
            for _, (img, label) in enumerate(self.test_loader):
                img, label = img.to(self.device), label.to(self.device)
                output = self.model(img)
                test_loss += F.nll_loss(output, label, reduction='sum').item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(label.view_as(pred)).sum().item()

        test_loss /= len(self.test_loader.dataset)
        accuracy = correct / len(self.test_loader.dataset)
        self.writer.add_scalar('accuracy/test_accuracy', accuracy, n_itr)
        tqdm.write(
            f'Test: Average loss: {test_loss}, Accuracy: {accuracy * 100.0}%')
        self.model.train()

    def _save_models(self, epoch, n_itr):
        checkpoint_name = f'{self.config.dataset_name}_model_ckpt_{n_itr}.pt'
        checkpoint_path = os.path.join(self.config.checkpoint_dir,
                                       checkpoint_name)
        torch.save(
            {
                # 'epoch': epoch,
                'n_itr': n_itr,
                'model': self.model.state_dict(),
                'optim': self.optim.state_dict(),
            },
            checkpoint_path)
        tqdm.write(f'Saved models state_dict: n_itr_{n_itr}')

    def _load_models(self, model_state_path):
        checkpoint = torch.load(model_state_path)
        # self.start_epoch = checkpoint['epoch']
        self.start_itr = checkpoint['n_itr'] + 1
        self.model.load_state_dict(checkpoint['model'])
        self.optim.load_state_dict(checkpoint['optim'])
        print(f'start_itr: {self.start_itr}')
        print('Loaded pretrained models...\n')
class dueling_agent:
    def __init__(self, env, args):
        # define some important
        self.env = env
        self.args = args
        # trying to define the network
        self.net = Net(self.env.action_space.n)
        self.target_net = Net(self.env.action_space.n)
        # make sure the target net has the same weights as the network
        self.target_net.load_state_dict(self.net.state_dict())
        if self.args.cuda:
            self.net.cuda()
            self.target_net.cuda()
        # define the optimizer
        self.optimizer = torch.optim.Adam(self.net.parameters(),
                                          lr=self.args.lr)
        # define the replay memory
        self.buffer = replay_memory(self.args.buffer_size)
        # define the linear schedule of the exploration
        self.exploration_schedule = linear_schedule(int(self.args.total_timesteps * self.args.exploration_fraction), \
                                                    self.args.final_ratio, self.args.init_ratio)
        # create the folder to save the models
        if not os.path.exists(self.args.save_dir):
            os.mkdir(self.args.save_dir)
        # set the environment folder
        self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)

    # start to do the training
    def learn(self):
        episode_reward = [0.0]
        obs = np.array(self.env.reset())
        td_loss = 0
        for timestep in range(self.args.total_timesteps):
            explore_eps = self.exploration_schedule.get_value(timestep)
            with torch.no_grad():
                obs_tensor = self._get_tensors(obs)
                action_value = self.net(obs_tensor)
            # select actions
            action = select_actions(action_value, explore_eps)
            # excute actions
            obs_, reward, done, _ = self.env.step(action)
            obs_ = np.array(obs_)
            # tryint to append the samples
            self.buffer.add(obs, action, reward, obs_, float(done))
            obs = obs_
            # add the rewards
            episode_reward[-1] += reward
            if done:
                obs = np.array(self.env.reset())
                episode_reward.append(0.0)
            if timestep > self.args.learning_starts and timestep % self.args.train_freq == 0:
                # start to sample the samples from the replay buffer
                batch_samples = self.buffer.sample(self.args.batch_size)
                td_loss = self._update_network(batch_samples)
            if timestep > self.args.learning_starts and timestep % self.args.target_network_update_freq == 0:
                # update the target network
                self.target_net.load_state_dict(self.net.state_dict())
            if len(episode_reward[-101:-1]) == 0:
                mean_reward_per_100 = 0
            else:
                mean_reward_per_100 = np.mean(episode_reward[-101:-1])
            num_episode = len(episode_reward) - 1
            if done and num_episode % self.args.display_interval == 0:
                print('[{}] Frames: {}, Episode: {}, Mean: {:.3f}, Loss: {:.3f}'.format(datetime.now(), timestep, num_episode, \
                    mean_reward_per_100, td_loss))
                torch.save(self.net.state_dict(),
                           self.model_path + '/model.pt')

    # update the network
    def _update_network(self, samples):
        obses, actions, rewards, obses_next, dones = samples
        # convert the data to tensor
        obses = self._get_tensors(obses)
        actions = torch.tensor(actions, dtype=torch.int64).unsqueeze(-1)
        rewards = torch.tensor(rewards, dtype=torch.float32).unsqueeze(-1)
        obses_next = self._get_tensors(obses_next)
        dones = torch.tensor(1 - dones, dtype=torch.float32).unsqueeze(-1)
        # convert into gpu
        if self.args.cuda:
            actions = actions.cuda()
            rewards = rewards.cuda()
            dones = dones.cuda()
        # calculate the target value
        with torch.no_grad():
            q_value_temp = self.net(obses_next)
            action_max_idx = torch.argmax(q_value_temp, dim=1, keepdim=True)
            target_action_value = self.target_net(obses_next)
            target_action_max_value = target_action_value.gather(
                1, action_max_idx)
            target_action_max_value = target_action_max_value.detach()
        # target
        expected_value = rewards + self.args.gamma * target_action_max_value * dones
        # get the real q value
        action_value = self.net(obses)
        real_value = action_value.gather(1, actions)
        loss = (expected_value - real_value).pow(2).mean()
        # start to update
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.item()

    # get tensors
    def _get_tensors(self, obs):
        if obs.ndim == 3:
            obs = np.transpose(obs, (2, 0, 1))
            obs = np.expand_dims(obs, 0)
        elif obs.ndim == 4:
            obs = np.transpose(obs, (0, 3, 1, 2))
        obs = torch.tensor(obs, dtype=torch.float32)
        if self.args.cuda:
            obs = obs.cuda()
        return obs
Example #21
0
import torch
import torch.nn as nn
import torch.optim as optim
from models import Net
from model_train import net_sample_output, train_net
from model_evaluate import visualize_output

use_cuda = torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")

net = Net().to(device)

# define loss and optimisation functions
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# train the network
n_epochs = 100
train_net(net, device, criterion, optimizer, n_epochs)

# get a sample of test data
test_images, test_outputs, gt_pts = net_sample_output(net, device)

visualize_output(test_images, test_outputs, gt_pts)

# after training, save the model parameters in the dir 'saved_models'
torch.save(net.state_dict(), 'saved_models/keypoints_model.pt')
Example #22
0
            # zero the parameter (weight) gradients
            optimizer.zero_grad()

            # backward pass to calculate the weight gradients
            loss.backward()

            # update the weights
            optimizer.step()

            # print loss statistics
            running_loss += loss.item()
            if batch_i % 10 == 9:  # print every 10 batches
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(
                    epoch + 1, batch_i + 1, running_loss / 10))
                running_loss = 0.0

    print('Finished Training')


# train your network
n_epochs = args.epochs  # start small, and increase when you've decided on your model structure and hyperparams
train_net(n_epochs)

# ## TODO: change the name to something uniqe for each new model
model_dir = 'saved_models/'
model_name = args.experiment_name + '.pt'

# # after training, save your model parameters in the dir 'saved_models'
torch.save(net.state_dict(), model_dir + model_name)
Example #23
0
class NNet():
    """
    Wrapper to manage neural net.
    """
    def __init__(self, args):
        self.args = args
        self.num_channels = NUM_CHANNELS

        if args.netType == 1:
            self.net = Net(self.num_channels, args)
        elif args.netType == 2:
            self.net = Net2(self.num_channels, args)

        if args.cuda:
            self.net = self.net.cuda()

        self.load_dataset_from_folder()
        self.writer = SummaryWriter()
        self.unique_tok = str(time.time())
        self.init_weights()

    def init_weights(self):
        """
        Initialize by Xavier weights
        """
        self.net.apply(init_weights)

    def load_dataset_from_folder(self):
        """
        Load complete dataset
        """
        all_data_path = self.args.all_data_path
        validation_split_size = self.args.validation_split_size
        batch_size = self.args.batch_size
        num_workers = self.args.num_workers
        shuffle = self.args.shuffle

        all_data = ImageFolder(root=all_data_path, transform=TRANSFORM)

        classes = all_data.classes
        self.classes = classes

        validation_size = int(validation_split_size * len(all_data))
        test_size = int(validation_split_size * len(all_data))
        train_size = len(all_data) - 2 * validation_size
        train_dataset, val_dataset, test_dataset = random_split(
            all_data, [train_size, validation_size, test_size])

        training_data_loader = DataLoader(train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          shuffle=shuffle)

        validation_dataset_loader = DataLoader(val_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=shuffle)

        test_dataset_loader = DataLoader(test_dataset,
                                         batch_size=batch_size,
                                         num_workers=num_workers,
                                         shuffle=shuffle)

        self.train_loader = training_data_loader
        self.val_loader = validation_dataset_loader
        self.test_loader = test_dataset_loader

    def train(self):
        """
        Train Neural Net
        """

        if self.args.optim == 'RMSprop':
            optimizer = optim.RMSprop(self.net.parameters(),
                                      lr=self.args.lr,
                                      momentum=self.args.momentum,
                                      weight_decay=self.args.l2_regularization)
        elif self.args.optim == 'SGD':
            optimizer = optim.SGD(self.net.parameters(),
                                  lr=self.args.lr,
                                  momentum=self.args.momentum)
        elif self.args.optim == 'Adam':
            optimizer = optim.Adam(self.net.parameters(), lr=self.args.lr)

        criterion = nn.CrossEntropyLoss()

        # scheduler = optim.lr_scheduler.StepLR(
        #     optimizer, step_size=self.args.scheduler_step_size, gamma=self.args.scheduler_gamma)

        self.net.train()

        for epoch in range(self.args.epoch):
            start_time = time.time()

            running_loss_t = 0.0
            num_batches = 0

            y_true = []
            y_pred = []
            # print('Epoch: {} , LR: {}'.format(epoch+1, scheduler.get_lr()))

            for data in tqdm(self.train_loader):
                inputs, labels = data
                labels_cp = labels.clone()

                # imshow(torchvision.utils.make_grid(inputs[:,:3,:,:]))

                if len(inputs) < 2:
                    continue

                if self.args.cuda:
                    inputs = inputs.cuda()
                    labels = labels.cuda()

                outputs = self.net(inputs)

                loss = criterion(outputs, labels)

                _, predicted = torch.max(outputs, 1)
                predicted = predicted.cpu()
                for i, pred in enumerate(predicted):
                    y_pred.append(pred)
                    y_true.append(labels_cp[i])

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                running_loss_t += loss.item()
                num_batches += 1

            end_time = time.time()

            train_f1 = f1_score(y_true, y_pred, average='weighted')

            # scheduler.step()

            self.save(epoch + 1)
            self.writer.add_scalar('Loss/train', running_loss_t / num_batches,
                                   epoch + 1)
            self.writer.add_scalar('F1/train', train_f1, epoch + 1)

            loss_v, val_f1 = self.get_validation_loss(criterion)

            self.writer.add_scalar('Loss/val', loss_v, epoch + 1)
            self.writer.add_scalar('F1/val', val_f1, epoch + 1)

            print(
                "Epoch {} Time {:.2f}s Train-Loss {:.3f} Val-Loss {:.3f} Train-F1 {:.3f} Val-F1 {:.3f}"
                .format(epoch + 1, end_time - start_time,
                        running_loss_t / num_batches, loss_v, train_f1,
                        val_f1))

    def get_validation_loss(self, criterion):
        """
        Check validation loss
        """
        running_loss = 0.0
        num_batches = 0

        self.net.eval()
        y_true = []
        y_pred = []

        with torch.no_grad():
            for data in tqdm(self.val_loader):
                images, labels = data
                labels_cp = labels.clone()

                if self.args.cuda:
                    images = images.cuda()
                    labels = labels.cuda()

                outputs = self.net(images)

                _, predicted = torch.max(outputs, 1)
                predicted = predicted.cpu()
                for i, pred in enumerate(predicted):
                    y_pred.append(pred)
                    y_true.append(labels_cp[i])

                loss = criterion(outputs, labels)
                running_loss += loss.item()
                num_batches += 1

        self.net.train()
        val_f1 = f1_score(y_true, y_pred, average='weighted')

        return running_loss / num_batches, val_f1

    def get_test_accuracy(self):
        """
        Check overall accuracy of model
        """
        y_true = []
        y_pred = []
        class_correct = list(0. for i in range(4))
        class_total = list(0. for i in range(4))

        with torch.no_grad():
            for data in tqdm(self.test_loader):
                images, labels = data
                labels_cp = labels.clone()
                if self.args.cuda:
                    images = images.cuda()
                    labels = labels.cuda()
                outputs = self.net(images)
                _, predicted = torch.max(outputs, 1)
                predicted = predicted.cpu()
                for i, pred in enumerate(predicted):
                    y_pred.append(pred)
                    y_true.append(labels_cp[i])
                c = (predicted == labels_cp).squeeze()

                for i in range(min(self.args.batch_size, len(labels_cp))):
                    label = labels_cp[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        print("Test F1: ", f1_score(y_true, y_pred, average='weighted'))

    def save(self, epochs, folder_path="../models/"):
        """
        Save Model
        """
        dict_save = {'params': self.net.state_dict(), 'classes': self.classes}
        name = folder_path + self.unique_tok + '_' + str(epochs) + '.model'
        torch.save(dict_save, name)
        print('Model saved at {}'.format(name))
        return name

    def load(self, path):
        """
        Load a saved model
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        dict_load = torch.load(path, map_location=torch.device(device))
        self.net.load_state_dict(dict_load['params'])
        return dict_load['classes']

    def predict(self, inp):
        """
        Predict using net
        """

        if self.args.cuda:
            inp = inp.cuda()

        self.net.eval()
        with torch.no_grad():
            vals = self.net(inp)
            print(vals)
            _, predicted = torch.max(vals, 1)
            predicted = predicted.cpu()
            result_class = self.classes[predicted]

        return result_class
Example #24
0
def main(args):
    model = Net().to(device)
    model.load_state_dict(torch.load(args.MODEL_FILE))
    feature_extractor = model.feature_extractor
    clf = model.classifier

    discriminator = nn.Sequential(GradientReversal(), nn.Linear(320, 50),
                                  nn.ReLU(), nn.Linear(50, 20), nn.ReLU(),
                                  nn.Linear(20, 1)).to(device)

    half_batch = args.batch_size // 2
    source_dataset = MNIST(config.DATA_DIR / 'mnist',
                           train=True,
                           download=True,
                           transform=Compose([GrayscaleToRgb(),
                                              ToTensor()]))
    source_loader = DataLoader(source_dataset,
                               batch_size=half_batch,
                               shuffle=True,
                               num_workers=1,
                               pin_memory=True)

    target_dataset = MNISTM(train=False)
    target_loader = DataLoader(target_dataset,
                               batch_size=half_batch,
                               shuffle=True,
                               num_workers=1,
                               pin_memory=True)

    optim = torch.optim.Adam(
        list(discriminator.parameters()) + list(model.parameters()))

    for epoch in range(1, args.epochs + 1):
        batches = zip(source_loader, target_loader)
        n_batches = min(len(source_loader), len(target_loader))

        total_domain_loss = total_label_accuracy = 0
        target_label_accuracy = 0
        for (source_x,
             source_labels), (target_x,
                              target_labels) in tqdm(batches,
                                                     leave=False,
                                                     total=n_batches):
            x = torch.cat([source_x, target_x])
            x = x.to(device)
            domain_y = torch.cat([
                torch.ones(source_x.shape[0]),
                torch.zeros(target_x.shape[0])
            ])
            domain_y = domain_y.to(device)
            label_y = source_labels.to(device)

            features = feature_extractor(x).view(x.shape[0], -1)
            domain_preds = discriminator(features).squeeze()
            label_preds = clf(features[:source_x.shape[0]])

            domain_loss = F.binary_cross_entropy_with_logits(
                domain_preds, domain_y)
            label_loss = F.cross_entropy(label_preds, label_y)
            loss = domain_loss + label_loss

            optim.zero_grad()
            loss.backward()
            optim.step()

            total_domain_loss += domain_loss.item()
            total_label_accuracy += (
                label_preds.max(1)[1] == label_y).float().mean().item()

            target_label_preds = clf(features[source_x.shape[0]:])
            target_label_accuracy += (target_label_preds.cpu().max(1)[1] ==
                                      target_labels).float().mean().item()

        mean_loss = total_domain_loss / n_batches
        mean_accuracy = total_label_accuracy / n_batches
        target_mean_accuracy = target_label_accuracy / n_batches
        tqdm.write(
            f'EPOCH {epoch:03d}: domain_loss={mean_loss:.4f}, '
            f'source_accuracy={mean_accuracy:.4f}, target_accuracy={target_mean_accuracy:.4f}'
        )

        torch.save(model.state_dict(), 'trained_models/revgrad.pt')
Example #25
0
def main(args):
    # TODO: add DTN model
    model = Net().to(device)
    model.load_state_dict(torch.load(args.MODEL_FILE))
    feature_extractor = model.feature_extractor
    clf = model.classifier

    discriminator = nn.Sequential(GradientReversal(), nn.Linear(320, 50),
                                  nn.ReLU(), nn.Linear(50, 20), nn.ReLU(),
                                  nn.Linear(20, 1)).to(device)

    half_batch = args.batch_size // 2
    if args.adapt_setting == 'mnist2mnistm':
        source_dataset = MNIST(config.DATA_DIR / 'mnist',
                               train=True,
                               download=True,
                               transform=Compose(
                                   [GrayscaleToRgb(),
                                    ToTensor()]))
        target_dataset = MNISTM(train=False)
    elif args.adapt_setting == 'svhn2mnist':
        source_dataset = ImageClassdata(txt_file=args.src_list,
                                        root_dir=args.src_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.Resize(28),
                                            transforms.ToTensor(),
                                        ]))
        target_dataset = ImageClassdata(txt_file=args.tar_list,
                                        root_dir=args.tar_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    elif args.adapt_setting == 'mnist2usps':
        source_dataset = ImageClassdata(txt_file=args.src_list,
                                        root_dir=args.src_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
        target_dataset = ImageClassdata(txt_file=args.tar_list,
                                        root_dir=args.tar_root,
                                        img_type=args.img_type,
                                        transform=transforms.Compose([
                                            transforms.Resize(28),
                                            transforms.ToTensor(),
                                        ]))
    else:
        raise NotImplementedError
    source_loader = DataLoader(source_dataset,
                               batch_size=half_batch,
                               shuffle=True,
                               num_workers=1,
                               pin_memory=True,
                               drop_last=True)
    target_loader = DataLoader(target_dataset,
                               batch_size=half_batch,
                               shuffle=True,
                               num_workers=1,
                               pin_memory=True,
                               drop_last=True)

    optim = torch.optim.Adam(
        list(discriminator.parameters()) + list(model.parameters()))
    if not os.path.exists('logs'): os.makedirs('logs')
    f = open(f'logs/{args.adapt_setting}_{args.name}.txt', 'w+')

    for epoch in range(1, args.epochs + 1):
        batches = zip(source_loader, target_loader)
        n_batches = min(len(source_loader), len(target_loader))

        total_domain_loss = total_label_accuracy = 0
        target_label_accuracy = 0
        for (source_x,
             source_labels), (target_x,
                              target_labels) in tqdm(batches,
                                                     leave=False,
                                                     total=n_batches):
            x = torch.cat([source_x, target_x])
            x = x.to(device)
            domain_y = torch.cat([
                torch.ones(source_x.shape[0]),
                torch.zeros(target_x.shape[0])
            ])
            domain_y = domain_y.to(device)
            label_y = source_labels.to(device)

            features = feature_extractor(x).view(x.shape[0], -1)
            domain_preds = discriminator(features).squeeze()
            label_preds = clf(features[:source_x.shape[0]])

            domain_loss = F.binary_cross_entropy_with_logits(
                domain_preds, domain_y)
            label_loss = F.cross_entropy(label_preds, label_y)
            loss = domain_loss + label_loss

            optim.zero_grad()
            loss.backward()
            optim.step()

            total_domain_loss += domain_loss.item()
            total_label_accuracy += (
                label_preds.max(1)[1] == label_y).float().mean().item()

            target_label_preds = clf(features[source_x.shape[0]:])
            target_label_accuracy += (target_label_preds.cpu().max(1)[1] ==
                                      target_labels).float().mean().item()

        mean_loss = total_domain_loss / n_batches
        mean_accuracy = total_label_accuracy / n_batches
        target_mean_accuracy = target_label_accuracy / n_batches
        tqdm.write(
            f'EPOCH {epoch:03d}: domain_loss={mean_loss:.4f}, '
            f'source_accuracy={mean_accuracy:.4f}, target_accuracy={target_mean_accuracy:.4f}'
        )
        f.write(
            f'EPOCH {epoch:03d}: domain_loss={mean_loss:.4f}, '
            f'source_accuracy={mean_accuracy:.4f}, target_accuracy={target_mean_accuracy:.4f}\n'
        )

        torch.save(
            model.state_dict(),
            f'trained_models/{args.adapt_setting}_{args.name}_ep{epoch}.pt')
    f.close()
class a2c_agent:
    def __init__(self, envs, args):
        self.envs = envs
        self.args = args
        # define the network
        self.net = Net(self.envs.action_space.n)
        if self.args.cuda:
            self.net.cuda()
        # define the optimizer
        self.optimizer = torch.optim.RMSprop(self.net.parameters(),
                                             lr=self.args.lr,
                                             eps=self.args.eps,
                                             alpha=self.args.alpha)
        if not os.path.exists(self.args.save_dir):
            os.mkdir(self.args.save_dir)
        # check the saved path for envs..
        self.model_path = self.args.save_dir + self.args.env_name + '/'
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
        # get the obs..
        self.batch_ob_shape = (self.args.num_processes * self.args.nsteps,
                               ) + self.envs.observation_space.shape
        self.obs = np.zeros(
            (self.args.num_processes, ) + self.envs.observation_space.shape,
            dtype=self.envs.observation_space.dtype.name)
        self.obs[:] = self.envs.reset()
        self.dones = [False for _ in range(self.args.num_processes)]

    # train the network..
    def learn(self):
        if not self.args.no_sil:
            sil_model = sil_module(self.net, self.args, self.optimizer)
        num_updates = self.args.total_frames // (self.args.num_processes *
                                                 self.args.nsteps)
        # get the reward to calculate other information
        episode_rewards = torch.zeros([self.args.num_processes, 1])
        final_rewards = torch.zeros([self.args.num_processes, 1])
        # start to update
        for update in range(num_updates):
            mb_obs, mb_rewards, mb_actions, mb_dones = [], [], [], []
            for step in range(self.args.nsteps):
                with torch.no_grad():
                    input_tensor = self._get_tensors(self.obs)
                    _, pi = self.net(input_tensor)
                # select actions
                actions = select_actions(pi)
                cpu_actions = actions.squeeze(1).cpu().numpy()
                # start to store the information
                mb_obs.append(np.copy(self.obs))
                mb_actions.append(cpu_actions)
                mb_dones.append(self.dones)
                # step
                obs, rewards, dones, _ = self.envs.step(cpu_actions)
                # process rewards...
                raw_rewards = copy.deepcopy(rewards)
                rewards = np.sign(rewards)
                # start to store the rewards
                self.dones = dones
                if not self.args.no_sil:
                    sil_model.step(input_tensor.detach().cpu().numpy(),
                                   cpu_actions, raw_rewards, dones)
                mb_rewards.append(rewards)
                for n, done in enumerate(dones):
                    if done:
                        self.obs[n] = self.obs[n] * 0
                self.obs = obs
                raw_rewards = torch.from_numpy(
                    np.expand_dims(np.stack(raw_rewards), 1)).float()
                episode_rewards += raw_rewards
                # get the masks
                masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                           for done_ in dones])
                final_rewards *= masks
                final_rewards += (1 - masks) * episode_rewards
                episode_rewards *= masks
                # update the obs
            mb_dones.append(self.dones)
            # process the rollouts
            mb_obs = np.asarray(mb_obs, dtype=np.uint8).swapaxes(1, 0).reshape(
                self.batch_ob_shape)
            mb_rewards = np.asarray(mb_rewards,
                                    dtype=np.float32).swapaxes(1, 0)
            mb_actions = np.asarray(mb_actions, dtype=np.int32).swapaxes(1, 0)
            mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0)
            mb_masks = mb_dones[:, :-1]
            mb_dones = mb_dones[:, 1:]
            with torch.no_grad():
                input_tensor = self._get_tensors(self.obs)
                last_values, _ = self.net(input_tensor)
            # compute returns
            for n, (rewards, dones, value) in enumerate(
                    zip(mb_rewards, mb_dones,
                        last_values.detach().cpu().numpy().squeeze())):
                rewards = rewards.tolist()
                dones = dones.tolist()
                if dones[-1] == 0:
                    rewards = discount_with_dones(rewards + [value],
                                                  dones + [0],
                                                  self.args.gamma)[:-1]
                else:
                    rewards = discount_with_dones(rewards, dones,
                                                  self.args.gamma)
                mb_rewards[n] = rewards
            mb_rewards = mb_rewards.flatten()
            mb_actions = mb_actions.flatten()
            # start to update network
            vl, al, ent = self._update_network(mb_obs, mb_rewards, mb_actions)
            # start to update the sil_module
            if not self.args.no_sil:
                mean_adv, num_samples = sil_model.train_sil_model()
            if update % self.args.log_interval == 0:
                if not self.args.no_sil:
                    print('[{}] Update: {}/{}, Frames: {}, Rewards: {:.2f}, VL: {:.3f}, PL: {:.3f},' \
                            'Ent: {:.2f}, Min: {}, Max:{}, BR:{}, E:{}, VS:{}, S:{}'.format(\
                            datetime.now(), update, num_updates, (update+1)*(self.args.num_processes * self.args.nsteps),\
                            final_rewards.mean(), vl, al, ent, final_rewards.min(), final_rewards.max(), sil_model.get_best_reward(), \
                            sil_model.num_episodes(), num_samples, sil_model.num_steps()))
                else:
                    print('[{}] Update: {}/{}, Frames: {}, Rewards: {:.2f}, VL: {:.3f}, PL: {:.3f},' \
                            'Ent: {:.2f}, Min: {}, Max:{}'.format(\
                            datetime.now(), update, num_updates, (update+1)*(self.args.num_processes * self.args.nsteps),\
                            final_rewards.mean(), vl, al, ent, final_rewards.min(), final_rewards.max()))
                torch.save(self.net.state_dict(), self.model_path + 'model.pt')

    # update_network
    def _update_network(self, obs, returns, actions):
        # evaluate the actions
        input_tensor = self._get_tensors(obs)
        values, pi = self.net(input_tensor)
        # define the tensor of actions, returns
        returns = torch.tensor(returns, dtype=torch.float32).unsqueeze(1)
        actions = torch.tensor(actions, dtype=torch.int64).unsqueeze(1)
        if self.args.cuda:
            returns = returns.cuda()
            actions = actions.cuda()
        # evaluate actions
        action_log_probs, dist_entropy = evaluate_actions(pi, actions)
        # calculate advantages...
        advantages = returns - values
        # get the value loss
        value_loss = advantages.pow(2).mean()
        # get the action loss
        action_loss = -(advantages.detach() * action_log_probs).mean()
        # total loss
        total_loss = action_loss + self.args.value_loss_coef * value_loss - self.args.entropy_coef * dist_entropy
        # start to update
        self.optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.net.parameters(),
                                       self.args.max_grad_norm)
        self.optimizer.step()

        return value_loss.item(), action_loss.item(), dist_entropy.item()

    # get the tensors...
    def _get_tensors(self, obs):
        input_tensor = torch.tensor(np.transpose(obs, (0, 3, 1, 2)),
                                    dtype=torch.float32)
        if self.args.cuda:
            input_tensor = input_tensor.cuda()
        return input_tensor
Example #27
0
def train():
    # load dataset
    train_loader, test_loader = load_data()
    channels = 1
    class_num = 10
    # params
    lr = 0.01
    momentum = 0.5
    model = Net(channels, class_num).to(device)
    #optimizer = optim.Adam(model.parameters(), lr=lr)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    epochs = 30

    # train data
    model.train()
    all_train_correct = []
    all_test_correct = []
    all_train_loss = []
    all_test_loss = []
    for epoch in range(epochs):
        train_correct = 0
        train_loss = 0
        print(len(train_loader))

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()  # zero to gradient
            output = model(data)
            loss = F.cross_entropy(output, target)  # loss
            loss.backward()
            optimizer.step()
            pred = output.argmax(dim=1, keepdim=True)
            train_correct += pred.eq(target.view_as(pred)).sum().item()
            train_loss += loss.item()
            if batch_idx % 50 == 0:
                print("train Epochs %d %d/%d loss %.6f" %
                      (epoch, batch_idx, len(train_loader), loss.item()))
        test_loss = 0
        test_correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(test_loader):
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += F.cross_entropy(output, target)
                pred = output.argmax(dim=1, keepdim=True)
                test_correct += pred.eq(target.view_as(pred)).sum().item()
        print("Test average loss %.6f test correct %.6f train correct %.6f" %
              (test_loss / len(test_loader), test_correct * 1.0 /
               (len(test_loader) * test_batch_size), train_correct * 1.0 /
               (len(train_loader) * batch_size)))
        torch.save(
            model.state_dict(), "model/model_cnn_epoch_" + str(epoch) + '_' +
            str(test_correct * 1.0 /
                (len(test_loader) * test_batch_size)) + ".pt")
        all_train_correct.append(train_correct * 1.0 /
                                 (len(train_loader) * batch_size))
        all_test_correct.append(test_correct * 1.0 /
                                (len(test_loader) * test_batch_size))
        all_train_loss.append(train_loss)
        all_test_loss.append(test_loss)
        print('all_train_correct', all_train_correct)
        print('all_test_correct', all_test_correct)
        print('all_train_loss', all_train_loss)
        print('all_test_loss', all_test_loss)