def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--kinematics-pose-csv",
        type=str,
        default="./dataset/train/kinematics_pose.csv",
    )
    parser.add_argument("--joint-states-csv",
                        type=str,
                        default="./dataset/train/joint_states.csv")
    parser.add_argument("--train-val-ratio", type=float, default=0.8)
    parser.add_argument("--batch-size", type=int, default=10000)
    parser.add_argument("--epochs", type=int, default=100)
    parser.add_argument("--lr", type=float, default=0.01)
    parser.add_argument("--save-model", action="store_true", default=False)
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = IKNet()
    model.to(device)
    train_loader, val_loader = get_data_loaders(args)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    trigger = ppe.training.triggers.EarlyStoppingTrigger(
        check_trigger=(3, "epoch"), monitor="val/loss")
    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix="model"),
        extensions.VariableStatisticsPlot(model),
        extensions.Evaluator(
            val_loader,
            model,
            eval_func=lambda data, target: validate(args, model, device, data,
                                                    target),
            progress_bar=True,
        ),
        extensions.PlotReport(["train/loss", "val/loss"],
                              "epoch",
                              filename="loss.png"),
        extensions.PrintReport([
            "epoch",
            "iteration",
            "train/loss",
            "lr",
            "val/loss",
        ]),
    ]
    manager = ppe.training.ExtensionsManager(
        model,
        optimizer,
        args.epochs,
        extensions=my_extensions,
        iters_per_epoch=len(train_loader),
        stop_trigger=trigger,
    )
    train(manager, args, model, device, train_loader)

    if args.save_model:
        torch.save(model.state_dict(), "iknet.pt")
Beispiel #2
0
    def set_ppe_manager(self):
        @self.trainer.on(Events.ITERATION_COMPLETED(every=self.eval_interval))
        def report_loss(engine):
            ppe.reporting.report({'train/loss': engine.state.output})

        # manager.extend
        my_extensions = [
            extensions.VariableStatisticsPlot(self.model),
            extensions.ParameterStatistics(self.model, prefix='model'),

            # observe_value
            extensions.observe_lr(optimizer=self.optimizer),
            extensions.PrintReport([
                'epoch', 'elapsed_time', 'lr', 'train/loss', 'val/loss',
                'val/accuracy'
            ]),
            extensions.LogReport(trigger=(self.log_interval, 'epoch')),
            # 'iteration', 'model/fc2.bias/grad/min'
            extensions.PlotReport(['train/loss', 'val/loss'],
                                  'epoch',
                                  filename='loss.png'),
            extensions.PlotReport(['val/accuracy'],
                                  'epoch',
                                  filename='accuracy.png'),
            extensions.ProgressBar(),
            extensions.snapshot(n_retains=self.retain_num),

            # (Not Implemented)ExponentialShift
            # (Not Implemented)InverseShift
            # (Not Implemented)LinearShift
            # (Not Implemented)MultistepShift
            # (Not Implemented)PolynomialShift
            # (Not Implemented)StepShift
            # (Not Implemented)WarmupShift

            # extensions.MicroAverage('loss', 'lr', 'mav'),

            # (Not Implemented)FailOnNonNumber

            # (Not Supported)DumpGraph
            # (Not Supported)unchain_variables
        ]

        my_extensions += [
            extensions.IgniteEvaluator(self.evaluator,
                                       self.valid_loader,
                                       self.model,
                                       progress_bar=True)
        ]

        models = {'main': self.model}
        optimizers = {'main': self.optimizer}
        self.ppe_manager = ppe.training.IgniteExtensionsManager(
            self.trainer,
            models,
            optimizers,
            self.max_epochs,
            extensions=my_extensions,
            out_dir=self.out)
def objective(trial):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = IKNet(trial)
    model.to(device)
    train_loader, val_loader = get_data_loaders(args)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    trigger = ppe.training.triggers.EarlyStoppingTrigger(
        check_trigger=(3, "epoch"), monitor="val/loss")
    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix="model"),
        extensions.VariableStatisticsPlot(model),
        extensions.Evaluator(
            val_loader,
            model,
            eval_func=lambda data, target: validate(args, model, device, data,
                                                    target),
            progress_bar=True,
        ),
        extensions.PlotReport(["train/loss", "val/loss"],
                              "epoch",
                              filename="loss.png"),
        extensions.PrintReport([
            "epoch",
            "iteration",
            "train/loss",
            "lr",
            "val/loss",
        ]),
    ]
    manager = ppe.training.ExtensionsManager(
        model,
        optimizer,
        args.epochs,
        extensions=my_extensions,
        iters_per_epoch=len(train_loader),
        stop_trigger=trigger,
    )
    return train(manager, args, model, device, train_loader)
Beispiel #4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        dest='cuda',
                        action='store_false',
                        default=True,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--snapshot',
                        type=str,
                        default=None,
                        help='path to snapshot file')
    parser.add_argument('--no-lazy',
                        dest='lazy',
                        action='store_false',
                        default=True,
                        help='do not use lazy modules')
    args = parser.parse_args()
    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = Net(args.lazy)
    model.to(device)
    if args.lazy:
        # You need to run a dummy forward to initialize parameters.
        # This should be done before passing parameter list to optimizers.
        # The dummy input can be generated from the loader's first batch
        # (trim off the data to batch size = 1 for performance).
        dummy_input = train_loader.dataset[0][0].unsqueeze(0).to(device)
        model(dummy_input)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    # manager.extend(...) also works
    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix='model'),
        extensions.VariableStatisticsPlot(model),
        extensions.Evaluator(test_loader,
                             model,
                             eval_func=lambda data, target: test(
                                 args, model, device, data, target),
                             progress_bar=True),
        extensions.PlotReport(['train/loss', 'val/loss'],
                              'epoch',
                              filename='loss.png'),
        extensions.PrintReport([
            'epoch', 'iteration', 'train/loss', 'lr',
            'model/fc2.bias/grad/min', 'val/loss', 'val/acc'
        ]),
        extensions.snapshot(),
    ]
    # Custom stop triggers can be added to the manager and
    # their status accessed through `manager.stop_trigger`
    trigger = None
    # trigger = ppe.training.triggers.EarlyStoppingTrigger(
    #     check_trigger=(1, 'epoch'), monitor='val/loss')
    manager = ppe.training.ExtensionsManager(model,
                                             optimizer,
                                             args.epochs,
                                             extensions=my_extensions,
                                             iters_per_epoch=len(train_loader),
                                             stop_trigger=trigger)
    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        manager.load_state_dict(state)
    train(manager, args, model, device, train_loader)
    # Test function is called from the evaluator extension
    # to get access to the reporter and other facilities
    # test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")
Beispiel #5
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', dest='cuda',
                        action='store_false', default=True,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--snapshot', type=str, default=None,
                        help='path to snapshot file')
    args = parser.parse_args()
    use_cuda = args.cuda and torch.cuda.is_available()

    # torch.backends.cudnn.benchmark = False
    # torch.backends.cudnn.deterministic = True
    torch.manual_seed(args.seed)

    comm_world_size, comm_rank, comm_local_rank, device = init_distributed(
        use_cuda)
    if comm_rank == 0:
        print("World size = {}".format(comm_world_size))
    print("Rank = {}, Local Rank = {}".format(comm_rank, comm_local_rank))
    print("Device = {}".format(device))

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    dataset_root = '../data'
    if comm_local_rank == 0:
        # download mnist
        datasets.MNIST(dataset_root, download=True)
    torch.distributed.barrier()

    train_dataset = datasets.MNIST(
        dataset_root,
        train=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,)),
        ]))
    test_dataset = datasets.MNIST(
        dataset_root,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,)),
        ]))

    train_sampler = torch.utils.data.DistributedSampler[int](
        train_dataset, num_replicas=comm_world_size, rank=comm_rank)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, sampler=train_sampler,
        **kwargs)  # type: ignore[arg-type]

    test_dataset_indices = list(range(len(test_dataset)))
    local_test_dataset_indices = test_dataset_indices[
        comm_rank:len(test_dataset_indices):comm_world_size]
    local_test_dataset = torch.utils.data.Subset(
        test_dataset, local_test_dataset_indices)
    test_loader = torch.utils.data.DataLoader(
        local_test_dataset, batch_size=args.test_batch_size, shuffle=True,
        **kwargs)  # type: ignore[arg-type]

    model = ppe.nn.parallel.DistributedDataParallel(Net().to(device))

    optimizer = optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum)

    # manager.extend(...) also works
    if comm_local_rank == 0:
        my_extensions = [
            extensions.LogReport(),
            extensions.ProgressBar(),
            extensions.observe_lr(optimizer=optimizer),
            extensions.ParameterStatistics(model, prefix='model'),
            extensions.VariableStatisticsPlot(model),
            extensions.Evaluator(
                test_loader, model,
                eval_func=lambda data, target:
                    test(args, model, device, data, target),
                progress_bar=True),
            extensions.PlotReport(
                ['train/loss', 'val/loss'], 'epoch', filename='loss.png'),
            extensions.PrintReport(['epoch', 'iteration',
                                    'train/loss', 'lr',
                                    'model/fc2.bias/grad/min',
                                    'val/loss', 'val/acc']),
            extensions.snapshot(),
        ]
    else:
        my_extensions = []

    # Custom stop triggers can be added to the manager and
    # their status accessed through `manager.stop_trigger`
    trigger = None
    # trigger = ppe.training.triggers.EarlyStoppingTrigger(
    #     check_trigger=(1, 'epoch'), monitor='val/loss')
    manager = ppe.training.ExtensionsManager(
        model, optimizer, args.epochs,
        extensions=my_extensions,
        iters_per_epoch=len(train_loader),
        stop_trigger=trigger)
    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        manager.load_state_dict(state)
    train(manager, args, model, device, train_loader)
    # Test function is called from the evaluator extension
    # to get access to the reporter and other facilities
    # test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")

    # Wait for all processes to finish to complete successfully
    torch.distributed.barrier()
Beispiel #6
0
def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_interval):
    train_loader, val_loader = get_data_loaders(train_batch_size,
                                                val_batch_size)
    model = Net()
    device = 'cpu'
    if torch.cuda.is_available():
        device = 'cuda:0'

    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    optimizer.step()
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        F.nll_loss,
                                        device=device)
    evaluator = create_supervised_evaluator(model,
                                            metrics={
                                                'acc': Accuracy(),
                                                'loss': Loss(F.nll_loss)
                                            },
                                            device=device)

    # manager.extend(...) also works
    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix='model'),
        extensions.VariableStatisticsPlot(model),
        extensions.snapshot(),
        extensions.IgniteEvaluator(evaluator,
                                   val_loader,
                                   model,
                                   progress_bar=True),
        extensions.PlotReport(['train/loss'], 'epoch', filename='loss.png'),
        extensions.PrintReport([
            'epoch',
            'iteration',
            'train/loss',
            'lr',
            'model/fc2.bias/grad/min',
            'val/loss',
            'val/acc',
        ]),
    ]
    models = {'main': model}
    optimizers = {'main': optimizer}
    manager = ppe.training.IgniteExtensionsManager(trainer,
                                                   models,
                                                   optimizers,
                                                   args.epochs,
                                                   extensions=my_extensions)

    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        manager.load_state_dict(state)

    @trainer.on(Events.ITERATION_COMPLETED)
    def report_loss(engine):
        ppe.reporting.report({'train/loss': engine.state.output})

    trainer.run(train_loader, max_epochs=epochs)
Beispiel #7
0
def main():
    args = argument_paser()

    # Fix seed
    torch.manual_seed(77)

    # Config gpu
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Prepare data
    dataset = MovingMnistDataset()
    train_index, valid_index = train_test_split(range(len(dataset)),
                                                test_size=0.3)
    train_loader = DataLoader(Subset(dataset, train_index),
                              batch_size=args.batch_size,
                              shuffle=True,
                              **kwargs)
    valid_loader = DataLoader(Subset(dataset, valid_index),
                              batch_size=args.test_batch_size,
                              shuffle=False,
                              **kwargs)

    # Prepare model
    net = ConvLSTMEncoderPredictor(image_size=(64, 64)).to(device)
    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=args.lr,
                                 betas=(0.9, 0.999))
    criterion = nn.MSELoss()

    # manager.extend(...) also works
    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(net, prefix='model'),
        extensions.VariableStatisticsPlot(net),
        extensions.Evaluator(valid_loader,
                             net,
                             eval_func=lambda data, target: eval_net(
                                 net, criterion, data, target, device),
                             progress_bar=True),
        extensions.PlotReport(['train/loss', 'val/loss'],
                              'epoch',
                              filename='loss.png'),
        extensions.PrintReport(
            ['epoch', 'iteration', 'train/loss', 'val/loss', 'lr']),
        extensions.snapshot(),
    ]

    # Custom stop triggers can be added to the manager and
    # their status accessed through `manager.stop_trigger`
    trigger = None
    # trigger = ppe.training.triggers.EarlyStoppingTrigger(
    #     check_trigger=(1, 'epoch'), monitor='val/loss')

    # Define manager
    manager = ppe.training.ExtensionsManager(net,
                                             optimizer,
                                             args.epochs,
                                             extensions=my_extensions,
                                             iters_per_epoch=len(train_loader),
                                             stop_trigger=trigger)

    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        manager.load_state_dict(state)

    # Execute train
    train(manager, net, criterion, train_loader, device)
    # Test function is called from the evaluator extension
    # to get access to the reporter and other facilities
    # test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(net.state_dict(), "mnist_cnn.pt")
Beispiel #8
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--device',
                        type=str,
                        default='cuda',
                        help='PyTorch device specifier')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--deterministic',
                        action='store_true',
                        default=False,
                        help='make the behavior deterministic')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--snapshot',
                        type=str,
                        default=None,
                        help='path to snapshot file')
    parser.add_argument('--compare-dump',
                        type=str,
                        default=None,
                        help='directory to save comparer dump to')
    parser.add_argument('--compare-with',
                        type=str,
                        default=None,
                        help='directory to load comparer dump from')
    parser.add_argument('--profiler',
                        type=str,
                        default=None,
                        help='output mode for profiler results')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    numpy.random.seed(args.seed)
    torch.use_deterministic_algorithms(args.deterministic)

    use_cuda = args.device.startswith('cuda')

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data',
                       train=True,
                       download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307, ), (0.3081, ))
                       ])),
        batch_size=args.batch_size,
        shuffle=True,
        collate_fn=ppe.dataloaders.utils.CollateAsDict(
            names=['data', 'target']),
        **kwargs)  # type: ignore[arg-type]
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data',
                       train=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307, ), (0.3081, ))
                       ])),
        batch_size=args.test_batch_size,
        shuffle=True,
        collate_fn=ppe.dataloaders.utils.CollateAsDict(
            names=['data', 'target']),
        **kwargs)  # type: ignore[arg-type]

    model = Net()

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix='model'),
        extensions.VariableStatisticsPlot(model),
        extensions.PlotReport(['train/loss', 'val/loss'],
                              'epoch',
                              filename='loss.png'),
        extensions.PrintReport([
            'epoch', 'iteration', 'train/loss', 'lr',
            'model/fc2.bias/grad/min', 'val/loss', 'val/accuracy'
        ]),
        extensions.snapshot(),
    ]

    # Custom stop triggers can be added to the manager and
    # their status accessed through `manager.stop_trigger`
    trigger = None
    # trigger = ppe.training.triggers.EarlyStoppingTrigger(
    #     check_trigger=(1, 'epoch'), monitor='val/loss')

    profile = None
    if args.profiler is not None:
        if args.profiler == 'tensorboard':

            def callback(prof):
                torch.profiler.tensorboard_trace_handler(
                    './prof')  # type: ignore[attr-defined]
        elif args.profiler == 'export_chrome_trace':

            def callback(prof):
                prof.export_chrome_trace('./prof')
        elif args.profiler == 'export_stacks':

            def callback(prof):
                prof.export_stacks('./prof')
        elif args.profiler == 'to_pickle':

            def callback(prof):
                import pandas as pd
                df = pd.DataFrame([e.__dict__ for e in prof.events()])
                df.to_pickle(f"{trainer.epoch}.pkl")
        elif args.profiler == 'print':

            def callback(prof):
                table = prof.key_averages().table(
                    sort_by="self_cuda_time_total", row_limit=-1)
                print(table)
        else:
            assert False
        profile = torch.profiler.profile(  # type: ignore[attr-defined]
            activities=[
                torch.profiler.ProfilerActivity.
                CPU,  # type: ignore[attr-defined]
                torch.profiler.ProfilerActivity.
                CUDA,  # type: ignore[attr-defined]
            ],
            schedule=torch.profiler.schedule(  # type: ignore[attr-defined]
                wait=0,
                warmup=0,
                active=len(train_loader)),
            on_trace_ready=callback,
        )

    model_with_loss = ModelWithLoss(model)
    trainer = ppe.engine.create_trainer(
        model_with_loss,
        optimizer,
        args.epochs,
        device=args.device,
        extensions=my_extensions,
        stop_trigger=trigger,
        evaluator=ppe.engine.create_evaluator(
            model_with_loss,
            device=args.device,
            progress_bar=True,
            metrics=[ppe.training.metrics.AccuracyMetric('target', 'output')],
            options={'eval_report_keys': ['loss', 'accuracy']},
        ),
        options={'train_report_keys': ['loss']},
        profile=profile,
    )

    ppe.to(model_with_loss, args.device)

    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        trainer.load_state_dict(state)

    # Run comparison between devices when requested.
    if args.compare_dump is not None or args.compare_with is not None:
        comp = ppe.utils.comparer.Comparer(
            compare_fn=ppe.utils.comparer.get_default_comparer(rtol=1e-2),
            outputs=['loss'],
        )
        if args.compare_dump is None:
            # Compare the engine with an existing dump directory.
            comp.add_dump('baseline', args.compare_with)
            comp.add_engine(args.device, trainer, train_loader, test_loader)
            comp.compare()
        else:
            # Create a dump for comparison.
            assert args.compare_with is None
            comp.dump(trainer, args.compare_dump, train_loader, test_loader)
        return

    trainer.run(train_loader, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")
Beispiel #9
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--device', type=str, default='cuda',
                        help='PyTorch device specifier')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--snapshot', type=str, default=None,
                        help='path to snapshot file')
    parser.add_argument('--no-lazy', dest='lazy',
                        action='store_false', default=True,
                        help='do not use lazy modules')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    use_cuda = args.device.startswith('cuda')

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True,
        collate_fn=ppe.dataloaders.utils.CollateAsDict(
            names=['data', 'target']), **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=args.test_batch_size, shuffle=True,
        collate_fn=ppe.dataloaders.utils.CollateAsDict(
            names=['data', 'target']), **kwargs)

    model = Net(args.lazy)
    if args.lazy:
        # You need to run a dummy forward to initialize parameters.
        # This should be done before passing parameter list to optimizers.
        dummy_input = train_loader.dataset[0][0].unsqueeze(0)
        model(dummy_input)

    optimizer = optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum)

    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix='model'),
        extensions.VariableStatisticsPlot(model),
        extensions.PlotReport(
            ['train/loss', 'val/loss'], 'epoch', filename='loss.png'),
        extensions.PrintReport(['epoch', 'iteration',
                                'train/loss', 'lr', 'model/fc2.bias/grad/min',
                                'val/loss', 'val/accuracy']),
        extensions.snapshot(),
    ]

    # Custom stop triggers can be added to the manager and
    # their status accessed through `manager.stop_trigger`
    trigger = None
    # trigger = ppe.training.triggers.EarlyStoppingTrigger(
    #     check_trigger=(1, 'epoch'), monitor='val/loss')

    model_with_loss = ModelWithLoss(model)
    trainer = ppe.engine.create_trainer(
        model_with_loss,
        optimizer,
        args.epochs,
        device=args.device,
        extensions=my_extensions,
        stop_trigger=trigger,
        evaluator=ppe.engine.create_evaluator(
            model_with_loss,
            device=args.device,
            progress_bar=True,
            metrics=[ppe.training.metrics.AccuracyMetric('target', 'output')],
            options={'eval_report_keys': ['loss', 'accuracy']}),
        options={'train_report_keys': ['loss']}
    )

    if use_cuda:
        ppe.to(model_with_loss, args.device)

    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        trainer.load_state_dict(state)

    trainer.run(train_loader, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--device', type=str, default='cuda',
                        help='PyTorch device specifier')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--snapshot', type=str, default=None,
                        help='path to snapshot file')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    use_cuda = args.device.startswith('cuda')

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True,
        collate_fn=ppe.dataloaders.utils.CollateAsDict(
            names=['data', 'target']), **kwargs)  # type: ignore[arg-type]
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=args.test_batch_size, shuffle=True,
        collate_fn=ppe.dataloaders.utils.CollateAsDict(
            names=['data', 'target']), **kwargs)  # type: ignore[arg-type]

    model = Net()

    optimizer = optim.SGD(
        model.parameters(), lr=args.lr, momentum=args.momentum)

    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix='model'),
        extensions.VariableStatisticsPlot(model),
        extensions.PlotReport(
            ['train/loss', 'val/loss'], 'epoch', filename='loss.png'),
        extensions.PrintReport(['epoch', 'iteration',
                                'train/loss', 'lr', 'model/fc2.bias/grad/min',
                                'val/loss', 'val/accuracy']),
        extensions.snapshot(),
    ]

    # Custom stop triggers can be added to the manager and
    # their status accessed through `manager.stop_trigger`
    trigger = None
    # trigger = ppe.training.triggers.EarlyStoppingTrigger(
    #     check_trigger=(1, 'epoch'), monitor='val/loss')

    class ModelWithLoss(torch.nn.Module):
        def __init__(self, model):
            super().__init__()
            self.model = model

        def forward(self, data, target):
            output = model(data)

            if model.training:
                loss = F.nll_loss(output, target)
                ppe.reporting.report({'train/loss': loss.item()})
                return {'loss': loss}

            # Final result will be average of averages of the same size
            test_loss = F.nll_loss(output, target, reduction='mean').item()
            pred = output.argmax(dim=1, keepdim=True)
            return {'loss': test_loss, 'output': pred}

    model_with_loss = ModelWithLoss(model)
    trainer = ppe.engine.create_trainer(
        model_with_loss,
        optimizer,
        args.epochs,
        device=args.device,
        extensions=my_extensions,
        stop_trigger=trigger,
        evaluator=ppe.engine.create_evaluator(
            model_with_loss,
            device=args.device,
            progress_bar=True,
            metrics=[ppe.training.metrics.AccuracyMetric('target', 'output')],
            options={'eval_report_keys': ['loss', 'accuracy']}),
        options={'train_report_keys': ['loss']},
        logic=CustomLogic(3),
    )

    if use_cuda:
        ppe.to(model_with_loss, args.device)

    # Lets load the snapshot
    if args.snapshot is not None:
        state = torch.load(args.snapshot)
        trainer.load_state_dict(state)

    trainer.run(train_loader, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")
Beispiel #11
0
def main():
    # Defaults arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data-type", type=str, default="synth")
    parser.add_argument("--joint-coord",
                        type=str,
                        default="../data/train/synth/")
    parser.add_argument("--joint-states",
                        type=str,
                        default="../data/train/synth/")
    parser.add_argument("--robot-path",
                        type=str,
                        default="../data/urdf/mh5l.urdf")
    parser.add_argument("--robot", type=str, default="mh5l")
    parser.add_argument("--train-val-ratio", type=float, default=0.8)
    parser.add_argument("--batch-size", type=int, default=1000)
    parser.add_argument("--epochs", type=int, default=20)
    parser.add_argument("--lr", type=float, default=0.01)
    parser.add_argument("--save-model", action="store_true", default=True)
    args, unknown = parser.parse_known_args()

    # Define torch device based upon GPU availability
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = KineNet()
    # Assign device to model
    model.to(device)
    train_loader, val_loader = get_data_loaders(args)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    trigger = ppe.training.triggers.EarlyStoppingTrigger(
        check_trigger=(3, "epoch"), monitor="val/loss")

    # Configure extensions
    results = []
    my_extensions = [
        extensions.LogReport(),
        extensions.ProgressBar(),
        extensions.observe_lr(optimizer=optimizer),
        extensions.ParameterStatistics(model, prefix="model"),
        extensions.VariableStatisticsPlot(model),
        extensions.Evaluator(val_loader,
                             model,
                             eval_func=lambda data, target: validate(
                                 args, model, device, data, target, results),
                             progress_bar=True),
        extensions.PlotReport(["train/loss", "val/loss"],
                              "epoch",
                              filename="loss.png"),
        extensions.PrintReport([
            "epoch",
            "iteration",
            "train/loss",
            "lr",
            "val/loss",
        ]),
    ]

    # Setup pfn extensions manager
    manager = ppe.training.ExtensionsManager(
        model,
        optimizer,
        args.epochs,
        extensions=my_extensions,
        iters_per_epoch=len(train_loader),
        stop_trigger=trigger,
    )

    train(manager, args, model, device, train_loader, results)
    plot_loss(pd.DataFrame(results, columns=['S', 'L', 'U', 'B', 'R']))

    if args.save_model:
        torch.save(model.state_dict(), "../model/mh5l_kinenet.pt")