Example #1
0
    def test_bigdl_pytorch_estimator_shard(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(2, 2)

            def forward(self, x):
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        model = SimpleModel()

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input,
                                                 target.flatten().long())

        def transform(df):
            result = {
                "x": [df['user'].to_numpy(), df['item'].to_numpy()],
                "y": df['label'].to_numpy()
            }
            return result

        OrcaContext.pandas_read_backend = "pandas"
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(model=model,
                                             loss=loss_func,
                                             optimizer=SGD(),
                                             model_dir=temp_dir_name,
                                             backend="bigdl")
            estimator.fit(data=data_shard,
                          epochs=4,
                          batch_size=2,
                          validation_data=data_shard,
                          validation_methods=[Accuracy()],
                          checkpoint_trigger=EveryEpoch())
            estimator.evaluate(data_shard,
                               validation_methods=[Accuracy()],
                               batch_size=2)
            est2 = Estimator.from_torch(model=model,
                                        loss=loss_func,
                                        optimizer=None,
                                        backend="bigdl")
            est2.load(temp_dir_name, loss=loss_func)
            est2.fit(data=data_shard,
                     epochs=8,
                     batch_size=2,
                     validation_data=data_shard,
                     validation_methods=[Accuracy()],
                     checkpoint_trigger=EveryEpoch())
            est2.evaluate(data_shard,
                          validation_methods=[Accuracy()],
                          batch_size=2)
Example #2
0
    def test_bigdl_pytorch_estimator_shard(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(2, 2)

            def forward(self, x):
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        model = SimpleModel()

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input, target.flatten().long())

        def transform(df):
            result = {
                "x": np.stack([df['user'].to_numpy(), df['item'].to_numpy()], axis=1),
                "y": df['label'].to_numpy()
            }
            return result

        def transform_del_y(d):
            result = {"x": d["x"]}
            return result

        OrcaContext.pandas_read_backend = "pandas"
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(model=model, loss=loss_func,
                                             metrics=[Accuracy()],
                                             optimizer=SGD(learningrate_schedule=Default()),
                                             model_dir=temp_dir_name)
            estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            estimator.evaluate(data_shard, batch_size=2)
            est2 = Estimator.from_torch(model=model, loss=loss_func,
                                        metrics=[Accuracy()],
                                        optimizer=None)
            est2.load(temp_dir_name, loss=loss_func)
            est2.fit(data=data_shard, epochs=8, batch_size=2, validation_data=data_shard,
                     checkpoint_trigger=EveryEpoch())
            est2.evaluate(data_shard, batch_size=2)
            pred_result = est2.predict(data_shard)
            pred_c = pred_result.collect()
            assert(pred_result, SparkXShards)
            pred_shard = data_shard.transform_shard(transform_del_y)
            pred_result2 = est2.predict(pred_shard)
            pred_c_2 = pred_result2.collect()
            assert (pred_c[0]["prediction"] == pred_c_2[0]["prediction"]).all()
Example #3
0
    def test_bigdl_pytorch_estimator_dataloader_creator(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.dense1 = nn.Linear(2, 4)
                self.bn1 = torch.nn.BatchNorm1d(4)
                self.dense2 = nn.Linear(4, 1)

            def forward(self, x):
                x = self.dense1(x)
                x = self.bn1(x)
                x = torch.sigmoid(self.dense2(x))
                return x

        model = SimpleModel()

        estimator = Estimator.from_torch(model=model, loss=nn.BCELoss(),
                                         optimizer=Adam())

        def get_dataloader():
            inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9], [1, 9]])
            targets = torch.Tensor([[0], [0], [0], [1], [1], [1]])
            return torch.utils.data.DataLoader(TensorDataset(inputs, targets), batch_size=2)

        estimator.fit(data=get_dataloader, epochs=2, validation_data=get_dataloader,
                      validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch())
        estimator.evaluate(data=get_dataloader, validation_metrics=[Accuracy()])
        model = estimator.get_model()
        assert isinstance(model, nn.Module)
Example #4
0
    def test_nnEstimator_fit_with_train_val_summary(self):
        model = Sequential().add(Linear(2, 2))
        criterion = MSECriterion()
        df, val_df = self.get_estimator_df()
        from zoo.orca.learn.metrics import MAE
        est = Estimator.from_bigdl(model=model,
                                   loss=criterion,
                                   optimizer=Adam(),
                                   metrics=[MAE()],
                                   feature_preprocessing=SeqToTensor([2]),
                                   label_preprocessing=SeqToTensor([2]))
        tmp_dir = tempfile.mkdtemp()
        est.set_tensorboard(log_dir=tmp_dir, app_name="estTest")

        est.fit(df,
                epochs=5,
                batch_size=4,
                validation_data=val_df,
                validation_trigger=EveryEpoch(),
                checkpoint_trigger=SeveralIteration(1))

        res = est.predict(df)
        loss_result = est.get_train_summary("Loss")
        mae_result = est.get_validation_summary("MAE")
        assert type(res).__name__ == 'DataFrame'
        assert len(loss_result) == 5
        assert len(mae_result) == 4
Example #5
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--dir', default='/tmp/data', metavar='N',
                        help='the folder store mnist data')
    parser.add_argument('--batch-size', type=int, default=256, metavar='N',
                        help='input batch size for training per executor(default: 256)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing per executor(default: 1000)')
    parser.add_argument('--epochs', type=int, default=2, metavar='N',
                        help='number of epochs to train (default: 2)')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--cluster_mode', type=str, default="local",
                        help='The mode for the Spark cluster. local or yarn.')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(args.dir, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(args.dir, train=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.test_batch_size, shuffle=False)

    if args.cluster_mode == "local":
        init_orca_context(cores=1, memory="2g")
    elif args.cluster_mode == "yarn":
        init_orca_context(
            cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g",
            driver_memory="10g", driver_cores=1,
            conf={"spark.rpc.message.maxSize": "1024",
                  "spark.task.maxFailures": "1",
                  "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1"})

    model = LeNet()
    model.train()
    criterion = nn.NLLLoss()

    adam = torch.optim.Adam(model.parameters(), args.lr)
    est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion)
    est.fit(data=train_loader, epochs=args.epochs, validation_data=test_loader,
            validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch())
    result = est.evaluate(data=test_loader, validation_metrics=[Accuracy()])
    for r in result:
        print(str(r))
    stop_orca_context()
    def test_bigdl_pytorch_estimator_pandas_dataframe(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(1, 10)

            def forward(self, x):
                x = torch.unsqueeze(x, dim=1)
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input,
                                                 target.flatten().long())

        model = SimpleModel()

        OrcaContext.pandas_read_backend = "pandas"
        file_path = os.path.join(resource_path,
                                 "orca/learn/simple_feature_label.csv")
        data_shard = read_csv(file_path)

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(
                model=model,
                loss=loss_func,
                metrics=[Accuracy()],
                optimizer=SGD(learningrate_schedule=Default()),
                model_dir=temp_dir_name)
            estimator.fit(data=data_shard,
                          epochs=1,
                          batch_size=4,
                          feature_cols=['feature'],
                          label_cols=['label'],
                          validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            estimator.evaluate(data_shard,
                               batch_size=4,
                               feature_cols=['feature'],
                               label_cols=['label'])
            est2 = Estimator.from_torch(model=model,
                                        loss=loss_func,
                                        metrics=[Accuracy()],
                                        optimizer=None)
            est2.load_orca_checkpoint(temp_dir_name)
            est2.predict(data_shard, batch_size=4, feature_cols=['feature'])
Example #7
0
    def test_bigdl_pytorch_estimator_dataloader_creator(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.dense1 = nn.Linear(2, 4)
                self.bn1 = torch.nn.BatchNorm1d(4)
                self.dense2 = nn.Linear(4, 1)

            def forward(self, x):
                x = self.dense1(x)
                x = self.bn1(x)
                x = torch.sigmoid(self.dense2(x))
                return x

        def model_creator(config):
            model = SimpleModel()
            return model

        def optim_creator(model, config):
            return optim.Adam(model.parameters(), lr=config.get("lr", 0.01))

        estimator = Estimator.from_torch(model=model_creator,
                                         loss=nn.BCELoss(),
                                         metrics=[Accuracy()],
                                         optimizer=optim_creator,
                                         config={"lr": 0.001})

        def get_dataloader(config, batch_size):
            inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9],
                                   [1, 9]])
            targets = torch.Tensor([[0], [0], [0], [1], [1], [1]])
            data_loader = torch.utils.data.DataLoader(
                TensorDataset(inputs, targets),
                batch_size=batch_size,
                num_workers=config.get("threads", 1))
            return data_loader

        estimator.fit(data=get_dataloader,
                      epochs=2,
                      batch_size=2,
                      validation_data=get_dataloader,
                      checkpoint_trigger=EveryEpoch())
        estimator.evaluate(data=get_dataloader, batch_size=2)
        model = estimator.get_model()
        assert isinstance(model, nn.Module)
Example #8
0
    def test_xshards_spark_estimator_multi_inputs(self):
        resource_path = os.path.join(
            os.path.split(__file__)[0], "../../../resources")

        def transform(df):
            result = {
                "x": [
                    np.expand_dims(df['user'].to_numpy(), axis=1),
                    np.expand_dims(df['item'].to_numpy(), axis=1)
                ],
                "y":
                df['label'].to_numpy()
            }
            return result

        file_path = os.path.join(resource_path, "orca/learn/ncf2.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        model = ZModel([zx1, zx2], zy)

        optim_method = SGD(learningrate=0.01)
        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_bigdl(model=model,
                                             optimizer=optim_method,
                                             loss=ClassNLLCriterion(),
                                             metrics=[Accuracy()],
                                             model_dir=temp_dir_name)
            estimator.set_constant_gradient_clipping(0.1, 1.2)
            r1 = estimator.predict(data=data_shard)
            r_c = r1.collect()
            estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test")
            estimator.fit(data=data_shard,
                          epochs=5,
                          batch_size=8,
                          validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            summary = estimator.get_train_summary(tag="Loss")
            temp_path = os.path.join(temp_dir_name, "save_model")
            estimator.save(temp_path)
            eval_result = estimator.evaluate(data=data_shard, batch_size=8)
    def test_bigdl_pytorch_estimator_dataframe_fit_evaluate(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(5, 5)

            def forward(self, x):
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        model = SimpleModel()

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input,
                                                 target.flatten().long())

        rdd = self.sc.range(0, 100)
        df = rdd.map(lambda x: ([float(x)] * 5,
                                [int(np.random.randint(0, 2, size=()))])).toDF(
                                    ["feature", "label"])

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(
                model=model,
                loss=loss_func,
                metrics=[Accuracy()],
                optimizer=SGD(learningrate_schedule=Default()),
                model_dir=temp_dir_name)
            estimator.fit(data=df,
                          epochs=4,
                          batch_size=2,
                          validation_data=df,
                          checkpoint_trigger=EveryEpoch(),
                          feature_cols=["feature"],
                          label_cols=["label"])
            eval_result = estimator.evaluate(df,
                                             batch_size=2,
                                             feature_cols=["feature"],
                                             label_cols=["label"])
            assert isinstance(eval_result, dict)
Example #10
0

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

net.train()
orca_estimator = Estimator.from_torch(model=net,
                                      optimizer=optimizer,
                                      loss=criterion,
                                      backend="bigdl")
orca_estimator.fit(data=trainloader,
                   epochs=2,
                   validation_data=testloader,
                   validation_metrics=[Accuracy()],
                   checkpoint_trigger=EveryEpoch())
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

res = orca_estimator.evaluate(data=testloader,
                              validation_metrics=[Accuracy()])[0]
total_num = res.total_num
result = res.result
print("Accuracy of the network on the %s test images: %s" %
      (total_num, result))
stop_orca_context()
    def test_xshards_spark_estimator(self):
        resource_path = os.path.join(
            os.path.split(__file__)[0], "../../../resources")

        def transform(df):
            result = {
                "x": [df['user'].to_numpy(), df['item'].to_numpy()],
                "y": df['label'].to_numpy()
            }
            return result

        file_path = os.path.join(resource_path, "orca/learn/ncf2.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)
        model = Sequential()
        model.add(Linear(2, 2))
        model.add(LogSoftMax())
        optim_method = SGD(learningrate=0.01)
        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_bigdl(
                model=model,
                optimizer=optim_method,
                loss=ClassNLLCriterion(),
                model_dir=temp_dir_name,
                feature_preprocessing=SeqToTensor([2]),
                label_preprocessing=SeqToTensor([1]))
            estimator.set_constant_gradient_clipping(0.1, 1.2)
            r1 = estimator.predict(data=data_shard)
            r_c = r1.collect()
            estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test")
            estimator.fit(data=data_shard,
                          epochs=5,
                          batch_size=8,
                          validation_data=data_shard,
                          validation_metrics=[Accuracy()],
                          checkpoint_trigger=EveryEpoch())
            summary = estimator.get_train_summary(tag="Loss")
            temp_path = os.path.join(temp_dir_name, "save_model")
            estimator.save(temp_path)
            estimator.evaluate(data=data_shard,
                               validation_metrics=[Accuracy()],
                               batch_size=8)
            result = estimator.predict(data=data_shard)
            assert type(result).__name__ == 'SparkXShards'
            result_c = result.collect()
            df = self.get_estimator_df2()
            r0 = estimator.predict(df)
            r0_c = r0.collect()
            assert type(r0).__name__ == 'DataFrame'
            for idx in range(len(r0_c)):
                assert abs(r0_c[idx]["prediction"][0] -
                           result_c[0]["prediction"][idx][0]) == 0
                assert abs(r0_c[idx]["prediction"][1] -
                           result_c[0]["prediction"][idx][1]) == 0
            estimator.fit(data=df,
                          epochs=6,
                          batch_size=8,
                          validation_data=df,
                          validation_metrics=[Accuracy()],
                          validation_trigger=EveryEpoch())
            summary = estimator.get_train_summary()

            # test load from checkpoint
            est2 = Estimator.from_bigdl(model=Sequential(),
                                        optimizer=None,
                                        loss=None,
                                        model_dir=None)
            est2.load(temp_dir_name,
                      loss=ClassNLLCriterion(),
                      is_checkpoint=True)
            r2 = est2.predict(data=data_shard)
            r2_c = r2.collect()
            assert (result_c[0]["prediction"] == r2_c[0]["prediction"]).all()
            # resume training
            est2.fit(data=data_shard,
                     epochs=10,
                     batch_size=8,
                     validation_data=data_shard,
                     validation_metrics=[Accuracy()],
                     checkpoint_trigger=EveryEpoch())
            est2.evaluate(data=data_shard,
                          validation_metrics=[Accuracy()],
                          batch_size=8)
            # test load from saved model
            est3 = Estimator.from_bigdl(model=Sequential(),
                                        optimizer=None,
                                        loss=None,
                                        model_dir=None)
            est3.load(temp_path,
                      optimizer=optim_method,
                      loss=ClassNLLCriterion())
            r3 = est3.predict(data=data_shard)
            r3_c = r3.collect()
            assert (r3_c[0]["prediction"] == r2_c[0]["prediction"]).all()
Example #12
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch Tensorboard Example')
    parser.add_argument('--cluster_mode',
                        type=str,
                        default="local",
                        help='The cluster mode, such as local, yarn or k8s.')
    parser.add_argument('--backend',
                        type=str,
                        default="bigdl",
                        help='The backend of PyTorch Estimator; '
                        'bigdl and torch_distributed are supported.')
    args = parser.parse_args()

    if args.cluster_mode == "local":
        init_orca_context()
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode=args.cluster_mode, cores=4, num_nodes=2)

    tensorboard_dir = "runs"
    writer = SummaryWriter(tensorboard_dir + '/fashion_mnist_experiment_1')
    # constant for classes
    classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

    # plot some random training images
    dataiter = iter(train_data_creator(config={}, batch_size=4))
    images, labels = dataiter.next()

    # create grid of images
    img_grid = torchvision.utils.make_grid(images)

    # show images
    matplotlib_imshow(img_grid, one_channel=True)

    # write to tensorboard
    writer.add_image('four_fashion_mnist_images', img_grid)

    # inspect the model using tensorboard
    writer.add_graph(model_creator(config={}), images)
    writer.close()

    # training loss vs. epochs
    criterion = nn.CrossEntropyLoss()
    batch_size = 4
    epochs = 5
    if args.backend == "bigdl":
        train_loader = train_data_creator(config={}, batch_size=batch_size)
        test_loader = validation_data_creator(config={}, batch_size=batch_size)

        net = model_creator(config={})
        optimizer = optimizer_creator(model=net, config={"lr": 0.001})
        orca_estimator = Estimator.from_torch(model=net,
                                              optimizer=optimizer,
                                              loss=criterion,
                                              metrics=[Accuracy()],
                                              backend="bigdl")

        orca_estimator.set_tensorboard(tensorboard_dir, "bigdl")

        orca_estimator.fit(data=train_loader,
                           epochs=epochs,
                           validation_data=test_loader,
                           checkpoint_trigger=EveryEpoch())

        res = orca_estimator.evaluate(data=test_loader)
        print("Accuracy of the network on the test images: %s" % res)
    elif args.backend == "torch_distributed":
        orca_estimator = Estimator.from_torch(model=model_creator,
                                              optimizer=optimizer_creator,
                                              loss=criterion,
                                              metrics=[Accuracy()],
                                              backend="torch_distributed")
        stats = orca_estimator.fit(train_data_creator,
                                   epochs=epochs,
                                   batch_size=batch_size)

        for stat in stats:
            writer.add_scalar("training_loss", stat['train_loss'],
                              stat['epoch'])
        print("Train stats: {}".format(stats))
        val_stats = orca_estimator.evaluate(validation_data_creator,
                                            batch_size=batch_size)
        print("Validation stats: {}".format(val_stats))
        orca_estimator.shutdown()
    else:
        raise NotImplementedError(
            "Only bigdl and torch_distributed are supported "
            "as the backend, but got {}".format(args.backend))

    stop_orca_context()
    def test_bigdl_pytorch_estimator_save_and_load(self):
        class Network(nn.Module):
            def __init__(self):
                super(Network, self).__init__()

                self.fc1 = nn.Linear(28 * 28, 500)
                self.fc2 = nn.Linear(500, 10)

            def forward(self, x):
                x = x.view(-1, 28 * 28)
                x = F.relu(self.fc1(x))
                x = self.fc2(x)
                return F.log_softmax(x, dim=1)

        model = Network()
        model.train()
        criterion = nn.NLLLoss()
        adam = torch.optim.Adam(model.parameters(), 0.001)

        dir = "./dataset"
        batch_size = 320
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            dir,
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=batch_size,
                                                   shuffle=True)

        test_loader = torch.utils.data.DataLoader(datasets.MNIST(
            dir,
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                  batch_size=batch_size,
                                                  shuffle=False)

        # epoch 1
        est = Estimator.from_torch(model=model,
                                   optimizer=adam,
                                   loss=criterion,
                                   metrics=[Accuracy()])

        est.fit(data=train_loader,
                epochs=1,
                validation_data=test_loader,
                batch_size=batch_size,
                checkpoint_trigger=EveryEpoch())
        paras1 = list(est.get_model().named_parameters())
        est.save("model_epoch_1")

        # epoch 2
        est.fit(data=train_loader,
                epochs=2,
                validation_data=test_loader,
                batch_size=batch_size,
                checkpoint_trigger=EveryEpoch())
        paras2 = list(est.get_model().named_parameters())
        est.load("model_epoch_1")
        paras3 = list(est.get_model().named_parameters())

        load_success = 0
        for i in range(len(paras2)):
            name2, para2 = paras2[i]
            name3, para3 = paras3[i]
            if not torch.all(torch.eq(para2, para3)):
                load_success = 1
                break
        if not load_success:
            raise Exception(
                "Load failed. Parameters did not change after loading.")

        for i in range(len(paras1)):
            name1, para1 = paras1[i]
            name3, para3 = paras3[i]
            if not torch.all(torch.eq(para1, para3)):
                raise Exception("After reloading the model," + name1 +
                                "does not match.")
        print("pass")
Example #14
0
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

net.train()
orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion,
                                      backend="bigdl")
orca_estimator.fit(data=trainloader, epochs=2, validation_data=testloader,
                   validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

res = orca_estimator.evaluate(data=testloader, validation_methods=[Accuracy()])[0]
total_num = res.total_num
result = res.result
print("Accuracy of the network on the %s test images: %s" % (total_num, result))
stop_orca_context()