Ejemplo n.º 1
0
    def test_bigdl_pytorch_estimator_shard(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(2, 2)

            def forward(self, x):
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        model = SimpleModel()

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input, target.flatten().long())

        def transform(df):
            result = {
                "x": [df['user'].to_numpy(), df['item'].to_numpy()],
                "y": df['label'].to_numpy()
            }
            return result

        OrcaContext.pandas_read_backend = "pandas"
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)

        estimator = Estimator.from_torch(model=model, loss=loss_func,
                                         optimizer=SGD(), backend="bigdl")
        estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard,
                      validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())
        estimator.evaluate(data_shard, validation_methods=[Accuracy()], batch_size=2)
Ejemplo n.º 2
0
    def test_bigdl_pytorch_estimator_dataloader_creator(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.dense1 = nn.Linear(2, 4)
                self.bn1 = torch.nn.BatchNorm1d(4)
                self.dense2 = nn.Linear(4, 1)

            def forward(self, x):
                x = self.dense1(x)
                x = self.bn1(x)
                x = torch.sigmoid(self.dense2(x))
                return x

        model = SimpleModel()

        estimator = Estimator.from_torch(model=model, loss=nn.BCELoss(),
                                         optimizer=Adam())

        def get_dataloader():
            inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9], [1, 9]])
            targets = torch.Tensor([[0], [0], [0], [1], [1], [1]])
            return torch.utils.data.DataLoader(TensorDataset(inputs, targets), batch_size=2)

        estimator.fit(data=get_dataloader, epochs=2, validation_data=get_dataloader,
                      validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch())
        estimator.evaluate(data=get_dataloader, validation_metrics=[Accuracy()])
        model = estimator.get_model()
        assert isinstance(model, nn.Module)
Ejemplo n.º 3
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--dir', default='/tmp/data', metavar='N',
                        help='the folder store mnist data')
    parser.add_argument('--batch-size', type=int, default=256, metavar='N',
                        help='input batch size for training per executor(default: 256)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing per executor(default: 1000)')
    parser.add_argument('--epochs', type=int, default=2, metavar='N',
                        help='number of epochs to train (default: 2)')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--cluster_mode', type=str, default="local",
                        help='The mode for the Spark cluster. local or yarn.')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(args.dir, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(args.dir, train=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.test_batch_size, shuffle=False)

    if args.cluster_mode == "local":
        init_orca_context(cores=1, memory="2g")
    elif args.cluster_mode == "yarn":
        init_orca_context(
            cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g",
            driver_memory="10g", driver_cores=1,
            conf={"spark.rpc.message.maxSize": "1024",
                  "spark.task.maxFailures": "1",
                  "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1"})

    model = LeNet()
    model.train()
    criterion = nn.NLLLoss()

    adam = torch.optim.Adam(model.parameters(), args.lr)
    est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion)
    est.fit(data=train_loader, epochs=args.epochs, validation_data=test_loader,
            validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch())
    result = est.evaluate(data=test_loader, validation_metrics=[Accuracy()])
    for r in result:
        print(str(r))
    stop_orca_context()
Ejemplo n.º 4
0
    def test_bigdl_pytorch_estimator_shard(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(2, 2)

            def forward(self, x):
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        model = SimpleModel()

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input, target.flatten().long())

        def transform(df):
            result = {
                "x": np.stack([df['user'].to_numpy(), df['item'].to_numpy()], axis=1),
                "y": df['label'].to_numpy()
            }
            return result

        def transform_del_y(d):
            result = {"x": d["x"]}
            return result

        OrcaContext.pandas_read_backend = "pandas"
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(model=model, loss=loss_func,
                                             metrics=[Accuracy()],
                                             optimizer=SGD(learningrate_schedule=Default()),
                                             model_dir=temp_dir_name)
            estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            estimator.evaluate(data_shard, batch_size=2)
            est2 = Estimator.from_torch(model=model, loss=loss_func,
                                        metrics=[Accuracy()],
                                        optimizer=None)
            est2.load(temp_dir_name, loss=loss_func)
            est2.fit(data=data_shard, epochs=8, batch_size=2, validation_data=data_shard,
                     checkpoint_trigger=EveryEpoch())
            est2.evaluate(data_shard, batch_size=2)
            pred_result = est2.predict(data_shard)
            pred_c = pred_result.collect()
            assert(pred_result, SparkXShards)
            pred_shard = data_shard.transform_shard(transform_del_y)
            pred_result2 = est2.predict(pred_shard)
            pred_c_2 = pred_result2.collect()
            assert (pred_c[0]["prediction"] == pred_c_2[0]["prediction"]).all()
    def test_bigdl_pytorch_estimator_pandas_dataframe(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(1, 10)

            def forward(self, x):
                x = torch.unsqueeze(x, dim=1)
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input,
                                                 target.flatten().long())

        model = SimpleModel()

        OrcaContext.pandas_read_backend = "pandas"
        file_path = os.path.join(resource_path,
                                 "orca/learn/simple_feature_label.csv")
        data_shard = read_csv(file_path)

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(
                model=model,
                loss=loss_func,
                metrics=[Accuracy()],
                optimizer=SGD(learningrate_schedule=Default()),
                model_dir=temp_dir_name)
            estimator.fit(data=data_shard,
                          epochs=1,
                          batch_size=4,
                          feature_cols=['feature'],
                          label_cols=['label'],
                          validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            estimator.evaluate(data_shard,
                               batch_size=4,
                               feature_cols=['feature'],
                               label_cols=['label'])
            est2 = Estimator.from_torch(model=model,
                                        loss=loss_func,
                                        metrics=[Accuracy()],
                                        optimizer=None)
            est2.load_orca_checkpoint(temp_dir_name)
            est2.predict(data_shard, batch_size=4, feature_cols=['feature'])
def get_estimator(workers_per_node=1, model_fn=get_model):
    estimator = Estimator.from_torch(model=model_fn,
                                     optimizer=get_optimizer,
                                     loss=nn.BCELoss(),
                                     metrics=Accuracy(),
                                     config={"lr": 1e-2},
                                     workers_per_node=workers_per_node,
                                     backend="torch_distributed")
    return estimator
    def test_bigdl_pytorch_estimator_dataframe_fit_evaluate(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.fc = nn.Linear(5, 5)

            def forward(self, x):
                x = self.fc(x)
                return F.log_softmax(x, dim=1)

        model = SimpleModel()

        def loss_func(input, target):
            return nn.CrossEntropyLoss().forward(input,
                                                 target.flatten().long())

        rdd = self.sc.range(0, 100)
        df = rdd.map(lambda x: ([float(x)] * 5,
                                [int(np.random.randint(0, 2, size=()))])).toDF(
                                    ["feature", "label"])

        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_torch(
                model=model,
                loss=loss_func,
                optimizer=SGD(learningrate_schedule=Default()),
                model_dir=temp_dir_name)
            estimator.fit(data=df,
                          epochs=4,
                          batch_size=2,
                          validation_data=df,
                          validation_metrics=[Accuracy()],
                          checkpoint_trigger=EveryEpoch(),
                          feature_cols=["feature"],
                          label_cols=["label"])
            estimator.evaluate(df,
                               validation_metrics=[Accuracy()],
                               batch_size=2,
                               feature_cols=["feature"],
                               label_cols=["label"])
Ejemplo n.º 8
0
    def test_bigdl_pytorch_estimator_dataloader_creator(self):
        class SimpleModel(nn.Module):
            def __init__(self):
                super(SimpleModel, self).__init__()
                self.dense1 = nn.Linear(2, 4)
                self.bn1 = torch.nn.BatchNorm1d(4)
                self.dense2 = nn.Linear(4, 1)

            def forward(self, x):
                x = self.dense1(x)
                x = self.bn1(x)
                x = torch.sigmoid(self.dense2(x))
                return x

        def model_creator(config):
            model = SimpleModel()
            return model

        def optim_creator(model, config):
            return optim.Adam(model.parameters(), lr=config.get("lr", 0.01))

        estimator = Estimator.from_torch(model=model_creator,
                                         loss=nn.BCELoss(),
                                         metrics=[Accuracy()],
                                         optimizer=optim_creator,
                                         config={"lr": 0.001})

        def get_dataloader(config, batch_size):
            inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9],
                                   [1, 9]])
            targets = torch.Tensor([[0], [0], [0], [1], [1], [1]])
            data_loader = torch.utils.data.DataLoader(
                TensorDataset(inputs, targets),
                batch_size=batch_size,
                num_workers=config.get("threads", 1))
            return data_loader

        estimator.fit(data=get_dataloader,
                      epochs=2,
                      batch_size=2,
                      validation_data=get_dataloader,
                      checkpoint_trigger=EveryEpoch())
        estimator.evaluate(data=get_dataloader, batch_size=2)
        model = estimator.get_model()
        assert isinstance(model, nn.Module)
Ejemplo n.º 9
0
    def test_xshards_spark_estimator_multi_inputs(self):
        resource_path = os.path.join(
            os.path.split(__file__)[0], "../../../resources")

        def transform(df):
            result = {
                "x": [
                    np.expand_dims(df['user'].to_numpy(), axis=1),
                    np.expand_dims(df['item'].to_numpy(), axis=1)
                ],
                "y":
                df['label'].to_numpy()
            }
            return result

        file_path = os.path.join(resource_path, "orca/learn/ncf2.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        model = ZModel([zx1, zx2], zy)

        optim_method = SGD(learningrate=0.01)
        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_bigdl(model=model,
                                             optimizer=optim_method,
                                             loss=ClassNLLCriterion(),
                                             metrics=[Accuracy()],
                                             model_dir=temp_dir_name)
            estimator.set_constant_gradient_clipping(0.1, 1.2)
            r1 = estimator.predict(data=data_shard)
            r_c = r1.collect()
            estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test")
            estimator.fit(data=data_shard,
                          epochs=5,
                          batch_size=8,
                          validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            summary = estimator.get_train_summary(tag="Loss")
            temp_path = os.path.join(temp_dir_name, "save_model")
            estimator.save(temp_path)
            eval_result = estimator.evaluate(data=data_shard, batch_size=8)
Ejemplo n.º 10
0
    def test_nnEstimator_evaluation(self):
        df = self.get_estimator_df2()
        linear_model = Sequential().add(Linear(2, 2)).add(LogSoftMax())

        est = Estimator.from_bigdl(model=linear_model,
                                   loss=ClassNLLCriterion(),
                                   optimizer=Adam(),
                                   feature_preprocessing=SeqToTensor([2]),
                                   label_preprocessing=SeqToTensor([1]),
                                   metrics=Accuracy())
        est.fit(data=df, epochs=10, batch_size=8)
        result = est.evaluate(df, batch_size=8)

        shift = udf(lambda p: float(p.index(max(p))), DoubleType())
        pred = est.predict(df).withColumn("prediction",
                                          shift(col('prediction'))).cache()

        correct = pred.filter("label=prediction").count()
        overall = pred.count()
        accuracy = correct * 1.0 / overall
        assert accuracy == round(result['Top1Accuracy'], 2)
Ejemplo n.º 11
0
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

net.train()
orca_estimator = Estimator.from_torch(model=net,
                                      optimizer=optimizer,
                                      loss=criterion,
                                      metrics=[Accuracy()],
                                      backend="bigdl")
orca_estimator.fit(data=trainloader,
                   epochs=2,
                   validation_data=testloader,
                   checkpoint_trigger=EveryEpoch())
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

res = orca_estimator.evaluate(data=testloader)
print("Accuracy of the network on the test images: %s" % res)
Ejemplo n.º 12
0
def bigdl_estimator():
    from zoo.orca.learn.bigdl.estimator import Estimator
    from tensorflow.python.keras.datasets import imdb
    from tensorflow.python.keras.preprocessing import sequence
    from zoo.pipeline.api.keras.models import Model
    from zoo.pipeline.api.keras.objectives import SparseCategoricalCrossEntropy
    from zoo.orca.data import XShards
    from zoo.orca.learn.metrics import Accuracy
    import numpy as np

    # conf = {"spark.executor.extraJavaOptions": "-Xss512m", "spark.driver.extraJavaOptions": "-Xss512m"}

    # init_orca_context(cluster_mode="local", cores=8, memory="16g")
    init_orca_context(cluster_mode="local", cores=4, memory="16g")
    max_features = 200
    max_len = 20

    print("running bigdl estimator")

    (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
    
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    x_test = x_test[-1000:]
    y_test = y_test[-1000:]
    
    print(len(x_train), 'train sequences')
    print(len(x_test), 'test sequences')

    print('Pad sequences (samples x time)')
    x_train = sequence.pad_sequences(x_train, maxlen=max_len)
    x_test = sequence.pad_sequences(x_test, maxlen=max_len)
    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)

    train_pos = np.zeros((len(x_train), max_len), dtype=np.int32)
    val_pos = np.zeros((len(x_test), max_len), dtype=np.int32)
    for i in range(0, len(x_train)):
        train_pos[i, :] = np.arange(max_len)
        val_pos[i, :] = np.arange(max_len)

    train_dataset = XShards.partition({"x": (x_train, train_pos), "y": np.array(y_train)})
    val_dataset = XShards.partition({"x": (x_test, val_pos), "y": np.array(y_test)})

    token_shape = (max_len,)
    position_shape = (max_len,)
    token_input = Input(shape=token_shape)
    position_input = Input(shape=position_shape)
    O_seq = TransformerLayer.init(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)([token_input, position_input])
    # Select the first output of the Transformer. The second is the pooled output.
    O_seq = SelectTable(0)(O_seq)
    O_seq = GlobalAveragePooling1D()(O_seq)
    O_seq = Dropout(0.2)(O_seq)
    outputs = Dense(2, activation='softmax')(O_seq)

    model = Model([token_input, position_input], outputs)
    model.summary()
    batch_size = 64
    print("Train started")
    est = Estimator.from_bigdl(model=model, loss=SparseCategoricalCrossEntropy(), optimizer=Adam(), metrics=[Accuracy()])
    est.set_constant_gradient_clipping(0.1, 0.2)
    est.fit(data=train_dataset, batch_size=batch_size, epochs=1)
    result = est.evaluate(val_dataset)
    print(result)
    est.clear_gradient_clipping()
    est.set_l2_norm_gradient_clipping(0.5)
    est.fit(data=train_dataset, batch_size=batch_size, epochs=1)
    print("Train finished") 
    
    print("Evaluating started")
    result = est.evaluate(val_dataset)
    print(result)
    print("Evaluating finished")
    est.save('work/saved_model')
    # est.load('work/saved_model')
    print("load and save API finished")

    est.get_train_summary(tag='Loss')
    est.get_validation_summary(tag='Top1Accuracy')
    print("get summary API finished")


    stop_orca_context()
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch Tensorboard Example')
    parser.add_argument('--cluster_mode',
                        type=str,
                        default="local",
                        help='The cluster mode, such as local, yarn or k8s.')
    parser.add_argument('--backend',
                        type=str,
                        default="bigdl",
                        help='The backend of PyTorch Estimator; '
                        'bigdl and torch_distributed are supported.')
    args = parser.parse_args()

    if args.cluster_mode == "local":
        init_orca_context()
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode=args.cluster_mode, cores=4, num_nodes=2)

    tensorboard_dir = "runs"
    writer = SummaryWriter(tensorboard_dir + '/fashion_mnist_experiment_1')
    # constant for classes
    classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

    # plot some random training images
    dataiter = iter(train_data_creator(config={}, batch_size=4))
    images, labels = dataiter.next()

    # create grid of images
    img_grid = torchvision.utils.make_grid(images)

    # show images
    matplotlib_imshow(img_grid, one_channel=True)

    # write to tensorboard
    writer.add_image('four_fashion_mnist_images', img_grid)

    # inspect the model using tensorboard
    writer.add_graph(model_creator(config={}), images)
    writer.close()

    # training loss vs. epochs
    criterion = nn.CrossEntropyLoss()
    batch_size = 4
    epochs = 5
    if args.backend == "bigdl":
        train_loader = train_data_creator(config={}, batch_size=batch_size)
        test_loader = validation_data_creator(config={}, batch_size=batch_size)

        net = model_creator(config={})
        optimizer = optimizer_creator(model=net, config={"lr": 0.001})
        orca_estimator = Estimator.from_torch(model=net,
                                              optimizer=optimizer,
                                              loss=criterion,
                                              metrics=[Accuracy()],
                                              backend="bigdl")

        orca_estimator.set_tensorboard(tensorboard_dir, "bigdl")

        orca_estimator.fit(data=train_loader,
                           epochs=epochs,
                           validation_data=test_loader,
                           checkpoint_trigger=EveryEpoch())

        res = orca_estimator.evaluate(data=test_loader)
        print("Accuracy of the network on the test images: %s" % res)
    elif args.backend == "torch_distributed":
        orca_estimator = Estimator.from_torch(model=model_creator,
                                              optimizer=optimizer_creator,
                                              loss=criterion,
                                              metrics=[Accuracy()],
                                              backend="torch_distributed")
        stats = orca_estimator.fit(train_data_creator,
                                   epochs=epochs,
                                   batch_size=batch_size)

        for stat in stats:
            writer.add_scalar("training_loss", stat['train_loss'],
                              stat['epoch'])
        print("Train stats: {}".format(stats))
        val_stats = orca_estimator.evaluate(validation_data_creator,
                                            batch_size=batch_size)
        print("Validation stats: {}".format(val_stats))
        orca_estimator.shutdown()
    else:
        raise NotImplementedError(
            "Only bigdl and torch_distributed are supported "
            "as the backend, but got {}".format(args.backend))

    stop_orca_context()
Ejemplo n.º 14
0
        return x


net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

net.train()
orca_estimator = Estimator.from_torch(model=net,
                                      optimizer=optimizer,
                                      loss=criterion,
                                      backend="bigdl")
orca_estimator.fit(data=trainloader,
                   epochs=2,
                   validation_data=testloader,
                   validation_methods=[Accuracy()],
                   checkpoint_trigger=EveryEpoch())
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

res = orca_estimator.evaluate(data=testloader,
                              validation_methods=[Accuracy()])[0]
total_num = res.total_num
result = res.result
print("Accuracy of the network on the %s test images: %s" %
      (total_num, result))
Ejemplo n.º 15
0
    (trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1])

    # run training and evaluation
    featureTransformer = ChainedPreprocessing([
        RowToImageFeature(),
        ImageCenterCrop(224, 224),
        ImageChannelNormalize(123.0, 117.0, 104.0, 255.0, 255.0, 255.0),
        ImageMatToTensor(),
        ImageFeatureToTensor()
    ])

    est = Estimator.from_bigdl(model=zoo_model,
                               loss=zoo_loss,
                               optimizer=SGD(learningrate=0.001),
                               feature_preprocessing=featureTransformer,
                               metrics=Accuracy())
    est.fit(data=trainingDF,
            batch_size=16,
            epochs=1,
            feature_cols="image",
            caching_sample=False,
            validation_data=validationDF,
            validation_trigger=EveryEpoch())

    shift = udf(lambda p: float(p.index(max(p))), DoubleType())
    predictionDF = est.predict(data=validationDF, feature_cols="image") \
        .withColumn("prediction", shift(col('prediction'))).cache()

    correct = predictionDF.filter("label=prediction").count()
    overall = predictionDF.count()
    accuracy = correct * 1.0 / overall
    def test_xshards_spark_estimator(self):
        resource_path = os.path.join(
            os.path.split(__file__)[0], "../../../resources")

        def transform(df):
            result = {
                "x": [df['user'].to_numpy(), df['item'].to_numpy()],
                "y": df['label'].to_numpy()
            }
            return result

        file_path = os.path.join(resource_path, "orca/learn/ncf2.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)
        model = Sequential()
        model.add(Linear(2, 2))
        model.add(LogSoftMax())
        optim_method = SGD(learningrate=0.01)
        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_bigdl(
                model=model,
                optimizer=optim_method,
                loss=ClassNLLCriterion(),
                model_dir=temp_dir_name,
                feature_preprocessing=SeqToTensor([2]),
                label_preprocessing=SeqToTensor([1]))
            estimator.set_constant_gradient_clipping(0.1, 1.2)
            r1 = estimator.predict(data=data_shard)
            r_c = r1.collect()
            estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test")
            estimator.fit(data=data_shard,
                          epochs=5,
                          batch_size=8,
                          validation_data=data_shard,
                          validation_metrics=[Accuracy()],
                          checkpoint_trigger=EveryEpoch())
            summary = estimator.get_train_summary(tag="Loss")
            temp_path = os.path.join(temp_dir_name, "save_model")
            estimator.save(temp_path)
            estimator.evaluate(data=data_shard,
                               validation_metrics=[Accuracy()],
                               batch_size=8)
            result = estimator.predict(data=data_shard)
            assert type(result).__name__ == 'SparkXShards'
            result_c = result.collect()
            df = self.get_estimator_df2()
            r0 = estimator.predict(df)
            r0_c = r0.collect()
            assert type(r0).__name__ == 'DataFrame'
            for idx in range(len(r0_c)):
                assert abs(r0_c[idx]["prediction"][0] -
                           result_c[0]["prediction"][idx][0]) == 0
                assert abs(r0_c[idx]["prediction"][1] -
                           result_c[0]["prediction"][idx][1]) == 0
            estimator.fit(data=df,
                          epochs=6,
                          batch_size=8,
                          validation_data=df,
                          validation_metrics=[Accuracy()],
                          validation_trigger=EveryEpoch())
            summary = estimator.get_train_summary()

            # test load from checkpoint
            est2 = Estimator.from_bigdl(model=Sequential(),
                                        optimizer=None,
                                        loss=None,
                                        model_dir=None)
            est2.load(temp_dir_name,
                      loss=ClassNLLCriterion(),
                      is_checkpoint=True)
            r2 = est2.predict(data=data_shard)
            r2_c = r2.collect()
            assert (result_c[0]["prediction"] == r2_c[0]["prediction"]).all()
            # resume training
            est2.fit(data=data_shard,
                     epochs=10,
                     batch_size=8,
                     validation_data=data_shard,
                     validation_metrics=[Accuracy()],
                     checkpoint_trigger=EveryEpoch())
            est2.evaluate(data=data_shard,
                          validation_metrics=[Accuracy()],
                          batch_size=8)
            # test load from saved model
            est3 = Estimator.from_bigdl(model=Sequential(),
                                        optimizer=None,
                                        loss=None,
                                        model_dir=None)
            est3.load(temp_path,
                      optimizer=optim_method,
                      loss=ClassNLLCriterion())
            r3 = est3.predict(data=data_shard)
            r3_c = r3.collect()
            assert (r3_c[0]["prediction"] == r2_c[0]["prediction"]).all()
Ejemplo n.º 17
0
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

net.train()
orca_estimator = Estimator.from_torch(model=net, optimizer=optimizer, loss=criterion,
                                      backend="bigdl")
orca_estimator.fit(data=trainloader, epochs=2, validation_data=testloader,
                   validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

res = orca_estimator.evaluate(data=testloader, validation_methods=[Accuracy()])[0]
total_num = res.total_num
result = res.result
print("Accuracy of the network on the %s test images: %s" % (total_num, result))
stop_orca_context()
Ejemplo n.º 18
0
        return x


net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

net.train()
orca_estimator = Estimator.from_torch(model=net,
                                      optimizer=optimizer,
                                      loss=criterion,
                                      backend="bigdl")
orca_estimator.fit(data=trainloader,
                   epochs=2,
                   validation_data=testloader,
                   validation_metrics=[Accuracy()],
                   checkpoint_trigger=EveryEpoch())
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

res = orca_estimator.evaluate(data=testloader,
                              validation_metrics=[Accuracy()])[0]
total_num = res.total_num
result = res.result
print("Accuracy of the network on the %s test images: %s" %
      (total_num, result))
    def test_bigdl_pytorch_estimator_save_and_load(self):
        class Network(nn.Module):
            def __init__(self):
                super(Network, self).__init__()

                self.fc1 = nn.Linear(28 * 28, 500)
                self.fc2 = nn.Linear(500, 10)

            def forward(self, x):
                x = x.view(-1, 28 * 28)
                x = F.relu(self.fc1(x))
                x = self.fc2(x)
                return F.log_softmax(x, dim=1)

        model = Network()
        model.train()
        criterion = nn.NLLLoss()
        adam = torch.optim.Adam(model.parameters(), 0.001)

        dir = "./dataset"
        batch_size = 320
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            dir,
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=batch_size,
                                                   shuffle=True)

        test_loader = torch.utils.data.DataLoader(datasets.MNIST(
            dir,
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                  batch_size=batch_size,
                                                  shuffle=False)

        # epoch 1
        est = Estimator.from_torch(model=model,
                                   optimizer=adam,
                                   loss=criterion,
                                   metrics=[Accuracy()])

        est.fit(data=train_loader,
                epochs=1,
                validation_data=test_loader,
                batch_size=batch_size,
                checkpoint_trigger=EveryEpoch())
        paras1 = list(est.get_model().named_parameters())
        est.save("model_epoch_1")

        # epoch 2
        est.fit(data=train_loader,
                epochs=2,
                validation_data=test_loader,
                batch_size=batch_size,
                checkpoint_trigger=EveryEpoch())
        paras2 = list(est.get_model().named_parameters())
        est.load("model_epoch_1")
        paras3 = list(est.get_model().named_parameters())

        load_success = 0
        for i in range(len(paras2)):
            name2, para2 = paras2[i]
            name3, para3 = paras3[i]
            if not torch.all(torch.eq(para2, para3)):
                load_success = 1
                break
        if not load_success:
            raise Exception(
                "Load failed. Parameters did not change after loading.")

        for i in range(len(paras1)):
            name1, para1 = paras1[i]
            name3, para3 = paras3[i]
            if not torch.all(torch.eq(para1, para3)):
                raise Exception("After reloading the model," + name1 +
                                "does not match.")
        print("pass")