def test_fit_model(self): model = create_xor_model() optimizer = torch.optim.SGD(model.parameters(), lr=0.1) loss = F.binary_cross_entropy with spark_session('test_fit_model') as spark: df = create_xor_data(spark) with local_store() as store: torch_estimator = hvd_spark.TorchEstimator( num_proc=2, store=store, model=model, optimizer=optimizer, loss=loss, input_shapes=[[2]], feature_cols=['features'], label_cols=['y'], batch_size=1, epochs=3, random_seed=1, verbose=2, sample_weight_col='weight') torch_model = torch_estimator.fit(df) trained_model = torch_model.getModel() pred = trained_model(torch.ones([1, 2], dtype=torch.int32)) assert len(pred) == 1 assert pred.dtype == torch.float32
def test_torch_direct_parquet_train(self): with spark_session('test_torch_direct_parquet_train') as spark: df = create_xor_data(spark) backend = CallbackBackend() with local_store() as store: store.get_train_data_path = lambda v=None: store._train_path store.get_val_data_path = lambda v=None: store._val_path with util.prepare_data(backend.num_processes(), store, df, feature_columns=['features'], label_columns=['y']): model = create_xor_model() optimizer = torch.optim.SGD(model.parameters(), lr=0.1) loss = nn.BCELoss() est = hvd_spark.TorchEstimator(backend=backend, store=store, model=model, optimizer=optimizer, input_shapes=[[2]], feature_cols=['features'], label_cols=['y'], batch_size=1, epochs=3, verbose=2) # To make sure that setLoss works with non-list loss. est.setLoss(loss) transformer = est.fit_on_parquet() predictions = transformer.transform(df) assert predictions.count() == df.count()
def test_fit_model(self): if sys.version_info < (3, 0, 0) and is_gloo_used(): self.skipTest( 'Horovod on Spark over Gloo only supported on Python3') model = create_xor_model() optimizer = torch.optim.SGD(model.parameters(), lr=0.1) loss = F.binary_cross_entropy with spark_session('test_fit_model') as spark: df = create_xor_data(spark) with local_store() as store: torch_estimator = hvd.TorchEstimator( num_proc=2, store=store, model=model, optimizer=optimizer, loss=loss, input_shapes=[[2]], feature_cols=['features'], label_cols=['y'], batch_size=1, epochs=3, verbose=2, sample_weight_col='weight') torch_model = torch_estimator.fit(df) trained_model = torch_model.getModel() pred = trained_model(torch.ones([1, 2], dtype=torch.int32)) assert len(pred) == 1 assert pred.dtype == torch.float32
def test_torch_direct_parquet_train(self): with spark_session('test_torch_direct_parquet_train') as spark: df = create_xor_data_with_val(spark) backend = CallbackBackend() with local_store() as store: store.get_train_data_path = lambda v=None: store._train_path store.get_val_data_path = lambda v=None: store._val_path # Make sure we cover validation dataloader as well for validation in [None, 'val']: # Need validation ratio to split data with util.prepare_data(backend.num_processes(), store, df, feature_columns=['features'], label_columns=['y'], validation=validation): model = create_xor_model() optimizer = torch.optim.SGD(model.parameters(), lr=0.1) loss = nn.BCELoss() for inmemory_cache_all in [False, True]: for reader_pool_type in ['process', 'thread']: est = hvd_spark.TorchEstimator( backend=backend, store=store, model=model, optimizer=optimizer, input_shapes=[[2]], feature_cols=['features'], label_cols=['y'], batch_size=1, epochs=3, verbose=2, reader_pool_type=reader_pool_type, inmemory_cache_all=inmemory_cache_all, validation=validation) # To make sure that setLoss works with non-list loss. est.setLoss(loss) transformer = est.fit_on_parquet() predictions = transformer.transform(df) assert predictions.count() == df.count()
def test_restore_from_checkpoint(self): model = create_xor_model() optimizer = torch.optim.SGD(model.parameters(), lr=0.1) loss = nn.BCELoss() with spark_session('test_restore_from_checkpoint') as spark: df = create_xor_data(spark) ctx = CallbackBackend() run_id = 'run01' with local_store() as store: torch_estimator = hvd_spark.TorchEstimator( backend=ctx, store=store, model=model, optimizer=optimizer, loss=loss, input_shapes=[[2]], feature_cols=['features'], label_cols=['y'], batch_size=1, epochs=1, verbose=2, run_id=run_id) torch_estimator._load_checkpoint = mock.Mock( side_effect=torch_estimator._load_checkpoint) ckpt_path = store.get_checkpoint_path(run_id) assert not store.exists(ckpt_path) torch_estimator._load_checkpoint.assert_not_called() torch_estimator.fit(df) assert store.exists(ckpt_path) torch_estimator.fit(df) torch_estimator._load_checkpoint.assert_called()
x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x) model = Net() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) loss = nn.NLLLoss() # Train a Horovod Spark Estimator on the DataFrame torch_estimator = hvd.TorchEstimator( num_proc=args.num_proc, store=store, model=model, optimizer=optimizer, loss=lambda input, target: loss(input, target.long()), input_shapes=[[-1, 1, 28, 28]], feature_cols=['features'], label_cols=['label'], batch_size=args.batch_size, epochs=args.epochs, verbose=1) torch_model = torch_estimator.fit(train_df).setOutputCols(['label_prob']) # Evaluate the model on the held-out test DataFrame pred_df = torch_model.transform(test_df) argmax = udf(lambda v: float(np.argmax(v)), returnType=T.DoubleType()) pred_df = pred_df.withColumn('label_pred', argmax(pred_df.label_prob)) evaluator = MulticlassClassificationEvaluator(predictionCol='label_pred', labelCol='label', metricName='accuracy')