def _to_bigdl_metric(metric): metric = metric.lower() if metric == "accuracy" or metric == "acc": return metrics.Accuracy() elif metric == "top5accuracy" or metric == "top5acc": return metrics.Top5Accuracy() elif metric == "mae": from bigdl.optim.optimizer import MAE return MAE() elif metric == "auc": return metrics.AUC() elif metric == "loss": return Loss() elif metric == "treennaccuracy": return TreeNNAccuracy() else: raise TypeError("Unsupported metric: %s" % metric)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) num_executors = 4 num_cores_per_executor = 1 hadoop_conf_dir = os.environ.get('HADOOP_CONF_DIR') sc = init_spark_on_yarn( hadoop_conf=hadoop_conf_dir, conda_name=os. environ["ZOO_CONDA_NAME"], # The name of the created conda-env num_executor=num_executors, executor_cores=num_cores_per_executor, executor_memory="10g", driver_memory="10g", driver_cores=1, spark_conf={"spark.rpc.message.maxSize": "1024"}) model.train() sgd = Adam() zooModel = TorchNet.from_pytorch(model, [64, 1, 28, 28]) def lossFunc(input, target): return nn.NLLLoss().forward(input, target.flatten().long()) zooCriterion = TorchCriterion.from_pytorch(lossFunc, [1, 2], torch.LongTensor([1])) # zooCriterion = SparseCategoricalCrossEntropy(zero_based_label=True) estimator = Estimator(zooModel, optim_methods=sgd) v_input = [] v_target = [] for data, target in test_loader: v_input.append([data.numpy()]) v_target.append([target.numpy()]) test_featureset = FeatureSet.minibatch(v_input, v_target) for epoch in range(1, args.epochs + 1): train(args, estimator, zooCriterion, train_loader, epoch) # test(args, estimator, zooCriterion, test_featureset) estimator.evaluate_minibatch( test_featureset, [Loss(zooCriterion), Accuracy()]) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt")