Exemple #1
0
    def test_weight_decay(self):
        from caffe2.python import brew
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        build_sgd(model, 0.11)

        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}

        # Check the proto that all weights are decayed and not non-weights
        # are decayed.
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print("Unexpected param for weight_decay: {}".format(
                        op.output[0]))
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])

        self.assertEqual(
            expected_weight_grad, set(),
            "Not all weights were decayed: {}".format(expected_weight_grad))
Exemple #2
0
    def test_caffe2_simple_model(self):
        model = ModelHelper(name="mnist")
        # how come those inputs don't break the forward pass =.=a
        workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))

        with core.NameScope("conv1"):
            conv1 = brew.conv(model, "data", 'conv1', dim_in=1, dim_out=20, kernel=5)
            # Image size: 24 x 24 -> 12 x 12
            pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
            # Image size: 12 x 12 -> 8 x 8
            conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5)
            # Image size: 8 x 8 -> 4 x 4
            pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
        with core.NameScope("classifier"):
            # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
            fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
            relu = brew.relu(model, fc3, fc3)
            pred = brew.fc(model, relu, 'pred', 500, 10)
            softmax = brew.softmax(model, pred, 'softmax')
            xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
            # compute the expected loss
            loss = model.AveragedLoss(xent, "loss")
        model.net.RunAllOnMKL()
        model.param_init_net.RunAllOnMKL()
        model.AddGradientOperators([loss], skip=1)
        blob_name_tracker = {}
        graph = c2_graph.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )
        compare_proto(graph, self)
    def _createDense(self, dtype=core.DataType.FLOAT):
        perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
        np.random.seed(123)  # make test deterministic
        numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
        initializer = Initializer if dtype == core.DataType.FLOAT else pFP16Initializer
        data = np.random.randint(2,
                                 size=(20,
                                       perfect_model.size)).astype(numpy_dtype)
        label = np.dot(data, perfect_model)[:, np.newaxis]

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        out = brew.fc(model,
                      'data',
                      'fc',
                      perfect_model.size,
                      1, ('ConstantFill', {}), ('ConstantFill', {}),
                      axis=0,
                      WeightInitializer=initializer,
                      BiasInitializer=initializer)
        if dtype == core.DataType.FLOAT16:
            out = model.HalfToFloat(out, out + "_fp32")
        sq = model.SquaredL2Distance([out, 'label'])
        loss = model.AveragedLoss(sq, "avg_loss")
        grad_map = model.AddGradientOperators([loss])
        self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
        return (model, perfect_model, data, label)
    def test_optimizer_context(self):
        from caffe2.python import brew, optimizer
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        count = optimizer._optimizer_instance_count['SgdOptimizer']
        cnv_optim = SgdOptimizer(0.15)
        weight_optim = SgdOptimizer(0.2)
        bias_optim = SgdOptimizer(0.1)

        with UseOptimizer(cnv_optim):
            cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}):
            a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        # use the following optimizer if none specified in param_info
        build_sgd(model, 0.11)
        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}
        expected_learning_rate = {
            "SgdOptimizer_{}_lr_cpu".format(count): -0.15,
            "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2,
            "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1,
            "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11
        }

        for op in model.net.Proto().op:
            # Check the proto that all weights are decayed and not non-weights
            # are decayed.
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print(
                        "Unexpected param for weight_decay: {}".
                        format(op.output[0])
                    )
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])
            # Check the learning rate for each parameter
            if op.type == 'LearningRate':
                val = 0
                for arg in op.arg:
                    if arg.name == 'base_lr':
                        val = arg.f
                self.assertAlmostEqual(
                    val,
                    expected_learning_rate[op.output[0]]
                )

        self.assertEqual(
            expected_weight_grad,
            set(),
            "Not all weights were decayed: {}".format(expected_weight_grad)
        )
Exemple #5
0
def createTrainModel(lmdb_path):
    """Create and return a training model, complete with training ops."""
    model = ModelHelper(name='train', arg_scope={'order': 'NCHW'})
    reader = model.CreateDB('train_reader', db=lmdb_path, db_type='lmdb')
    AddInputOps(model, reader, BATCH_SIZE)
    losses = AddForwardPassOps(model)
    model.AddGradientOperators(losses)
    AddOptimizerOps(model)
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    return model
Exemple #6
0
def main(opt_name):
    workspace.FeedBlob('input', np.random.randn(2, 16).astype(np.float32))
    workspace.FeedBlob('label', np.array([0, 1]).astype(np.float32))

    helper = ModelHelper("sample_model")
    fc = brew.fc(helper, "input", "fc", dim_in=16, dim_out=8)
    relu = helper.Relu(fc, 'relu')
    fc2 = brew.fc(helper, relu, "fc2", dim_in=8, dim_out=1)
    label_ex = helper.ExpandDims("label", "label_ex", dims=[1])
    xent = helper.SigmoidCrossEntropyWithLogits([fc2, label_ex], 'xent')
    loss = helper.AveragedLoss(xent, 'loss')
    helper.AddGradientOperators([loss])

    if opt_name == "manual":
        ONE = helper.param_init_net.ConstantFill([],
                                                 "ONE",
                                                 shape=[1],
                                                 value=1.0)
        LR = helper.param_init_net.ConstantFill([],
                                                "LR",
                                                shape=[1],
                                                value=-0.03)

        for param in helper.params:
            param_grad = helper.param_to_grad[param]
            helper.WeightedSum([param, ONE, param_grad, LR], param)
    elif opt_name == "sgd":
        optimizer.build_sgd(helper, 0.03)
    elif opt_name == "adagrad":
        optimizer.build_adagrad(helper, 0.03)
    # caffe2 does not support rowwise adagrad for dense parameters
    # caffe2 seems not have lamb support yet
    elif opt_name == "adam":
        optimizer.build_adam(helper, 0.03)
    else:
        assert False, f"Unsupported optimizer {opt_name}"

    workspace.RunNetOnce(helper.param_init_net)
    workspace.RunNetOnce(helper.net)

    import pdb
    pdb.set_trace()
Exemple #7
0
    def test_multiple_optimizers(self):
        from caffe2.python import brew, core, optimizer
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test")
        fc1 = brew.fc(model, 'data', 'fc1', 100, 50)
        fc2 = brew.fc(model, fc1, 'fc2', 50, 25)
        pred = brew.fc(model, fc2, 'fc3', 25, 10)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        param_to_device = optimizer._get_param_to_device(model)

        def infer_blob_device(blob_name):
            return optimizer.get_param_device(blob_name,
                                              "{}_grad".format(blob_name),
                                              param_to_device)

        sgd_1 = optimizer.SgdOptimizer(base_learning_rate=0.1)
        sgd_2 = optimizer.SgdOptimizer(base_learning_rate=0.2)
        adagrad = optimizer.AdagradOptimizer()

        # Check same optimizer share the same learning rate.
        with core.DeviceScope(infer_blob_device("fc1_w")):
            sgd_1(model.net, model.param_init_net, "fc1_w", "fc1_w_grad")
        with core.DeviceScope(infer_blob_device("fc1_b")):
            sgd_1(model.net, model.param_init_net, "fc1_b", "fc1_b_grad")
        fc1_lr_blobs = []
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and op.input[0] == 'fc1_w' or \
                    op.input[0] == 'fc1_b':
                fc1_lr_blobs.append(op.input[3])
        self.assertEqual(fc1_lr_blobs[0], fc1_lr_blobs[1])

        # Check different instance of the same optimizer has a different lr.
        with core.DeviceScope(infer_blob_device("fc2_w")):
            sgd_2(model.net, model.param_init_net, "fc2_w", "fc2_w_grad")
        with core.DeviceScope(infer_blob_device("fc2_b")):
            sgd_2(model.net, model.param_init_net, "fc2_b", "fc2_b_grad")
        fc2_lr_blobs = []
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and op.input[0] == 'fc2_w' or \
                    op.input[0] == 'fc2_b':
                self.assertTrue(op.input[3] not in fc1_lr_blobs)
                fc2_lr_blobs.append(op.input[3])
        self.assertEqual(fc2_lr_blobs[0], fc2_lr_blobs[1])

        # Check different optimizer type case
        with core.DeviceScope(infer_blob_device("fc3_w")):
            adagrad(model.net, model.param_init_net, "fc3_w", "fc3_w_grad")
        with core.DeviceScope(infer_blob_device("fc3_b")):
            adagrad(model.net, model.param_init_net, "fc3_b", "fc3_b_grad")
        fc3_lr_blobs = []
        for op in model.net.Proto().op:
            if op.type == 'Adagrad' and op.input[0] == 'fc3_w' or \
                    op.input[0] == 'fc3_b':
                self.assertTrue(op.input[3] not in fc2_lr_blobs)
                self.assertTrue(op.input[3] not in fc1_lr_blobs)
                fc3_lr_blobs.append(op.input[3])
        self.assertEqual(fc3_lr_blobs[0], fc3_lr_blobs[1])
Exemple #8
0
    def test_convolution_sync(self, net_type, num_workers, engine, gc, dc):
        m = ModelHelper(name="test_model")
        n = 1
        d = 2
        depth = 3
        iters = 5
        h = 5
        w = 5
        workspace.ResetWorkspace()

        use_cudnn = (engine == 'CUDNN')

        np.random.seed(1701)
        # Build a binary tree of conv layers, summing at each node.
        for i in reversed(range(depth)):
            for j in range(2**i):
                bottom_1 = "{}_{}".format(i + 1, 2 * j)
                bottom_2 = "{}_{}".format(i + 1, 2 * j + 1)
                mid_1 = "{}_{}_m".format(i + 1, 2 * j)
                mid_2 = "{}_{}_m".format(i + 1, 2 * j + 1)
                top = "{}_{}".format(i, j)
                w1, b1, w2, b2 = np.random.randn(4).tolist()
                brew.conv(m,
                          bottom_1,
                          mid_1,
                          dim_in=d,
                          dim_out=d,
                          kernel=3,
                          weight_init=('ConstantFill', dict(value=w1)),
                          bias_init=('ConstantFill', dict(value=b1)),
                          cudnn_state=np.random.randint(0, 3),
                          stride=1,
                          pad=1,
                          deterministic=1,
                          use_cudnn=use_cudnn,
                          engine=engine)
                brew.conv(m,
                          bottom_2,
                          mid_2,
                          dim_in=d,
                          dim_out=d,
                          kernel=3,
                          stride=1,
                          pad=1,
                          weight_init=('ConstantFill', dict(value=w2)),
                          bias_init=('ConstantFill', dict(value=b2)),
                          deterministic=1,
                          cudnn_state=np.random.randint(0, 3),
                          use_cudnn=use_cudnn,
                          engine=engine)
                m.net.Sum([mid_1, mid_2], top)

        m.net.Flatten(["0_0"], ["0_0_flat"])
        m.net.SquaredL2Distance(["0_0_flat", "label"], "xent")
        m.net.AveragedLoss("xent", "loss")
        input_to_grad = m.AddGradientOperators(["loss"])
        m.Proto().device_option.CopyFrom(gc)
        m.param_init_net.Proto().device_option.CopyFrom(gc)
        m.Proto().type = net_type
        m.Proto().num_workers = num_workers
        self.ws.run(m.param_init_net)

        def run():
            import numpy as np
            np.random.seed(1701)
            input_blobs = ["{}_{}".format(depth, j) for j in range(2**depth)]
            for input_blob in input_blobs:
                self.ws.create_blob(input_blob).feed(np.random.randn(
                    n, d, h, w).astype(np.float32),
                                                     device_option=gc)
                self.ws.create_blob("label").feed(np.random.randn(
                    n, d * h * w).astype(np.float32),
                                                  device_option=gc)
            self.ws.run(m.net)
            gradients = [
                self.ws.blobs[str(input_to_grad[input_blob])].fetch()
                for input_blob in input_blobs
            ]
            return gradients

        outputs = [run() for _ in range(iters)]
        for output in outputs[1:]:
            np.testing.assert_array_equal(outputs[0], output)
            np.testing.assert_allclose(np.sum(np.square(output)),
                                       1763719461732352.0,
                                       rtol=1e-5)
Exemple #9
0
class MLTrainer:
    """ This is meant to be a generic neural net trainer.  It uses minibatch and
    ADAM for momentum/smoothing.
    """
    def __init__(
        self,
        name: str,
        parameters: TrainingParameters,
    ) -> None:
        """

        :param name: A unique name for this trainer used to create the data on the
            caffe2 workspace
        :param parameters: The set of training parameters
        """
        self.model_id = name
        self.optimizer = parameters.optimizer
        self.layers = parameters.layers
        self.activations = parameters.activations
        self.learning_rate = parameters.learning_rate

        self.gamma = parameters.gamma
        self.lr_policy = parameters.lr_policy
        self.dropout_ratio = parameters.dropout_ratio

        self._validate_inputs()
        self._setup_initial_blobs()

        self._build_fwd_pass_score_model()

        self._build_fwd_pass_train_model()
        self._generate_train_model_loss()
        self._update_train_model()

        workspace.RunNetOnce(self.score_model.param_init_net)
        workspace.CreateNet(self.score_model.net)
        workspace.RunNetOnce(self.train_model.param_init_net)
        workspace.CreateNet(self.train_model.net)

    def _validate_inputs(self):
        num_layers = len(self.layers)
        num_activations = len(self.activations)

        if num_activations != num_layers - 1:
            raise Exception(
                "Incompatible input `layers` and `activations` sizes.")

        if not all(x > 0 and int(x) == x for x in self.layers):
            raise Exception(
                "All values in `layers` should be positive integers.")

    def _generate_train_model_loss(self):
        GenerateLossOps(self.train_model, self.model_id + "_train",
                        self.output_blob, self.labels_blob, self.loss_blob)

    def _build_fwd_pass_train_model(self):
        self.train_model.StopGradient(self.labels_blob, self.labels_blob)
        MakeForwardPassOps(self.train_model, self.model_id + "_train",
                           self.input_blob, self.output_blob, self.weights,
                           self.biases, self.activations, self.layers,
                           self.dropout_ratio, False)

    def _build_fwd_pass_score_model(self):
        MakeForwardPassOps(self.score_model, self.model_id + "_score",
                           self.input_blob, self.output_blob, self.weights,
                           self.biases, self.activations, self.layers,
                           self.dropout_ratio, True)

    def _setup_initial_blobs(self):
        # Define models
        self.score_model = ModelHelper(name="score_" + self.model_id)
        self.train_model = ModelHelper(name="train_" + self.model_id)

        # Create input, output, labels, and loss blobs
        self.input_blob = "ModelInput_" + self.model_id
        workspace.FeedBlob(self.input_blob, np.zeros(1, dtype=np.float32))
        self.output_blob = "ModelOutput_" + self.model_id
        workspace.FeedBlob(self.output_blob, np.zeros(1, dtype=np.float32))
        self.labels_blob = "ModelLabels_" + self.model_id
        workspace.FeedBlob(self.labels_blob, np.zeros(1, dtype=np.float32))
        self.loss_blob = "loss"  # "ModelLoss_" + self.model_id
        workspace.FeedBlob(self.loss_blob, np.zeros(1, dtype=np.float32))

        # Create blobs for model parameters
        self.weights: List[str] = []
        self.biases: List[str] = []

        for x in six.moves.range(len(self.layers) - 1):
            dim_in = self.layers[x]
            dim_out = self.layers[x + 1]

            weight_name = "Weights_" + str(x) + "_" + self.model_id
            bias_name = "Biases_" + str(x) + "_" + self.model_id
            self.weights.append(weight_name)
            self.biases.append(bias_name)

            bias = np.zeros(shape=[
                dim_out,
            ], dtype=np.float32)
            workspace.FeedBlob(bias_name, bias)

            gain = np.sqrt(2) if self.activations[x] == 'relu' else 1
            weights = scipy.stats.norm(0, gain * np.sqrt(1 / dim_in)).rvs(
                size=[dim_out, dim_in]).astype(np.float32)
            workspace.FeedBlob(weight_name, weights)

    def _update_train_model(self):
        self.train_model.AddGradientOperators([self.loss_blob])

        for param in self.train_model.params:
            param_grad = self.train_model.param_to_grad[param]
            self.train_model.net.NanCheck([param_grad], [param_grad])

        AddParameterUpdateOps(
            self.train_model,
            optimizer_input=self.optimizer,
            base_learning_rate=self.learning_rate,
            gamma=self.gamma,
            policy=self.lr_policy,
        )

    def build_predictor(self, model, input_blob, output_blob) -> List[str]:
        MakeForwardPassOps(model,
                           self.model_id + "_score",
                           input_blob,
                           output_blob,
                           self.weights,
                           self.biases,
                           self.activations,
                           self.layers,
                           self.dropout_ratio,
                           is_test=True)
        return self.weights + self.biases

    def score(self, inputs: np.ndarray) -> np.ndarray:
        """
        Runs the net on a set of data and returns the outputs.

        :param inputs: Numpy array containing examples to score.
        """
        workspace.FeedBlob(self.input_blob, inputs)
        workspace.RunNet(self.score_model.net)
        return workspace.FetchBlob(self.output_blob)

    def train_batch(self, inputs: np.ndarray, labels: np.ndarray) -> None:
        """
        Trains net on inputs and labels. Please ensure that inputs are batched
        to an appropriate size and are shuffled.

        :param inputs: Numpy array containing training examples.
        :param labels: Numpy array containing training labels.
        """
        workspace.FeedBlob(self.input_blob, inputs)
        workspace.FeedBlob(self.labels_blob, labels)
        workspace.RunNet(self.train_model.net)

    @property
    def output(self) -> np.ndarray:
        return workspace.FetchBlob(self.output_blob)

    @property
    def loss(self) -> np.ndarray:
        return workspace.FetchBlob('loss')
print("\n************* Init Net *************")
print(regression_model.param_init_net.Proto())

# #### Add the training operators and prime the workspace
#
# In this **very important** step, we specify the loss function, setup the SGD training algorithm, prime and initialize the workspace, and initialize our model's weights and biases.

# In[5]:

# The loss function is computed by a squared L2 distance,
#   and then averaged over all items.
dist = regression_model.SquaredL2Distance(['Y_gt', y_pred], "dist")
loss = regression_model.AveragedLoss(dist, "loss")

# Add the gradient operators and setup the SGD algorithm
regression_model.AddGradientOperators([loss])
optimizer.build_sgd(regression_model, base_learning_rate=learning_rate)

# Prime the workspace with some data
workspace.FeedBlob("Y_gt", Y_gt.astype(np.float32))
workspace.FeedBlob("X", X.astype(np.float32))

# Run the init net to prepare the workspace then create the net
workspace.RunNetOnce(regression_model.param_init_net)
workspace.CreateNet(regression_model.net)

# Inject our desired initial weights and bias
workspace.FeedBlob("y_pred_w", np.array([initial_weights]).astype(np.float32))
workspace.FeedBlob("y_pred_b", np.array([0.]).astype(np.float32))

# #### Run the training