Python CNNModelHelper Exemples, caffe2.python.cnn.CNNModelHelper Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : shape_inference_test.py Projet : yaochx/caffe2

    def testShapeInferenceSimpleFC(self):
        m = cnn.CNNModelHelper()

        m.FC("data", "fc1", dim_in=96, dim_out=32)
        m.FC("fc1", "fc2", dim_in=32, dim_out=55)

        (shapes,
         types) = workspace.InferShapesAndTypes([m.param_init_net, m.net],
                                                {'data': [64, 96]})

        self.assertEquals(shapes['data'], [64, 96])
        self.assertEquals(shapes['fc1_w'], [32, 96])
        self.assertEquals(shapes['fc1_b'], [32])
        self.assertEquals(shapes['fc1'], [64, 32])
        self.assertEquals(shapes['fc2_w'], [55, 32])
        self.assertEquals(shapes['fc2_b'], [55])
        self.assertEquals(shapes['fc2'], [64, 55])

Exemple #2

0

Afficher le fichier

Fichier : shape_inference_test.py Projet : yaochx/caffe2

    def testShapeInferenceTranspose(self):
        model = cnn.CNNModelHelper()

        workspace.FeedBlob("tensor",
                           np.random.rand(4, 2, 3, 3, 5).astype(np.float32))

        # Testing with axes undefined
        model.Transpose(
            ["tensor"],
            "transpose",
        )
        self.InferTensorRunAndCompare(model)

        # Testing with axes defined
        model.Transpose(["tensor"], "transpose", axes=np.random.permutation(5))

        return self.InferTensorRunAndCompare(model)

Exemple #3

0

Afficher le fichier

Fichier : executor_test_util.py Projet : natureofnature/caffe2

def gen_test_resnet50(_order, _cudnn_ws):
    model = cnn.CNNModelHelper(
        order="NCHW",
        name="resnet_50_test",
        cudnn_exhaustive_search=True,
    )
    data = model.net.AddExternalInput("data")
    label = model.net.AddExternalInput("label")
    (_softmax, loss) = resnet.create_resnet50(
        model,
        data,
        num_input_channels=3,
        num_labels=1000,
        label=label,
        is_test=False,
    )
    return model, 227

Exemple #4

0

Afficher le fichier

Fichier : shape_inference_test.py Projet : yaochx/caffe2

    def testShapeInferencePad(self):
        model = cnn.CNNModelHelper(name="padtest")
        model.PadImage("data",
                       'padded',
                       pad_t=100,
                       pad_l=37,
                       pad_b=28,
                       pad_r=20,
                       mode="constant",
                       order="NCHW")

        workspace.FeedBlob(
            "data",
            np.random.rand(16, 3, 228, 228).astype(np.float32),
        )

        self.InferTensorRunAndCompare(model)

Exemple #5

0

Afficher le fichier

def test_forward_only_fast_simplenet(
    create_model,
    last_out_blob,
    data_blob="gpu_0/data",
    num_labels=1000,
):
    model = cnn.CNNModelHelper(
        order="NCHW",
        name="test",
        cudnn_exhaustive_search=True,
    )
    with core.NameScope("gpu_0"):
        data = model.net.AddExternalInput(data_blob)
        create_model(model,
                     data,
                     num_input_channels=3,
                     num_labels=num_labels,
                     is_test=True)

    count_before = count_blobs(model.net.Proto())
    t = time.time()
    optim_proto = memonger.optimize_inference_fast(
        model.net.Proto(),
        set([data_blob,
             last_out_blob]).union(set(model.net.Proto().external_input)))
    print("Optimization took {} secs".format(time.time() - t))
    count_after = count_blobs(optim_proto)
    num_shared_blobs = count_shared_blobs(optim_proto)

    print(count_after, count_before, num_shared_blobs)

    # Run model and compare results
    workspace.RunNetOnce(model.param_init_net)
    data = np.random.rand(4, 3, 227, 227).astype(np.float32)

    workspace.FeedBlob(data_blob, data)
    model.net.Proto().type = 'simple'

    workspace.RunNetOnce(model.net)
    loss1 = workspace.FetchBlob(last_out_blob)

    workspace.RunNetOnce(optim_proto)
    optimized_loss1 = workspace.FetchBlob(last_out_blob)
    return [(count_after, count_before), (num_shared_blobs),
            (loss1, optimized_loss1)]

Exemple #6

0

Afficher le fichier

    def test_copy_gradient_multiple_gpus(self):
        model = cnn.CNNModelHelper(name="copy_test")

        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
            x_cpu = model.net.AddExternalInputs("x_cpu")

        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            x_gpu_1 = model.CopyCPUToGPU(x_cpu, "x_gpu_1")

        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 1)):
            x_gpu_2 = model.Copy(x_gpu_1, "x_gpu_2")
            loss = model.AveragedLoss(x_gpu_2, "loss")
            gradient_map = model.AddGradientOperators([loss])

        workspace.FeedBlob("x_cpu", np.random.rand(32).astype(np.float32))
        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        print(model.net.Proto())

        self.assertTrue(
            np.array_equal(
                workspace.FetchBlob("x_gpu_1"),
                workspace.FetchBlob("x_gpu_2"),
            ))
        self.assertTrue(
            np.array_equal(
                workspace.FetchBlob(gradient_map["x_gpu_1"]),
                workspace.FetchBlob(gradient_map["x_gpu_2"]),
            ))

        def get_op_with_output(model, output_blob_name):
            for op in model.net.Proto().op:
                if len(op.output) == 1 and op.output[0] == output_blob_name:
                    return op
            return None

        self.assertEqual(
            get_op_with_output(model, "x_gpu_2_grad").device_option,
            core.DeviceOption(caffe2_pb2.CUDA, 1),
        )
        self.assertEqual(
            get_op_with_output(model, "x_cpu_grad").device_option,
            core.DeviceOption(caffe2_pb2.CUDA, 0),
        )

Exemple #7

0

Afficher le fichier

Fichier : data_parallel_model_test.py Projet : zsk423200/pytorch

        def run(comm_rank, comm_size, tmpdir):
            def add_input_ops(model):
                pass

            def add_model_ops(model, loss_scale):
                return []

            def add_optimizer(model):
                pass

            workspace.ResetWorkspace()
            store_handler = "store_handler"
            workspace.RunOperatorOnce(
                core.CreateOperator("FileStoreHandlerCreate", [],
                                    [store_handler],
                                    path=tmpdir))
            rendezvous = dict(
                kv_handler=store_handler,
                shard_id=comm_rank,
                num_shards=comm_size,
                engine='GLOO',
            )

            model = cnn.CNNModelHelper(
                order="NHWC",
                name="test",
            )
            # Set network timeout to 2 seconds, and add a 3 seconds
            # sleep for 1 host.  Make sure there is no timeout on the
            # second RunNet.
            data_parallel_model._DEFAULT_TIMEOUT_SEC = 2
            data_parallel_model.Parallelize_CPU(
                model,
                input_builder_fun=add_input_ops,
                forward_pass_builder_fun=add_model_ops,
                optimizer_builder_fun=add_optimizer,
                devices=[1, 2, 3],
                rendezvous=rendezvous,
                barrier_net_timeout_sec=5)
            data_parallel_model.RunInitNet(model)
            data_parallel_model.RunNet(model, 2)
            if comm_rank == 0:
                time.sleep(data_parallel_model._DEFAULT_TIMEOUT_SEC)
            data_parallel_model.RunNet(model, 2)

Exemple #8

0

Afficher le fichier

Fichier : leaky_relu_test.py Projet : zlbing/caffe2

    def test_leaky_relu_cnn_helper(self, N, C, H, W, order, alpha, seed):
        np.random.seed(seed)
        model = cnn.CNNModelHelper(order=order)
        model.LeakyRelu('input', 'output', alpha=alpha)

        input_blob = np.random.rand(N, C, H, W).astype(np.float32)
        if order == 'NHWC':
            input_blob = np.transpose(input_blob, axes=(0, 2, 3, 1))

        self.ws.create_blob('input').feed(input_blob)

        self.ws.create_net(model.param_init_net).run()
        self.ws.create_net(model.net).run()

        output_blob = self.ws.blobs['output'].fetch()
        if order == 'NHWC':
            output_blob = np.transpose(output_blob, axes=(0, 3, 1, 2))

        assert output_blob.shape == (N, C, H, W)

Exemple #9

0

Afficher le fichier

 def test_simple_cnnmodel(self):
     model = cnn.CNNModelHelper("NCHW", name="overfeat")
     workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32))
     workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))
     with core.NameScope("conv1"):
         conv1 = model.Conv("data", "conv1", 3, 96, 11, stride=4)
         relu1 = model.Relu(conv1, conv1)
         pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
     with core.NameScope("classifier"):
         fc = model.FC(pool1, "fc", 4096, 1000)
         pred = model.Softmax(fc, "pred")
         xent = model.LabelCrossEntropy([pred, "label"], "xent")
         loss = model.AveragedLoss(xent, "loss")
     model.net.RunAllOnMKL()
     model.param_init_net.RunAllOnMKL()
     model.AddGradientOperators([loss], skip=1)
     blob_name_tracker = {}
     graph = tb.model_to_graph_def(
         model,
         blob_name_tracker=blob_name_tracker,
         shapes={},
         show_simplified=False,
     )
     #self.assertEqual(
     #    blob_name_tracker['GRADIENTS/conv1/conv1_b_grad'],
     #    'conv1/conv1_b_grad',
     #)
     self.maxDiff = None
     # We can't guarantee the order in which they appear, so we sort
     # both before we compare them
     with open('tests/expect/caffe_overfeat.expect') as f:
         EXPECTED_CNN = f.read()
     sep = "node {"
     expected = "\n".join(sorted(
         sep + "\n  " + part.strip()
         for part in EXPECTED_CNN.strip().split(sep)
         if part.strip()
     ))
     actual = "\n".join(sorted(
         sep + "\n  " + part.strip()
         for part in str(graph).strip().split(sep)
         if part.strip()
     ))

Exemple #10

0

Afficher le fichier

    def test_net_gradient_checker(self):
        model = cnn.CNNModelHelper(name="test")
        const = model.net.AddExternalInputs("const1", "const2")
        fc = model.FC(dim_in=3, dim_out=4, blob_in="X", blob_out="Y", axis=0)
        dist = [model.net.SquaredL2Distance([fc, c]) for c in const]
        losses = [model.net.AveragedLoss(d)
                  for d in dist]  # using two losses here

        workspace.RunNetOnce(model.param_init_net)
        gradient_checker.NetGradientChecker.Check(
            model.net,
            outputs_with_grad=losses,
            input_values={
                "X": np.array([1, 2, 3], dtype="float32"),
                const[0]: np.array([1, 1, 1, 1], dtype="float32"),
                const[1]: np.array([2, 2, 2, 2], dtype="float32")
            },
            input_to_check="X",
        )

Exemple #11

0

Afficher le fichier

    def _createDense(self):
        perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
        np.random.seed(123)  # make test deterministic
        data = np.random.randint(2,
                                 size=(20,
                                       perfect_model.size)).astype(np.float32)
        label = np.dot(data, perfect_model)[:, np.newaxis]

        model = cnn.CNNModelHelper("NCHW", name="test")
        out = model.FC('data',
                       'fc',
                       perfect_model.size,
                       1, ('ConstantFill', {}), ('ConstantFill', {}),
                       axis=0)
        sq = model.SquaredL2Distance([out, 'label'])
        loss = model.AveragedLoss(sq, "avg_loss")
        grad_map = model.AddGradientOperators([loss])
        self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
        return (model, perfect_model, data, label)

Exemple #12

0

Afficher le fichier

Fichier : data_parallel_model_test.py Projet : locn1000/caffe2

        def add_parameter_update_ops(model):
            model.Iter("ITER")
            LR = model.param_init_net.ConstantFill([],
                                                   'LR',
                                                   shape=[1],
                                                   value=0.1)
            for param in model.GetParams():
                param_grad = model.param_to_grad[param]
                param_momentum = model.param_init_net.ConstantFill([param],
                                                                   param +
                                                                   '_momentum',
                                                                   value=0.0)
                model.net.MomentumSGDUpdate(
                    [param_grad, param_momentum, LR, param],
                    [param_grad, param_momentum, param],
                )
            model = cnn.CNNModelHelper(
                order="NHWC",
                name="test",
            )
            data_parallel_model.Parallelize_GPU(
                model,
                input_builder_fun=add_input_ops,
                forward_pass_builder_fun=add_model_ops,
                param_update_builder_fun=add_parameter_update_ops,
                devices=[1, 2, 3],
            )

            # Only gpu_1 params should be returned (gpu_1 is the first gpu)
            checkpoint_params = data_parallel_model.GetCheckpointParams(model)
            for p in model.GetParams("gpu_1/"):
                self.assertTrue(p in checkpoint_params)
                self.assertTrue(p + "_momentum" in checkpoint_params)
            for p in model.GetParams("gpu_2/"):
                self.assertTrue(p in checkpoint_params)
            for c in model.GetComputedParams("gpu_1/"):
                self.assertFalse(c in checkpoint_params)
            for c in model.GetComputedParams("gpu_2/"):
                self.assertFalse(c in checkpoint_params)
            self.assertFalse(
                core.BlobReference("gpu_1/data") in checkpoint_params)
            self.assertTrue(
                core.BlobReference("gpu_1/ITER") in checkpoint_params)

Exemple #13

0

Afficher le fichier

 def test_simple_cnnmodel(self):
     model = cnn.CNNModelHelper("NCHW", name="overfeat")
     data, label = model.ImageInput(["db"], ["data", "label"], is_test=0)
     with core.NameScope("conv1"):
         conv1 = model.Conv(data, "conv1", 3, 96, 11, stride=4)
         relu1 = model.Relu(conv1, conv1)
         pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
     with core.NameScope("classifier"):
         fc = model.FC(pool1, "fc", 4096, 1000)
         pred = model.Softmax(fc, "pred")
         xent = model.LabelCrossEntropy([pred, label], "xent")
         loss = model.AveragedLoss(xent, "loss")
     model.net.RunAllOnGPU()
     model.param_init_net.RunAllOnGPU()
     model.AddGradientOperators([loss], skip=1)
     with SummaryWriter(filename_suffix='.test') as writer:
         writer.add_graph(model)
     blob_name_tracker = {}
     graph = tb.model_to_graph_def(
         model,
         blob_name_tracker=blob_name_tracker,
         shapes={},
         show_simplified=False,
     )
     self.assertEqual(
         blob_name_tracker['GRADIENTS/conv1/conv1_b_grad'],
         'conv1/conv1_b_grad',
     )
     self.maxDiff = None
     # We can't guarantee the order in which they appear, so we sort
     # both before we compare them
     sep = "node {"
     expected = "\n".join(sorted(
         sep + "\n  " + part.strip()
         for part in EXPECTED_CNN.strip().split(sep)
         if part.strip()
     ))
     actual = "\n".join(sorted(
         sep + "\n  " + part.strip()
         for part in str(graph).strip().split(sep)
         if part.strip()
     ))
     self.assertMultiLineEqual(actual, expected)

Exemple #14

0

Afficher le fichier

 def run_test_copy_gradient(self, device_opt):
     model = cnn.CNNModelHelper(name="copy_test")
     with core.DeviceScope(device_opt):
         x = model.net.AddExternalInputs("x")
         y = model.Copy(x, "y")
         loss = model.AveragedLoss(y, "loss")
         gradient_map = model.AddGradientOperators([loss])
         workspace.FeedBlob(x, np.random.rand(32).astype(np.float32))
         workspace.RunNetOnce(model.param_init_net)
         workspace.RunNetOnce(model.net)
         self.assertTrue(
             np.array_equal(
                 workspace.FetchBlob(x),
                 workspace.FetchBlob(y),
             ))
         self.assertTrue(
             np.array_equal(
                 workspace.FetchBlob(gradient_map[x]),
                 workspace.FetchBlob(gradient_map[y]),
             ))

Exemple #15

0

Afficher le fichier

Fichier : shape_inference_test.py Projet : yiutechsolutions/caffe2

    def testShapeInferenceConvNet(self):
        model = cnn.CNNModelHelper(name="convtest", order="NCHW")
        model.NHWC2NCHW("data", "data_nchw")
        model.Conv("data_nchw", 'conv1', 3, 64,
                   weight_init=("MSRAFill", {}), kernel=7,
                   stride=2, pad=3, no_bias=0)
        model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3)
        model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu')
        model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
        model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=100)
        model.Sigmoid('fc', 'fc_sigm')
        model.Softmax('fc_sigm', 'softmax')

        workspace.FeedBlob(
            "data",
            np.random.rand(16, 227, 227, 3).astype(np.float32),
        )
        # Then do automatic comparison test: run the next once to
        # initialize everything
        self.InferTensorRunAndCompare(model)

Exemple #16

0

Afficher le fichier

    def test_resnet_forward_only(self):
        model = cnn.CNNModelHelper(
            order="NCHW",
            name="test",
            cudnn_exhaustive_search=True,
        )
        with core.NameScope("gpu_0"):
                data = model.net.AddExternalInput("gpu_0/data")
                resnet.create_resnet50(
                    model,
                    data,
                    num_input_channels=3,
                    num_labels=1000,
                    is_test=True
                )

        count_before = count_blobs(model.net.Proto())
        optim_proto = memonger.optimize_inference_for_dag(
            model.net, ["gpu_0/data"], "gpu_0/"
        )
        count_after = count_blobs(optim_proto)
        num_shared_blobs = count_shared_blobs(optim_proto)

        # Run model and compare results

        workspace.RunNetOnce(model.param_init_net)
        data = np.random.rand(4, 3, 227, 227).astype(np.float32)

        workspace.FeedBlob("gpu_0/data", data)
        workspace.RunNetOnce(model.net)
        model.net.Proto().type = 'dag'
        model.net.Proto().num_workers = 4
        loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
        self.assertTrue(memonger.verify_graph_equality(
            model.net.Proto(), optim_proto))

        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
        self.assertTrue(count_after < count_before)
        self.assertTrue(num_shared_blobs < 7)
        np.testing.assert_almost_equal(loss1, optimized_loss1)

Exemple #17

0

Afficher le fichier

Fichier : memonger_test.py Projet : GeekLiB/caffe2-master

    def test_topological_sort_longest_path(self):
        m = cnn.CNNModelHelper()
        # 0
        m.Copy("conv0_w_comp", "conv0_w")
        # 1
        conv0 = m.Conv("data", "conv0", 32, 32, 4)
        # 2
        m.Copy("conv2_w", "conv2_w")
        # 3
        m.Conv(conv0, "conv2", 16, 32, 4)

        g = memonger.compute_interference_graph(m.net.Proto().op)

        orders_org = memonger.topological_sort_traversal(g)
        orders_gt_org = [2, 0, 1, 3]
        self.assertEqual(orders_gt_org, orders_org)

        orders = memonger.topological_sort_traversal_longest_path(g)
        # longer path is in front of the shorter one
        orders_gt = [0, 1, 2, 3]
        self.assertEqual(orders_gt, orders)

Exemple #18

0

Afficher le fichier

        def run(comm_rank, comm_size, tmpdir):
            def add_input_ops(model):
                pass

            def add_model_ops(model, loss_scale):
                return []

            def add_optimizer(model):
                pass

            store_handler = "store_handler"
            workspace.RunOperatorOnce(
                core.CreateOperator(
                    "FileStoreHandlerCreate",
                    [],
                    [store_handler],
                    path=tmpdir))
            rendezvous = dict(
                kv_handler=store_handler,
                shard_id=comm_rank,
                num_shards=comm_size,
                engine='GLOO',
            )

            model = cnn.CNNModelHelper(
                order="NHWC",
                name="test",
            )
            data_parallel_model.Parallelize_CPU(
                model,
                input_builder_fun=add_input_ops,
                forward_pass_builder_fun=add_model_ops,
                optimizer_builder_fun=add_optimizer,
                devices=[1, 2, 3],
                rendezvous=rendezvous
            )
            data_parallel_model.RunInitNet(model)

            for _ in range(2):
                data_parallel_model.Synchronize(model)

Exemple #19

0

Afficher le fichier

    def test_read_from_db(self):
        random_label = np.random.randint(0, 100)
        VIDEO = "/mnt/vol/gfsdataswarm-oregon/users/trandu/sample.avi"
        temp_list = tempfile.NamedTemporaryFile(delete=False).name
        line_str = '{} 0 {}\n'.format(VIDEO, random_label)
        self.create_a_list(
            temp_list,
            line_str,
            16)
        video_db_dir = tempfile.mkdtemp()

        self.create_video_db(temp_list, video_db_dir)
        model = cnn.CNNModelHelper(name="Video Loader from LMDB")
        reader = model.CreateDB(
            "sample",
            db=video_db_dir,
            db_type="lmdb")
        model.VideoInput(
            reader,
            ["data", "label"],
            name="data",
            batch_size=10,
            width=171,
            height=128,
            crop=112,
            length=8,
            sampling_rate=2,
            mirror=1,
            use_local_file=0,
            temporal_jitter=1)

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)
        data = workspace.FetchBlob("data")
        label = workspace.FetchBlob("label")

        np.testing.assert_equal(label, random_label)
        np.testing.assert_equal(data.shape, [10, 3, 8, 112, 112])
        os.remove(temp_list)
        shutil.rmtree(video_db_dir)

Exemple #20

0

Afficher le fichier

Fichier : memonger_test.py Projet : GeekLiB/caffe2-master

    def test_gradient_optim(self, input_dim, output_dim, batch_size):
        m = cnn.CNNModelHelper()
        with core.NameScope("name_x"):
            fc1 = m.FC("data", "fc1", dim_in=input_dim, dim_out=output_dim)
            fc2 = m.FC(fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
            fc3 = m.FC(fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = m.FC(fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = m.FC(fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
            fc5.Relu([], fc5)\
               .Softmax([], "pred") \
               .LabelCrossEntropy(["label"], ["xent"]) \
               .AveragedLoss([], "loss")
        input_to_grad = m.AddGradientOperators(["name_x/loss"])

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.share_grad_blobs(
            m.net,
            ["name_x/loss"],
            set(m.param_to_grad.values()),
            "name_x/",
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        # Test networks produce exactly same gradients
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(low=0, high=output_dim,
                                  size=(batch_size, )).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss = workspace.FetchBlob("name_x/loss")
        grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
        workspace.RunNetOnce(optim_proto)
        optimized_loss = workspace.FetchBlob("name_x/loss")
        optimized_grad = workspace.FetchBlob(str(
            input_to_grad["name_x/fc1_w"]))
        np.testing.assert_almost_equal(loss, optimized_loss)
        np.testing.assert_almost_equal(grad, optimized_grad)

Exemple #21

0

Afficher le fichier

Fichier : data_parallel_model_test.py Projet : locn1000/caffe2

        def add_model_ops(model):
            model = cnn.CNNModelHelper(name="convtest", order="NCHW")
            model.NHWC2NCHW("data", "data_nchw")
            model.Conv("data_nchw",
                       'conv1',
                       3,
                       64,
                       weight_init=("MSRAFill", {}),
                       kernel=7,
                       stride=2,
                       pad=3,
                       no_bias=0)
            model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3)
            model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu')
            model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
            model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=100)
            model.Sigmoid('fc', 'fc_sigm')
            model.Softmax('fc_sigm', 'softmax')
            model.LabelCrossEntropy(['softmax', 'label'], 'xent')
            loss = model.AveragedLoss('xent', 'loss')

            model.AddGradientOperators([loss])

Exemple #22

0

Afficher le fichier

def test_forward_only(
    create_model,
    last_out_blob,
    data_blob='gpu_0/data',
    num_labels=1000,
):
    model = cnn.CNNModelHelper(
        order="NCHW",
        name="test",
        cudnn_exhaustive_search=True,
    )
    with core.NameScope("gpu_0"):
        data = model.net.AddExternalInput(data_blob)
        create_model(model,
                     data,
                     num_input_channels=3,
                     num_labels=num_labels,
                     is_test=True)

    count_before = count_blobs(model.net.Proto())
    optim_proto = memonger.optimize_inference_for_dag(model.net, [data_blob],
                                                      "gpu_0/")
    count_after = count_blobs(optim_proto)
    num_shared_blobs = count_shared_blobs(optim_proto)

    # Run model and compare results
    workspace.RunNetOnce(model.param_init_net)
    data = np.random.rand(4, 3, 227, 227).astype(np.float32)

    workspace.FeedBlob(data_blob, data)
    workspace.RunNetOnce(model.net)
    model.net.Proto().type = 'dag'
    model.net.Proto().num_workers = 4
    loss1 = workspace.FetchBlob(last_out_blob)

    workspace.RunNetOnce(optim_proto)
    optimized_loss1 = workspace.FetchBlob(last_out_blob)
    return [(count_after, count_before), (num_shared_blobs),
            (loss1, optimized_loss1)]

Exemple #23

0

Afficher le fichier

    def test_caffe2_simple_cnnmodel(self):
        model = cnn.CNNModelHelper("NCHW", name="overfeat")
        workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))
        with core.NameScope("conv1"):
            conv1 = model.Conv("data", "conv1", 3, 96, 11, stride=4)
            relu1 = model.Relu(conv1, conv1)
            pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
        with core.NameScope("classifier"):
            fc = model.FC(pool1, "fc", 4096, 1000)
            pred = model.Softmax(fc, "pred")
            xent = model.LabelCrossEntropy([pred, "label"], "xent")
            loss = model.AveragedLoss(xent, "loss")

        blob_name_tracker = {}
        graph = c2_graph.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )
        compare_proto(graph, self)

Exemple #24

0

Afficher le fichier

Fichier : convnet_benchmarks.py Projet : yaochx/caffe2

def MLP(order, cudnn_ws):
    model = cnn.CNNModelHelper()
    d = 256
    depth = 20
    width = 3
    for i in range(depth):
        for j in range(width):
            current = "fc_{}_{}".format(i, j) if i > 0 else "data"
            next_ = "fc_{}_{}".format(i + 1, j)
            model.FC(
                current, next_,
                dim_in=d, dim_out=d,
                weight_init=model.XavierInit,
                bias_init=model.XavierInit)
    model.Sum(["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"])
    model.FC("sum", "last",
             dim_in=d, dim_out=1000,
             weight_init=model.XavierInit,
             bias_init=model.XavierInit)
    xent = model.LabelCrossEntropy(["last", "label"], "xent")
    model.AveragedLoss(xent, "loss")
    return model, d

Exemple #25

0

Afficher le fichier

 def testReLUConsistencyWithCPU(self):
     X = np.random.randn(128, 4096).astype(np.float32)
     mkl_do = core.DeviceOption(caffe2_pb2.MKLDNN)
     # Makes sure that feed works.
     workspace.FeedBlob("X", X)
     workspace.FeedBlob("X_mkl", X, device_option=mkl_do)
     model = cnn.CNNModelHelper()
     # Makes sure that we can run relu.
     model.Relu("X", "Y")
     model.Relu("X_mkl", "Y_mkl", device_option=mkl_do)
     workspace.CreateNet(model.net)
     workspace.RunNet(model.net)
     # makes sure that the results are good.
     np.testing.assert_allclose(workspace.FetchBlob("Y"),
                                workspace.FetchBlob("Y_mkl"),
                                atol=1e-10,
                                rtol=1e-10)
     runtime = workspace.BenchmarkNet(model.net.Proto().name, 1, 10, True)
     # The returned runtime is the time of
     # [whole_net, cpu_op, mkl_op]
     # so we will assume that the MKL one runs faster than the CPU one.
     self.assertTrue(runtime[1] >= runtime[2])
     print("CPU runtime {}, MKL runtime {}.".format(runtime[1], runtime[2]))

Exemple #26

0

Afficher le fichier

    def testNonParallelModel(self):
        workspace.ResetWorkspace()

        model = cnn.CNNModelHelper(name="test")
        old_seq_id = data_workers.global_coordinator._fetcher_id_seq
        coordinator = data_workers.init_data_input_workers(
            model, ["data", "label"],
            dummy_fetcher,
            32,
            2,
            input_source_name="unittest")
        new_seq_id = data_workers.global_coordinator._fetcher_id_seq
        self.assertEqual(new_seq_id, old_seq_id + 2)

        coordinator.start()

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)

        for _i in range(500):
            with timeout_guard.CompleteInTimeOrDie(5):
                workspace.RunNet(model.net.Proto().name)

            data = workspace.FetchBlob("data")
            labels = workspace.FetchBlob("label")

            self.assertEqual(data.shape[0], labels.shape[0])
            self.assertEqual(data.shape[0], 32)

            for j in range(32):
                self.assertEqual(labels[j], data[j, 0])
                self.assertEqual(labels[j], data[j, 1])
                self.assertEqual(labels[j], data[j, 2])

        coordinator.stop_coordinator("unittest")
        self.assertEqual(coordinator._coordinators, [])

Exemple #27

0

Afficher le fichier

def bmuf_process(filestore_dir,
                 process_id,
                 shared_results,
                 cpu_device=False,
                 nesterov=False):
    # We need to import caffe2 in every process to initialize CUDA independently.
    from caffe2.python import core, cnn, data_parallel_model, dyndep, workspace
    from caffe2.proto import caffe2_pb2
    dyndep.InitOpsLibrary("@/caffe2/caffe2/distributed:file_store_handler_ops")

    if not cpu_device:
        if not workspace.has_gpu_support and not workspace.has_hip_support:
            log.info('No GPU support test is Ignored.')
            return
        if workspace.NumGpuDevices() < 4:
            log.info('Not enough GPU support, test IGNORED')
            return

    model = cnn.CNNModelHelper(order="NHWC", name="test")
    if not cpu_device:
        device_type = workspace.GpuDeviceType
        device_prefix = "gpu"
    else:
        device_type = caffe2_pb2.CPU
        device_prefix = "cpu"

    devices = [0, 1] if process_id == 0 else [2, 3]

    def _model_build_fun(model, loss_scale):
        fc = model.FC("data", "fc", 16, 1, ("ConstantFill", {}),
                      ("ConstantFill", {}))
        fc_fl = model.FlattenToVec(fc, "fc_fl")
        sigm = model.Sigmoid(fc_fl, "sigm")
        sq = model.SquaredL2Distance([sigm, "label"], "sq")
        loss = model.AveragedLoss(sq, "loss")
        loss = model.Scale(loss, scale=loss_scale)

        # For testing explicit sync
        model.param_init_net.UniformFill([], ["sync_num"], shape=[1])
        return [loss]

    def _input_builder_fun(model):
        return None

    def _param_update_fun(model):
        ITER = model.Iter("ITER")
        LR = model.net.LearningRate(
            [ITER],
            "LR",
            base_lr=(-0.1),
            policy="fixed",
        )
        ONE = model.param_init_net.ConstantFill(
            [],
            "ONE",
            shape=[1],
            value=1.0,
        )
        for param in model.GetParams():
            grad = model.param_to_grad[param]
            model.WeightedSum([param, ONE, grad, LR], param)

    def _generate_data(devices, process_id, device_type, device_prefix):
        np.random.seed(26 + process_id * 10)
        # Each run has same input, independent of number of gpus
        batch_size = 64
        for _ in range(0, 10):
            full_data = np.random.rand(batch_size, 16)
            full_labels = np.round(full_data[:, 0])
            batch_per_device = batch_size // len(devices)

            for (j, g) in enumerate(devices):
                st = j * batch_per_device
                en = st + batch_per_device
                data = full_data[st:en, :].astype(np.float32)
                labels = full_labels[st:en].astype(np.float32)
                with core.DeviceScope(core.DeviceOption(device_type, g)):
                    workspace.FeedBlob("{}_{}/data".format(device_prefix, g),
                                       data)
                    workspace.FeedBlob("{}_{}/label".format(device_prefix, g),
                                       labels)

    _generate_data(devices, process_id, device_type, device_prefix)

    workspace.RunOperatorOnce(
        core.CreateOperator("FileStoreHandlerCreate", [], ["store_handler"],
                            path=filestore_dir))
    rendezvous = dict(kv_handler="store_handler",
                      shard_id=process_id,
                      num_shards=2,
                      engine="GLOO",
                      exit_nets=None)

    data_parallel_model.Parallelize_BMUF(model,
                                         _input_builder_fun,
                                         _model_build_fun,
                                         _param_update_fun,
                                         devices=devices,
                                         rendezvous=rendezvous,
                                         nesterov=nesterov,
                                         add_blobs_to_sync=["sync_num"],
                                         cpu_device=cpu_device)

    data_parallel_model.RunInitNet(model)

    def _device_pid(device, pid):
        if pid == 1:
            return device + 2
        return device

    np.testing.assert_equal(
        workspace.FetchBlob("{}_{}/fc_w_v".format(device_prefix,
                                                  _device_pid(0, process_id))),
        np.zeros(16).astype(np.float32).reshape(1, 16))

    # Run the algorithm for one iteration to have non-zero params.
    data_parallel_model.RunNet(model, 1)

    # Save iteration momentum and post local update params
    results = {}
    v_b_ = workspace.FetchBlob("{}_{}/fc_b_v".format(
        device_prefix, _device_pid(0, process_id)))
    v_w_ = workspace.FetchBlob("{}_{}/fc_w_v".format(
        device_prefix, _device_pid(0, process_id)))

    results['v_b_'] = v_b_
    results['v_w_'] = v_w_

    workspace.RunNetOnce(model.net)

    b_0_ = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                   _device_pid(0, process_id)))
    w_0_ = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                   _device_pid(0, process_id)))
    b_1_ = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                   _device_pid(1, process_id)))
    w_1_ = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                   _device_pid(1, process_id)))

    results['b_0_'] = b_0_
    results['w_0_'] = w_0_
    results['b_1_'] = b_1_
    results['w_1_'] = w_1_

    # Test sync
    if process_id == 0:
        workspace.FeedBlob(device_prefix + "_0/sync_num",
                           np.array([2603]).astype(np.float32),
                           device_option=core.DeviceOption(device_type, 0))

    # Compute block gradients.
    b_g_ = workspace.FetchBlob("{}_{}/fc_b_g".format(
        device_prefix, _device_pid(0, process_id)))
    w_g_ = workspace.FetchBlob("{}_{}/fc_w_g".format(
        device_prefix, _device_pid(0, process_id)))
    results['b_g_'] = b_g_
    results['w_g_'] = w_g_
    workspace.RunNetOnce(model._global_model_param_updates_net)

    #  g_b = (b_0_ + b_1_) / 2 - b_g_
    #  g_w = (w_0_ + w_1_) / 2 - w_g_
    v_b = workspace.FetchBlob("{}_{}/fc_b_v".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    v_w = workspace.FetchBlob("{}_{}/fc_w_v".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    w_g = workspace.FetchBlob("{}_{}/fc_w_g".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    b_g = workspace.FetchBlob("{}_{}/fc_b_g".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    w_0 = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                  _device_pid(0, process_id)))
    b_0 = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                  _device_pid(0, process_id)))
    w_1 = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                  _device_pid(1, process_id)))
    b_1 = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                  _device_pid(1, process_id)))
    results['v_b'] = v_b
    results['v_w'] = v_w
    results['w_g'] = w_g
    results['b_g'] = b_g
    results['w_0'] = w_0
    results['b_0'] = b_0
    results['w_1'] = w_1
    results['b_1'] = b_1

    # Test add_blobs_to_sync
    for j in devices:
        sync = workspace.FetchBlob(device_prefix + "_{}/sync_num".format(j))[0]
        results['sync_{}'.format(j)] = sync

    shared_results[process_id] = results

Exemple #28

0

Afficher le fichier

def VGGA(order, cudnn_ws):
    model = cnn.CNNModelHelper(order,
                               name='vgg-a',
                               use_cudnn=True,
                               cudnn_exhaustive_search=True,
                               ws_nbytes_limit=cudnn_ws)
    conv1 = model.Conv("data",
                       "conv1",
                       3,
                       64,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu1 = model.Relu(conv1, "conv1")
    pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
    conv2 = model.Conv(pool1,
                       "conv2",
                       64,
                       128,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu2 = model.Relu(conv2, "conv2")
    pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
    conv3 = model.Conv(pool2,
                       "conv3",
                       128,
                       256,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu3 = model.Relu(conv3, "conv3")
    conv4 = model.Conv(relu3,
                       "conv4",
                       256,
                       256,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu4 = model.Relu(conv4, "conv4")
    pool4 = model.MaxPool(relu4, "pool4", kernel=2, stride=2)
    conv5 = model.Conv(pool4,
                       "conv5",
                       256,
                       512,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu5 = model.Relu(conv5, "conv5")
    conv6 = model.Conv(relu5,
                       "conv6",
                       512,
                       512,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu6 = model.Relu(conv6, "conv6")
    pool6 = model.MaxPool(relu6, "pool6", kernel=2, stride=2)
    conv7 = model.Conv(pool6,
                       "conv7",
                       512,
                       512,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu7 = model.Relu(conv7, "conv7")
    conv8 = model.Conv(relu7,
                       "conv8",
                       512,
                       512,
                       3, ('XavierFill', {}), ('ConstantFill', {}),
                       pad=1)
    relu8 = model.Relu(conv8, "conv8")
    pool8 = model.MaxPool(relu8, "pool8", kernel=2, stride=2)

    fcix = model.FC(pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
                    ('ConstantFill', {}))
    reluix = model.Relu(fcix, "fcix")
    fcx = model.FC(reluix, "fcx", 4096, 4096, ('XavierFill', {}),
                   ('ConstantFill', {}))
    relux = model.Relu(fcx, "fcx")
    fcxi = model.FC(relux, "fcxi", 4096, 1000, ('XavierFill', {}),
                    ('ConstantFill', {}))
    pred = model.Softmax(fcxi, "pred")
    xent = model.LabelCrossEntropy([pred, "label"], "xent")
    loss = model.AveragedLoss(xent, "loss")
    return model, 231

Exemple #29

0

Afficher le fichier

Fichier : lstm_benchmark.py Projet : sandeepl337/caffe2

def create_model(args, queue, label_queue, input_shape):
    model = cnn.CNNModelHelper(name="LSTM_bench")
    seq_lengths, hidden_init, cell_init, target = \
        model.net.AddExternalInputs(
            'seq_lengths',
            'hidden_init',
            'cell_init',
            'target',
        )
    input_blob = model.DequeueBlobs(queue, "input_data")
    labels = model.DequeueBlobs(label_queue, "label")

    if args.implementation == "own":
        output, last_hidden, _, last_state = rnn_cell.LSTM(
            model=model,
            input_blob=input_blob,
            seq_lengths=seq_lengths,
            initial_states=(hidden_init, cell_init),
            dim_in=args.input_dim,
            dim_out=args.hidden_dim,
            scope="lstm1",
            memory_optimization=args.memory_optimization,
            forward_only=args.forward_only,
            drop_states=True,
        )
    elif args.implementation == "cudnn":
        # We need to feed a placeholder input so that RecurrentInitOp
        # can infer the dimensions.
        model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
        output, last_hidden, _ = rnn_cell.cudnn_LSTM(
            model=model,
            input_blob=input_blob,
            initial_states=(hidden_init, cell_init),
            dim_in=args.input_dim,
            dim_out=args.hidden_dim,
            scope="cudnnlstm",
            num_layers=1,
        )

    else:
        assert False, "Unknown implementation"

    weights = model.UniformFill(labels, "weights")
    softmax, loss = model.SoftmaxWithLoss(
        [model.Flatten(output), labels, weights],
        ['softmax', 'loss'],
    )

    if not args.forward_only:
        model.AddGradientOperators([loss])

    # carry states over
    model.net.Copy(last_hidden, hidden_init)
    model.net.Copy(last_hidden, cell_init)

    workspace.FeedBlob(hidden_init, np.zeros(
        [1, args.batch_size, args.hidden_dim], dtype=np.float32
    ))
    workspace.FeedBlob(cell_init, np.zeros(
        [1, args.batch_size, args.hidden_dim], dtype=np.float32
    ))
    return model, output

Exemple #30

0

Afficher le fichier

    def run_model(self, V, gpu_devices, cpu_indices):
        def input_builder_fun(model):
            return None

        def model_build_fun(model, loss_scale):
            if cpu_indices:
                with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
                    gathered_cpu = model.net.Gather([self.vecs, 'indices'],
                                                    'gathered_cpu')

                gathered = model.CopyCPUToGPU(gathered_cpu, "gathered")
            else:
                gpu_vecs = model.param_init_net.CopyCPUToGPU(
                    self.vecs,
                    "gpuvecs",
                )
                model.params.append(gpu_vecs)
                gathered = model.net.Gather([gpu_vecs, 'indices'], 'gathered')
            flattened = model.Flatten(gathered, "flattened")
            fc = model.FC(flattened, "fc", 16 * 16, 1, ("ConstantFill", {}),
                          ("ConstantFill", {}))
            fc_fl = model.FlattenToVec(fc, "fc_fl")
            sigm = model.Sigmoid(fc_fl, "sigm")
            sq = model.SquaredL2Distance([sigm, "label"], "sq")
            loss = model.AveragedLoss(sq, "loss")
            loss = model.Scale(loss, scale=loss_scale)
            return [loss]

        def param_update_fun(model):
            ONE = model.param_init_net.ConstantFill(
                [],
                "ONE",
                shape=[1],
                value=1.0,
            )
            LR = model.CopyCPUToGPU(self.LR, "LR")
            for param in model.GetParams():
                param_grad = model.param_to_grad[param]
                if not isinstance(param_grad, core.GradientSlice):
                    model.WeightedSum([param, ONE, param_grad, LR], param)
                else:
                    param_momentum = model.param_init_net.ConstantFill(
                        [param],
                        '{}_momentum'.format(param),
                        value=0.0,
                    )
                    model.net.SparseMomentumSGDUpdate(
                        [
                            param_grad.values,
                            param_momentum,
                            LR,
                            param,
                            param_grad.indices,
                        ],
                        [param_grad.values, param_momentum, param],
                        momentum=0.1,
                        nesterov=0,
                    )

        workspace.ResetWorkspace()
        model = cnn.CNNModelHelper(
            order="NHWC",
            name="sparse_test{}".format(gpu_devices),
        )

        with core.NameScope("cpu"):
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
                self.ITER = model.Iter("ITER")
                self.LR = model.net.LearningRate(
                    [self.ITER],
                    "LR",
                    base_lr=(-0.1),
                    policy="fixed",
                )
                self.vecs = model.param_init_net.UniformFill([],
                                                             "vecs",
                                                             shape=[V, 16])
                if cpu_indices:
                    model.params.append(self.vecs)
                self.ONE_CPU = model.param_init_net.ConstantFill(
                    [],
                    "ONE_CPU",
                    shape=[1],
                    value=1.0,
                )

        data_parallel_model.Parallelize_GPU(
            model,
            input_builder_fun=input_builder_fun,
            forward_pass_builder_fun=model_build_fun,
            param_update_builder_fun=param_update_fun,
            devices=gpu_devices,
        )

        # Update the vecs
        if cpu_indices:
            with core.NameScope("cpu"):
                with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
                    for param in model.GetParams():
                        param_grad = model.param_to_grad[param]
                        model.ScatterWeightedSum([
                            param, self.ONE_CPU, param_grad.indices,
                            param_grad.values, self.LR
                        ], self.vecs)
        else:
            with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
                model.CopyGPUToCPU("gpu_0/gpuvecs", self.vecs)

        np.random.seed(2603)

        # Each run has same input, independent of number of gpus
        batch_size = 64
        for i in range(0, 10):
            full_indices = np.random.permutation(V)[:batch_size * 16].reshape(
                batch_size, 16)
            full_labels = full_indices[:, 0] % 2
            batch_per_device = batch_size // len(gpu_devices)

            for (j, g) in enumerate(gpu_devices):
                st = j * batch_per_device
                en = st + batch_per_device
                indices = full_indices[st:en, :].astype(np.int32)
                labels = full_labels[st:en].astype(np.float32)

                device_for_indices = core.DeviceOption(caffe2_pb2.CPU)
                if not cpu_indices:
                    device_for_indices = core.DeviceOption(caffe2_pb2.CUDA, g)

                with core.DeviceScope(device_for_indices):
                    workspace.FeedBlob("gpu_{}/indices".format(g), indices)

                with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, g)):
                    workspace.FeedBlob("gpu_{}/label".format(g), labels)

            if i == 0:
                workspace.RunNetOnce(model.param_init_net)
                # Force vecs to be same on all runs
                orig_vecs = np.random.rand(V, 16).astype(np.float32)
                workspace.FeedBlob(self.vecs, orig_vecs)
                if not cpu_indices:
                    for g in gpu_devices:
                        workspace.FeedBlob(
                            "gpu_{}/gpuvecs".format(g),
                            orig_vecs,
                            device_option=core.DeviceOption(
                                caffe2_pb2.CUDA, g),
                        )
                workspace.CreateNet(model.net)

            workspace.RunNet(model.net.Proto().name)
            if len(gpu_devices) == 2:
                open("dump.txt", "w").write(str(model.net.Proto()))
                if not cpu_indices:
                    idx = workspace.FetchBlob("gpu_0/indices")
                    idx = list(idx.flatten())
                    n = len(idx)
                    nu = len(set(idx))
                    assert n == nu, "We cannot have duplicate indices"

        # Sanity check to see the vecs were updated
        self.assertFalse(np.allclose(workspace.FetchBlob(self.vecs),
                                     orig_vecs))
        return [
            workspace.FetchBlob(self.vecs if cpu_indices else "gpu_0/gpuvecs"),
            workspace.FetchBlob("gpu_0/fc_w")
        ]