Exemple #1
0
def allcompare_process(filestore_dir, process_id, data, num_procs):
    from caffe2.python import core, data_parallel_model, workspace, dyndep
    from caffe2.python.model_helper import ModelHelper
    from caffe2.proto import caffe2_pb2
    dyndep.InitOpsLibrary("@/caffe2/caffe2/distributed:file_store_handler_ops")

    workspace.RunOperatorOnce(
        core.CreateOperator(
            "FileStoreHandlerCreate", [], ["store_handler"], path=filestore_dir
        )
    )
    rendezvous = dict(
        kv_handler="store_handler",
        shard_id=process_id,
        num_shards=num_procs,
        engine=op_engine,
        exit_nets=None
    )

    model = ModelHelper()
    model._rendezvous = rendezvous

    workspace.FeedBlob("test_data", data)

    data_parallel_model._RunComparison(
        model, "test_data", core.DeviceOption(caffe2_pb2.CPU, 0)
    )
Exemple #2
0
    def test_validate(self):
        model = ModelHelper(name="test_model")
        model.params.append("aaa")
        model.params.append("bbb")
        self.assertEqual(model._Validate(), [])

        model.params.append("xxx")
        model.params.append("bbb")
        self.assertEqual(model._Validate(), ["bbb"])
Exemple #3
0
def GenerateLossOps(
    model: ModelHelper, model_id: str, output_blob: str, label_blob: str,
    loss_blob: str
) -> None:
    """
    Adds loss operators to net. The loss function is computed by a squared L2
    distance, and then averaged over all items in the minibatch.

    :param model: ModelHelper object to add loss operators to.
    :param model_id: String identifier.
    :param output_blob: Blob containing output of net.
    :param label_blob: Blob containing labels.
    :param loss_blob: Blob in which to store loss.
    """
    dist = model.SquaredL2Distance([label_blob, output_blob], model_id + "dist")
    model.AveragedLoss(dist, loss_blob)
Exemple #4
0
    def test_caffe2_to_onnx_value_info(self):
        caffe2_net = tempfile.NamedTemporaryFile()
        output = tempfile.NamedTemporaryFile()

        model = ModelHelper(name='caffe2-to-onnx-test')
        brew.relu(model, ["X"], "Y")
        caffe2_net.write(model.net.Proto().SerializeToString())
        caffe2_net.flush()

        args = [caffe2_net.name, '--output', output.name]
        self.assertRaisesRegexp(Exception,
                                'value info',
                                self._run_command, caffe2_to_onnx, args)

        args.extend([
            '--value-info',
            json.dumps({
                'X': (TensorProto.FLOAT, (2, 2)),
            })])
        result = self._run_command(caffe2_to_onnx, args)

        onnx_model = ModelProto()
        onnx_model.ParseFromString(output.read())
        self.assertEqual(len(onnx_model.graph.node), 1)
        self.assertEqual(onnx_model.graph.node[0].op_type, 'Relu')
        self.assertEqual(len(onnx_model.graph.initializer), 0)
Exemple #5
0
    def setUp(self):

        def myhelper(model, val=-1):
            return val

        if not brew.has_helper(myhelper):
            brew.Register(myhelper)
        self.myhelper = myhelper

        def myhelper2(model, val=-1):
            return val

        if not brew.has_helper(myhelper2):
            brew.Register(myhelper2)
        self.myhelper2 = myhelper2
        self.model = ModelHelper(name="test_model")
Exemple #6
0
    def test_crf_gradient(self, num_tags, num_words):
        base_model = ModelHelper(name='base_model')
        transitions = np.random.randn(num_tags + 2,
                                      num_tags + 2).astype(np.float32)
        predictions = np.random.randn(num_words, 1,
                                      num_tags + 2).astype(np.float32)
        initial = np.random.randn(1, num_tags + 2).astype(np.float32)
        predictions_blob, transitions_blob, initial_blob = (
            base_model.net.AddExternalInputs('predictions_blob',
                                             'crf_transitions', 'inital_blob'))

        workspace.FeedBlob(str(predictions_blob), predictions)
        workspace.FeedBlob(str(transitions_blob), transitions)
        workspace.FeedBlob(str(initial_blob), initial)

        crf_layer = crf.CRFWithLoss(base_model, num_tags, transitions_blob)
        crf_layer.build_crf_net(predictions_blob, initial_blob,
                                transitions_blob)
        op = base_model.net._net.op[-1]
        workspace.RunNetOnce(base_model.param_init_net)
        gradients_to_check = (index
                              for (index, input_name) in enumerate(op.input)
                              if input_name != "crf_net/zero_segment_id")

        inputs = [workspace.FetchBlob(name) for name in op.input]
        for param in gradients_to_check:
            self.assertGradientChecks(
                device_option=hu.cpu_do,
                op=op,
                inputs=inputs,
                outputs_to_check=param,
                outputs_with_grads=[1],
                threshold=0.05,
                stepsize=0.001,
            )
Exemple #7
0
    def test_layer_norm_op_c10_preallocated_outputs(self, X, gc, dc):
        # This test case ensures that it works correctly when output tensors are preallocated.
        axis = np.random.randint(0, len(X.shape))
        epsilon = 1e-4
        self.ws.create_blob('input').feed(X)
        m = ModelHelper(name="test")
        m.net.C10LayerNorm_DontUseThisOpYet(["input"],
                                            ["output", "mean", "stdev"],
                                            axis=axis,
                                            epsilon=epsilon)
        self.ws.create_net(m.param_init_net).run()
        net = self.ws.create_net(m.net)
        net.run()
        net.run(
        )  # run two times to be extra sure that the outputs are preallocated

        expected_norm, expected_mean, expected_stdev = _layer_norm_ref(
            axis, epsilon, X)
        actual_norm = self.ws.fetch_blob('output')
        actual_mean = self.ws.fetch_blob('mean')
        actual_stdev = self.ws.fetch_blob('stdev')

        torch.testing.assert_allclose(expected_norm, actual_norm)
        torch.testing.assert_allclose(expected_mean, actual_mean)
        torch.testing.assert_allclose(expected_stdev, actual_stdev)
Exemple #8
0
    def test_arg_scope_single(self):
        X = np.random.rand(64, 3, 32, 32).astype(np.float32) - 0.5

        workspace.FeedBlob("x", X)
        model = ModelHelper(name="test_model")
        with brew.arg_scope(
            brew.conv,
            stride=2,
            pad=2,
            weight_init=('XavierFill', {}),
            bias_init=('ConstantFill', {})
        ):
            brew.conv(
                model=model,
                blob_in="x",
                blob_out="out",
                dim_in=3,
                dim_out=64,
                kernel=3,
            )

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)
        out = workspace.FetchBlob("out")
        self.assertEqual(out.shape, (64, 64, 17, 17))
Exemple #9
0
def _prepare_gru_unit_op(gc,
                         n,
                         d,
                         outputs_with_grads,
                         forward_only=False,
                         drop_states=False,
                         two_d_initial_states=None):
    print("Dims: (n,d) = ({},{})".format(n, d))

    def generate_input_state(n, d):
        if two_d_initial_states:
            return np.random.randn(n, d).astype(np.float32)
        else:
            return np.random.randn(1, n, d).astype(np.float32)

    model = ModelHelper(name='external')

    with scope.NameScope("test_name_scope"):
        hidden_t_prev, gates_t, seq_lengths, timestep = \
            model.net.AddScopedExternalInputs(
                "hidden_t_prev",
                "gates_t",
                'seq_lengths',
                "timestep",
            )
        workspace.FeedBlob(hidden_t_prev,
                           generate_input_state(n, d).astype(np.float32),
                           device_option=gc)
        workspace.FeedBlob(gates_t,
                           generate_input_state(n, 3 * d).astype(np.float32),
                           device_option=gc)

        hidden_t = model.net.GRUUnit(
            [
                hidden_t_prev,
                gates_t,
                seq_lengths,
                timestep,
            ],
            ['hidden_t'],
            forget_bias=0.0,
            drop_states=drop_states,
        )
        model.net.AddExternalOutputs(hidden_t)
        workspace.RunNetOnce(model.param_init_net)

        # 10 is used as a magic number to simulate some reasonable timestep
        # and generate some reasonable seq. lengths
        workspace.FeedBlob(seq_lengths,
                           np.random.randint(1, 10,
                                             size=(n, )).astype(np.int32),
                           device_option=gc)
        workspace.FeedBlob(
            timestep,
            np.random.randint(1, 10, size=(1, )).astype(np.int32),
            device_option=core.DeviceOption(caffe2_pb2.CPU),
        )
        print("Feed {}".format(timestep))

    return hidden_t, model.net
Exemple #10
0
    def test_relu(self):
        Xpos = np.ones((5, 5)).astype(np.float32) - 0.5
        Xneg = np.ones((5, 5)).astype(np.float32) - 1.5

        workspace.FeedBlob("xpos", Xpos)
        workspace.FeedBlob("xneg", Xneg)
        model = ModelHelper(name="test_model")
        brew.relu(model, "xpos", "out_xpos")
        brew.relu(model, "xneg", "out_xneg")
        model.Validate()
        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        pos = workspace.FetchBlob("out_xpos")
        self.assertAlmostEqual(pos.mean(), 0.5)
        neg = workspace.FetchBlob("out_xneg")
        self.assertAlmostEqual(neg.mean(), 0)
Exemple #11
0
 def _create_reward_train_net(self) -> None:
     self.reward_train_model = ModelHelper(name="reward_train_" +
                                           self.model_id)
     C2.set_model(self.reward_train_model)
     self.update_model('states', 'actions', 'rewards')
     workspace.RunNetOnce(self.reward_train_model.param_init_net)
     workspace.CreateNet(self.reward_train_model.net)
     C2.set_model(None)
Exemple #12
0
 def _create_all_q_score_net(self) -> None:
     self.all_q_score_model = ModelHelper(name="all_q_score_" +
                                          self.model_id)
     C2.set_model(self.all_q_score_model)
     self.all_q_score_output = self.get_q_values_all_actions("states", True)
     workspace.RunNetOnce(self.all_q_score_model.param_init_net)
     workspace.CreateNet(self.all_q_score_model.net)
     C2.set_model(None)
Exemple #13
0
def simple_mlp():
    model = ModelHelper(name="r")
    brew.relu(
        model,
        brew.fc(model,
                brew.relu(model, brew.fc(model, "data", "fc1", 10, 10), "rl1"),
                "fc2", 10, 10), "rl2")
    return model, [(1, 10)]
def ResNet50(order, cudnn_ws, mkl):
    my_arg_scope = {'order': order, 'use_cudnn': True,
                    'cudnn_exhaustive_search': True,
                    'ws_nbytes_limit': str(cudnn_ws)}
    model = ModelHelper(name="alexnet", arg_scope=my_arg_scope)
    resnet.create_resnet50(model, "data", 3, 1000, is_test=True,
                           final_avg_kernel=14)
    return model, 448
Exemple #15
0
    def test_milstm_params(self):
        model = ModelHelper(name="milstm_params_test")

        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
            output, _, _, _ = rnn_cell.MILSTM(
                model=model,
                input_blob="input",
                seq_lengths="seqlengths",
                initial_states=None,
                dim_in=20,
                dim_out=[40, 20],
                scope="test",
                drop_states=True,
                return_last_layer_only=True,
            )
        for param in model.GetParams():
            self.assertNotEqual(model.get_param_info(param), None)
def createTrainModel(lmdb_path, devices):
    """Create and return a training model, complete with training ops."""
    model = ModelHelper(name='train', arg_scope={'order': 'NCHW'})
    reader = model.CreateDB('train_reader', db=lmdb_path, db_type='lmdb')
    data_parallel_model.Parallelize_GPU(
        model,
        input_builder_fun=functools.partial(AddInputOps,
                                            reader=reader,
                                            batch_size=(BATCH_SIZE //
                                                        len(devices))),
        forward_pass_builder_fun=AddForwardPassOps,
        optimizer_builder_fun=AddOptimizerOps,
        devices=devices,
        use_nccl=True)
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    return model
    def test_mobile_exporter(self):
        model = ModelHelper(name="mobile_exporter_test_model")
        # Test LeNet
        brew.conv(model, 'data', 'conv1', dim_in=1, dim_out=20, kernel=5)
        brew.max_pool(model, 'conv1', 'pool1', kernel=2, stride=2)
        brew.conv(model, 'pool1', 'conv2', dim_in=20, dim_out=50, kernel=5)
        brew.max_pool(model, 'conv2', 'pool2', kernel=2, stride=2)
        brew.fc(model, 'pool2', 'fc3', dim_in=50 * 4 * 4, dim_out=500)
        brew.relu(model, 'fc3', 'fc3')
        brew.fc(model, 'fc3', 'pred', 500, 10)
        brew.softmax(model, 'pred', 'out')

        # Create our mobile exportable networks
        workspace.RunNetOnce(model.param_init_net)
        init_net, predict_net = mobile_exporter.Export(workspace, model.net,
                                                       model.params)

        # Populate the workspace with data
        np_data = np.random.rand(1, 1, 28, 28).astype(np.float32)
        workspace.FeedBlob("data", np_data)

        workspace.CreateNet(model.net)
        workspace.RunNet(model.net)
        ref_out = workspace.FetchBlob("out")

        # Clear the workspace
        workspace.ResetWorkspace()

        # Populate the workspace with data
        workspace.RunNetOnce(init_net)
        # Fake "data" is populated by init_net, we have to replace it
        workspace.FeedBlob("data", np_data)

        # Overwrite the old net
        workspace.CreateNet(predict_net, True)
        workspace.RunNet(predict_net.name)
        manual_run_out = workspace.FetchBlob("out")
        np.testing.assert_allclose(ref_out,
                                   manual_run_out,
                                   atol=1e-10,
                                   rtol=1e-10)

        # Clear the workspace
        workspace.ResetWorkspace()

        # Predictor interface test (simulates writing to disk)
        predictor = workspace.Predictor(init_net.SerializeToString(),
                                        predict_net.SerializeToString())

        # Output is a vector of outputs but we only care about the first and only result
        predictor_out = predictor.run([np_data])
        assert len(predictor_out) == 1
        predictor_out = predictor_out[0]

        np.testing.assert_allclose(ref_out,
                                   predictor_out,
                                   atol=1e-10,
                                   rtol=1e-10)
Exemple #18
0
    def test_extract(self, T, n, d):
        model = ModelHelper(name='external')
        workspace.ResetWorkspace()

        input_blob, initial_input_blob = model.net.AddExternalInputs(
            'input', 'initial_input')

        step = ModelHelper(name='step', param_model=model)
        input_t, output_t_prev = step.net.AddExternalInput(
            'input_t', 'output_t_prev')
        output_t = step.net.Mul([input_t, output_t_prev])
        step.net.AddExternalOutput(output_t)

        inputs = np.random.randn(T, n, d).astype(np.float32)
        initial_input = np.random.randn(1, n, d).astype(np.float32)
        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, initial_input_blob)],
            links={output_t_prev: output_t},
            scope="test_rnn_sum_mull",
        )

        workspace.blobs[input_blob] = inputs
        workspace.blobs[initial_input_blob] = initial_input

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)

        prefix = "extractTest"

        workspace.RunNet(model.net.Proto().name, T)
        retrieved_blobs = recurrent.retrieve_step_blobs(model.net, prefix)

        # needed for python3.6, which returns bytearrays instead of str
        retrieved_blobs = [x.decode() for x in retrieved_blobs]

        for i in range(T):
            blob_name = prefix + "_" + "input_t" + str(i)
            self.assertTrue(
                blob_name in retrieved_blobs,
                "blob extraction failed on timestep {}\
                    . \n\n Extracted Blobs: {} \n\n Looking for {}\
                    .".format(i, retrieved_blobs, blob_name))
Exemple #19
0
    def test_caffe2_simple_model(self):
        model = ModelHelper(name="mnist")
        # how come those inputs don't break the forward pass =.=a
        workspace.FeedBlob("data",
                           np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))

        with core.NameScope("conv1"):
            conv1 = brew.conv(model,
                              "data",
                              'conv1',
                              dim_in=1,
                              dim_out=20,
                              kernel=5)
            # Image size: 24 x 24 -> 12 x 12
            pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
            # Image size: 12 x 12 -> 8 x 8
            conv2 = brew.conv(model,
                              pool1,
                              'conv2',
                              dim_in=20,
                              dim_out=100,
                              kernel=5)
            # Image size: 8 x 8 -> 4 x 4
            pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
        with core.NameScope("classifier"):
            # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
            fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
            relu = brew.relu(model, fc3, fc3)
            pred = brew.fc(model, relu, 'pred', 500, 10)
            softmax = brew.softmax(model, pred, 'softmax')
            xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
            # compute the expected loss
            loss = model.AveragedLoss(xent, "loss")
        model.net.RunAllOnMKL()
        model.param_init_net.RunAllOnMKL()
        model.AddGradientOperators([loss], skip=1)
        blob_name_tracker = {}
        graph = c2_graph.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )
        compare_proto(graph, self)
Exemple #20
0
 def _create_q_score_net(self) -> None:
     self.q_score_model = ModelHelper(name="q_score_" + self.model_id)
     C2.set_model(self.q_score_model)
     self.q_score_output = self.get_q_values("states", "actions", True)
     workspace.RunNetOnce(self.q_score_model.param_init_net)
     self.q_score_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.q_score_model.net)
     C2.set_model(None)
def createTestModel(lmdb_path, devices):
    """Create and return a test model. Does not include training ops."""
    model = ModelHelper(name='test',
                        arg_scope={'order': 'NCHW'},
                        init_params=False)
    reader = model.CreateDB('test_reader', db=lmdb_path, db_type='lmdb')
    data_parallel_model.Parallelize_GPU(
        model,
        input_builder_fun=functools.partial(AddInputOps,
                                            reader=reader,
                                            batch_size=(BATCH_SIZE //
                                                        len(devices))),
        forward_pass_builder_fun=AddForwardPassOps,
        param_update_builder_fun=None,
        devices=devices)
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    return model
Exemple #22
0
def complex_resnet():
    model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True})
    resnet.create_resnet50(model,
                           "data",
                           num_input_channels=1,
                           num_labels=5,
                           is_test=True,
                           no_loss=True)
    return model, [(1, 1, 224, 224)]
Exemple #23
0
def simple_resnet():
    model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True})
    resnet.create_resnet_32x32(model,
                               "data",
                               num_input_channels=1,
                               num_groups=1,
                               num_labels=5,
                               is_test=True)
    return model, [(1, 1, 32, 32)]
Exemple #24
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="internal_policy_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = self.get_q_values_all_actions(
         "states", False)
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
Exemple #25
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="q_score_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = C2.FlattenToVec(
         self.get_q_values('states', 'actions', False))
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
Exemple #26
0
 def test_dropout(self):
     p = 0.2
     X = np.ones((100, 100)).astype(np.float32) - p
     workspace.FeedBlob("x", X)
     model = ModelHelper(name="test_model")
     brew.dropout(model, "x", "out")
     workspace.RunNetOnce(model.param_init_net)
     workspace.RunNetOnce(model.net)
     out = workspace.FetchBlob("out")
     self.assertLess(abs(out.mean() - (1 - p)), 0.05)
def simple_cnn():
    model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True})
    brew.conv(
        model, "data", 'conv1', 3, 16, kernel=3, stride=1
    )
    brew.spatial_bn(
        model, 'conv1', 'conv1_spatbn', 16, epsilon=1e-3
    )
    brew.relu(model, 'conv1_spatbn', 'relu1')
    return model, (1, 3, 32, 32)
Exemple #28
0
    def test_fc(self):
        m, n, k = (15, 15, 15)
        X = np.random.rand(m, k).astype(np.float32) - 0.5

        workspace.FeedBlob("x", X)
        model = ModelHelper(name="test_model")
        brew.fc(model, "x", "out_1", k, n)

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)
Exemple #29
0
 def _create_reward_train_net(self) -> None:
     self.reward_train_model = ModelHelper(name="reward_train_" +
                                           self.model_id)
     C2.set_model(self.reward_train_model)
     self.update_model('states', 'actions', 'rewards')
     workspace.RunNetOnce(self.reward_train_model.param_init_net)
     self.reward_train_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.reward_train_model.net)
     C2.set_model(None)
Exemple #30
0
    def test_c10_layer_norm(self, X, gc, dc):
        X = X[0]
        if len(X.shape) == 1:
            X = np.expand_dims(X, axis=0)
        axis = np.random.randint(0, len(X.shape))
        scale_dim = [1] * np.ndim(X)
        scale_dim[axis] = X.shape[axis]

        self.ws.create_blob('input').feed(X)

        model = ModelHelper(name='test_layer_norm_brew_wrapper')
        model.C10LayerNorm_DontUseThisOpYet(
            ['input'],
            ['output_1', 'output_1', 'output_2'],
            axis=axis,
            epsilon=1e-4,
        )

        self.ws.create_net(model.param_init_net).run()
        self.ws.create_net(model.net).run()
Exemple #31
0
 def _create_internal_policy_net(self) -> None:
     self.internal_policy_model = ModelHelper(name="internal_policy_" +
                                              self.model_id)
     C2.set_model(self.internal_policy_model)
     self.internal_policy_output = self.get_q_values_all_actions(
         "states", False)
     workspace.RunNetOnce(self.internal_policy_model.param_init_net)
     self.internal_policy_model.net.Proto().num_workers = \
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS
     workspace.CreateNet(self.internal_policy_model.net)
     C2.set_model(None)
 def _create_reward_train_net(self) -> None:
     self.reward_train_model = ModelHelper(name="reward_train_" +
                                           self.model_id)
     C2.set_model(self.reward_train_model)
     self.update_model("states", "actions", "rewards")
     workspace.RunNetOnce(self.reward_train_model.param_init_net)
     self.reward_train_model.net.Proto().num_workers = (
         RLTrainer.DEFAULT_TRAINING_NUM_WORKERS)
     self.reward_train_model.net.Proto().type = "async_scheduling"
     workspace.CreateNet(self.reward_train_model.net)
     C2.set_model(None)
Exemple #33
0
    def _create_rnn_variant(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))

        if n.op_type == 'RNN':
            activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        elif n.op_type == 'GRU':
            linear_before_reset = attrs.pop('linear_before_reset', 0)

        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN/GRU/LSTM"

        if n.op_type in ['RNN', 'GRU']:
            input_blob, W, R, B, sequence_lens, initial_h = n.inputs
        elif n.op_type == 'LSTM':
            input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        for x in itertools.chain(init_model.graph.input,
                                 init_model.graph.value_info,
                                 pred_model.graph.input,
                                 pred_model.graph.value_info):
            if x.name == W:
                input_size = x.type.tensor_type.shape.dim[1].dim_value
                break
        else:
            raise RuntimeError("best-effort shape inference for RNN/GRU/LSTM failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        if n.op_type == 'RNN':
            def reform(*args):
                pass

            def make_cell(*args, **kwargs):
                return rnn_cell.BasicRNN(*args, activation=activation, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 1, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'GRU':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #  z r h  -> r z h
                reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                           (R_, 'gate_t_w', False, [(0,-1)]),
                           (Bi, 'i2h_b',    True,  []),
                           (Br, 'gate_t_b', False, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['update', 'reset', 'output'], [1, 0, 2])

            def make_cell(*args, **kwargs):
                return gru_cell.GRU(*args, linear_before_reset=linear_before_reset, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 3, direction_offset,
                    "_bias_i2h", "_bias_gates", "/i2h_w_pre", "/gates_t_w_pre",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'LSTM':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #   i o f c -> i f o c
                reforms = ((W_, 'i2h_w',     True, [(0, -1)]),
                           (R_, 'gates_t_w', True, [(0, -1)]),
                           (Bi, 'i2h_b'    , True, []),
                           (Br, 'gates_t_b', True, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['input', 'output', 'forget', 'cell'], [0, 2, 1, 3])

            def make_cell(*args, **kwargs):
                return rnn_cell.LSTM(*args, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h'), (initial_c, '/initial_c')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 4, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: [x[0], x[1], x[3]])

        if direction == 'forward':
            outputs = make_rnn(0)

            # in the forward case, storage is shared between the
            # last outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates
            # n.outputs[0]
            for i in range(1, len(outputs)):
                pred_mh.net.Copy(outputs[i], n.outputs[i])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net", blob_remap={ outputs[0]: n.outputs[0] }
            )
        elif direction == 'bidirectional':
            outputs_f = make_rnn(0)
            outputs_b = make_rnn(1)

            pred_mh.net.Concat([outputs_f[0], outputs_b[0]],
                               [n.outputs[0], cls.dummy_name()], axis=2)
            for i in range(1, len(n.outputs)):
                pred_mh.net.Concat([outputs_f[i], outputs_b[i]],
                                   [n.outputs[i], cls.dummy_name()], axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))