Esempio n. 1
0
 def testFetchFeedBlobZeroDim(self):
     data = np.empty(shape=(2, 0, 3), dtype=np.float32)
     self.assertEqual(workspace.FeedBlob("testblob_empty", data), True)
     fetched_back = workspace.FetchBlob("testblob_empty")
     self.assertEqual(fetched_back.shape, (2, 0, 3))
     self.assertEqual(fetched_back.dtype, np.float32)
def run_single_segms(
    net,
    image,
    target_size,
    pixel_means=PIXEL_MEANS_DEFAULT,
    pixel_stds=PIXEL_STDS_DEFAULT,
    rle_encode=True,
    max_size=1333,
):
    inputs = utils2.prepare_blobs(
        image,
        target_size=target_size,
        max_size=max_size,
        pixel_means=pixel_means,
        pixel_stds=pixel_stds,
    )

    # Prepare inputs for AABB and Int8AABB operators
    im_info = inputs["im_info"]
    scale = im_info[0][2]
    inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16)
    inputs["im_info2"] = im_info[:,:2]

    blob_names = []
    ser_blobs = []

    # Serialize inputs for remote device
    for k, v in inputs.items():
        workspace.FeedBlob(k, v)
        blob_names.append(k)
        ser_blobs.append(workspace.SerializeBlob(k))

    # Serialize output templates for remote device
    fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op)
    bbox_type = np.uint16 if fully_quantized else np.float32
    output_templates = {
            "score_nms": np.zeros((LIMIT,), np.float32),
            "bbox_nms": np.zeros((LIMIT, 4), bbox_type),
            "class_nms": np.zeros((LIMIT,), np.int32),
            "mask_fcn_probs": np.zeros((LIMIT, CLASSES, RES, RES), np.float32),
            }
    for out_name in net.external_output:
        fake_name = out_name + "_empty_template"
        blob_names.append(out_name)
        workspace.FeedBlob(fake_name, output_templates[out_name])
        ser_blobs.append(workspace.SerializeBlob(fake_name))

    # Package inputs and output templates
    inout_netdef = caffe2_pb2.NetDef()
    inout_netdef.arg.extend([
            utils.MakeArgument("blob_names", blob_names),
            utils.MakeArgument("ser_blobs", ser_blobs),
        ])

    # Send in/out to the remote device
    with tempfile.NamedTemporaryFile() as inout_file:
        inout_file.write(inout_netdef.SerializeToString())
        inout_file.flush()
        subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"])

    try:
        # Run the model
        use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else ""
        subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True)

        # Retrieve and deserialize outputs
        with tempfile.TemporaryDirectory() as tmpdir:
            output_file = os.path.join(tmpdir, "output_blobs.pb")
            subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file])

            out_net = caffe2_pb2.NetDef()
            with open(output_file, "rb") as handle:
                out_net.ParseFromString(handle.read())

        all_outputs = utils.ArgsToDict(out_net.arg)["outputs"]
        for output in all_outputs:
            bp = caffe2_pb2.BlobProto()
            bp.ParseFromString(output)
            workspace.DeserializeBlob(bp.name, output)

        classids = workspace.FetchBlob("class_nms")
        scores = workspace.FetchBlob("score_nms")  # bbox scores, (R, )
        boxes = workspace.FetchBlob("bbox_nms")  # i.e., boxes, (R, 4*1)
        masks = workspace.FetchBlob("mask_fcn_probs")  # (R, cls, mask_dim, mask_dim)
        if boxes.dtype == np.uint16:
            boxes = boxes.astype(np.float32) * 0.125
            boxes /= scale
    except Exception as e:
        print(e)
        # may not detect anything at all
        R = 0
        scores = np.zeros((R,), dtype=np.float32)
        boxes = np.zeros((R, 4), dtype=np.float32)
        classids = np.zeros((R,), dtype=np.float32)
        masks = np.zeros((R, 1, 1, 1), dtype=np.float32)

    # included in the model
    # scale = inputs["im_info"][0][2]
    # boxes /= scale

    R = boxes.shape[0]
    im_masks = []
    if R > 0:
        im_dims = image.shape
        im_masks = utils2.compute_segm_results(
            masks, boxes, classids, im_dims[0], im_dims[1], rle_encode=rle_encode
        )

    boxes = np.column_stack((boxes, scores))

    ret = {"classids": classids, "boxes": boxes, "masks": masks, "im_masks": im_masks}
    return ret
Esempio n. 3
0
    def test_prepare_normalization_and_normalize(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, 10, feature_type=self._feature_type_override(name)
            )
        for k, v in normalization_parameters.items():
            if id_to_type(k) == CONTINUOUS:
                self.assertEqual(v.feature_type, CONTINUOUS)
                self.assertIs(v.boxcox_lambda, None)
                self.assertIs(v.boxcox_shift, None)
            elif id_to_type(k) == BOXCOX:
                self.assertEqual(v.feature_type, BOXCOX)
                self.assertIsNot(v.boxcox_lambda, None)
                self.assertIsNot(v.boxcox_shift, None)
            else:
                assert v.feature_type == id_to_type(k)
        sorted_features, _ = sort_features_by_normalization(normalization_parameters)

        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet()
        input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32)
        for i, feature in enumerate(sorted_features):
            input_matrix[:, i] = feature_value_map[feature]
        input_matrix_blob = "input_matrix_blob"
        workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32))
        output_blob, _ = preprocessor.normalize_dense_matrix(
            input_matrix_blob, sorted_features, normalization_parameters, "", False
        )
        workspace.FeedBlob(input_matrix_blob, input_matrix)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(output_blob)

        normalized_features = {}
        on_column = 0
        for feature in sorted_features:
            norm = normalization_parameters[feature]
            if norm.feature_type == ENUM:
                column_size = len(norm.possible_values)
            else:
                column_size = 1
            normalized_features[feature] = normalized_feature_matrix[
                :, on_column : (on_column + column_size)
            ]
            on_column += column_size

        self.assertTrue(
            all(
                [
                    np.isfinite(parameter.stddev) and np.isfinite(parameter.mean)
                    for parameter in normalization_parameters.values()
                ]
            )
        )
        for k, v in six.iteritems(normalized_features):
            self.assertTrue(np.all(np.isfinite(v)))
            feature_type = normalization_parameters[k].feature_type
            if feature_type == identify_types.PROBABILITY:
                sigmoidv = special.expit(v)
                self.assertTrue(
                    np.all(
                        np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1))
                    )
                )
            elif feature_type == identify_types.ENUM:
                possible_values = normalization_parameters[k].possible_values
                self.assertEqual(v.shape[0], len(feature_value_map[k]))
                self.assertEqual(v.shape[1], len(possible_values))

                possible_value_map = {}
                for i, possible_value in enumerate(possible_values):
                    possible_value_map[possible_value] = i

                for i, row in enumerate(v):
                    original_feature = feature_value_map[k][i]
                    self.assertEqual(
                        possible_value_map[original_feature], np.where(row == 1)[0][0]
                    )
            elif feature_type == identify_types.QUANTILE:
                for i, feature in enumerate(v[0]):
                    original_feature = feature_value_map[k][i]
                    expected = NumpyFeatureProcessor.value_to_quantile(
                        original_feature, normalization_parameters[k].quantiles
                    )
                    self.assertAlmostEqual(feature, expected, 2)
            elif feature_type == identify_types.BINARY:
                pass
            elif (
                feature_type == identify_types.CONTINUOUS
                or feature_type == identify_types.BOXCOX
            ):
                one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01)
                zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01)
                zero_mean = np.isclose(np.mean(v), 0, atol=0.01)
                self.assertTrue(
                    np.all(zero_mean),
                    "mean of feature {} is {}, not 0".format(k, np.mean(v)),
                )
                self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev)))
            elif feature_type == identify_types.CONTINUOUS_ACTION:
                less_than_max = v < 1
                more_than_min = v > -1
                self.assertTrue(
                    np.all(less_than_max),
                    "values are not less than 1: {}".format(v[less_than_max == False]),
                )
                self.assertTrue(
                    np.all(more_than_min),
                    "values are not more than -1: {}".format(v[more_than_min == False]),
                )
            else:
                raise NotImplementedError()
Esempio n. 4
0
def RunEpoch(
    args,
    epoch,
    train_model,
    test_model,
    total_batch_size,
    num_shards,
    expname,
    explog,
    best_accuracy,
):
    '''
    Run one epoch of the trainer.
    TODO: add checkpointing here.
    '''
    # TODO: add loading from checkpoint
    log.info("Starting epoch {}/{}".format(epoch, args.num_epochs))
    epoch_iters = int(args.epoch_size / total_batch_size / num_shards)
    for i in range(epoch_iters):
        # This timeout is required (temporarily) since CUDA-NCCL
        # operators might deadlock when synchronizing between GPUs.
        timeout = 600.0 if i == 0 else 60.0
        with timeout_guard.CompleteInTimeOrDie(timeout):
            t1 = time.time()
            workspace.RunNet(train_model.net.Proto().name)
            t2 = time.time()
            dt = t2 - t1

        fmt = "Finished iteration {}/{} of epoch {} ({:.2f} images/sec)"
        log.info(fmt.format(i + 1, epoch_iters, epoch, total_batch_size / dt))
        prefix = "{}_{}".format(train_model._device_prefix,
                                train_model._devices[0])
        accuracy = workspace.FetchBlob(prefix + '/accuracy')
        loss = workspace.FetchBlob(prefix + '/loss')
        train_fmt = "Training loss: {}, accuracy: {}"
        log.info(train_fmt.format(loss, accuracy))

    num_images = epoch * epoch_iters * total_batch_size
    prefix = "{}_{}".format(train_model._device_prefix,
                            train_model._devices[0])
    accuracy = workspace.FetchBlob(prefix + '/accuracy')
    loss = workspace.FetchBlob(prefix + '/loss')
    learning_rate = workspace.FetchBlob(
        data_parallel_model.GetLearningRateBlobNames(train_model)[0])
    test_accuracy = 0
    if (test_model is not None):
        # Run 100 iters of testing
        ntests = 0
        # for _ in range(0, 100):
        # for _ in range(0, 125):
        for _ in range(0, args.test_iters):
            workspace.RunNet(test_model.net.Proto().name)
            for g in test_model._devices:
                test_accuracy += np.asscalar(
                    workspace.FetchBlob(
                        "{}_{}".format(test_model._device_prefix, g) +
                        '/accuracy'))
                ntests += 1
        test_accuracy /= ntests
    else:
        test_accuracy = (-1)
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy

    explog.log(input_count=num_images,
               batch_count=(i + epoch * epoch_iters),
               additional_values={
                   'accuracy': accuracy,
                   'loss': loss,
                   'learning_rate': learning_rate,
                   'epoch': epoch,
                   'test_accuracy': test_accuracy,
                   'best_accuracy': best_accuracy,
               })
    assert loss < 40, "Exploded gradients :("

    # TODO: add checkpointing
    return epoch + 1, best_accuracy
Esempio n. 5
0
    def test_cpu2gpu_gpu2cpu_gradients(self):
        model = cnn.CNNModelHelper(name="copy_test")

        batch = 32
        cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
        gpu_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)

        with core.NameScope("cpu"):
            with core.DeviceScope(cpu_opt):
                x_cpu = model.FC('data', 'x_cpu', 16, 8)

        with core.NameScope("gpu_0"):
            with core.DeviceScope(gpu_opt):
                x_gpu = model.CopyCPUToGPU(x_cpu, "x_gpu")
                pred_gpu = model.FC(x_gpu, "pred_gpu", 8, 4)
                pred_cpu = model.CopyGPUToCPU(pred_gpu, "pred_cpu")

        with core.DeviceScope(cpu_opt):
            with core.NameScope("cpu"):
                (softmax, loss) = model.SoftmaxWithLoss(
                    [pred_cpu, "label"],
                    ["softmax", "loss"],
                )

        gradient_map = model.AddGradientOperators([loss])

        # Add param updates (for cpu and gpu)
        init_net = model.param_init_net
        with core.DeviceScope(cpu_opt):
            with core.NameScope("cpu"):
                ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
                LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0)
                for param in model.GetParams():
                    model.WeightedSum(
                        [param, ONE, gradient_map[param], LR],
                        param,
                    )

        with core.NameScope("gpu_0"):
            with core.DeviceScope(gpu_opt):
                ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
                LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0)
                for param in model.GetParams():
                    model.WeightedSum(
                        [param, ONE, gradient_map[param], LR],
                        param,
                    )

        with core.DeviceScope(cpu_opt):
            workspace.FeedBlob(
                'cpu/data',
                np.random.rand(batch, 16).astype(np.float32),
            )
            workspace.FeedBlob(
                'cpu/label',
                np.random.randint(4, size=batch).astype(np.int32),
            )

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)

        initial_params = {p: workspace.FetchBlob(p) for p in model.GetParams()}
        workspace.RunNet(model.net.Proto().name)
        updated_params = {p: workspace.FetchBlob(p) for p in model.GetParams()}

        for p in model.GetParams():
            g = gradient_map[p]
            expected = initial_params[p] - 2.0 * workspace.FetchBlob(g)
            actual = updated_params[p]
            self.assertTrue(
                np.array_equal(expected, updated_params[p]),
                "Mismatch: {}: {}, {}".format(p, expected, actual),
            )
Esempio n. 6
0
            print("Iter: {}, Loss: {}, Accuracy: {}".format(
                i, loss[i], accuracy[i]))

    #
    # visualize the data and the results
    #
    plt.figure("Summary of Training")
    plt.title("Summary of Training Run")
    plt.plot(loss, 'b')
    plt.plot(accuracy, 'r')
    plt.xlabel("Iteration")
    plt.legend(('Loss', 'Accuracy'), loc='upper right')

    plt.figure("Training Data")
    plt.title("Training Data Sample")
    data = workspace.FetchBlob('data')
    _ = visualize.NCHW.ShowMultiple(data)

    plt.figure("Softmax Prediction")
    plt.title("Softmax Prediction for the first image above")
    plt.ylabel('Confidence')
    plt.xlabel('Label')
    # Grab and visualize the softmax blob for the batch we just visualized. Since batch size
    #  is 64, the softmax blob contains 64 vectors, one for each image in the batch. To grab
    #  the vector for the first image, we can simply index the fetched softmax blob at zero.
    softmax = workspace.FetchBlob('softmax')
    _ = plt.plot(softmax[0], 'ro')

    # if USE_LENET_MODEL:
    #     plt.figure("Conv1 5th Feature Maps")
    #     plt.title("Conv1 Output Feature Maps for Most Recent Mini-batch")
    def test_convolution_relu_fusion(self, stride, pad, kernel, size,
                             input_channels, output_channels,
                             batch_size, use_bias, group, gc, dc):
        conv = core.CreateOperator(
            "Conv",
            ["X0", "w0", "b0"] if use_bias else ["X0", "w0"],
            ["Y0"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[0]
        )
        relu = core.CreateOperator(
            "Relu",
            ["Y0"],
            ["Y0"],
            device_option=dc[0]
        )

        # Manual fusion
        conv_fusion = core.CreateOperator(
            "ConvFusion",
            ["X1", "w1", "b1"] if use_bias else ["X1", "w1"],
            ["Y1"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            fusion_type = 1,
            device_option=dc[1]
        )

        # Auto fusion
        old_net = caffe2_pb2.NetDef()
        conv_old = caffe2_pb2.OperatorDef()
        conv_old.CopyFrom(conv)
        conv_old.device_option.CopyFrom(dc[1])
        relu_old = caffe2_pb2.OperatorDef()
        relu_old.CopyFrom(relu)
        relu_old.device_option.CopyFrom(dc[1])
        old_net.op.extend([conv_old, relu_old])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForIDEEP(net)
        self.assertTrue(len(net.Proto().op) == 1)
        self.assertTrue(net.Proto().op[0].type == "ConvFusion")

        X = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels * group).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('X0', X, dc[0])
        workspace.FeedBlob('w0', w, dc[0])
        workspace.FeedBlob('b0', b, dc[0])
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(relu)
        Y0 = workspace.FetchBlob('Y0')

        workspace.ResetWorkspace()
        workspace.FeedBlob('X1', X, dc[1])
        workspace.FeedBlob('w1', w, dc[1])
        workspace.FeedBlob('b1', b, dc[1])
        workspace.RunOperatorOnce(conv_fusion)
        Y1 = workspace.FetchBlob('Y1')
        if not np.allclose(Y0, Y1, atol=0.01, rtol=0.01):
            print(Y1.flatten())
            print(Y0.flatten())
            print(np.max(np.abs(Y1 - Y0)))
            self.assertTrue(False)

        workspace.ResetWorkspace()
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        workspace.RunOperatorOnce(net.Proto().op[0])
        Y2 = workspace.FetchBlob('Y0')
        if not np.allclose(Y0, Y2, atol=0.01, rtol=0.01):
            print(Y2.flatten())
            print(Y0.flatten())
            print(np.max(np.abs(Y2 - Y0)))
            self.assertTrue(False)

        workspace.SwitchWorkspace(old_ws_name)
Esempio n. 8
0
#print detection_out.shape
net.DetectionEvalute(
    ['detection_out', 'gt_label'], ['detection_eval'],
    num_classes=21,
    overlap_threshold=0.001,
    resize_valid=False,
    name_size_file=
    '/home/ernie/caffe2/caffe2/python/ssd_test/detection_eval/test_name_size.txt'
)

workspace.FeedBlob("loc", mbox_loc)
workspace.FeedBlob("conf", mbox_conf)
workspace.FeedBlob("prior", mbox_priorbox)
workspace.FeedBlob("gt_label", gt_label)
#workspace.FeedBlob('detection_out',detection_out)

workspace.CreateNet(net.Proto())
print net.Proto()

workspace.RunNet("detection_eval_test", 1)

conf_softmax_flat = workspace.FetchBlob('conf_softmax_flat')
detections = workspace.FetchBlob('detection_out')
detection_eval = workspace.FetchBlob('detection_eval')

np.set_printoptions(threshold=np.NaN)

#print conf_softmax_flat
#print detections
print detection_eval
Esempio n. 9
0
    def TrainModel(self):
        log.debug("Training model")

        workspace.RunNetOnce(self.model.param_init_net)

        # As though we predict the same probability for each character
        smooth_loss = -np.log(1.0 / self.D) * self.seq_length
        last_n_iter = 0
        last_n_loss = 0.0
        num_iter = 0
        N = len(self.text)

        # We split text into batch_size pieces. Each piece will be used only
        # by a corresponding batch during the training process
        text_block_positions = np.zeros(self.batch_size, dtype=np.int32)
        text_block_size = N // self.batch_size
        text_block_starts = list(range(0, N, text_block_size))
        text_block_sizes = [text_block_size] * self.batch_size
        text_block_sizes[self.batch_size - 1] += N % self.batch_size
        assert sum(text_block_sizes) == N

        # Writing to output states which will be copied to input
        # states within the loop below
        workspace.FeedBlob(
            self.hidden_output,
            np.zeros([1, self.batch_size, self.hidden_size], dtype=np.float32))
        workspace.FeedBlob(
            self.cell_state,
            np.zeros([1, self.batch_size, self.hidden_size], dtype=np.float32))
        workspace.CreateNet(self.prepare_state)

        graph = net_drawer.GetPydotGraph(self.model.net, "mnist", rankdir="LR")
        experiment.set_model_graph(graph)

        # We iterate over text in a loop many times. Each time we peak
        # seq_length segment and feed it to LSTM as a sequence
        last_time = datetime.now()
        progress = 0
        while True:
            workspace.FeedBlob(
                "seq_lengths",
                np.array([self.seq_length] * self.batch_size, dtype=np.int32))
            workspace.RunNet(self.prepare_state.Name())

            input = np.zeros([self.seq_length, self.batch_size,
                              self.D]).astype(np.float32)
            target = np.zeros([self.seq_length * self.batch_size
                               ]).astype(np.int32)

            for e in range(self.batch_size):
                for i in range(self.seq_length):
                    pos = text_block_starts[e] + text_block_positions[e]
                    input[i][e][self._idx_at_pos(pos)] = 1
                    target[i * self.batch_size + e] =\
                        self._idx_at_pos((pos + 1) % N)
                    text_block_positions[e] = (text_block_positions[e] +
                                               1) % text_block_sizes[e]
                    progress += 1

            workspace.FeedBlob('input_blob', input)
            workspace.FeedBlob('target', target)

            CreateNetOnce(self.model.net)
            workspace.RunNet(self.model.net.Name())

            num_iter += 1
            last_n_iter += 1

            if num_iter % self.iters_to_report == 0:
                new_time = datetime.now()
                print("Characters Per Second: {}".format(
                    int(progress / (new_time - last_time).total_seconds())))
                print("Iterations Per Second: {}".format(
                    int(self.iters_to_report /
                        (new_time - last_time).total_seconds())))

                last_time = new_time
                progress = 0

                print("{} Iteration {} {}".format('-' * 10, num_iter,
                                                  '-' * 10))

            loss = workspace.FetchBlob(self.loss) * self.seq_length
            smooth_loss = 0.999 * smooth_loss + 0.001 * loss
            last_n_loss += loss

            experiment.log_metric("loss", smooth_loss)

            if num_iter % self.iters_to_report == 0:
                self.GenerateText(500, np.random.choice(self.vocab))
                lass_loss = last_n_loss / last_n_iter

                log.debug("Loss since last report: {}".format(last_n_loss /
                                                              last_n_iter))
                log.debug("Smooth loss: {}".format(smooth_loss))

                last_n_loss = 0.0
                last_n_iter = 0
Esempio n. 10
0
def bmuf_process(filestore_dir,
                 process_id,
                 shared_results,
                 cpu_device=False,
                 nesterov=False):
    # We need to import caffe2 in every process to initialize CUDA independently.
    from caffe2.python import core, cnn, data_parallel_model, dyndep, workspace
    from caffe2.proto import caffe2_pb2
    dyndep.InitOpsLibrary("@/caffe2/caffe2/distributed:file_store_handler_ops")

    if not cpu_device:
        if not workspace.has_gpu_support:
            log.info('No GPU support test is Ignored.')
            return
        if workspace.NumGpuDevices() < 4:
            log.info('Not enough GPU support, test IGNORED')
            return

    model = cnn.CNNModelHelper(order="NHWC", name="test")
    if not cpu_device:
        device_type = workspace.GpuDeviceType
        device_prefix = "gpu"
    else:
        device_type = caffe2_pb2.CPU
        device_prefix = "cpu"

    devices = [0, 1] if process_id == 0 else [2, 3]

    def _model_build_fun(model, loss_scale):
        fc = model.FC("data", "fc", 16, 1, ("ConstantFill", {}),
                      ("ConstantFill", {}))
        fc_fl = model.FlattenToVec(fc, "fc_fl")
        sigm = model.Sigmoid(fc_fl, "sigm")
        sq = model.SquaredL2Distance([sigm, "label"], "sq")
        loss = model.AveragedLoss(sq, "loss")
        loss = model.Scale(loss, scale=loss_scale)

        # For testing explicit sync
        model.param_init_net.UniformFill([], ["sync_num"], shape=[1])
        return [loss]

    def _input_builder_fun(model):
        return None

    def _param_update_fun(model):
        ITER = model.Iter("ITER")
        LR = model.net.LearningRate(
            [ITER],
            "LR",
            base_lr=(-0.1),
            policy="fixed",
        )
        ONE = model.param_init_net.ConstantFill(
            [],
            "ONE",
            shape=[1],
            value=1.0,
        )
        for param in model.GetParams():
            grad = model.param_to_grad[param]
            model.WeightedSum([param, ONE, grad, LR], param)

    def _generate_data(devices, process_id, device_type, device_prefix):
        np.random.seed(26 + process_id * 10)
        # Each run has same input, independent of number of gpus
        batch_size = 64
        for _ in range(0, 10):
            full_data = np.random.rand(batch_size, 16)
            full_labels = np.round(full_data[:, 0])
            batch_per_device = batch_size // len(devices)

            for (j, g) in enumerate(devices):
                st = j * batch_per_device
                en = st + batch_per_device
                data = full_data[st:en, :].astype(np.float32)
                labels = full_labels[st:en].astype(np.float32)
                with core.DeviceScope(core.DeviceOption(device_type, g)):
                    workspace.FeedBlob("{}_{}/data".format(device_prefix, g),
                                       data)
                    workspace.FeedBlob("{}_{}/label".format(device_prefix, g),
                                       labels)

    _generate_data(devices, process_id, device_type, device_prefix)

    workspace.RunOperatorOnce(
        core.CreateOperator("FileStoreHandlerCreate", [], ["store_handler"],
                            path=filestore_dir))
    rendezvous = dict(kv_handler="store_handler",
                      shard_id=process_id,
                      num_shards=2,
                      engine="GLOO",
                      exit_nets=None)

    data_parallel_model.Parallelize_BMUF(model,
                                         _input_builder_fun,
                                         _model_build_fun,
                                         _param_update_fun,
                                         devices=devices,
                                         rendezvous=rendezvous,
                                         nesterov=nesterov,
                                         add_blobs_to_sync=["sync_num"],
                                         cpu_device=cpu_device)

    data_parallel_model.RunInitNet(model)

    def _device_pid(device, pid):
        if pid == 1:
            return device + 2
        return device

    np.testing.assert_equal(
        workspace.FetchBlob("{}_{}/fc_w_v".format(device_prefix,
                                                  _device_pid(0, process_id))),
        np.zeros(16).astype(np.float32).reshape(1, 16))

    # Run the algorithm for one iteration to have non-zero params.
    data_parallel_model.RunNet(model, 1)

    # Save iteration momentum and post local update params
    results = {}
    v_b_ = workspace.FetchBlob("{}_{}/fc_b_v".format(
        device_prefix, _device_pid(0, process_id)))
    v_w_ = workspace.FetchBlob("{}_{}/fc_w_v".format(
        device_prefix, _device_pid(0, process_id)))

    results['v_b_'] = v_b_
    results['v_w_'] = v_w_

    workspace.RunNetOnce(model.net)

    b_0_ = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                   _device_pid(0, process_id)))
    w_0_ = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                   _device_pid(0, process_id)))
    b_1_ = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                   _device_pid(1, process_id)))
    w_1_ = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                   _device_pid(1, process_id)))

    results['b_0_'] = b_0_
    results['w_0_'] = w_0_
    results['b_1_'] = b_1_
    results['w_1_'] = w_1_

    # Test sync
    if process_id == 0:
        workspace.FeedBlob(device_prefix + "_0/sync_num",
                           np.array([2603]).astype(np.float32),
                           device_option=core.DeviceOption(device_type, 0))

    # Compute block gradients.
    b_g_ = workspace.FetchBlob("{}_{}/fc_b_g".format(
        device_prefix, _device_pid(0, process_id)))
    w_g_ = workspace.FetchBlob("{}_{}/fc_w_g".format(
        device_prefix, _device_pid(0, process_id)))
    results['b_g_'] = b_g_
    results['w_g_'] = w_g_
    workspace.RunNetOnce(model._global_model_param_updates_net)

    #  g_b = (b_0_ + b_1_) / 2 - b_g_
    #  g_w = (w_0_ + w_1_) / 2 - w_g_
    v_b = workspace.FetchBlob("{}_{}/fc_b_v".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    v_w = workspace.FetchBlob("{}_{}/fc_w_v".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    w_g = workspace.FetchBlob("{}_{}/fc_w_g".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    b_g = workspace.FetchBlob("{}_{}/fc_b_g".format(device_prefix,
                                                    _device_pid(0,
                                                                process_id)))
    w_0 = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                  _device_pid(0, process_id)))
    b_0 = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                  _device_pid(0, process_id)))
    w_1 = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix,
                                                  _device_pid(1, process_id)))
    b_1 = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix,
                                                  _device_pid(1, process_id)))
    results['v_b'] = v_b
    results['v_w'] = v_w
    results['w_g'] = w_g
    results['b_g'] = b_g
    results['w_0'] = w_0
    results['b_0'] = b_0
    results['w_1'] = w_1
    results['b_1'] = b_1

    # Test add_blobs_to_sync
    for j in devices:
        sync = workspace.FetchBlob(device_prefix + "_{}/sync_num".format(j))[0]
        results['sync_{}'.format(j)] = sync

    shared_results[process_id] = results
Esempio n. 11
0
    def predict(self, float_state_features, int_state_features=None):
        """ Returns values for each state
        :param float_state_features A list of feature -> float value dict examples
        :param int_state_features A list of feature -> int value dict examples
        """
        float_state_keys = []
        float_state_values = []
        for example in float_state_features:
            for k, v in example.items():
                float_state_keys.append(k)
                float_state_values.append(v)
        workspace.FeedBlob(
            "input/float_features.lengths",
            np.array([len(e) for e in float_state_features], dtype=np.int32),
        )
        workspace.FeedBlob("input/float_features.keys",
                           np.array(float_state_keys, dtype=np.int64))
        workspace.FeedBlob(
            "input/float_features.values",
            np.array(float_state_values, dtype=np.float32).flatten(),
        )

        if int_state_features is not None:
            workspace.FeedBlob(
                "input/int_features.lengths",
                np.array([len(e) for e in int_state_features], dtype=np.int32),
            )
            int_state_keys = []
            int_state_values = []
            for example in int_state_features:
                for k, v in example.items():
                    int_state_keys.append(k)
                    int_state_values.append(v)
            workspace.FeedBlob(
                "input/int_features.keys",
                np.array(int_state_keys, dtype=np.int64).flatten(),
            )
            workspace.FeedBlob(
                "input/int_features.values",
                np.array(int_state_values, dtype=np.int32).flatten(),
            )

        workspace.RunNet(self._net)

        output_lengths = workspace.FetchBlob(
            "output/string_weighted_multi_categorical_features.values.lengths")
        output_names = workspace.FetchBlob(
            "output/string_weighted_multi_categorical_features.values.keys")
        output_values = workspace.FetchBlob(
            "output/string_weighted_multi_categorical_features.values.values")
        assert len(output_lengths) == len(float_state_features), (
            "Invalid number of outputs: " + str(len(output_lengths)) + " != " +
            str(len(float_state_features)))

        results = []

        cursor = 0
        for length in output_lengths:
            cursor_begin = cursor
            cursor_end = cursor_begin + length
            cursor = cursor_end

            result = {}
            for x in range(cursor_begin, cursor_end):
                result[output_names[x].decode("utf-8")] = output_values[x]
            results.append(result)

        return results
Esempio n. 12
0
 def blob_nbytes(blob):
     return workspace.FetchBlob(blob).nbytes
Esempio n. 13
0
 def testFeedFetchBlobMKLDNN(self):
     arr = np.random.randn(2, 3).astype(np.float32)
     workspace.FeedBlob(
         "testblob_mkldnn", arr, core.DeviceOption(caffe2_pb2.MKLDNN))
     fetched = workspace.FetchBlob("testblob_mkldnn")
     np.testing.assert_array_equal(arr, fetched)
Esempio n. 14
0
 def testFetchFeedPlainString(self):
     # this is actual string, not a tensor of strings
     s = "Hello, world! I have special \0 symbols \1!"
     workspace.FeedBlob('my_plain_string', s)
     s2 = workspace.FetchBlob('my_plain_string')
     self.assertEqual(s, s2)
Esempio n. 15
0
    def from_caffe2(self, init_net, predict_net):
        """Construct Relay expression from caffe2 graph.

        Parameters
        ----------
        init_net : protobuf object
        predict_net : protobuf object

        Returns
        -------
        mod : tvm.IRModule
            The module that optimizations will be performed on.

        params : dict
            A dict of name: tvm.nd.array pairs, used as pretrained weights
        """
        # pylint: disable=import-outside-toplevel
        from caffe2.python import workspace
        workspace.RunNetOnce(init_net)

        # Input
        input_name = predict_net.op[0].input[0]

        # Params
        self._params = {}
        used_blobs = set()
        for c2_op in predict_net.op:
            for i in c2_op.input:
                used_blobs.add(i)
        for blob in workspace.Blobs():
            if blob in used_blobs and blob != input_name:
                self._params[blob] = _nd.array(workspace.FetchBlob(blob))

        # Variables
        self._nodes = {}
        for blob in predict_net.external_input:
            if blob in self._params:
                self._nodes[blob] = new_var(blob, shape=self._params[blob].shape, dtype=self._params[blob].dtype)
            else:
                shape = self._shape[blob] if blob in self._shape else ()
                if isinstance(self._dtype, dict) and blob in self._dtype:
                    dtype = str(self._dtype[blob])
                elif isinstance(self._dtype, str):
                    dtype = self._dtype
                else:
                    dtype = "float32"
                self._nodes[blob] = new_var(blob, shape=shape, dtype=dtype)

        # Ops
        for c2_op in predict_net.op:
            for blob in c2_op.output:
                self._ops[blob] = c2_op

        for c2_op in predict_net.op:
            self._process_op(c2_op)

        # Outputs
        out = []
        for blob in predict_net.external_output:
            out.append(self._nodes[blob])

        if len(out) > 1:
            outputs = _expr.Tuple(out)
        else:
            outputs = out[0]

        func = _function.Function(analysis.free_vars(outputs), outputs)
        self._mod["main"] = func

        return self._mod, self._params
Esempio n. 16
0
def test_convolution_nchw():
    # [batch, input_feature_map, spatial, output_feature_map, kernel, stride, c2_padding_type]
    param_list = [[1, 3, 2, 1, 2, 2, caffe2_legacy_pb2.NOTSET],
                  [1, 1, 4, 1, 2, 2, caffe2_legacy_pb2.NOTSET],
                  [2, 3, 8, 1, 2, 2, caffe2_legacy_pb2.NOTSET],
                  [8, 2, 5, 4, 3, 1, caffe2_legacy_pb2.NOTSET],
                  [1, 2, 5, 2, 3, 1, caffe2_legacy_pb2.NOTSET],
                  [8, 3, 4, 4, 3, 3, caffe2_legacy_pb2.VALID],
                  [12, 6, 5, 5, 4, 3, caffe2_legacy_pb2.VALID],
                  [8, 3, 4, 4, 3, 3, caffe2_legacy_pb2.SAME],
                  [12, 6, 5, 5, 4, 3, caffe2_legacy_pb2.SAME]]

    for param_iter in param_list:
        n, ifm, spatial, ofm, kernel, stride, pad_type = param_iter

        shape_x = (n, ifm, spatial, spatial)
        shape_w = (ofm, ifm, kernel, kernel)
        shape_b = (ofm, )

        data_x = [
            random.gauss(mu=0, sigma=10) for i in range(np.prod(shape_x))
        ]
        data_w = [
            random.gauss(mu=0, sigma=10) for i in range(np.prod(shape_w))
        ]
        data_b = [
            random.gauss(mu=0, sigma=10) for i in range(np.prod(shape_b))
        ]

        net = core.Net("net")
        X = net.GivenTensorFill([], ["X"],
                                shape=shape_x,
                                values=data_x,
                                name="X")
        W = net.GivenTensorFill([], ["W"],
                                shape=shape_w,
                                values=data_w,
                                name="W")
        B = net.GivenTensorFill([], ["B"],
                                shape=shape_b,
                                values=data_b,
                                name="B")

        net.Conv([X, W, B],
                 'Y',
                 kernel=kernel,
                 stride=stride,
                 order='NCHW',
                 legacy_pad=pad_type)

        # Execute via Caffe2
        workspace.RunNetOnce(net)

        # Import caffe2 network into ngraph
        importer = C2Importer()
        importer.parse_net_def(net.Proto(), verbose=False)

        # Get handle
        f_ng = importer.get_op_handle("Y")

        # Execute
        with ExecutorFactory() as ex:
            f_result = ex.executor(f_ng)()

            # compare Caffe2 and ngraph results
            assert (np.allclose(f_result,
                                workspace.FetchBlob("Y"),
                                atol=1e-4,
                                rtol=1e-3,
                                equal_nan=False))
Esempio n. 17
0
def get_detections_from_im(cfg,
                           model,
                           im,
                           image_id,
                           featmap_blob_name,
                           feat_blob_name,
                           MIN_BOXES,
                           MAX_BOXES,
                           conf_thresh=0.2,
                           bboxes=None):

    assert conf_thresh >= 0.
    with c2_utils.NamedCudaScope(0):
        scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(
            model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes)
        num_rpn = scores.shape[0]
        region_feat = workspace.FetchBlob(feat_blob_name)
        max_conf = np.zeros((num_rpn, ), dtype=np.float32)
        max_cls = np.zeros((num_rpn, ), dtype=np.int32)
        max_box = np.zeros((num_rpn, 4), dtype=np.float32)

        for cls_ind in range(1, cfg.MODEL.NUM_CLASSES):
            cls_scores = scores[:, cls_ind]
            dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)],
                              cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(nms(dets, cfg.TEST.NMS))
            inds_update = np.where(cls_scores[keep] > max_conf[keep])
            kinds = keep[inds_update]
            max_conf[kinds] = cls_scores[kinds]
            max_cls[kinds] = cls_ind
            max_box[kinds] = dets[kinds][:, :4]

        keep_boxes = np.where(max_conf > conf_thresh)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

        objects = max_cls[keep_boxes]
        obj_prob = max_conf[keep_boxes]
        obj_boxes = max_box[keep_boxes, :]
        cls_prob = scores[keep_boxes, :]

    # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id,
    #       np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape,
    #       box_features[keep_boxes].shape, objects.shape))
    # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10])

    assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0)
    # assert(np.min(obj_prob[:10])>=0.2)
    # if np.min(obj_prob) < 0.2:
    # print('confidence score too low!', np.min(obj_prob[:10]))
    # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)):
    #     print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1))

    return {
        "image_id": image_id,
        "image_h": np.size(im, 0),
        "image_w": np.size(im, 1),
        'num_boxes': len(keep_boxes),
        'boxes': obj_boxes,
        'region_feat': region_feat[keep_boxes, :],
        'object': objects,
        'obj_prob': obj_prob,
        'cls_prob': cls_prob
    }
Esempio n. 18
0
    def test_int8_fc(self, n, m, k, rand_seed, quantize_bias, f):
        print(
            f"n={n}, m={m}, k={k}, rand_seed={rand_seed}, quantize_bias={quantize_bias}"
        )
        np.random.seed(rand_seed)
        workspace.ResetWorkspace()

        ff = float(f)
        X_fp32 = np.random.uniform(-ff, ff, size=(m, k)).astype(np.float32)
        W_fp32 = np.random.uniform(-ff, ff, size=(n, k)).astype(np.float32)
        b_fp32 = np.random.uniform(-ff, ff, size=(n)).astype(np.float32)

        X_scale, X_zero_point = self._get_scale_zp(X_fp32)
        Y_fp32 = np.dot(X_fp32, W_fp32.T) + b_fp32
        Y_scale, Y_zero_point = self._get_scale_zp(Y_fp32)

        workspace.FeedBlob("X", X_fp32)
        workspace.FeedBlob("W", W_fp32)
        workspace.FeedBlob("b", b_fp32)

        workspace.RunOperatorOnce(
            core.CreateOperator(
                "Int8FCPackWeight",
                ["W", "b"] if quantize_bias else ["W"],
                ["W_int8", "b_int32"] if quantize_bias else ["W_int8"],
                engine="DNNLOWP",
                save_unpacked_weights=True,
                in_scale=X_scale,
            ))

        ref_net = core.Net("net")
        ref_net.Int8QuantizeNNPI(["X"], ["X_int8"],
                                 Y_scale=X_scale,
                                 Y_zero_point=X_zero_point)
        ref_net.Int8FCFakeAcc32NNPI(
            ["X_int8", "W_int8", "b_int32" if quantize_bias else "b"],
            ["Y_int8"],
            Y_scale=Y_scale,
            Y_zero_point=Y_zero_point,
        )
        ref_net.Int8DequantizeNNPI(["Y_int8"], ["Y"])
        ref_net.Proto().external_output.append("Y")

        # run ref_net
        workspace.RunNetOnce(ref_net)
        Y_fbgemm = workspace.FetchBlob("Y")

        # run onnxifi net
        ref_net.Proto().op[0].type = "Int8Quantize"
        ref_net.Proto().op[1].type = "Int8FC"
        ref_net.Proto().op[2].type = "Int8Dequantize"
        net_onnxified = onnxifi_caffe2_net(
            ref_net.Proto(),
            {},
            debug=True,
            adjust_batch=False,
            use_onnx=False,
            weight_names=["W_int8", "b_int32"]
            if quantize_bias else ["W_int8", "b"],
        )
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.CreateNet(net_onnxified)
        workspace.RunNet(net_onnxified.name)
        Y_glow = workspace.FetchBlob("Y")

        if not np.allclose(Y_glow, Y_fbgemm):
            diff_Y = np.abs(Y_glow - Y_fbgemm)
            print_test_debug_info(
                "int8_fc",
                {
                    "seed": rand_seed,
                    "n": n,
                    "m": m,
                    "k": k,
                    "X": X_fp32,
                    "W": W_fp32,
                    "b": b_fp32,
                    "Y_fbgemm": Y_fbgemm,
                    "Y_glow": Y_glow,
                    "diff": diff_Y,
                    "maxdiff": diff_Y.max(axis=1),
                },
            )
            assert 0
    def test_convolution_sum_relu_fusion(self, stride, pad, kernel, size,
                             input_channels, output_channels,
                             batch_size, use_bias, group, gc, dc):
        conv = core.CreateOperator(
            "Conv",
            ["X0", "w0", "b0"] if use_bias else ["X0", "w0"],
            ["Y0"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[0]
        )
        sum = core.CreateOperator(
            "Sum",
            ["S0", "Y0"],
            ["S0"],
            device_option=dc[0]
        )
        relu = core.CreateOperator(
            "Relu",
            ["S0"],
            ["S0"],
            device_option=dc[0]
        )
        conv_fusion = core.CreateOperator(
            "ConvFusion",
            ["X1", "w1", "b1", "S1"] if use_bias else ["X1", "w1", "S1"],
            ["S1"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            fusion_type = 3,
            device_option=dc[1]
        )
        X = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels * group).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('X0', X, dc[0])
        workspace.FeedBlob('w0', w, dc[0])
        workspace.FeedBlob('b0', b, dc[0])
        workspace.RunOperatorOnce(conv)
        Y0 = workspace.FetchBlob('Y0')
        S = np.random.rand(*Y0.shape).astype(np.float32) - 0.5
        workspace.FeedBlob('S0', S, dc[0])
        workspace.RunOperatorOnce(sum)
        workspace.RunOperatorOnce(relu)
        S0 = workspace.FetchBlob('S0')

        workspace.ResetWorkspace()
        workspace.FeedBlob('X1', X, dc[1])
        workspace.FeedBlob('w1', w, dc[1])
        workspace.FeedBlob('b1', b, dc[1])
        workspace.FeedBlob('S1', S, dc[1])
        workspace.RunOperatorOnce(conv_fusion)
        S1 = workspace.FetchBlob('S1')

        if not np.allclose(S0, S1, atol=0.01, rtol=0.01):
            print(S1.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S1 - S0)))
            self.assertTrue(False)
        workspace.SwitchWorkspace(old_ws_name)
Esempio n. 20
0
    def test_int8_quantize(self, n, rand_seed, non_zero_offset):
        print("n={}, rand_seed={}".format(n, rand_seed))
        np.random.seed(rand_seed)
        workspace.ResetWorkspace()

        if non_zero_offset:
            X_fp32 = np.random.uniform(-1, 1, size=(n, n)).astype(np.float16) \
                .astype(np.float32)
        else:
            X_fp32 = np.random.rand(n, n).astype(np.float16).astype(np.float32)

        W_fp32 = np.identity(n, dtype=np.float32)
        b_fp32 = np.zeros((n, ), dtype=np.float32)

        X_scale, X_zero_point = self._get_scale_zp(X_fp32)

        workspace.FeedBlob("X", X_fp32)
        workspace.FeedBlob("W", W_fp32)
        workspace.FeedBlob("b", b_fp32)

        workspace.RunOperatorOnce(
            core.CreateOperator(
                "Int8FCPackWeight",
                ["W"],
                ["W_int8"],
                engine="DNNLOWP",
                save_unpacked_weights=True,
                in_scale=X_scale,
            ))

        ref_net = core.Net("net")
        ref_net.Int8QuantizeNNPI(["X"], ["X_int8"],
                                 Y_scale=X_scale,
                                 Y_zero_point=X_zero_point)
        ref_net.Int8FCFakeAcc32NNPI(
            ["X_int8", "W_int8", "b"],
            ["Y_int8"],
            Y_scale=X_scale,
            Y_zero_point=X_zero_point,
        )
        ref_net.Int8DequantizeNNPI(["Y_int8"], ["Y"])
        ref_net.Proto().external_output.append("Y")

        # run ref_net
        workspace.RunNetOnce(ref_net)
        Y_fbgemm = workspace.FetchBlob("Y")

        # run onnxifi net
        ref_net.Proto().op[0].type = "Int8Quantize"
        ref_net.Proto().op[1].type = "Int8FC"
        ref_net.Proto().op[2].type = "Int8Dequantize"
        net_onnxified = onnxifi_caffe2_net(
            ref_net.Proto(),
            {},
            debug=True,
            adjust_batch=False,
            use_onnx=False,
            weight_names=["W_int8", "b"],
        )
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.CreateNet(net_onnxified)
        workspace.RunNet(net_onnxified.name)
        Y_glow = workspace.FetchBlob("Y")

        if not np.allclose(Y_glow, Y_fbgemm):
            diff_Y = np.abs(Y_glow - Y_fbgemm)
            print_test_debug_info(
                "int8_fc",
                {
                    "seed": rand_seed,
                    "n": n,
                    "X": X_fp32,
                    "W": W_fp32,
                    "b": b_fp32,
                    "Y_fbgemm": Y_fbgemm,
                    "Y_glow": Y_glow,
                    "diff": diff_Y,
                    "maxdiff": diff_Y.max(axis=1),
                },
            )
            assert 0
Esempio n. 21
0
def run_main(config):
    ''' running MAMC training & validation'''
    # init model
    initialize(config)

    # print network graph
    """
    # full-graph
    mamc_graph = net_drawer.GetPydotGraph(
        validation_model.net.Proto().op,
        "mamc_graph",
        rankdir="TB",
    )
    mamc_graph.write_svg("mamc_no_npairloss_graph.svg")
    print("write graph over...")
    sys.exit(0)

    # # mini-graph
    # mamc_graph_mini = net_drawer.GetPydotGraphMinimal(
    #     validation_model.net.Proto().op,
    #     "mamc_graph_minimal",
    #     rankdir="TB",
    #     minimal_dependency=True
    # )
    # mamc_graph_mini.write_svg("mamc_no_npairloss_graph_mini.svg")
    # print("write graph over...")
    # sys.exit(0)
    """

    # experiment params config
    # training mode
    # tag = "imagenet"
    tag = config['name']
    if config['finetune']:
        tag = 'FINETUNE-{}'.format(tag)
    else:
        tag = 'RETRAIN-{}'.format(tag)

    root_experiments_dir = os.path.join(config['root_dir'], 'experiments')
    if config['dataset_name'] is not None:
        root_experiments_dir = os.path.join(root_experiments_dir,
                                            config['dataset_name'])
    experiment = Experiment(root_experiments_dir, tag)
    experiment.add_config_file(config['config_path'])

    # add chart
    chart_acc = experiment.add_chart('accuracy',
                                     xlabel='epochs',
                                     ylabel='accuracy')
    chart_acc_5 = experiment.add_chart('accuracy_5',
                                       xlabel='epochs',
                                       ylabel='accuracy_5')
    chart_softmax_loss = experiment.add_chart('softmax_loss',
                                              xlabel='epochs',
                                              ylabel='softmax_loss')
    chart_loss = experiment.add_chart('loss', xlabel='epochs', ylabel='loss')

    # plot params (should be added into 'experiment module'
    # TODO add 'variable' object to Experiment class
    training_acc_statistics = []
    training_acc5_statistics = []
    training_softmax_loss_statistics = []
    training_loss_statistics = []
    epoch_training_acc = 0
    epoch_training_acc5 = 0
    epoch_training_softmax_loss = 0
    epoch_training_loss = 0
    training_accuracy = 0
    training_accuracy_5 = 0
    training_softmax_loss = 0
    training_loss = 0

    validation_acc_statistics = []
    validation_acc5_statistics = []
    validation_softmax_loss_statistics = []
    validation_loss_statistics = []

    best_acc = 0

    # build model
    training_model = build_training_model(config, experiment)
    validation_model = build_validation_model(config)

    # run the model
    experiment.add_log(
        "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    )
    for training_iter in tqdm(range(config['solver']['max_iterations'])):
        workspace.RunNet(training_model.net)
        accuracy = workspace.FetchBlob('accuracy')
        accuracy_5 = workspace.FetchBlob('accuracy_5')
        softmax_loss = workspace.FetchBlob('softmax_loss')
        loss = workspace.FetchBlob('loss')

        epoch_training_acc += accuracy
        epoch_training_acc5 += accuracy_5
        epoch_training_softmax_loss += softmax_loss
        epoch_training_loss += loss

        training_accuracy += accuracy
        training_accuracy_5 += accuracy_5
        training_softmax_loss += softmax_loss
        training_loss += loss

        # display training result
        if training_iter != 0 and (training_iter +
                                   1) % config['solver']['display'] == 0:
            experiment.add_log("[TRAIN] epoch: {}   iteration: {}   accuracy: {:.4f}   "\
                  "accuracy_5: {:.4f}   softmax_loss: {:.4f}   loss: {:.4f}".format(
                      (training_iter // config['solver']['train_iterations'] + 1),
                      training_iter,
                      training_accuracy / config['solver']['display'],
                      training_accuracy_5 / config['solver']['display'],
                      training_softmax_loss / config['solver']['display'],
                      training_loss / config['solver']['display'],
            ))
            experiment.add_log("Global learning rate: {}".format(
                workspace.FetchBlob(
                    'MultiPrecisionSgdOptimizer_0_lr_gpu{}'.format(
                        config['gpu_id']))))

            # cleanup the counters
            training_accuracy = training_accuracy_5 = training_softmax_loss = training_loss = 0

        # plot training statistics every epoch
        if training_iter != 0 and (
                training_iter + 1) % config['solver']['train_iterations'] == 0:
            training_acc_statistics.append(
                epoch_training_acc / config['solver']['train_iterations'])
            training_acc5_statistics.append(
                epoch_training_acc5 / config['solver']['train_iterations'])
            training_softmax_loss_statistics.append(
                epoch_training_softmax_loss /
                config['solver']['train_iterations'])
            training_loss_statistics.append(
                epoch_training_loss / config['solver']['train_iterations'])

            epoch_training_acc = 0
            epoch_training_acc5 = 0
            epoch_training_softmax_loss = 0
            epoch_training_loss = 0

            experiment.add_plot(chart_acc, training_acc_statistics, 'r.--',
                                'training')
            experiment.add_plot(chart_acc_5, training_acc5_statistics, 'r.--',
                                'training')
            experiment.add_plot(chart_softmax_loss,
                                training_softmax_loss_statistics, 'b+--',
                                'training')
            experiment.add_plot(chart_loss, training_loss_statistics, 'b+--',
                                'training')

        # start to validate the model
        if training_iter != 0 and (training_iter +
                                   1) % config['solver']['test_interval'] == 0:
            test_accuracy = 0
            test_accuracy_5 = 0
            test_softmax_loss = 0
            test_loss = 0

            for test_iter in range(config['solver']['test_iterations']):
                workspace.RunNet(validation_model.net)
                accuracy = workspace.FetchBlob('accuracy')
                accuracy_5 = workspace.FetchBlob('accuracy_5')
                softmax_loss = workspace.FetchBlob('softmax_loss')
                loss = workspace.FetchBlob('loss')

                # update counter
                test_accuracy += accuracy
                test_accuracy_5 += accuracy_5
                test_softmax_loss += softmax_loss
                test_loss += loss
                experiment.add_log("[VALIDATION] accuracy: {:.4f}   accuracy_5: {:.4f}   "\
                                   "softmax_loss: {:.4f}   loss: {:.4f}".format(
                    accuracy, accuracy_5, softmax_loss, loss))

            # end validation
            if test_accuracy / config['solver']['test_iterations'] > best_acc:
                best_acc = test_accuracy / config['solver']['test_iterations']
            experiment.add_log(
                "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
            )
            experiment.add_log("[VALIDATION] avg_acc: {:.4f}   best_acc: {:.4f}   avg_acc_5: {:.4f}   "\
                               "avg_softmax_loss: {:.4f}   avg_loss: {:.4f}".format(
                      test_accuracy / config['solver']['test_iterations'],
                      best_acc,
                      test_accuracy_5 / config['solver']['test_iterations'],
                      test_softmax_loss / config['solver']['test_iterations'],
                      test_loss / config['solver']['test_iterations'],
                  )
            )

            # snapshot training model params
            print("[INFO] snapshot the model..... ")
            experiment.add_init_net_snapshot(
                training_model.GetAllParams(),
                workspace,
                config,
                (training_iter // config['solver']['train_iterations'] + 1),
                test_accuracy / config['solver']['test_iterations'],
                best_acc,
            )
            print("[INFO] snapshot the model. Done.....")

            # plot validation statistics
            validation_acc_statistics.append(
                test_accuracy / config['solver']['test_iterations'])
            validation_acc5_statistics.append(
                test_accuracy_5 / config['solver']['test_iterations'])
            validation_softmax_loss_statistics.append(
                test_softmax_loss / config['solver']['test_iterations'])
            validation_loss_statistics.append(
                test_loss / config['solver']['test_iterations'])

            experiment.add_plot(chart_acc, validation_acc_statistics, 'c.--',
                                'validation')
            experiment.add_plot(chart_acc_5, validation_acc5_statistics,
                                'c.--', 'validation')
            experiment.add_plot(chart_softmax_loss,
                                validation_softmax_loss_statistics, 'g+--',
                                'validation')
            experiment.add_plot(chart_loss, validation_loss_statistics, 'g+--',
                                'validation')

    experiment.add_log(
        "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    )
Esempio n. 22
0
 def _run_zero_even_op(self, X):
     op = core.CreateOperator('ZeroEven', ['X'], ['Y'])
     workspace.FeedBlob('X', X)
     workspace.RunOperatorOnce(op)
     Y = workspace.FetchBlob('Y')
     return Y
Esempio n. 23
0
    def _test_index_ops(self, entries, dtype, index_create_op):
        workspace.RunOperatorOnce(
            core.CreateOperator(index_create_op, [], ['index'],
                                max_elements=10))
        my_entries = np.array([entries[0], entries[1], entries[2]],
                              dtype=dtype)

        workspace.FeedBlob('entries', my_entries)
        workspace.RunOperatorOnce(
            core.CreateOperator('IndexLoad', ['index', 'entries'], ['index']))
        query1 = np.array([entries[0], entries[3], entries[0], entries[4]],
                          dtype=dtype)

        workspace.FeedBlob('query1', query1)
        workspace.RunOperatorOnce(
            core.CreateOperator('IndexGet', ['index', 'query1'], ['result1']))
        result1 = workspace.FetchBlob('result1')
        np.testing.assert_array_equal([1, 4, 1, 5], result1)

        workspace.RunOperatorOnce(
            core.CreateOperator('IndexFreeze', ['index'], ['index']))

        query2 = np.array(
            [entries[5], entries[4], entries[0], entries[6], entries[7]],
            dtype=dtype)
        workspace.FeedBlob('query2', query2)
        workspace.RunOperatorOnce(
            core.CreateOperator('IndexGet', ['index', 'query2'], ['result2']))
        result2 = workspace.FetchBlob('result2')
        np.testing.assert_array_equal([0, 5, 1, 0, 0], result2)

        workspace.RunOperatorOnce(
            core.CreateOperator('IndexSize', ['index'], ['index_size']))
        size = workspace.FetchBlob('index_size')
        self.assertEquals(size, 6)

        workspace.RunOperatorOnce(
            core.CreateOperator('IndexStore', ['index'], ['stored_entries']))
        stored_actual = workspace.FetchBlob('stored_entries')
        new_entries = np.array([entries[3], entries[4]], dtype=dtype)
        np.testing.assert_array_equal(
            np.concatenate((my_entries, new_entries)), stored_actual)

        workspace.RunOperatorOnce(
            core.CreateOperator(index_create_op, [], ['index2']))

        workspace.RunOperatorOnce(
            core.CreateOperator('IndexLoad', ['index2', 'stored_entries'],
                                ['index2'],
                                skip_first_entry=1))

        workspace.RunOperatorOnce(
            core.CreateOperator('IndexSize', ['index2'], ['index2_size']))
        index2_size = workspace.FetchBlob('index2_size')
        self.assertEquals(index2_size, 5)

        # test serde
        with tempfile.NamedTemporaryFile() as tmp:
            workspace.RunOperatorOnce(
                core.CreateOperator('Save', ['index'], [],
                                    absolute_path=1,
                                    db_type='minidb',
                                    db=tmp.name))
            # frees up the blob
            workspace.FeedBlob('index', np.array([]))
            # reloads the index
            workspace.RunOperatorOnce(
                core.CreateOperator('Load', [], ['index'],
                                    absolute_path=1,
                                    db_type='minidb',
                                    db=tmp.name))
            query3 = np.array(
                [entries[0], entries[3], entries[0], entries[4], entries[4]],
                dtype=dtype)

            workspace.FeedBlob('query3', query3)
            workspace.RunOperatorOnce(
                core.CreateOperator('IndexGet', ['index', 'query3'],
                                    ['result3']))
            result3 = workspace.FetchBlob('result3')
            np.testing.assert_array_equal([1, 4, 1, 5, 5], result3)
    def InferTensorRunAndCompare(self, model, expected_uninferred_blobs=None):
        '''
        Runs shape inference, and then the model to check
        that the inferred shapes agree with the actual ones

        'expected_uninferred_blobs' is the list of blobs for which type and
        shape cannot be inferred.
        '''
        (shapes, types) = workspace.InferShapesAndTypes(
            [model.param_init_net, model.net],
        )

        # .. Create net
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net, True)
        workspace.RunNet(model.Proto().name)

        # ... and then check the shapes mismatch
        correct_shapes = {}
        correct_types = {}
        for b in workspace.Blobs():
            arr = workspace.FetchBlob(b)
            correct_shapes[b] = arr.shape
            if type(arr) is np.ndarray:
                if arr.dtype == np.dtype('float32'):
                    correct_types[b] = caffe2_pb2.TensorProto.FLOAT
                elif arr.dtype == np.dtype('int32'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT32
                # BYTE
                # STRING
                elif arr.dtype == np.dtype('bool'):
                    correct_types[b] = caffe2_pb2.TensorProto.BOOL
                elif arr.dtype == np.dtype('uint8'):
                    correct_types[b] = caffe2_pb2.TensorProto.UINT8
                elif arr.dtype == np.dtype('int8'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT8
                elif arr.dtype == np.dtype('uint16'):
                    correct_types[b] = caffe2_pb2.TensorProto.UINT16
                elif arr.dtype == np.dtype('int16'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT16
                elif arr.dtype == np.dtype('int64'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT64
                elif arr.dtype == np.dtype('float16'):
                    correct_types[b] = caffe2_pb2.TensorProto.FLOAT16
                elif arr.dtype == np.dtype('float64'):
                    correct_types[b] = caffe2_pb2.TensorProto.DOUBLE
                else:
                    correct_types[b] = "unknown {}".format(arr.dtype)
            else:
                correct_types[b] = str(type(arr))

        if expected_uninferred_blobs is None:
            expected_uninferred_blobs = []
        for b in correct_shapes:
            # skip blobs for which shape couldn't be inferred
            if b in expected_uninferred_blobs:
                continue
            self.assertTrue(
                np.array_equal(
                    np.array(shapes[b]).astype(np.int32),
                    np.array(correct_shapes[b]).astype(np.int32)
                ),
                "Shape {} mismatch: {} vs. correct {}".format(
                    b, shapes[b], correct_shapes[b]
                )
            )
            self.assertFalse(
                b not in types and b in correct_types,
                "Type for {} not defined".format(b),
            )
            self.assertEqual(
                types[b],
                correct_types[b],
                "Type {} mismatch: {} vs. {}".format(
                    b, types[b], correct_types[b],
                )
            )
def run_single_kpts(
    net,
    image,
    target_size,
    pixel_means=PIXEL_MEANS_DEFAULT,
    pixel_stds=PIXEL_STDS_DEFAULT,
    max_size=1333,
):
    inputs = utils2.prepare_blobs(
        image,
        target_size=target_size,
        max_size=max_size,
        pixel_means=pixel_means,
        pixel_stds=pixel_stds,
    )

    # Prepare inputs for AABB and Int8AABB operators
    im_info = inputs["im_info"]
    scale = im_info[0][2]
    inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16)
    inputs["im_info2"] = im_info[:,:2]

    blob_names = []
    ser_blobs = []

    # Serialize inputs for remote device
    for k, v in inputs.items():
        workspace.FeedBlob(k, v)
        blob_names.append(k)
        ser_blobs.append(workspace.SerializeBlob(k))

    # Serialize output templates for remote device
    fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op)
    bbox_type = np.uint16 if fully_quantized else np.float32
    output_templates = {
            "score_nms": np.zeros((3,), np.float32),
            "keypoint_rois": np.zeros((3, 4), bbox_type),
            "keypoints_out": np.zeros((3, 17, 2), bbox_type),
            "class_nms": np.zeros((3,), np.int32),
            "keypoints_scores_out": np.zeros((3, 17), np.float32),
    }
    for out_name in net.external_output:
        fake_name = out_name + "_empty_template"
        blob_names.append(out_name)
        workspace.FeedBlob(fake_name, output_templates[out_name])
        ser_blobs.append(workspace.SerializeBlob(fake_name))

    # Package inputs and output templates
    inout_netdef = caffe2_pb2.NetDef()
    inout_netdef.arg.extend([
            utils.MakeArgument("blob_names", blob_names),
            utils.MakeArgument("ser_blobs", ser_blobs),
        ])

    # Send in/out to the remote device
    with tempfile.NamedTemporaryFile() as inout_file:
        inout_file.write(inout_netdef.SerializeToString())
        inout_file.flush()
        subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"])

    try:
        # Run the model
        use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else ""
        subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True)

        # Retrieve and deserialize outputs
        with tempfile.TemporaryDirectory() as tmpdir:
            output_file = os.path.join(tmpdir, "output_blobs.pb")
            subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file])

            out_net = caffe2_pb2.NetDef()
            with open(output_file, "rb") as handle:
                out_net.ParseFromString(handle.read())

        all_outputs = utils.ArgsToDict(out_net.arg)["outputs"]
        for output in all_outputs:
            bp = caffe2_pb2.BlobProto()
            bp.ParseFromString(output)
            workspace.DeserializeBlob(bp.name, output)

        scores = workspace.FetchBlob("score_nms")
        boxes = workspace.FetchBlob("keypoint_rois")
        coords_preds = workspace.FetchBlob("keypoints_out")
        scores_preds = workspace.FetchBlob("keypoints_scores_out")
        classids = workspace.FetchBlob("class_nms")

        if boxes.dtype == np.uint16:
            boxes = boxes.astype(np.float32) * 0.125

        # New output format of AABBRoIKeypoints:
        # - XY coordinates are [num_rois, num_keypoints, 2] array in keypoints_out
        # - Scores are [num_rois, num_keypoints] array in keypoints_scores_out
        if coords_preds.dtype == np.uint16:
            coords_preds = coords_preds.astype(np.float32) * 0.125
        assert coords_preds.shape[:2] == scores_preds.shape
        num_rois, num_keypoints = coords_preds.shape[:2]
        xy_preds = np.concatenate(
            (coords_preds, scores_preds.reshape([num_rois, num_keypoints, 1]),
            np.zeros([num_rois, num_keypoints, 1], dtype=np.float32)),
            axis=2)
        assert xy_preds.shape == (num_rois, num_keypoints, 4)
        xy_preds = np.swapaxes(xy_preds, 1, 2)
        assert xy_preds.shape == (num_rois, 4, num_keypoints)


    except Exception as e:
        print(e)
        # may not detect anything at all
        R = 0
        scores = np.zeros((R,), dtype=np.float32)
        boxes = np.zeros((R, 4), dtype=np.float32)
        xy_preds = np.zeros((R, 4, 1), dtype=np.float32)
        classids = np.zeros((R,), dtype=np.float32)

    scale = inputs["im_info"][0][2]
    boxes /= scale
    if xy_preds is not None:
        xy_preds /= scale

    boxes = np.column_stack((boxes, scores))

    return boxes, xy_preds, classids
fc_1 = m.net.FC(["data", "fc_w", "fc_b"], "fc1")
pred = m.net.Sigmoid(fc_1, "pred")
pred2 = m.net.FloatToHalf(pred, 'pred2')
pred = m.net.HalfToFloat(pred2, 'pred3')
softmax, loss = m.net.SoftmaxWithLoss([pred, "label"], ["softmax", "loss"])
# softmax2 = m.net.FloatToHalf(softmax, 'softmax2')
print(m.net.Proto())
print(m.param_init_net.Proto())
m.net.RunAllOnGPU(gpu_id=0, use_cudnn=True)
m.param_init_net.RunAllOnGPU(gpu_id=0, use_cudnn=True)
workspace.RunNetOnce(m.param_init_net)
workspace.CreateNet(m.net)
# Run 100 x 10 iterations
for _ in range(100):
    data = np.random.rand(16, 100).astype(np.float32)
    label = (np.random.rand(16) * 10).astype(np.int32)

    workspace.FeedBlob("data", data, device_opts)
    workspace.FeedBlob("label", label, device_opts)

    workspace.RunNet(m.name, 10)  # run for 10 times
    pred2 = workspace.FetchBlob('pred2')
    print(pred2)
    print(pred2.dtype)
    # softmax2 = workspace.FetchBlob('softmax2')
    # print(softmax2)
    # print(softmax2.dtype)

print(workspace.FetchBlob("softmax"))
print(workspace.FetchBlob("loss"))
Esempio n. 27
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                name, values, feature_type=self._feature_type_override(name)
            )
        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters
        )

        net = core.Net("PreprocessingTestNet")
        C2.set_net(net)
        preprocessor = PreprocessorNet()
        name_preprocessed_blob_map = {}
        for feature_name in feature_value_map:
            workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32))
            preprocessed_blob, _ = preprocessor.preprocess_blob(
                str(feature_name), [normalization_parameters[feature_name]]
            )
            name_preprocessed_blob_map[feature_name] = preprocessed_blob

        workspace.CreateNet(net)

        for feature_name, feature_value in six.iteritems(feature_value_map):
            feature_value = np.expand_dims(feature_value, -1)
            workspace.FeedBlob(str(feature_name), feature_value)
        workspace.RunNetOnce(net)

        for feature_name in feature_value_map:
            normalized_features = workspace.FetchBlob(
                name_preprocessed_blob_map[feature_name]
            )
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                )
            )
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features,
                        test_features[feature_name],
                        rtol=tolerance,
                        atol=tolerance,
                    )
                ),
                "{} does not match: {} {}".format(
                    feature_name,
                    normalized_features[non_matching].tolist(),
                    test_features[feature_name][non_matching].tolist(),
                ),
            )
Esempio n. 28
0
    def testGatherRecord(self):
        indices = np.array([1, 3, 4], dtype=np.int32)
        dense = np.array(list(range(20)), dtype=np.float32).reshape(10, 2)
        lengths = np.array(list(range(10)), dtype=np.int32)
        items = np.array(list(range(lengths.sum())), dtype=np.int64)
        items_lengths = np.array(list(range(lengths.sum())), dtype=np.int32)
        items_items = np.array(list(range(items_lengths.sum())),
                               dtype=np.int64)
        record = self.new_record(
            schema.Struct(
                ('dense', schema.Scalar(np.float32)),
                ('sparse',
                 schema.Struct(
                     ('list', schema.List(np.int64)),
                     ('list_of_list', schema.List(schema.List(np.int64))),
                 )), ('empty_struct', schema.Struct())))
        indices_record = self.new_record(schema.Scalar(np.int32))
        input_record = schema.Struct(
            ('indices', indices_record),
            ('record', record),
        )
        schema.FeedRecord(input_record, [
            indices, dense, lengths, items, lengths, items_lengths, items_items
        ])
        gathered_record = self.model.GatherRecord(input_record)
        self.assertTrue(schema.equal_schemas(gathered_record, record))

        self.run_train_net_forward_only()
        gathered_dense = workspace.FetchBlob(gathered_record.dense())
        np.testing.assert_array_equal(
            np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense)
        gathered_lengths = workspace.FetchBlob(
            gathered_record.sparse.list.lengths())
        np.testing.assert_array_equal(
            np.concatenate([lengths[i:i + 1] for i in indices]),
            gathered_lengths)
        gathered_items = workspace.FetchBlob(
            gathered_record.sparse.list.items())
        offsets = lengths.cumsum() - lengths
        np.testing.assert_array_equal(
            np.concatenate(
                [items[offsets[i]:offsets[i] + lengths[i]] for i in indices]),
            gathered_items)

        gathered_items_lengths = workspace.FetchBlob(
            gathered_record.sparse.list_of_list.items.lengths())
        np.testing.assert_array_equal(
            np.concatenate([
                items_lengths[offsets[i]:offsets[i] + lengths[i]]
                for i in indices
            ]), gathered_items_lengths)

        nested_offsets = []
        nested_lengths = []
        nested_offset = 0
        j = 0
        for l in lengths:
            nested_offsets.append(nested_offset)
            nested_length = 0
            for _i in range(l):
                nested_offset += items_lengths[j]
                nested_length += items_lengths[j]
                j += 1
            nested_lengths.append(nested_length)

        gathered_items_items = workspace.FetchBlob(
            gathered_record.sparse.list_of_list.items.items())
        np.testing.assert_array_equal(
            np.concatenate([
                items_items[nested_offsets[i]:nested_offsets[i] +
                            nested_lengths[i]] for i in indices
            ]), gathered_items_items)
Esempio n. 29
0
    def test_collect_tensor_ops(self):
        init_net = core.Net('init_net')
        blobs = ['blob_1', 'blob_2', 'blob_3']
        bvec_map = {}
        ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1)
        for b in blobs:
            init_net.ConstantFill([], [b], shape=[1, 2], value=0)
            bvec_map[b] = b + '_vec'
            init_net.CreateTensorVector([], [bvec_map[b]])

        reader_net = core.Net('reader_net')
        for b in blobs:
            reader_net.Add([b, ONE], [b])

        collect_net = core.Net('collect_net')
        num_to_collect = 1000
        max_example_to_cover = 100000
        bvec = [bvec_map[b] for b in blobs]
        collect_net.CollectTensor(
            bvec + blobs,
            bvec,
            num_to_collect=num_to_collect,
        )

        print('Collect Net Proto: {}'.format(collect_net.Proto()))

        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_init', init_net))
        plan.AddStep(
            core.execution_step('collect_data', [reader_net, collect_net],
                                num_iter=max_example_to_cover))
        workspace.RunPlan(plan)

        # concat the collected tensors
        concat_net = core.Net('concat_net')
        bconcated_map = {}
        bsize_map = {}
        for b in blobs:
            bconcated_map[b] = b + '_concated'
            bsize_map[b] = b + '_size'
            concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]])
            concat_net.TensorVectorSize([bvec_map[b]], [bsize_map[b]])

        workspace.RunNetOnce(concat_net)

        # check data
        reference_result = workspace.FetchBlob(bconcated_map[blobs[0]])
        self.assertEqual(reference_result.shape,
                         (min(num_to_collect, max_example_to_cover), 2))
        size = workspace.FetchBlob(bsize_map[blobs[0]])
        self.assertEqual(tuple(), size.shape)
        self.assertEqual(min(num_to_collect, max_example_to_cover),
                         size.item())

        hist, _ = np.histogram(reference_result[:, 0],
                               bins=10,
                               range=(1, max_example_to_cover))
        print('Sample histogram: {}'.format(hist))

        self.assertTrue(all(hist > 0.7 * (num_to_collect / 10)))
        for i in range(1, len(blobs)):
            result = workspace.FetchBlob(bconcated_map[blobs[i]])
            self.assertEqual(reference_result.tolist(), result.tolist())
Esempio n. 30
0
    def test_stateful_convolution_forward_only(
        self,
        sequence_length,
        conv_window,
        batch_size,
        state_size,
    ):
        '''
        This unit test demonstrates another ways of using RecurrentNetwork.

        Imagine, that you want to compute convolution over a sequence,
        but sequence elements are not given to you from the beginning,
        so you have to loop over the sequence and compute convolution
        for each element separately. This situation can occur,
        during inference/generation step of the neural networks.

        First of all, you have to provide actual input via recurrent states,
        since the input of RecurrentNetwork should be known in advance.
        Here, we use `fake_inputs` as the input,
        and it's used by the op to extract batch size and sequence length.
        The actual input sequence is stored in the recurrent state
        `input_state`. At every step we generate a new element via input_state_t
        (in this example, input_state_t is generated at random, but
        in a real situation it can be created using convolution output
        from the previous step).

        A few important differences from regular RecurrentNetwork usecase:

        1. input_state_t_prev is not only a single previous element of
        input_state sequence. It is last conv_window elements including (!)
        the current one - input_state_t. We specify that using `link_window`
        argument of RecurrentNetwork. We need that many elements to
        compute a single convolution step. Also, note that `link_window`
        specifies how many element to link starting at
        `timestep` + `link_offset` position.

        2. First few steps might require additional zero padding from the left,
        since there is no enough element of input_state sequence are available.
        So the initial_state for input_state contains several elements
        (exactly how many pads we need for the first step). Also, because of
        that all offseting over input_state sequnece is being shifted
        by length of initial_input_state: see `link_offset` and `alias_offset`
        arguments of RecurrentNetwork.

        In this test, we assert that we get the same result
        if we apply convolution over all elements simultaneously,
        since the whole input_state sequence was generated at the end.
    '''
        model = CNNModelHelper(name='model')
        fake_inputs = model.param_init_net.UniformFill(
            [],
            'fake_inputs',
            min=-1.0,
            max=1.0,
            shape=[sequence_length, batch_size, state_size],
        )
        initial_input_state = model.param_init_net.ConstantFill(
            [],
            'initial_input_state',
            value=0.0,
            shape=[conv_window - 1, batch_size, state_size],
        )
        initial_output_state = model.param_init_net.ConstantFill(
            [],
            'initial_output_state',
            value=0.0,
            shape=[1, batch_size, state_size],
        )
        step_model = CNNModelHelper(name='step_model', param_model=model)
        (
            fake_input_t,
            timestep,
            input_state_t_prev,
        ) = step_model.net.AddExternalInputs(
            'fake_input_t',
            'timestep',
            'input_state_t_prev',
        )
        conv_filter = step_model.param_init_net.XavierFill(
            [],
            'conv_filter',
            shape=[state_size, 1, conv_window, state_size],
        )
        conv_bias = step_model.param_init_net.ConstantFill(
            [],
            'conv_bias',
            shape=[state_size],
            value=0.0,
        )
        step_model.params.extend([conv_filter, conv_bias])
        input_state_t = step_model.net.UniformFill(
            [],
            'input_state_t',
            min=-1.0,
            max=1.0,
            shape=[1, batch_size, state_size],
        )
        output_state_t = self._convolution_1d(
            model=step_model,
            inputs=input_state_t_prev,
            conv_window=conv_window,
            conv_filter=conv_filter,
            conv_bias=conv_bias,
            output_name='output_state_t',
            left_pad=False,
        )
        initial_recurrent_states = [initial_input_state, initial_output_state]
        all_inputs = ([fake_inputs] + step_model.params +
                      initial_recurrent_states)
        all_outputs = ['input_state_all', 'output_state_all']
        recurrent_states = ['input_state', 'output_state']
        input_state_all, output_state_all, _ = model.net.RecurrentNetwork(
            all_inputs,
            all_outputs + ['step_workspaces'],
            param=map(all_inputs.index, step_model.params),
            alias_src=recurrent_states,
            alias_dst=all_outputs,
            alias_offset=[conv_window - 1, 1],
            recurrent_states=recurrent_states,
            initial_recurrent_state_ids=map(
                all_inputs.index,
                initial_recurrent_states,
            ),
            link_internal=map(
                str,
                [input_state_t_prev, input_state_t, output_state_t],
            ),
            link_external=['input_state', 'input_state', 'output_state'],
            link_offset=[0, conv_window - 1, 1],
            link_window=[conv_window, 1, 1],
            backward_link_internal=[],
            backward_link_external=[],
            backward_link_offset=[],
            step_net=str(step_model.net.Proto()),
            backward_step_net='',
            timestep='timestep' if timestep is None else str(timestep),
            outputs_with_grads=[],
        )

        output_states_2 = self._convolution_1d(
            model=model,
            inputs=input_state_all,
            conv_window=conv_window,
            conv_filter=conv_filter,
            conv_bias=conv_bias,
            output_name='output_states_2',
            left_pad=True,
        )

        workspace.RunNetOnce(model.param_init_net)
        workspace.RunNetOnce(model.net)

        np.testing.assert_almost_equal(
            workspace.FetchBlob(output_state_all),
            workspace.FetchBlob(output_states_2),
        )