Esempio n. 1
0
    def test_get_predictor_export_meta_and_workspace_full(self):
        model = Model()

        state_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(1, 5)
        }
        action_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(5, 9)
        }

        extractor = PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization_parameters,
            action_normalization_parameters=action_normalization_parameters,
            normalize=False,
        )
        output_transformer = TestOutputTransformer()

        pem, ws = model.get_predictor_export_meta_and_workspace(
            feature_extractor=extractor, output_transformer=output_transformer)
        # model has 2 params + 1 const. extractor has 1 const. output_transformer has 1 const.
        self.assertEqual(5, len(pem.parameters))
        for p in pem.parameters:
            self.assertTrue(ws.HasBlob(p))
        self.assertEqual(3, len(pem.inputs))
        self.assertEqual(5, len(pem.outputs))
        self.assertEqual(
            {
                "output/string_weighted_multi_categorical_features.lengths",
                "output/string_weighted_multi_categorical_features.keys",
                "output/string_weighted_multi_categorical_features.values.lengths",
                "output/string_weighted_multi_categorical_features.values.keys",
                "output/string_weighted_multi_categorical_features.values.values",
            },
            set(pem.outputs),
        )

        input_prototype = model.input_prototype()

        with tempfile.TemporaryDirectory() as tmpdirname:
            db_path = os.path.join(tmpdirname, "model")
            logger.info("DB path: {}".format(db_path))
            db_type = "minidb"
            with ws._ctx:
                save_to_db(db_type, db_path, pem)

            # Load the model from DB file and run it
            net = prepare_prediction_net(db_path, db_type)

            state_features = input_prototype.state.float_features
            action_features = input_prototype.action.float_features
            float_features_values = (torch.cat(
                (state_features, action_features), dim=1).reshape(-1).numpy())
            float_features_keys = np.arange(1, 9)
            float_features_lengths = np.array([8], dtype=np.int32)

            workspace.FeedBlob("input/float_features.keys",
                               float_features_keys)
            workspace.FeedBlob("input/float_features.values",
                               float_features_values)
            workspace.FeedBlob("input/float_features.lengths",
                               float_features_lengths)

            workspace.RunNet(net)

            model_sum, model_mul, model_plus_one, model_linear = model(
                input_prototype)

            lengths = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.lengths")
            keys = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.keys")
            values_lengths = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.values.lengths"
            )
            values_keys = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.values.keys"
            )
            values_values = workspace.FetchBlob(
                "output/string_weighted_multi_categorical_features.values.values"
            )

            N = 1
            npt.assert_array_equal(np.ones(N, dtype=np.int32), lengths)
            npt.assert_array_equal(np.zeros(N, dtype=np.int64), keys)
            npt.assert_array_equal([1] * N, values_lengths)
            npt.assert_array_equal(np.array([b"TestAction"], dtype=np.object),
                                   values_keys)
            npt.assert_array_equal(model_linear.detach().numpy().reshape(-1),
                                   values_values)
Esempio n. 2
0
    def testEqualToCudnn(self):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA)):
            T = 8
            batch_size = 4
            input_dim = 8
            hidden_dim = 31

            workspace.FeedBlob("seq_lengths",
                               np.array([T] * batch_size, dtype=np.int32))
            workspace.FeedBlob(
                "target",
                np.zeros([T, batch_size, hidden_dim], dtype=np.float32))
            workspace.FeedBlob(
                "hidden_init",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))
            workspace.FeedBlob(
                "cell_init",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))

            own_model = model_helper.ModelHelper(name="own_lstm")

            input_shape = [T, batch_size, input_dim]
            cudnn_model = model_helper.ModelHelper(name="cudnn_lstm")
            input_blob = cudnn_model.param_init_net.UniformFill(
                [], "input", shape=input_shape)
            workspace.FeedBlob(
                "CUDNN/hidden_init_cudnn",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))
            workspace.FeedBlob(
                "CUDNN/cell_init_cudnn",
                np.zeros([1, batch_size, hidden_dim], dtype=np.float32))

            cudnn_output, cudnn_last_hidden, _, param_extract = rnn_cell.cudnn_LSTM(
                model=cudnn_model,
                input_blob=input_blob,
                initial_states=("hidden_init_cudnn", "hidden_init_cudnn"),
                dim_in=input_dim,
                dim_out=hidden_dim,
                scope="CUDNN",
                return_params=True,
            )
            cudnn_loss = cudnn_model.AveragedLoss(
                cudnn_model.SquaredL2Distance([cudnn_output, "target"],
                                              "CUDNN/dist"), "CUDNN/loss")

            own_output, own_last_hidden, _, last_state, own_params = rnn_cell.LSTM(
                model=own_model,
                input_blob=input_blob,
                seq_lengths="seq_lengths",
                initial_states=("hidden_init", "cell_init"),
                dim_in=input_dim,
                dim_out=hidden_dim,
                scope="OWN",
                return_params=True,
            )
            own_loss = own_model.AveragedLoss(
                own_model.SquaredL2Distance([own_output, "target"],
                                            "OWN/dist"), "OWN/loss")

            # Add gradients
            cudnn_model.AddGradientOperators([cudnn_loss])
            own_model.AddGradientOperators([own_loss])

            # Add parameter updates
            LR = cudnn_model.param_init_net.ConstantFill([],
                                                         shape=[1],
                                                         value=0.01)
            ONE = cudnn_model.param_init_net.ConstantFill([],
                                                          shape=[1],
                                                          value=1.0)
            for param in cudnn_model.GetParams():
                cudnn_model.WeightedSum(
                    [param, ONE, cudnn_model.param_to_grad[param], LR], param)
            for param in own_model.GetParams():
                own_model.WeightedSum(
                    [param, ONE, own_model.param_to_grad[param], LR], param)

            workspace.RunNetOnce(cudnn_model.param_init_net)
            workspace.CreateNet(cudnn_model.net)

            ##
            ##  CUDNN LSTM MODEL EXECUTION
            ##
            # Get initial values from CuDNN LSTM so we can feed them
            # to our own.
            (param_extract_net, param_extract_mapping) = param_extract
            workspace.RunNetOnce(param_extract_net)
            cudnn_lstm_params = {}
            for input_type, pars in param_extract_mapping.items():
                cudnn_lstm_params[input_type] = {}
                for k, v in pars.items():
                    cudnn_lstm_params[input_type][k] = workspace.FetchBlob(
                        v[0])

            # Run the model 3 times, so that some parameter updates are done
            workspace.RunNet(cudnn_model.net.Proto().name, 3)

            ##
            ## OWN LSTM MODEL EXECUTION
            ##
            # Map the cuDNN parameters to our own
            workspace.RunNetOnce(own_model.param_init_net)
            rnn_cell.InitFromLSTMParams(own_params, cudnn_lstm_params)

            # Run the model 3 times, so that some parameter updates are done
            workspace.CreateNet(own_model.net)
            workspace.RunNet(own_model.net.Proto().name, 3)

            ##
            ## COMPARE RESULTS
            ##
            # Then compare that final results after 3 runs are equal
            own_output_data = workspace.FetchBlob(own_output)
            own_last_hidden = workspace.FetchBlob(own_last_hidden)
            own_loss = workspace.FetchBlob(own_loss)

            cudnn_output_data = workspace.FetchBlob(cudnn_output)
            cudnn_last_hidden = workspace.FetchBlob(cudnn_last_hidden)
            cudnn_loss = workspace.FetchBlob(cudnn_loss)

            self.assertTrue(np.allclose(own_output_data, cudnn_output_data))
            self.assertTrue(np.allclose(own_last_hidden, cudnn_last_hidden))
            self.assertTrue(np.allclose(own_loss, cudnn_loss))
def im_detections(model, im, anchors):
    """Generate RetinaNet detections on a single image."""
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], inputs['im_info'] = _get_image_blob(im)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    scale = inputs['im_info'][0, 2]
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])

    # Combine predictions across all levels and retain the top scoring by class
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]
    boxes = detections[:, 0:4]
    scores = detections[:, 4]
    classes = detections[:, 5]
    return boxes, scores, classes
Esempio n. 4
0
    def test_slws_fused_8bit_rowwise_acc32_nnpi(self, seed, num_rows,
                                                embedding_dim, batch_size,
                                                max_weight):
        workspace.GlobalInit([
            "caffe2",
            "--glow_global_fp16=0",
            "--glow_global_fused_scale_offset_fp16=0",
            "--glow_global_force_sls_fp16_accum=0",
        ])

        workspace.ResetWorkspace()
        np.random.seed(seed)
        data = np.random.rand(num_rows, embedding_dim).astype(np.float32)
        lengths = np.random.choice(np.arange(1, num_rows),
                                   batch_size).astype(np.int32)

        indices = []
        for length in lengths:
            indices.extend(np.random.choice(np.arange(1, num_rows), length))
        indices = np.asarray(indices).astype(np.int64)

        weights = np.random.uniform(low=0,
                                    high=max_weight,
                                    size=[len(indices)]).astype(np.float32)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator("FloatToFused8BitRowwiseQuantized", ["data"],
                                ["quantized_data"]))
        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=batch_size,
            max_seq_size=batch_size * np.max(lengths),
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )
        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(onnxified_net)
        workspace.CreateNet(ref_net)

        workspace.RunNet(onnxified_net.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchBlob("Y")

        diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
        max_err = np.max(diff, axis=1)
        num_offenders = (max_err > 0).sum()
        if num_offenders > 0:
            print_test_debug_info(
                "test_slws_fused_8bit_rowwise_acc32_nnpi",
                {
                    "seed": seed,
                    "num_rows": num_rows,
                    "embedding_dim": embedding_dim,
                    "batch_size": batch_size,
                    "indices": indices,
                    "data": data.shape,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_glow": Y_glow,
                    "Y_ref": Y_ref,
                    "diff": diff,
                    "rowwise_diff": np.max(diff, axis=1),
                },
            )
            assert 0
Esempio n. 5
0
    def train_with_eval(
        self,
        num_epoch=1,
        report_interval=0,
        eval_during_training=False,
    ):
        ''' Fastest mode: report_interval = 0
			Medium mode: report_interval > 0, eval_during_training=False
			Slowest mode: report_interval > 0, eval_during_training=True
		'''
        num_batch_per_epoch = int(self.input_data_store['train'][1] /
                                  self.batch_size)
        if not self.input_data_store['train'][1] % self.batch_size == 0:
            num_batch_per_epoch += 1
            print('[Warning]: batch_size cannot be divided. ' +
                  'Run on {} example instead of {}'.format(
                      num_batch_per_epoch *
                      self.batch_size, self.input_data_store['train'][1]))
        print('<<< Run {} iteration'.format(num_epoch * num_batch_per_epoch))

        train_net = self.net_store['train_net']
        if report_interval > 0:
            print('>>> Training with Reports')
            num_eval = int(num_epoch / report_interval)
            num_unit_iter = int((num_batch_per_epoch * num_epoch) / num_eval)
            if eval_during_training and 'eval_net' in self.net_store:
                print('>>> Training with Eval Reports (Slowest mode)')
                eval_net = self.net_store['eval_net']
            for i in range(num_eval):
                workspace.RunNet(train_net.Proto().name,
                                 num_iter=num_unit_iter)
                self.reports['epoch'].append((i + 1) * report_interval)
                train_loss = np.asscalar(schema.FetchRecord(self.loss).get())
                self.reports['train_loss'].append(train_loss)
                # Add metrics
                train_l1_metric = np.asscalar(
                    schema.FetchRecord(
                        self.model.metrics_schema.l1_metric).get())
                self.reports['train_l1_metric'].append(train_l1_metric)
                train_scaled_l1_metric = np.asscalar(
                    schema.FetchRecord(
                        self.model.metrics_schema.scaled_l1_metric).get())
                self.reports['train_scaled_l1_metric'].append(
                    train_scaled_l1_metric)

                if eval_during_training and 'eval_net' in self.net_store:
                    workspace.RunNet(eval_net.Proto().name,
                                     num_iter=num_unit_iter)
                    eval_loss = np.asscalar(
                        schema.FetchRecord(self.loss).get())
                    # Add metrics
                    self.reports['eval_loss'].append(eval_loss)
                    eval_l1_metric = np.asscalar(
                        schema.FetchRecord(
                            self.model.metrics_schema.l1_metric).get())
                    self.reports['eval_l1_metric'].append(eval_l1_metric)
                    eval_scaled_l1_metric = np.asscalar(
                        schema.FetchRecord(
                            self.model.metrics_schema.scaled_l1_metric).get())
                    self.reports['eval_scaled_l1_metric'].append(
                        eval_scaled_l1_metric)
        else:
            print('>>> Training without Reports (Fastest mode)')
            workspace.RunNet(
                train_net,
                num_iter=num_epoch * num_batch_per_epoch,
            )

        print('>>> Saving test model')

        # Save Net
        exporter.save_net(self.net_store['pred_net'], self.model,
                          self.model_name + '_init',
                          self.model_name + '_predict')

        # Save Loss Trend
        if report_interval > 0:
            self.save_loss_trend(self.model_name)
Esempio n. 6
0
 def forward(self, niters):
     workspace.RunNet(self.net, niters, False)
                      pad=1)
    conv3 = brew.relu(model, conv3, conv3)
    fc3 = brew.fc(model, conv3, 'fc3', dim_in=256 * 28 * 28, dim_out=512)
    fc3 = brew.relu(model, fc3, fc3)
    pred = brew.fc(model, fc3, 'pred', 512, 10)
    softmax = brew.softmax(model, pred, 'softmax')
    return softmax


core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
root_folder, data_folder = DownloadMNIST()
workspace.ResetWorkspace(root_folder)

arg_scope = {"order": "NCHW"}
test_model = model_helper.ModelHelper(name="mnist_test",
                                      arg_scope=arg_scope,
                                      init_params=True)
data, label = AddInput(test_model,
                       batch_size=1,
                       db=os.path.join(data_folder, 'mnist-test-nchw-lmdb'),
                       db_type='lmdb')

softmax = AddLeNetModel(test_model, data)

# run a test pass on the test net
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
test_accuracy = np.zeros(10000)
for i in tqdm.tqdm(range(10000)):
    workspace.RunNet(test_model.net.Proto().name)
Esempio n. 8
0
               db_type="lmdb",
               load_all=1,
               keep_device=1,
               absolute_path=0)
     workspace.RunNetOnce(load.net)
     workspace.FeedBlob('iter', [iter_val])
     save_trained_model(deploy)
 if load_trained:
     load_crunk(load_trained, device_opts)
 loss = np.zeros(train_iters)
 start = time.time()
 name = train.net.Proto().name
 numstraight = 0
 i = 0
 while i < train_iters:
     workspace.RunNet(name)
     if i == 0:
         realstart = time.time()
     loss[i] = workspace.FetchBlob('avgloss')
     if i % 200 == 0:
         LR = workspace.FetchBlob('LR')
         stop = time.time()
         j = i
         if j == 0: j = 1
         st = workspace.FetchBlob('output')
         steer = st[0, 0]
         lb = workspace.FetchBlob('label')
         label = lb[0, 0]
         outputs = []
         for q in st:
             outputs.append(q[0])
    workspace.FeedBlob("data", image)
    workspace.FeedBlob("label", label)
    break
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)

num_correct = 0
total = 0

# Cycle through the test dictionary once, with batch size = 1, meaning we only consider one stack at a time
for stack, label in test_dataset.read(batch_size=1):

    # Run the stack through the predictor and get the result array
    workspace.FeedBlob("data", stack, device_option=device_opts)
    workspace.FeedBlob("label", label, device_option=device_opts)
    workspace.RunNet(test_model.net)
    results = workspace.FetchBlob('softmax')[0]

    print results
    # Get the top-1 prediction
    max_index, max_value = max(enumerate(results), key=operator.itemgetter(1))

    print "Prediction: ", max_index
    print "Confidence: ", max_value

    # Update confusion matrix
    cmat[label, max_index] += 1

    if max_index == label:
        num_correct += 1
Esempio n. 10
0
def RunValidation(model, i):
    workspace.RunNet(model.net)
    print 'after val:'
    PrintStatistics(i)
Esempio n. 11
0
from caffe2.python import cnn, workspace, core
from caffe2.proto import caffe2_pb2
import numpy as np
import time

#device_opts = caffe2_pb2.DeviceOption()
#device_opts.device_type = caffe2_pb2.CUDA
#device_opts.cuda_gpu_id = 0
device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0)
net = core.Net("smoothL1Loss_test")
net.SmoothL1LossGradient(["data1", "data2", "avg_loss"],
                         "loss",
                         device_option=device_opts)

print net.Proto()

data1 = np.load('data1.npy')
data2 = np.load('data2.npy')
avg_loss = np.ones(1, dtype=np.float32)

workspace.FeedBlob("data1", data1, device_option=device_opts)
workspace.FeedBlob("data2", data2, device_option=device_opts)
workspace.FeedBlob("avg_loss", avg_loss, device_option=device_opts)
workspace.CreateNet(net.Proto())

workspace.RunNet("smoothL1Loss_test", 1)

caffe2_out = workspace.FetchBlob('loss')

print(caffe2_out)
Esempio n. 12
0
def run_conv_or_fc(test_case,
                   init_net,
                   net,
                   X,
                   W,
                   b,
                   op_type,
                   engine,
                   order,
                   gc,
                   outputs,
                   scale=None,
                   zero_point=None):
    if order:
        # Conv
        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
    else:
        # FC
        Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])

    # We run DNNLOWP ops multiple times to test their first runs that
    # do caching so exercises different code paths from the subsequent
    # runs

    # self.ws.run re-creates operator every time so this test covers
    # cases when we have multiple nets sharing the same workspace
    test_case.ws.create_blob("X").feed(X, device_option=gc)
    test_case.ws.create_blob("W").feed(W, device_option=gc)
    test_case.ws.create_blob("b").feed(b, device_option=gc)
    if scale is not None and zero_point is not None:
        test_case.ws.create_blob("scale").feed(scale, device_option=gc)
        test_case.ws.create_blob("zero_point").feed(zero_point,
                                                    device_option=gc)

    if init_net:
        test_case.ws.run(init_net)
    for i in range(1 if engine == "" else 2):
        test_case.ws.run(net)
        Y = test_case.ws.blobs["Y"].fetch()
        if order:
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))
        else:
            outputs.append(Output(Y=Y, op_type=op_type, engine=engine))

    # workspace.CreateNet + workspace.RunNet reuses the same operator
    if engine != "":
        workspace.FeedBlob("X", X)
        workspace.FeedBlob("W", W)
        workspace.FeedBlob("b", b)
        if scale is not None and zero_point is not None:
            workspace.FeedBlob("scale", scale)
            workspace.FeedBlob("zero_point", zero_point)

        if init_net:
            workspace.RunNetOnce(init_net)
        workspace.CreateNet(net)
        for i in range(2):
            workspace.RunNet(net)
            Y = workspace.FetchBlob("Y")
            if order:
                outputs.append(
                    Output(Y=Y, op_type=op_type, engine=engine, order=order))
            else:
                outputs.append(Output(Y=Y, op_type=op_type, engine=engine))
Esempio n. 13
0
# Weighted sum
train_net.WeightedSum([W, ONE, gradient_map[W], LR], W)
train_net.WeightedSum([B, ONE, gradient_map[B], LR], B)

# Let's show the graph again.
graph = net_drawer.GetPydotGraph(train_net.Proto().op, "train", rankdir="LR")
graph.write_svg('Sixth.svg')

workspace.RunNetOnce(init_net)
workspace.CreateNet(train_net)
# ------------------------------------------------------------------------------------
print("Before training, W is: {}".format(workspace.FetchBlob("W")))
print("Before training, B is: {}".format(workspace.FetchBlob("B")))

for i in range(100):
    workspace.RunNet(train_net.Proto().name)

print("After training, W is: {}".format(workspace.FetchBlob("W")))
print("After training, B is: {}".format(workspace.FetchBlob("B")))

print("Ground truth W is: {}".format(workspace.FetchBlob("W_gt")))
print("Ground truth B is: {}".format(workspace.FetchBlob("B_gt")))

# ------------------------------------------------------------------------------------
workspace.RunNetOnce(init_net)
w_history = []
b_history = []
for i in range(50):
    workspace.RunNet(train_net.Proto().name)
    w_history.append(workspace.FetchBlob("W"))
    b_history.append(workspace.FetchBlob("B"))
Esempio n. 14
0
    def test_get_predictor_export_meta_and_workspace_with_feature_extractor(
            self):
        model = Model()

        state_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(1, 5)
        }
        action_normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS)
            for i in range(5, 9)
        }

        extractor = PredictorFeatureExtractor(
            state_normalization_parameters=state_normalization_parameters,
            action_normalization_parameters=action_normalization_parameters,
            normalize=False,
        )

        pem, ws = model.get_predictor_export_meta_and_workspace(
            feature_extractor=extractor)
        # model has 2 params + 1 const. extractor has 1 const.
        self.assertEqual(4, len(pem.parameters))
        for p in pem.parameters:
            self.assertTrue(ws.HasBlob(p))
        self.assertEqual(3, len(pem.inputs))
        self.assertEqual(4, len(pem.outputs))

        input_prototype = model.input_prototype()

        with tempfile.TemporaryDirectory() as tmpdirname:
            db_path = os.path.join(tmpdirname, "model")
            logger.info("DB path: ", db_path)
            db_type = "minidb"
            with ws._ctx:
                save_to_db(db_type, db_path, pem)

            # Load the model from DB file and run it
            net = prepare_prediction_net(db_path, db_type)

            state_features = input_prototype.state.float_features
            action_features = input_prototype.action.float_features
            float_features_values = (torch.cat(
                (state_features, action_features), dim=1).reshape(-1).numpy())
            float_features_keys = np.arange(1, 9)
            float_features_lengths = np.array([8], dtype=np.int32)

            workspace.FeedBlob("input/float_features.keys",
                               float_features_keys)
            workspace.FeedBlob("input/float_features.values",
                               float_features_values)
            workspace.FeedBlob("input/float_features.lengths",
                               float_features_lengths)

            workspace.RunNet(net)
            net_sum = workspace.FetchBlob("sum")
            net_mul = workspace.FetchBlob("mul")
            net_plus_one = workspace.FetchBlob("plus_one")
            net_linear = workspace.FetchBlob("linear")

            model_sum, model_mul, model_plus_one, model_linear = model(
                input_prototype)

            npt.assert_array_equal(model_sum.numpy(), net_sum)
            npt.assert_array_equal(model_mul.numpy(), net_mul)
            npt.assert_array_equal(model_plus_one.numpy(), net_plus_one)
            npt.assert_allclose(model_linear.detach().numpy(),
                                net_linear,
                                rtol=1e-4)
Esempio n. 15
0
def main():
    root_path = '/home/osboxes/zementis/scalogram/fault_diagnosis'
    data_path = os.path.join(root_path, 'data')
    labels_path = os.path.join(data_path, 'labels.txt')
    labels_to_classes_map = get_labels_to_classes_map(labels_path)
    fault_types_path = {
        'baseLine': os.path.join(data_path, 'raw_signals', 'baseLine'),
        'rollingDefect': os.path.join(data_path, 'raw_signals',
                                      'rollingDefect'),
        'innerRace': os.path.join(data_path, 'raw_signals', 'innerRace'),
        'outerRace': os.path.join(data_path, 'raw_signals', 'outerRace')
    }
    sub_signal_len = 400
    # The sample rate is 12 kHz and the approximate motor speed is 1797 RPM. Therefore, there are approximately 401
    # sample points per revolution (12000 / (1797 / 60)).
    for fault_type, fault_dir_path in fault_types_path.items():
        sub_signals = get_sub_signals(fault_dir_path, sub_signal_len)
        sub_signal_idx = 0
        for sub_signal in sub_signals:
            sub_signal_idx += 1
            scalo = get_scalogram(sub_signal)
            scaled_scalo = get_scaled_data(
                scalo)  # scales an array to have values between 0.0 and 1.0
            img_obj = PIL.Image.fromarray(scaled_scalo)
            img_f_name = str(sub_signal_idx) + '_' + fault_type + '.tiff'
            img_obj.save(os.path.join(data_path, img_f_name))
            if sub_signal_idx == 50:
                break  # stop after creating 50 images in each class
    # Create txt files mapping image names to classes
    img_to_class_paths = create_img_to_class_files(data_path,
                                                   labels_to_classes_map)
    # Create lmdb files
    lmdb_paths = write_lmdb_files(data_path, img_to_class_paths)
    model_files_path = os.path.join(root_path, 'model_files')
    if not os.path.isdir(model_files_path):
        os.makedirs(model_files_path)
    workspace.ResetWorkspace(model_files_path)
    unique_timestamp = str(
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
    checkpoint_dir = os.path.join(model_files_path, unique_timestamp)
    os.makedirs(checkpoint_dir)
    print("Checkpoint output location: ", checkpoint_dir)

    # Dataset specific params
    image_width = sub_signal_len
    image_height = sub_signal_len
    image_channels = 1
    num_classes = 4
    init_net_out_fname = 'init_net.pb'
    predict_net_out_fname = 'predict_net.pb'

    # Training params
    n_iters = 600  # total training iterations
    batch_size = 10  # batch size for training
    n_val_images = 30  # total number of validation images
    validation_interval = 50  # validate every <validation_interval> training iterations
    n_checkpoint_iters = 200  # output checkpoint db every <checkpoint_iters> iterations

    # TRAINING MODEL
    train_model = model_helper.ModelHelper(name="train_net")
    data, label = add_input(train_model,
                            batch_size=batch_size,
                            db=lmdb_paths['train'],
                            db_type='lmdb')
    softmax = add_cnn_model_1(train_model, data, num_classes, image_height,
                              image_width, image_channels)
    add_optmzer_lossfunc(train_model, softmax, label)
    add_check_points(train_model,
                     unique_timestamp,
                     n_checkpoint_iters,
                     db_type="lmdb")

    # VALIDATION MODEL
    # Initialize with ModelHelper class without re-initializing params
    val_model = model_helper.ModelHelper(name="val_net", init_params=False)
    data, label = add_input(val_model,
                            batch_size=n_val_images,
                            db=lmdb_paths['val'],
                            db_type='lmdb')
    softmax = add_cnn_model_1(val_model, data, num_classes, image_height,
                              image_width, image_channels)
    add_accuracy(val_model, softmax, label)

    # DEPLOY MODEL
    # Initialize with ModelHelper class without re-initializing params
    deploy_model = model_helper.ModelHelper(name="deploy_net",
                                            init_params=False)
    # Add model definition, expect input blob called "data"
    add_cnn_model_1(deploy_model, "data", num_classes, image_height,
                    image_width, image_channels)
    print("Training, Validation, and Deploy models all defined!")

    # Initialize and create the training network
    workspace.RunNetOnce(train_model.param_init_net)
    workspace.CreateNet(train_model.net, overwrite=True)
    # Initialize and create validation network
    workspace.RunNetOnce(val_model.param_init_net)
    workspace.CreateNet(val_model.net, overwrite=True)
    # Placeholder to track loss and validation accuracy
    training_loss = np.zeros(int(math.ceil(n_iters / validation_interval)))
    val_accuracy = np.zeros(int(math.ceil(n_iters / validation_interval)))
    val_count = 0
    val_iter_list = np.zeros(int(math.ceil(n_iters / validation_interval)))

    # run the network (forward & backward pass)
    for i in range(n_iters):
        workspace.RunNet(train_model.net)
        # Validate every <validation_interval> training iterations
        if (i % validation_interval) == 0:
            print("Training iter: ", i)
            training_loss[val_count] = workspace.FetchBlob('loss')
            workspace.RunNet(val_model.net)
            val_accuracy[val_count] = workspace.FetchBlob('accuracy')
            print("Loss: ", str(training_loss[val_count]))
            print("Validation accuracy: ", str(val_accuracy[val_count]) + "\n")
            val_iter_list[val_count] = i
            val_count += 1

    fig = pyplot.figure()
    fig.add_subplot(111)
    pyplot.title("Training Loss and Validation Accuracy")
    pyplot.plot(val_iter_list, training_loss, 'b')
    pyplot.plot(val_iter_list, val_accuracy, 'r')
    pyplot.xlabel("Training iteration")
    pyplot.legend(('Training Loss', 'Validation Accuracy'), loc='upper right')
    pyplot.savefig("loss_and_accuracy.png")
    pyplot.close()

    # Save trained model
    workspace.RunNetOnce(deploy_model.param_init_net)
    workspace.CreateNet(deploy_model.net, overwrite=True)
    init_net, predict_net = mobile_exporter.Export(workspace, deploy_model.net,
                                                   deploy_model.params)
    init_net_out_path = os.path.join(checkpoint_dir, init_net_out_fname)
    predict_net_out_path = os.path.join(checkpoint_dir, predict_net_out_fname)
    with open(init_net_out_path, 'wb') as f:
        f.write(init_net.SerializeToString())
    with open(predict_net_out_path, 'wb') as f:
        f.write(predict_net.SerializeToString())
    print("Model saved as " + init_net_out_path + " and " +
          predict_net_out_path)
Esempio n. 16
0
def RunEpoch(
    args,
    epoch,
    train_model,
    test_model,
    total_batch_size,
    num_shards,
    expname,
    explog,
):
    '''
    Run one epoch of the trainer.
    TODO: add checkpointing here.
    '''
    # TODO: add loading from checkpoint
    log.info("Starting epoch {}/{}".format(epoch, args.num_epochs))
    epoch_iters = int(args.epoch_size / total_batch_size / num_shards)
    test_epoch_iters = int(args.test_epoch_size / total_batch_size /
                           num_shards)
    for i in range(epoch_iters):
        # This timeout is required (temporarily) since CUDA-NCCL
        # operators might deadlock when synchronizing between GPUs.
        timeout = 600.0 if i == 0 else 60.0
        with timeout_guard.CompleteInTimeOrDie(timeout):
            t1 = time.time()
            workspace.RunNet(train_model.net.Proto().name)
            t2 = time.time()
            dt = t2 - t1

        fmt = "Finished iteration {}/{} of epoch {} ({:.2f} images/sec)"
        log.info(fmt.format(i + 1, epoch_iters, epoch, total_batch_size / dt))
        prefix = "{}_{}".format(train_model._device_prefix,
                                train_model._devices[0])
        accuracy = workspace.FetchBlob(prefix + '/accuracy')
        loss = workspace.FetchBlob(prefix + '/loss')
        train_fmt = "Training loss: {}, accuracy: {}"
        log.info(train_fmt.format(loss, accuracy))

    num_images = epoch * epoch_iters * total_batch_size
    prefix = "{}_{}".format(train_model._device_prefix,
                            train_model._devices[0])
    accuracy = workspace.FetchBlob(prefix + '/accuracy')
    loss = workspace.FetchBlob(prefix + '/loss')
    learning_rate = workspace.FetchBlob(
        data_parallel_model.GetLearningRateBlobNames(train_model)[0])
    test_accuracy = 0
    test_accuracy_top5 = 0
    if test_model is not None:
        # Run 100 iters of testing
        ntests = 0
        for _ in range(test_epoch_iters):
            workspace.RunNet(test_model.net.Proto().name)
            for g in test_model._devices:
                test_accuracy += np.asscalar(
                    workspace.FetchBlob(
                        "{}_{}".format(test_model._device_prefix, g) +
                        '/accuracy'))
                test_accuracy_top5 += np.asscalar(
                    workspace.FetchBlob(
                        "{}_{}".format(test_model._device_prefix, g) +
                        '/accuracy_top5'))
                ntests += 1
        test_accuracy /= ntests
        test_accuracy_top5 /= ntests
    else:
        test_accuracy = (-1)
        test_accuracy_top5 = (-1)

    explog.log(input_count=num_images,
               batch_count=(i + epoch * epoch_iters),
               additional_values={
                   'accuracy': accuracy,
                   'loss': loss,
                   'learning_rate': learning_rate,
                   'epoch': epoch,
                   'top1_test_accuracy': test_accuracy,
                   'top5_test_accuracy': test_accuracy_top5,
               })
    assert loss < 40, "Exploded gradients :("

    # TODO: add checkpointing
    return epoch + 1
########################################################################
# Run training procedure
########################################################################
# The parameter initialization network only needs to be run once.
workspace.RunNetOnce(train_model.param_init_net)
# creating the network
workspace.CreateNet(train_model.net, overwrite=True)
# initialize and create validation network
workspace.RunNetOnce(val_model.param_init_net)
workspace.CreateNet(val_model.net, overwrite=True)
# variables to track the accuracy & loss
accuracy = np.zeros(training_iters)
loss = np.zeros(training_iters)
# Now, we will manually run the network for 200 iterations.
for i in range(training_iters):
    workspace.RunNet(train_model.net)
    accuracy[i] = workspace.FetchBlob('accuracy')
    loss[i] = workspace.FetchBlob('loss')
    if (i % validation_interval == 0):
        print("Training iter: ", i)
        #run validation
        workspace.RunNet(val_model.net.Proto().name)
        val_accuracy = workspace.FetchBlob('accuracy')
        print("Validation accuracy: ", str(val_accuracy))

# After the execution is done, let's plot the values.
pyplot.plot(loss, 'b')
pyplot.plot(accuracy, 'r')
pyplot.legend(('Loss', 'Accuracy'), loc='upper right')
pyplot.show()
Esempio n. 18
0
    def InferTensorRunAndCompare(self, model):
        '''
        Runs shape inference, and then the model to check
        that the inferred shapes agree with the actual ones
        '''
        (shapes, types) = workspace.InferShapesAndTypes(
            [model.param_init_net, model.net],
        )

        # .. Create net
        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net, True)
        workspace.RunNet(model.Proto().name)

        # ... and then check the shapes mismatch
        correct_shapes = {}
        correct_types = {}
        for b in workspace.Blobs():
            arr = workspace.FetchBlob(b)
            correct_shapes[b] = arr.shape
            if type(arr) is np.ndarray:
                if arr.dtype == np.dtype('float32'):
                    correct_types[b] = caffe2_pb2.TensorProto.FLOAT
                elif arr.dtype == np.dtype('int32'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT32
                # BYTE
                # STRING
                elif arr.dtype == np.dtype('bool'):
                    correct_types[b] = caffe2_pb2.TensorProto.BOOL
                elif arr.dtype == np.dtype('uint8'):
                    correct_types[b] = caffe2_pb2.TensorProto.UINT8
                elif arr.dtype == np.dtype('int8'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT8
                elif arr.dtype == np.dtype('uint16'):
                    correct_types[b] = caffe2_pb2.TensorProto.UINT16
                elif arr.dtype == np.dtype('int16'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT16
                elif arr.dtype == np.dtype('int64'):
                    correct_types[b] = caffe2_pb2.TensorProto.INT64
                elif arr.dtype == np.dtype('float16'):
                    correct_types[b] = caffe2_pb2.TensorProto.FLOAT16
                elif arr.dtype == np.dtype('float64'):
                    correct_types[b] = caffe2_pb2.TensorProto.DOUBLE
                else:
                    correct_types[b] = "unknown {}".format(arr.dtype)
            else:
                correct_types[b] = str(type(arr))

        for b in correct_shapes:
            self.assertTrue(
                np.array_equal(
                    np.array(shapes[b]).astype(np.int32),
                    np.array(correct_shapes[b]).astype(np.int32)
                ),
                "Shape {} mismatch: {} vs. {}".format(
                    b, shapes[b], correct_shapes[b]
                )
            )
            self.assertFalse(
                b not in types and b in correct_types,
                "Type for {} not defined".format(b),
            )
            self.assertEqual(
                types[b],
                correct_types[b],
                "Type {} mismatch: {} vs. {}".format(
                    b, types[b], correct_types[b],
                )
            )
my_model.AddGradientOperators([loss])
opt = optimizer.build_sgd(my_model, base_learning_rate=0.1)
for param in my_model.GetOptimizationParamInfo():
    opt(my_model.net, my_model.param_init_net, param)

##################################################################################
# Run the training
workspace.RunNetOnce(my_model.param_init_net)
workspace.CreateNet(my_model.net, overwrite=True)

total_iters = train_iters
accuracy = np.zeros(total_iters)
loss = np.zeros(total_iters)

for i in range(total_iters):
	workspace.RunNet(my_model.net)
	accuracy[i] = workspace.FetchBlob('accuracy')
	loss[i] = workspace.FetchBlob('loss')
	print "accuracy: ", accuracy[i]
	print "loss: ", loss[i]

plt.plot(loss, 'b', label="loss")
plt.plot(accuracy, 'r', label="accuracy")
plt.legend(loc="upper right")
plt.show()

exit()

##################################################################################
# Save the newly finetuned model
deploy_model = model_helper.ModelHelper("finetuned_squeezenet_ucf11_deploy", arg_scope=arg_scope, init_params=False)
Esempio n. 20
0
 def run_model():
     iterations = ITERATIONS
     if model_name == "MLP":
         iterations = 1  # avoid numeric instability with MLP gradients
     workspace.RunNet(model.net, iterations)
Esempio n. 21
0
    def test_small_sls_acc32(self, seed):
        workspace.GlobalInit([
            "caffe2",
            "--glow_global_fp16=0",
            "--glow_global_fused_scale_offset_fp16=0",
            "--glow_global_force_sls_fp16_accum=0",
        ])
        np.random.seed(seed)
        workspace.ResetWorkspace()

        n = 2
        DIM = 3
        data = 4 * (np.random.random_sample((n, DIM)) + 1).astype(np.float32)

        lengths = np.array([n], dtype=np.int32)
        indices = np.array(range(n), dtype=np.int64)
        weights = np.random.uniform(low=0.01, high=0.5,
                                    size=[n]).astype(np.float32)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator("FloatToFused8BitRowwiseQuantized", ["data"],
                                ["quantized_data"]))

        quantized_data = workspace.FetchBlob("quantized_data")

        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=1,
            max_seq_size=n,
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )
        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(onnxified_net)
        workspace.CreateNet(ref_net)

        workspace.RunNet(onnxified_net.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchBlob("Y")

        diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
        max_err = np.max(diff, axis=1)
        num_offenders = (max_err > 0).sum()
        if num_offenders > 0:
            np.set_printoptions(precision=12)
            print(
                "ref",
                Y_ref.astype(np.float16).astype(np.float32),
                "glow",
                Y_glow.astype(np.float16).astype(np.float32),
            )
            print_test_debug_info(
                "test_small_sls_acc32",
                {
                    "seed": seed,
                    "num_rows": num_rows,
                    "embedding_dim": embedding_dim,
                    "batch_size": batch_size,
                    "indices": indices,
                    "data": data,
                    "quantized_data": quantized_data,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_glow": Y_glow,
                    "Y_ref": Y_ref,
                    "diff": diff,
                    "rowwise_diff": np.max(diff, axis=1),
                },
            )
            assert 0
Esempio n. 22
0
                        core.DeviceOption(train_model._device_type, g)):
                    workspace.FeedBlob(
                        "{}_{}/data".format(train_model._device_prefix, g),
                        data_device)
                    workspace.FeedBlob(
                        "{}_{}/label".format(train_model._device_prefix, g),
                        labels_device)
            if i == 0 and e == 0:
                workspace.RunNetOnce(train_model.param_init_net)
                workspace.CreateNet(train_model.net)
                workspace.RunNetOnce(test_model.param_init_net)
                workspace.CreateNet(test_model.net, overwrite=True)
                workspace.RunNetOnce(deploy_model.param_init_net)
                workspace.CreateNet(deploy_model.net, overwrite=True)

            workspace.RunNet(train_model.net.Proto().name)
            loss_sum += workspace.FetchBlob("gpu_0/loss")
            correct += workspace.FetchBlob("gpu_0/accuracy")

        time_ep = time.time() - time_ep
        lr = workspace.FetchBlob(
            data_parallel_model.GetLearningRateBlobNames(train_model)[0])

        values = [
            e + 1,
            lr,
            loss_sum / batch_num,
            correct / batch_num,
            test_res['loss'],
            test_res['accuracy'],
            time_ep,
Esempio n. 23
0
def run_training_net(self):
    timeout = 2000.0
    with timeout_guard.CompleteInTimeOrDie(timeout):
        workspace.RunNet(self.train_model.net.Proto().name)
Esempio n. 24
0
    def test_slws_fused_4bit_rowwise(self, seed, num_rows, embedding_dim,
                                     batch_size, max_weight):
        workspace.ResetWorkspace()
        np.random.seed(seed)
        data = np.random.rand(num_rows, embedding_dim).astype(np.float32)
        data = data * 1e-3

        lengths = np.random.choice(np.arange(1, num_rows),
                                   batch_size).astype(np.int32)
        indices = []
        for length in lengths:
            indices.extend(np.random.choice(np.arange(1, num_rows), length))
        indices = np.asarray(indices).astype(np.int64)

        weights = np.random.uniform(
            low=0, high=max_weight, size=[len(indices)]).astype(
                np.float32) - max_weight / 2.0
        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused4BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator("FloatToFused4BitRowwiseQuantized", ["data"],
                                ["quantized_data"]))

        pred_net_onnxified = onnxifi_caffe2_net(pred_net, {},
                                                max_batch_size=batch_size,
                                                max_seq_size=np.max(lengths),
                                                debug=True,
                                                adjust_batch=True,
                                                use_onnx=False)

        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in pred_net_onnxified.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(pred_net_onnxified)
        workspace.CreateNet(ref_net)

        workspace.RunNet(pred_net_onnxified.name)
        Y_glow = workspace.FetchBlob('Y')

        workspace.RunNet(ref_net.name)
        Y_c2 = workspace.FetchBlob('Y')

        if not np.allclose(Y_c2, Y_glow):
            print_test_debug_info(
                "slws_fused_4bit_rowwise", {
                    "seed": seed,
                    "indices": indices,
                    "data": data.shape,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_c2": Y_c2.shape,
                    "Y_glow": Y_glow.shape,
                    "diff": Y_glow - Y_c2,
                    "rowwise_diff": (Y_glow - Y_c2)[:, 0]
                })
            assert (0)
    total_iterations = 501
    Snapshot_interval = 10
    total_iterations = total_iterations * 64

    print workspace.Blobs()

    accuracy = []
    val_accuracy = []
    loss = []
    lr = []
    start = 0
    while start < total_iterations:
        l = train[start:start + Batch_Size,
                  0].astype(np.int32)  # labels for a given batch
        d = train[start:start + Batch_Size,
                  1:].reshape(l.shape[0], 28,
                              28)  # pixel values for each sample in the batch
        d = d[:, np.newaxis, ...].astype(np.float32)
        d = d * float(
            1. / 256)  # Scaling the pixel values for faster computation
        workspace.FeedBlob("data", d, device_option)
        workspace.FeedBlob("label", l, device_option)
        workspace.RunNet(training_model.net, num_iter=1)
        accuracy.append(workspace.FetchBlob('accuracy'))
        loss.append(workspace.FetchBlob('loss'))
        lr.append(workspace.FetchBlob('SgdOptimizer_0_lr_gpu0'))
        #    lr.append(workspace.FetchBlob('conv1_b_lr'))
        if start % Snapshot_interval == 0:
            save_snapshot(training_model, start)
        val_accuracy.append(check_val())
        start += Batch_Size
Esempio n. 26
0
 def test_slws_fused_4bit_rowwise_all_same(self, seed):
     np.random.seed(seed)
     workspace.ResetWorkspace()
     n = 1
     m = 2
     data = np.ones((n, m)).astype(np.float32) * 0.2 - 0.1
     max_segments = 5
     max_segment_length = 100
     num_lengths = np.random.randint(1, max_segments + 1)
     # number of segments to run
     lengths = np.random.randint(0,
                                 max_segment_length + 1,
                                 size=num_lengths).astype(np.int32)
     num_indices = np.sum(lengths)
     indices = np.zeros(num_indices, dtype=np.int64)
     weights = np.random.uniform(low=-0.5, high=0.5, size=[len(indices)])\
         .astype(np.float32)
     weights = np.ones(len(indices)).astype(np.float32)
     pred_net = caffe2_pb2.NetDef()
     pred_net.name = "pred"
     pred_net.external_input.extend(
         ["quantized_data", "weights", "indices", "lengths"])
     pred_net.external_output.append("Y")
     pred_net.op.add().CopyFrom(
         core.CreateOperator(
             "SparseLengthsWeightedSumFused4BitRowwise",
             ["quantized_data", "weights", "indices", "lengths"],
             ["Y"],
         ))
     ref_net = caffe2_pb2.NetDef()
     ref_net.name = "ref"
     ref_net.external_input.extend(
         ["quantized_data", "weights", "indices", "lengths"])
     ref_net.external_output.append("Y")
     ref_net.op.add().CopyFrom(
         core.CreateOperator(
             "SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI",
             ["quantized_data", "weights", "indices", "lengths"],
             ["Y"],
         ))
     workspace.FeedBlob("data", data)
     workspace.RunOperatorOnce(
         core.CreateOperator("FloatToFused4BitRowwiseQuantized", ['data'],
                             ['quantized_data']))
     print("quantized", workspace.FetchBlob("quantized_data"))
     pred_net_onnxified = onnxifi_caffe2_net(
         pred_net, {},
         max_batch_size=max_segments,
         max_seq_size=max_segment_length,
         debug=True,
         adjust_batch=True,
         use_onnx=False)
     num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                             for o in pred_net_onnxified.op)
     np.testing.assert_equal(num_onnxified_ops, 1)
     workspace.FeedBlob("indices", indices)
     workspace.FeedBlob("lengths", lengths)
     workspace.FeedBlob("weights", weights)
     workspace.CreateNet(pred_net_onnxified)
     workspace.CreateNet(ref_net)
     workspace.RunNet(pred_net_onnxified.name)
     Y_glow = workspace.FetchBlob('Y')
     workspace.RunNet(ref_net.name)
     Y_c2 = workspace.FetchBlob('Y')
     if not np.allclose(Y_c2, Y_glow):
         print_test_debug_info(
             "slws_fused_4bit_rowwise", {
                 "seed": seed,
                 "indices": indices,
                 "data": data,
                 "lengths": lengths,
                 "weights": weights,
                 "Y_c2": Y_c2,
                 "Y_glow": Y_glow,
                 "diff": Y_glow - Y_c2,
                 "rowwise_diff": (Y_glow - Y_c2)[:, 0]
             })
         assert (0)
Esempio n. 27
0
def RunNet(model, num_iterations):
    for net_iter in model._data_parallel_model_nets:
        if isinstance(net_iter, tuple):
            workspace.RunNet(net_iter[0].Proto().name, net_iter[1])
        else:
            workspace.RunNet(net_iter, num_iterations)
Esempio n. 28
0
def Caffe2LSTM(args):
    T = args.data_size // args.batch_size

    input_blob_shape = [args.seq_length, args.batch_size, args.input_dim]
    queue, label_queue, entry_counts = generate_data(T // args.seq_length,
                                                     input_blob_shape,
                                                     args.hidden_dim,
                                                     args.fixed_shape)

    workspace.FeedBlob(
        "seq_lengths",
        np.array([args.seq_length] * args.batch_size, dtype=np.int32))

    model, output = create_model(args, queue, label_queue, input_blob_shape)

    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)

    start_time = time.time()
    num_iters = T // args.seq_length
    total_iters = 0

    # Run the Benchmark
    log.info("------ Warming up ------")
    workspace.RunNet(model.net.Proto().name)

    if (args.gpu):
        log.info("Memory stats:")
        stats = utils.GetGPUMemoryUsageStats()
        log.info("GPU memory:\t{} MB".format(stats['max_total'] / 1024 / 1024))

    log.info("------ Starting benchmark ------")
    start_time = time.time()
    last_time = time.time()
    for iteration in range(1, num_iters, args.iters_to_report):
        iters_once = min(args.iters_to_report, num_iters - iteration)
        total_iters += iters_once
        workspace.RunNet(model.net.Proto().name, iters_once)

        new_time = time.time()
        log.info("Iter: {} / {}. Entries Per Second: {}k.".format(
            iteration,
            num_iters,
            np.sum(entry_counts[iteration:iteration + iters_once]) /
            (new_time - last_time) // 100 / 10,
        ))
        last_time = new_time

    log.info("Done. Total EPS excluding 1st iteration: {}k {}".format(
        np.sum(entry_counts[1:]) / (time.time() - start_time) // 100 / 10,
        " (with RNN executor)" if args.rnn_executor else "",
    ))

    if (args.gpu):
        log.info("Memory stats:")
        stats = utils.GetGPUMemoryUsageStats()
        log.info("GPU memory:\t{} MB".format(stats['max_total'] / 1024 / 1024))
        if (stats['max_total'] != stats['total']):
            log.warning(
                "Max usage differs from current total usage: {} > {}".format(
                    stats['max_total'], stats['total']))
            log.warning("This means that costly deallocations occurred.")

    return time.time() - start_time
Esempio n. 29
0
    def test_int8_small_input(self, n, rand_seed):
        print("n={}, rand_seed={}".format(n, rand_seed))
        np.random.seed(rand_seed)
        workspace.ResetWorkspace()

        X_fp32 = np.random.uniform(0.01, 0.03, size=(n, n)).astype(np.float32)
        W_fp32 = np.identity(n, dtype=np.float32)
        b_fp32 = np.zeros((n,), dtype=np.float32)

        X_scale, X_zero_point = self._get_scale_zp(X_fp32)

        workspace.FeedBlob("X", X_fp32)
        workspace.FeedBlob("W", W_fp32)
        workspace.FeedBlob("b", b_fp32)

        workspace.RunOperatorOnce(
            core.CreateOperator(
                "Int8FCPackWeight",
                ["W"],
                ["W_int8"],
                engine="DNNLOWP",
                save_unpacked_weights=True,
                in_scale=X_scale,
            )
        )

        ref_net = core.Net("net")
        ref_net.Int8QuantizeNNPI(
            ["X"],
            ["X_int8"],
            Y_scale=X_scale,
            Y_zero_point=X_zero_point
        )
        ref_net.Int8FCFakeAcc32NNPI(
            ["X_int8", "W_int8", "b"],
            ["Y_int8"],
            Y_scale=X_scale,
            Y_zero_point=X_zero_point,
        )
        ref_net.Int8DequantizeNNPI(
            ["Y_int8"],
            ["Y"]
        )
        ref_net.Proto().external_output.append("Y")

        # run ref_net
        workspace.RunNetOnce(ref_net)
        Y_fbgemm = workspace.FetchBlob("Y")

        # run onnxifi net
        ref_net.Proto().op[0].type = "Int8Quantize"
        ref_net.Proto().op[1].type = "Int8FC"
        ref_net.Proto().op[2].type = "Int8Dequantize"
        net_onnxified = onnxifi_caffe2_net(
            ref_net.Proto(),
            {},
            debug=True,
            adjust_batch=False,
            use_onnx=False,
            weight_names=["W_int8", "b"],
        )
        num_onnxified_ops = sum(
            1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op
        )
        np.testing.assert_equal(num_onnxified_ops, 1)
        workspace.CreateNet(net_onnxified)
        workspace.RunNet(net_onnxified.name)
        Y_glow = workspace.FetchBlob("Y")

        if not np.allclose(Y_glow, Y_fbgemm):
            diff_Y = np.abs(Y_glow - Y_fbgemm)
            print_test_debug_info(
                "int8_fc",
                {
                    "seed": rand_seed,
                    "n": n,
                    "X": X_fp32,
                    "W": W_fp32,
                    "b": b_fp32,
                    "Y_fbgemm": Y_fbgemm,
                    "Y_glow": Y_glow,
                    "diff": diff_Y,
                    "maxdiff": diff_Y.max(axis=1),
                },
            )
            assert 0
Esempio n. 30
0
    t2 = time.time()
    print('Finish loading model in %.4fs' % (t2 - t1))

    t1 = time.time()
    data_list = [
        np.random.uniform(
            -1, 1, (args.batch_size, 3, im_size, im_size)).astype(np.float32)
        for i in range(int(np.ceil(1.0 * args.n_sample / args.batch_size)))
    ]
    t2 = time.time()
    print('Generate %d random images in %.4fs!' % (args.n_sample, t2 - t1))

    # dry run
    for i in range(5):
        workspace.FeedBlob('data', data_list[i], device_opts)
        workspace.RunNet(net_def.name, 1)
    print('Finish dry run(5 times)')

    t_list = []
    t_start = time.time()
    for i in range(args.n_epoch):
        t1 = time.time()

        for j, batch in enumerate(data_list):
            workspace.FeedBlob('data', batch, device_opts)
            workspace.RunNet(net_def.name, 1)

        t2 = time.time()
        t_list.append(t2 - t1)
        if args.verbose:
            print('Epoch %d, finish %d images in %.4fs, speed = %.4f image/s' %