def testFetchFeedBlobZeroDim(self): data = np.empty(shape=(2, 0, 3), dtype=np.float32) self.assertEqual(workspace.FeedBlob("testblob_empty", data), True) fetched_back = workspace.FetchBlob("testblob_empty") self.assertEqual(fetched_back.shape, (2, 0, 3)) self.assertEqual(fetched_back.dtype, np.float32)
def run_single_segms( net, image, target_size, pixel_means=PIXEL_MEANS_DEFAULT, pixel_stds=PIXEL_STDS_DEFAULT, rle_encode=True, max_size=1333, ): inputs = utils2.prepare_blobs( image, target_size=target_size, max_size=max_size, pixel_means=pixel_means, pixel_stds=pixel_stds, ) # Prepare inputs for AABB and Int8AABB operators im_info = inputs["im_info"] scale = im_info[0][2] inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16) inputs["im_info2"] = im_info[:,:2] blob_names = [] ser_blobs = [] # Serialize inputs for remote device for k, v in inputs.items(): workspace.FeedBlob(k, v) blob_names.append(k) ser_blobs.append(workspace.SerializeBlob(k)) # Serialize output templates for remote device fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op) bbox_type = np.uint16 if fully_quantized else np.float32 output_templates = { "score_nms": np.zeros((LIMIT,), np.float32), "bbox_nms": np.zeros((LIMIT, 4), bbox_type), "class_nms": np.zeros((LIMIT,), np.int32), "mask_fcn_probs": np.zeros((LIMIT, CLASSES, RES, RES), np.float32), } for out_name in net.external_output: fake_name = out_name + "_empty_template" blob_names.append(out_name) workspace.FeedBlob(fake_name, output_templates[out_name]) ser_blobs.append(workspace.SerializeBlob(fake_name)) # Package inputs and output templates inout_netdef = caffe2_pb2.NetDef() inout_netdef.arg.extend([ utils.MakeArgument("blob_names", blob_names), utils.MakeArgument("ser_blobs", ser_blobs), ]) # Send in/out to the remote device with tempfile.NamedTemporaryFile() as inout_file: inout_file.write(inout_netdef.SerializeToString()) inout_file.flush() subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"]) try: # Run the model use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else "" subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True) # Retrieve and deserialize outputs with tempfile.TemporaryDirectory() as tmpdir: output_file = os.path.join(tmpdir, "output_blobs.pb") subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file]) out_net = caffe2_pb2.NetDef() with open(output_file, "rb") as handle: out_net.ParseFromString(handle.read()) all_outputs = utils.ArgsToDict(out_net.arg)["outputs"] for output in all_outputs: bp = caffe2_pb2.BlobProto() bp.ParseFromString(output) workspace.DeserializeBlob(bp.name, output) classids = workspace.FetchBlob("class_nms") scores = workspace.FetchBlob("score_nms") # bbox scores, (R, ) boxes = workspace.FetchBlob("bbox_nms") # i.e., boxes, (R, 4*1) masks = workspace.FetchBlob("mask_fcn_probs") # (R, cls, mask_dim, mask_dim) if boxes.dtype == np.uint16: boxes = boxes.astype(np.float32) * 0.125 boxes /= scale except Exception as e: print(e) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) masks = np.zeros((R, 1, 1, 1), dtype=np.float32) # included in the model # scale = inputs["im_info"][0][2] # boxes /= scale R = boxes.shape[0] im_masks = [] if R > 0: im_dims = image.shape im_masks = utils2.compute_segm_results( masks, boxes, classids, im_dims[0], im_dims[1], rle_encode=rle_encode ) boxes = np.column_stack((boxes, scores)) ret = {"classids": classids, "boxes": boxes, "masks": masks, "im_masks": im_masks} return ret
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10, feature_type=self._feature_type_override(name) ) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) sorted_features, _ = sort_features_by_normalization(normalization_parameters) norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(output_blob) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = normalized_feature_matrix[ :, on_column : (on_column + column_size) ] on_column += column_size self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0] ) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = NumpyFeatureProcessor.value_to_quantile( original_feature, normalization_parameters[k].quantiles ) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format(v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format(v[more_than_min == False]), ) else: raise NotImplementedError()
def RunEpoch( args, epoch, train_model, test_model, total_batch_size, num_shards, expname, explog, best_accuracy, ): ''' Run one epoch of the trainer. TODO: add checkpointing here. ''' # TODO: add loading from checkpoint log.info("Starting epoch {}/{}".format(epoch, args.num_epochs)) epoch_iters = int(args.epoch_size / total_batch_size / num_shards) for i in range(epoch_iters): # This timeout is required (temporarily) since CUDA-NCCL # operators might deadlock when synchronizing between GPUs. timeout = 600.0 if i == 0 else 60.0 with timeout_guard.CompleteInTimeOrDie(timeout): t1 = time.time() workspace.RunNet(train_model.net.Proto().name) t2 = time.time() dt = t2 - t1 fmt = "Finished iteration {}/{} of epoch {} ({:.2f} images/sec)" log.info(fmt.format(i + 1, epoch_iters, epoch, total_batch_size / dt)) prefix = "{}_{}".format(train_model._device_prefix, train_model._devices[0]) accuracy = workspace.FetchBlob(prefix + '/accuracy') loss = workspace.FetchBlob(prefix + '/loss') train_fmt = "Training loss: {}, accuracy: {}" log.info(train_fmt.format(loss, accuracy)) num_images = epoch * epoch_iters * total_batch_size prefix = "{}_{}".format(train_model._device_prefix, train_model._devices[0]) accuracy = workspace.FetchBlob(prefix + '/accuracy') loss = workspace.FetchBlob(prefix + '/loss') learning_rate = workspace.FetchBlob( data_parallel_model.GetLearningRateBlobNames(train_model)[0]) test_accuracy = 0 if (test_model is not None): # Run 100 iters of testing ntests = 0 # for _ in range(0, 100): # for _ in range(0, 125): for _ in range(0, args.test_iters): workspace.RunNet(test_model.net.Proto().name) for g in test_model._devices: test_accuracy += np.asscalar( workspace.FetchBlob( "{}_{}".format(test_model._device_prefix, g) + '/accuracy')) ntests += 1 test_accuracy /= ntests else: test_accuracy = (-1) if test_accuracy > best_accuracy: best_accuracy = test_accuracy explog.log(input_count=num_images, batch_count=(i + epoch * epoch_iters), additional_values={ 'accuracy': accuracy, 'loss': loss, 'learning_rate': learning_rate, 'epoch': epoch, 'test_accuracy': test_accuracy, 'best_accuracy': best_accuracy, }) assert loss < 40, "Exploded gradients :(" # TODO: add checkpointing return epoch + 1, best_accuracy
def test_cpu2gpu_gpu2cpu_gradients(self): model = cnn.CNNModelHelper(name="copy_test") batch = 32 cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0) gpu_opt = core.DeviceOption(caffe2_pb2.CUDA, 0) with core.NameScope("cpu"): with core.DeviceScope(cpu_opt): x_cpu = model.FC('data', 'x_cpu', 16, 8) with core.NameScope("gpu_0"): with core.DeviceScope(gpu_opt): x_gpu = model.CopyCPUToGPU(x_cpu, "x_gpu") pred_gpu = model.FC(x_gpu, "pred_gpu", 8, 4) pred_cpu = model.CopyGPUToCPU(pred_gpu, "pred_cpu") with core.DeviceScope(cpu_opt): with core.NameScope("cpu"): (softmax, loss) = model.SoftmaxWithLoss( [pred_cpu, "label"], ["softmax", "loss"], ) gradient_map = model.AddGradientOperators([loss]) # Add param updates (for cpu and gpu) init_net = model.param_init_net with core.DeviceScope(cpu_opt): with core.NameScope("cpu"): ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.) LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0) for param in model.GetParams(): model.WeightedSum( [param, ONE, gradient_map[param], LR], param, ) with core.NameScope("gpu_0"): with core.DeviceScope(gpu_opt): ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.) LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0) for param in model.GetParams(): model.WeightedSum( [param, ONE, gradient_map[param], LR], param, ) with core.DeviceScope(cpu_opt): workspace.FeedBlob( 'cpu/data', np.random.rand(batch, 16).astype(np.float32), ) workspace.FeedBlob( 'cpu/label', np.random.randint(4, size=batch).astype(np.int32), ) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) initial_params = {p: workspace.FetchBlob(p) for p in model.GetParams()} workspace.RunNet(model.net.Proto().name) updated_params = {p: workspace.FetchBlob(p) for p in model.GetParams()} for p in model.GetParams(): g = gradient_map[p] expected = initial_params[p] - 2.0 * workspace.FetchBlob(g) actual = updated_params[p] self.assertTrue( np.array_equal(expected, updated_params[p]), "Mismatch: {}: {}, {}".format(p, expected, actual), )
print("Iter: {}, Loss: {}, Accuracy: {}".format( i, loss[i], accuracy[i])) # # visualize the data and the results # plt.figure("Summary of Training") plt.title("Summary of Training Run") plt.plot(loss, 'b') plt.plot(accuracy, 'r') plt.xlabel("Iteration") plt.legend(('Loss', 'Accuracy'), loc='upper right') plt.figure("Training Data") plt.title("Training Data Sample") data = workspace.FetchBlob('data') _ = visualize.NCHW.ShowMultiple(data) plt.figure("Softmax Prediction") plt.title("Softmax Prediction for the first image above") plt.ylabel('Confidence') plt.xlabel('Label') # Grab and visualize the softmax blob for the batch we just visualized. Since batch size # is 64, the softmax blob contains 64 vectors, one for each image in the batch. To grab # the vector for the first image, we can simply index the fetched softmax blob at zero. softmax = workspace.FetchBlob('softmax') _ = plt.plot(softmax[0], 'ro') # if USE_LENET_MODEL: # plt.figure("Conv1 5th Feature Maps") # plt.title("Conv1 Output Feature Maps for Most Recent Mini-batch")
def test_convolution_relu_fusion(self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, group, gc, dc): conv = core.CreateOperator( "Conv", ["X0", "w0", "b0"] if use_bias else ["X0", "w0"], ["Y0"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[0] ) relu = core.CreateOperator( "Relu", ["Y0"], ["Y0"], device_option=dc[0] ) # Manual fusion conv_fusion = core.CreateOperator( "ConvFusion", ["X1", "w1", "b1"] if use_bias else ["X1", "w1"], ["Y1"], stride=stride, pad=pad, kernel=kernel, group=group, fusion_type = 1, device_option=dc[1] ) # Auto fusion old_net = caffe2_pb2.NetDef() conv_old = caffe2_pb2.OperatorDef() conv_old.CopyFrom(conv) conv_old.device_option.CopyFrom(dc[1]) relu_old = caffe2_pb2.OperatorDef() relu_old.CopyFrom(relu) relu_old.device_option.CopyFrom(dc[1]) old_net.op.extend([conv_old, relu_old]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForIDEEP(net) self.assertTrue(len(net.Proto().op) == 1) self.assertTrue(net.Proto().op[0].type == "ConvFusion") X = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 w = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels * group).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('X0', X, dc[0]) workspace.FeedBlob('w0', w, dc[0]) workspace.FeedBlob('b0', b, dc[0]) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(relu) Y0 = workspace.FetchBlob('Y0') workspace.ResetWorkspace() workspace.FeedBlob('X1', X, dc[1]) workspace.FeedBlob('w1', w, dc[1]) workspace.FeedBlob('b1', b, dc[1]) workspace.RunOperatorOnce(conv_fusion) Y1 = workspace.FetchBlob('Y1') if not np.allclose(Y0, Y1, atol=0.01, rtol=0.01): print(Y1.flatten()) print(Y0.flatten()) print(np.max(np.abs(Y1 - Y0))) self.assertTrue(False) workspace.ResetWorkspace() workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) workspace.RunOperatorOnce(net.Proto().op[0]) Y2 = workspace.FetchBlob('Y0') if not np.allclose(Y0, Y2, atol=0.01, rtol=0.01): print(Y2.flatten()) print(Y0.flatten()) print(np.max(np.abs(Y2 - Y0))) self.assertTrue(False) workspace.SwitchWorkspace(old_ws_name)
#print detection_out.shape net.DetectionEvalute( ['detection_out', 'gt_label'], ['detection_eval'], num_classes=21, overlap_threshold=0.001, resize_valid=False, name_size_file= '/home/ernie/caffe2/caffe2/python/ssd_test/detection_eval/test_name_size.txt' ) workspace.FeedBlob("loc", mbox_loc) workspace.FeedBlob("conf", mbox_conf) workspace.FeedBlob("prior", mbox_priorbox) workspace.FeedBlob("gt_label", gt_label) #workspace.FeedBlob('detection_out',detection_out) workspace.CreateNet(net.Proto()) print net.Proto() workspace.RunNet("detection_eval_test", 1) conf_softmax_flat = workspace.FetchBlob('conf_softmax_flat') detections = workspace.FetchBlob('detection_out') detection_eval = workspace.FetchBlob('detection_eval') np.set_printoptions(threshold=np.NaN) #print conf_softmax_flat #print detections print detection_eval
def TrainModel(self): log.debug("Training model") workspace.RunNetOnce(self.model.param_init_net) # As though we predict the same probability for each character smooth_loss = -np.log(1.0 / self.D) * self.seq_length last_n_iter = 0 last_n_loss = 0.0 num_iter = 0 N = len(self.text) # We split text into batch_size pieces. Each piece will be used only # by a corresponding batch during the training process text_block_positions = np.zeros(self.batch_size, dtype=np.int32) text_block_size = N // self.batch_size text_block_starts = list(range(0, N, text_block_size)) text_block_sizes = [text_block_size] * self.batch_size text_block_sizes[self.batch_size - 1] += N % self.batch_size assert sum(text_block_sizes) == N # Writing to output states which will be copied to input # states within the loop below workspace.FeedBlob( self.hidden_output, np.zeros([1, self.batch_size, self.hidden_size], dtype=np.float32)) workspace.FeedBlob( self.cell_state, np.zeros([1, self.batch_size, self.hidden_size], dtype=np.float32)) workspace.CreateNet(self.prepare_state) graph = net_drawer.GetPydotGraph(self.model.net, "mnist", rankdir="LR") experiment.set_model_graph(graph) # We iterate over text in a loop many times. Each time we peak # seq_length segment and feed it to LSTM as a sequence last_time = datetime.now() progress = 0 while True: workspace.FeedBlob( "seq_lengths", np.array([self.seq_length] * self.batch_size, dtype=np.int32)) workspace.RunNet(self.prepare_state.Name()) input = np.zeros([self.seq_length, self.batch_size, self.D]).astype(np.float32) target = np.zeros([self.seq_length * self.batch_size ]).astype(np.int32) for e in range(self.batch_size): for i in range(self.seq_length): pos = text_block_starts[e] + text_block_positions[e] input[i][e][self._idx_at_pos(pos)] = 1 target[i * self.batch_size + e] =\ self._idx_at_pos((pos + 1) % N) text_block_positions[e] = (text_block_positions[e] + 1) % text_block_sizes[e] progress += 1 workspace.FeedBlob('input_blob', input) workspace.FeedBlob('target', target) CreateNetOnce(self.model.net) workspace.RunNet(self.model.net.Name()) num_iter += 1 last_n_iter += 1 if num_iter % self.iters_to_report == 0: new_time = datetime.now() print("Characters Per Second: {}".format( int(progress / (new_time - last_time).total_seconds()))) print("Iterations Per Second: {}".format( int(self.iters_to_report / (new_time - last_time).total_seconds()))) last_time = new_time progress = 0 print("{} Iteration {} {}".format('-' * 10, num_iter, '-' * 10)) loss = workspace.FetchBlob(self.loss) * self.seq_length smooth_loss = 0.999 * smooth_loss + 0.001 * loss last_n_loss += loss experiment.log_metric("loss", smooth_loss) if num_iter % self.iters_to_report == 0: self.GenerateText(500, np.random.choice(self.vocab)) lass_loss = last_n_loss / last_n_iter log.debug("Loss since last report: {}".format(last_n_loss / last_n_iter)) log.debug("Smooth loss: {}".format(smooth_loss)) last_n_loss = 0.0 last_n_iter = 0
def bmuf_process(filestore_dir, process_id, shared_results, cpu_device=False, nesterov=False): # We need to import caffe2 in every process to initialize CUDA independently. from caffe2.python import core, cnn, data_parallel_model, dyndep, workspace from caffe2.proto import caffe2_pb2 dyndep.InitOpsLibrary("@/caffe2/caffe2/distributed:file_store_handler_ops") if not cpu_device: if not workspace.has_gpu_support: log.info('No GPU support test is Ignored.') return if workspace.NumGpuDevices() < 4: log.info('Not enough GPU support, test IGNORED') return model = cnn.CNNModelHelper(order="NHWC", name="test") if not cpu_device: device_type = workspace.GpuDeviceType device_prefix = "gpu" else: device_type = caffe2_pb2.CPU device_prefix = "cpu" devices = [0, 1] if process_id == 0 else [2, 3] def _model_build_fun(model, loss_scale): fc = model.FC("data", "fc", 16, 1, ("ConstantFill", {}), ("ConstantFill", {})) fc_fl = model.FlattenToVec(fc, "fc_fl") sigm = model.Sigmoid(fc_fl, "sigm") sq = model.SquaredL2Distance([sigm, "label"], "sq") loss = model.AveragedLoss(sq, "loss") loss = model.Scale(loss, scale=loss_scale) # For testing explicit sync model.param_init_net.UniformFill([], ["sync_num"], shape=[1]) return [loss] def _input_builder_fun(model): return None def _param_update_fun(model): ITER = model.Iter("ITER") LR = model.net.LearningRate( [ITER], "LR", base_lr=(-0.1), policy="fixed", ) ONE = model.param_init_net.ConstantFill( [], "ONE", shape=[1], value=1.0, ) for param in model.GetParams(): grad = model.param_to_grad[param] model.WeightedSum([param, ONE, grad, LR], param) def _generate_data(devices, process_id, device_type, device_prefix): np.random.seed(26 + process_id * 10) # Each run has same input, independent of number of gpus batch_size = 64 for _ in range(0, 10): full_data = np.random.rand(batch_size, 16) full_labels = np.round(full_data[:, 0]) batch_per_device = batch_size // len(devices) for (j, g) in enumerate(devices): st = j * batch_per_device en = st + batch_per_device data = full_data[st:en, :].astype(np.float32) labels = full_labels[st:en].astype(np.float32) with core.DeviceScope(core.DeviceOption(device_type, g)): workspace.FeedBlob("{}_{}/data".format(device_prefix, g), data) workspace.FeedBlob("{}_{}/label".format(device_prefix, g), labels) _generate_data(devices, process_id, device_type, device_prefix) workspace.RunOperatorOnce( core.CreateOperator("FileStoreHandlerCreate", [], ["store_handler"], path=filestore_dir)) rendezvous = dict(kv_handler="store_handler", shard_id=process_id, num_shards=2, engine="GLOO", exit_nets=None) data_parallel_model.Parallelize_BMUF(model, _input_builder_fun, _model_build_fun, _param_update_fun, devices=devices, rendezvous=rendezvous, nesterov=nesterov, add_blobs_to_sync=["sync_num"], cpu_device=cpu_device) data_parallel_model.RunInitNet(model) def _device_pid(device, pid): if pid == 1: return device + 2 return device np.testing.assert_equal( workspace.FetchBlob("{}_{}/fc_w_v".format(device_prefix, _device_pid(0, process_id))), np.zeros(16).astype(np.float32).reshape(1, 16)) # Run the algorithm for one iteration to have non-zero params. data_parallel_model.RunNet(model, 1) # Save iteration momentum and post local update params results = {} v_b_ = workspace.FetchBlob("{}_{}/fc_b_v".format( device_prefix, _device_pid(0, process_id))) v_w_ = workspace.FetchBlob("{}_{}/fc_w_v".format( device_prefix, _device_pid(0, process_id))) results['v_b_'] = v_b_ results['v_w_'] = v_w_ workspace.RunNetOnce(model.net) b_0_ = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix, _device_pid(0, process_id))) w_0_ = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix, _device_pid(0, process_id))) b_1_ = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix, _device_pid(1, process_id))) w_1_ = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix, _device_pid(1, process_id))) results['b_0_'] = b_0_ results['w_0_'] = w_0_ results['b_1_'] = b_1_ results['w_1_'] = w_1_ # Test sync if process_id == 0: workspace.FeedBlob(device_prefix + "_0/sync_num", np.array([2603]).astype(np.float32), device_option=core.DeviceOption(device_type, 0)) # Compute block gradients. b_g_ = workspace.FetchBlob("{}_{}/fc_b_g".format( device_prefix, _device_pid(0, process_id))) w_g_ = workspace.FetchBlob("{}_{}/fc_w_g".format( device_prefix, _device_pid(0, process_id))) results['b_g_'] = b_g_ results['w_g_'] = w_g_ workspace.RunNetOnce(model._global_model_param_updates_net) # g_b = (b_0_ + b_1_) / 2 - b_g_ # g_w = (w_0_ + w_1_) / 2 - w_g_ v_b = workspace.FetchBlob("{}_{}/fc_b_v".format(device_prefix, _device_pid(0, process_id))) v_w = workspace.FetchBlob("{}_{}/fc_w_v".format(device_prefix, _device_pid(0, process_id))) w_g = workspace.FetchBlob("{}_{}/fc_w_g".format(device_prefix, _device_pid(0, process_id))) b_g = workspace.FetchBlob("{}_{}/fc_b_g".format(device_prefix, _device_pid(0, process_id))) w_0 = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix, _device_pid(0, process_id))) b_0 = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix, _device_pid(0, process_id))) w_1 = workspace.FetchBlob("{}_{}/fc_w".format(device_prefix, _device_pid(1, process_id))) b_1 = workspace.FetchBlob("{}_{}/fc_b".format(device_prefix, _device_pid(1, process_id))) results['v_b'] = v_b results['v_w'] = v_w results['w_g'] = w_g results['b_g'] = b_g results['w_0'] = w_0 results['b_0'] = b_0 results['w_1'] = w_1 results['b_1'] = b_1 # Test add_blobs_to_sync for j in devices: sync = workspace.FetchBlob(device_prefix + "_{}/sync_num".format(j))[0] results['sync_{}'.format(j)] = sync shared_results[process_id] = results
def predict(self, float_state_features, int_state_features=None): """ Returns values for each state :param float_state_features A list of feature -> float value dict examples :param int_state_features A list of feature -> int value dict examples """ float_state_keys = [] float_state_values = [] for example in float_state_features: for k, v in example.items(): float_state_keys.append(k) float_state_values.append(v) workspace.FeedBlob( "input/float_features.lengths", np.array([len(e) for e in float_state_features], dtype=np.int32), ) workspace.FeedBlob("input/float_features.keys", np.array(float_state_keys, dtype=np.int64)) workspace.FeedBlob( "input/float_features.values", np.array(float_state_values, dtype=np.float32).flatten(), ) if int_state_features is not None: workspace.FeedBlob( "input/int_features.lengths", np.array([len(e) for e in int_state_features], dtype=np.int32), ) int_state_keys = [] int_state_values = [] for example in int_state_features: for k, v in example.items(): int_state_keys.append(k) int_state_values.append(v) workspace.FeedBlob( "input/int_features.keys", np.array(int_state_keys, dtype=np.int64).flatten(), ) workspace.FeedBlob( "input/int_features.values", np.array(int_state_values, dtype=np.int32).flatten(), ) workspace.RunNet(self._net) output_lengths = workspace.FetchBlob( "output/string_weighted_multi_categorical_features.values.lengths") output_names = workspace.FetchBlob( "output/string_weighted_multi_categorical_features.values.keys") output_values = workspace.FetchBlob( "output/string_weighted_multi_categorical_features.values.values") assert len(output_lengths) == len(float_state_features), ( "Invalid number of outputs: " + str(len(output_lengths)) + " != " + str(len(float_state_features))) results = [] cursor = 0 for length in output_lengths: cursor_begin = cursor cursor_end = cursor_begin + length cursor = cursor_end result = {} for x in range(cursor_begin, cursor_end): result[output_names[x].decode("utf-8")] = output_values[x] results.append(result) return results
def blob_nbytes(blob): return workspace.FetchBlob(blob).nbytes
def testFeedFetchBlobMKLDNN(self): arr = np.random.randn(2, 3).astype(np.float32) workspace.FeedBlob( "testblob_mkldnn", arr, core.DeviceOption(caffe2_pb2.MKLDNN)) fetched = workspace.FetchBlob("testblob_mkldnn") np.testing.assert_array_equal(arr, fetched)
def testFetchFeedPlainString(self): # this is actual string, not a tensor of strings s = "Hello, world! I have special \0 symbols \1!" workspace.FeedBlob('my_plain_string', s) s2 = workspace.FetchBlob('my_plain_string') self.assertEqual(s, s2)
def from_caffe2(self, init_net, predict_net): """Construct Relay expression from caffe2 graph. Parameters ---------- init_net : protobuf object predict_net : protobuf object Returns ------- mod : tvm.IRModule The module that optimizations will be performed on. params : dict A dict of name: tvm.nd.array pairs, used as pretrained weights """ # pylint: disable=import-outside-toplevel from caffe2.python import workspace workspace.RunNetOnce(init_net) # Input input_name = predict_net.op[0].input[0] # Params self._params = {} used_blobs = set() for c2_op in predict_net.op: for i in c2_op.input: used_blobs.add(i) for blob in workspace.Blobs(): if blob in used_blobs and blob != input_name: self._params[blob] = _nd.array(workspace.FetchBlob(blob)) # Variables self._nodes = {} for blob in predict_net.external_input: if blob in self._params: self._nodes[blob] = new_var(blob, shape=self._params[blob].shape, dtype=self._params[blob].dtype) else: shape = self._shape[blob] if blob in self._shape else () if isinstance(self._dtype, dict) and blob in self._dtype: dtype = str(self._dtype[blob]) elif isinstance(self._dtype, str): dtype = self._dtype else: dtype = "float32" self._nodes[blob] = new_var(blob, shape=shape, dtype=dtype) # Ops for c2_op in predict_net.op: for blob in c2_op.output: self._ops[blob] = c2_op for c2_op in predict_net.op: self._process_op(c2_op) # Outputs out = [] for blob in predict_net.external_output: out.append(self._nodes[blob]) if len(out) > 1: outputs = _expr.Tuple(out) else: outputs = out[0] func = _function.Function(analysis.free_vars(outputs), outputs) self._mod["main"] = func return self._mod, self._params
def test_convolution_nchw(): # [batch, input_feature_map, spatial, output_feature_map, kernel, stride, c2_padding_type] param_list = [[1, 3, 2, 1, 2, 2, caffe2_legacy_pb2.NOTSET], [1, 1, 4, 1, 2, 2, caffe2_legacy_pb2.NOTSET], [2, 3, 8, 1, 2, 2, caffe2_legacy_pb2.NOTSET], [8, 2, 5, 4, 3, 1, caffe2_legacy_pb2.NOTSET], [1, 2, 5, 2, 3, 1, caffe2_legacy_pb2.NOTSET], [8, 3, 4, 4, 3, 3, caffe2_legacy_pb2.VALID], [12, 6, 5, 5, 4, 3, caffe2_legacy_pb2.VALID], [8, 3, 4, 4, 3, 3, caffe2_legacy_pb2.SAME], [12, 6, 5, 5, 4, 3, caffe2_legacy_pb2.SAME]] for param_iter in param_list: n, ifm, spatial, ofm, kernel, stride, pad_type = param_iter shape_x = (n, ifm, spatial, spatial) shape_w = (ofm, ifm, kernel, kernel) shape_b = (ofm, ) data_x = [ random.gauss(mu=0, sigma=10) for i in range(np.prod(shape_x)) ] data_w = [ random.gauss(mu=0, sigma=10) for i in range(np.prod(shape_w)) ] data_b = [ random.gauss(mu=0, sigma=10) for i in range(np.prod(shape_b)) ] net = core.Net("net") X = net.GivenTensorFill([], ["X"], shape=shape_x, values=data_x, name="X") W = net.GivenTensorFill([], ["W"], shape=shape_w, values=data_w, name="W") B = net.GivenTensorFill([], ["B"], shape=shape_b, values=data_b, name="B") net.Conv([X, W, B], 'Y', kernel=kernel, stride=stride, order='NCHW', legacy_pad=pad_type) # Execute via Caffe2 workspace.RunNetOnce(net) # Import caffe2 network into ngraph importer = C2Importer() importer.parse_net_def(net.Proto(), verbose=False) # Get handle f_ng = importer.get_op_handle("Y") # Execute with ExecutorFactory() as ex: f_result = ex.executor(f_ng)() # compare Caffe2 and ngraph results assert (np.allclose(f_result, workspace.FetchBlob("Y"), atol=1e-4, rtol=1e-3, equal_nan=False))
def get_detections_from_im(cfg, model, im, image_id, featmap_blob_name, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): assert conf_thresh >= 0. with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.int32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)], cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf > conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id, # np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape, # box_features[keep_boxes].shape, objects.shape)) # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10]) assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0) # assert(np.min(obj_prob[:10])>=0.2) # if np.min(obj_prob) < 0.2: # print('confidence score too low!', np.min(obj_prob[:10])) # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)): # print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1)) return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': obj_boxes, 'region_feat': region_feat[keep_boxes, :], 'object': objects, 'obj_prob': obj_prob, 'cls_prob': cls_prob }
def test_int8_fc(self, n, m, k, rand_seed, quantize_bias, f): print( f"n={n}, m={m}, k={k}, rand_seed={rand_seed}, quantize_bias={quantize_bias}" ) np.random.seed(rand_seed) workspace.ResetWorkspace() ff = float(f) X_fp32 = np.random.uniform(-ff, ff, size=(m, k)).astype(np.float32) W_fp32 = np.random.uniform(-ff, ff, size=(n, k)).astype(np.float32) b_fp32 = np.random.uniform(-ff, ff, size=(n)).astype(np.float32) X_scale, X_zero_point = self._get_scale_zp(X_fp32) Y_fp32 = np.dot(X_fp32, W_fp32.T) + b_fp32 Y_scale, Y_zero_point = self._get_scale_zp(Y_fp32) workspace.FeedBlob("X", X_fp32) workspace.FeedBlob("W", W_fp32) workspace.FeedBlob("b", b_fp32) workspace.RunOperatorOnce( core.CreateOperator( "Int8FCPackWeight", ["W", "b"] if quantize_bias else ["W"], ["W_int8", "b_int32"] if quantize_bias else ["W_int8"], engine="DNNLOWP", save_unpacked_weights=True, in_scale=X_scale, )) ref_net = core.Net("net") ref_net.Int8QuantizeNNPI(["X"], ["X_int8"], Y_scale=X_scale, Y_zero_point=X_zero_point) ref_net.Int8FCFakeAcc32NNPI( ["X_int8", "W_int8", "b_int32" if quantize_bias else "b"], ["Y_int8"], Y_scale=Y_scale, Y_zero_point=Y_zero_point, ) ref_net.Int8DequantizeNNPI(["Y_int8"], ["Y"]) ref_net.Proto().external_output.append("Y") # run ref_net workspace.RunNetOnce(ref_net) Y_fbgemm = workspace.FetchBlob("Y") # run onnxifi net ref_net.Proto().op[0].type = "Int8Quantize" ref_net.Proto().op[1].type = "Int8FC" ref_net.Proto().op[2].type = "Int8Dequantize" net_onnxified = onnxifi_caffe2_net( ref_net.Proto(), {}, debug=True, adjust_batch=False, use_onnx=False, weight_names=["W_int8", "b_int32"] if quantize_bias else ["W_int8", "b"], ) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op) np.testing.assert_equal(num_onnxified_ops, 1) workspace.CreateNet(net_onnxified) workspace.RunNet(net_onnxified.name) Y_glow = workspace.FetchBlob("Y") if not np.allclose(Y_glow, Y_fbgemm): diff_Y = np.abs(Y_glow - Y_fbgemm) print_test_debug_info( "int8_fc", { "seed": rand_seed, "n": n, "m": m, "k": k, "X": X_fp32, "W": W_fp32, "b": b_fp32, "Y_fbgemm": Y_fbgemm, "Y_glow": Y_glow, "diff": diff_Y, "maxdiff": diff_Y.max(axis=1), }, ) assert 0
def test_convolution_sum_relu_fusion(self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, group, gc, dc): conv = core.CreateOperator( "Conv", ["X0", "w0", "b0"] if use_bias else ["X0", "w0"], ["Y0"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[0] ) sum = core.CreateOperator( "Sum", ["S0", "Y0"], ["S0"], device_option=dc[0] ) relu = core.CreateOperator( "Relu", ["S0"], ["S0"], device_option=dc[0] ) conv_fusion = core.CreateOperator( "ConvFusion", ["X1", "w1", "b1", "S1"] if use_bias else ["X1", "w1", "S1"], ["S1"], stride=stride, pad=pad, kernel=kernel, group=group, fusion_type = 3, device_option=dc[1] ) X = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 w = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels * group).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('X0', X, dc[0]) workspace.FeedBlob('w0', w, dc[0]) workspace.FeedBlob('b0', b, dc[0]) workspace.RunOperatorOnce(conv) Y0 = workspace.FetchBlob('Y0') S = np.random.rand(*Y0.shape).astype(np.float32) - 0.5 workspace.FeedBlob('S0', S, dc[0]) workspace.RunOperatorOnce(sum) workspace.RunOperatorOnce(relu) S0 = workspace.FetchBlob('S0') workspace.ResetWorkspace() workspace.FeedBlob('X1', X, dc[1]) workspace.FeedBlob('w1', w, dc[1]) workspace.FeedBlob('b1', b, dc[1]) workspace.FeedBlob('S1', S, dc[1]) workspace.RunOperatorOnce(conv_fusion) S1 = workspace.FetchBlob('S1') if not np.allclose(S0, S1, atol=0.01, rtol=0.01): print(S1.flatten()) print(S0.flatten()) print(np.max(np.abs(S1 - S0))) self.assertTrue(False) workspace.SwitchWorkspace(old_ws_name)
def test_int8_quantize(self, n, rand_seed, non_zero_offset): print("n={}, rand_seed={}".format(n, rand_seed)) np.random.seed(rand_seed) workspace.ResetWorkspace() if non_zero_offset: X_fp32 = np.random.uniform(-1, 1, size=(n, n)).astype(np.float16) \ .astype(np.float32) else: X_fp32 = np.random.rand(n, n).astype(np.float16).astype(np.float32) W_fp32 = np.identity(n, dtype=np.float32) b_fp32 = np.zeros((n, ), dtype=np.float32) X_scale, X_zero_point = self._get_scale_zp(X_fp32) workspace.FeedBlob("X", X_fp32) workspace.FeedBlob("W", W_fp32) workspace.FeedBlob("b", b_fp32) workspace.RunOperatorOnce( core.CreateOperator( "Int8FCPackWeight", ["W"], ["W_int8"], engine="DNNLOWP", save_unpacked_weights=True, in_scale=X_scale, )) ref_net = core.Net("net") ref_net.Int8QuantizeNNPI(["X"], ["X_int8"], Y_scale=X_scale, Y_zero_point=X_zero_point) ref_net.Int8FCFakeAcc32NNPI( ["X_int8", "W_int8", "b"], ["Y_int8"], Y_scale=X_scale, Y_zero_point=X_zero_point, ) ref_net.Int8DequantizeNNPI(["Y_int8"], ["Y"]) ref_net.Proto().external_output.append("Y") # run ref_net workspace.RunNetOnce(ref_net) Y_fbgemm = workspace.FetchBlob("Y") # run onnxifi net ref_net.Proto().op[0].type = "Int8Quantize" ref_net.Proto().op[1].type = "Int8FC" ref_net.Proto().op[2].type = "Int8Dequantize" net_onnxified = onnxifi_caffe2_net( ref_net.Proto(), {}, debug=True, adjust_batch=False, use_onnx=False, weight_names=["W_int8", "b"], ) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op) np.testing.assert_equal(num_onnxified_ops, 1) workspace.CreateNet(net_onnxified) workspace.RunNet(net_onnxified.name) Y_glow = workspace.FetchBlob("Y") if not np.allclose(Y_glow, Y_fbgemm): diff_Y = np.abs(Y_glow - Y_fbgemm) print_test_debug_info( "int8_fc", { "seed": rand_seed, "n": n, "X": X_fp32, "W": W_fp32, "b": b_fp32, "Y_fbgemm": Y_fbgemm, "Y_glow": Y_glow, "diff": diff_Y, "maxdiff": diff_Y.max(axis=1), }, ) assert 0
def run_main(config): ''' running MAMC training & validation''' # init model initialize(config) # print network graph """ # full-graph mamc_graph = net_drawer.GetPydotGraph( validation_model.net.Proto().op, "mamc_graph", rankdir="TB", ) mamc_graph.write_svg("mamc_no_npairloss_graph.svg") print("write graph over...") sys.exit(0) # # mini-graph # mamc_graph_mini = net_drawer.GetPydotGraphMinimal( # validation_model.net.Proto().op, # "mamc_graph_minimal", # rankdir="TB", # minimal_dependency=True # ) # mamc_graph_mini.write_svg("mamc_no_npairloss_graph_mini.svg") # print("write graph over...") # sys.exit(0) """ # experiment params config # training mode # tag = "imagenet" tag = config['name'] if config['finetune']: tag = 'FINETUNE-{}'.format(tag) else: tag = 'RETRAIN-{}'.format(tag) root_experiments_dir = os.path.join(config['root_dir'], 'experiments') if config['dataset_name'] is not None: root_experiments_dir = os.path.join(root_experiments_dir, config['dataset_name']) experiment = Experiment(root_experiments_dir, tag) experiment.add_config_file(config['config_path']) # add chart chart_acc = experiment.add_chart('accuracy', xlabel='epochs', ylabel='accuracy') chart_acc_5 = experiment.add_chart('accuracy_5', xlabel='epochs', ylabel='accuracy_5') chart_softmax_loss = experiment.add_chart('softmax_loss', xlabel='epochs', ylabel='softmax_loss') chart_loss = experiment.add_chart('loss', xlabel='epochs', ylabel='loss') # plot params (should be added into 'experiment module' # TODO add 'variable' object to Experiment class training_acc_statistics = [] training_acc5_statistics = [] training_softmax_loss_statistics = [] training_loss_statistics = [] epoch_training_acc = 0 epoch_training_acc5 = 0 epoch_training_softmax_loss = 0 epoch_training_loss = 0 training_accuracy = 0 training_accuracy_5 = 0 training_softmax_loss = 0 training_loss = 0 validation_acc_statistics = [] validation_acc5_statistics = [] validation_softmax_loss_statistics = [] validation_loss_statistics = [] best_acc = 0 # build model training_model = build_training_model(config, experiment) validation_model = build_validation_model(config) # run the model experiment.add_log( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) for training_iter in tqdm(range(config['solver']['max_iterations'])): workspace.RunNet(training_model.net) accuracy = workspace.FetchBlob('accuracy') accuracy_5 = workspace.FetchBlob('accuracy_5') softmax_loss = workspace.FetchBlob('softmax_loss') loss = workspace.FetchBlob('loss') epoch_training_acc += accuracy epoch_training_acc5 += accuracy_5 epoch_training_softmax_loss += softmax_loss epoch_training_loss += loss training_accuracy += accuracy training_accuracy_5 += accuracy_5 training_softmax_loss += softmax_loss training_loss += loss # display training result if training_iter != 0 and (training_iter + 1) % config['solver']['display'] == 0: experiment.add_log("[TRAIN] epoch: {} iteration: {} accuracy: {:.4f} "\ "accuracy_5: {:.4f} softmax_loss: {:.4f} loss: {:.4f}".format( (training_iter // config['solver']['train_iterations'] + 1), training_iter, training_accuracy / config['solver']['display'], training_accuracy_5 / config['solver']['display'], training_softmax_loss / config['solver']['display'], training_loss / config['solver']['display'], )) experiment.add_log("Global learning rate: {}".format( workspace.FetchBlob( 'MultiPrecisionSgdOptimizer_0_lr_gpu{}'.format( config['gpu_id'])))) # cleanup the counters training_accuracy = training_accuracy_5 = training_softmax_loss = training_loss = 0 # plot training statistics every epoch if training_iter != 0 and ( training_iter + 1) % config['solver']['train_iterations'] == 0: training_acc_statistics.append( epoch_training_acc / config['solver']['train_iterations']) training_acc5_statistics.append( epoch_training_acc5 / config['solver']['train_iterations']) training_softmax_loss_statistics.append( epoch_training_softmax_loss / config['solver']['train_iterations']) training_loss_statistics.append( epoch_training_loss / config['solver']['train_iterations']) epoch_training_acc = 0 epoch_training_acc5 = 0 epoch_training_softmax_loss = 0 epoch_training_loss = 0 experiment.add_plot(chart_acc, training_acc_statistics, 'r.--', 'training') experiment.add_plot(chart_acc_5, training_acc5_statistics, 'r.--', 'training') experiment.add_plot(chart_softmax_loss, training_softmax_loss_statistics, 'b+--', 'training') experiment.add_plot(chart_loss, training_loss_statistics, 'b+--', 'training') # start to validate the model if training_iter != 0 and (training_iter + 1) % config['solver']['test_interval'] == 0: test_accuracy = 0 test_accuracy_5 = 0 test_softmax_loss = 0 test_loss = 0 for test_iter in range(config['solver']['test_iterations']): workspace.RunNet(validation_model.net) accuracy = workspace.FetchBlob('accuracy') accuracy_5 = workspace.FetchBlob('accuracy_5') softmax_loss = workspace.FetchBlob('softmax_loss') loss = workspace.FetchBlob('loss') # update counter test_accuracy += accuracy test_accuracy_5 += accuracy_5 test_softmax_loss += softmax_loss test_loss += loss experiment.add_log("[VALIDATION] accuracy: {:.4f} accuracy_5: {:.4f} "\ "softmax_loss: {:.4f} loss: {:.4f}".format( accuracy, accuracy_5, softmax_loss, loss)) # end validation if test_accuracy / config['solver']['test_iterations'] > best_acc: best_acc = test_accuracy / config['solver']['test_iterations'] experiment.add_log( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" ) experiment.add_log("[VALIDATION] avg_acc: {:.4f} best_acc: {:.4f} avg_acc_5: {:.4f} "\ "avg_softmax_loss: {:.4f} avg_loss: {:.4f}".format( test_accuracy / config['solver']['test_iterations'], best_acc, test_accuracy_5 / config['solver']['test_iterations'], test_softmax_loss / config['solver']['test_iterations'], test_loss / config['solver']['test_iterations'], ) ) # snapshot training model params print("[INFO] snapshot the model..... ") experiment.add_init_net_snapshot( training_model.GetAllParams(), workspace, config, (training_iter // config['solver']['train_iterations'] + 1), test_accuracy / config['solver']['test_iterations'], best_acc, ) print("[INFO] snapshot the model. Done.....") # plot validation statistics validation_acc_statistics.append( test_accuracy / config['solver']['test_iterations']) validation_acc5_statistics.append( test_accuracy_5 / config['solver']['test_iterations']) validation_softmax_loss_statistics.append( test_softmax_loss / config['solver']['test_iterations']) validation_loss_statistics.append( test_loss / config['solver']['test_iterations']) experiment.add_plot(chart_acc, validation_acc_statistics, 'c.--', 'validation') experiment.add_plot(chart_acc_5, validation_acc5_statistics, 'c.--', 'validation') experiment.add_plot(chart_softmax_loss, validation_softmax_loss_statistics, 'g+--', 'validation') experiment.add_plot(chart_loss, validation_loss_statistics, 'g+--', 'validation') experiment.add_log( "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" )
def _run_zero_even_op(self, X): op = core.CreateOperator('ZeroEven', ['X'], ['Y']) workspace.FeedBlob('X', X) workspace.RunOperatorOnce(op) Y = workspace.FetchBlob('Y') return Y
def _test_index_ops(self, entries, dtype, index_create_op): workspace.RunOperatorOnce( core.CreateOperator(index_create_op, [], ['index'], max_elements=10)) my_entries = np.array([entries[0], entries[1], entries[2]], dtype=dtype) workspace.FeedBlob('entries', my_entries) workspace.RunOperatorOnce( core.CreateOperator('IndexLoad', ['index', 'entries'], ['index'])) query1 = np.array([entries[0], entries[3], entries[0], entries[4]], dtype=dtype) workspace.FeedBlob('query1', query1) workspace.RunOperatorOnce( core.CreateOperator('IndexGet', ['index', 'query1'], ['result1'])) result1 = workspace.FetchBlob('result1') np.testing.assert_array_equal([1, 4, 1, 5], result1) workspace.RunOperatorOnce( core.CreateOperator('IndexFreeze', ['index'], ['index'])) query2 = np.array( [entries[5], entries[4], entries[0], entries[6], entries[7]], dtype=dtype) workspace.FeedBlob('query2', query2) workspace.RunOperatorOnce( core.CreateOperator('IndexGet', ['index', 'query2'], ['result2'])) result2 = workspace.FetchBlob('result2') np.testing.assert_array_equal([0, 5, 1, 0, 0], result2) workspace.RunOperatorOnce( core.CreateOperator('IndexSize', ['index'], ['index_size'])) size = workspace.FetchBlob('index_size') self.assertEquals(size, 6) workspace.RunOperatorOnce( core.CreateOperator('IndexStore', ['index'], ['stored_entries'])) stored_actual = workspace.FetchBlob('stored_entries') new_entries = np.array([entries[3], entries[4]], dtype=dtype) np.testing.assert_array_equal( np.concatenate((my_entries, new_entries)), stored_actual) workspace.RunOperatorOnce( core.CreateOperator(index_create_op, [], ['index2'])) workspace.RunOperatorOnce( core.CreateOperator('IndexLoad', ['index2', 'stored_entries'], ['index2'], skip_first_entry=1)) workspace.RunOperatorOnce( core.CreateOperator('IndexSize', ['index2'], ['index2_size'])) index2_size = workspace.FetchBlob('index2_size') self.assertEquals(index2_size, 5) # test serde with tempfile.NamedTemporaryFile() as tmp: workspace.RunOperatorOnce( core.CreateOperator('Save', ['index'], [], absolute_path=1, db_type='minidb', db=tmp.name)) # frees up the blob workspace.FeedBlob('index', np.array([])) # reloads the index workspace.RunOperatorOnce( core.CreateOperator('Load', [], ['index'], absolute_path=1, db_type='minidb', db=tmp.name)) query3 = np.array( [entries[0], entries[3], entries[0], entries[4], entries[4]], dtype=dtype) workspace.FeedBlob('query3', query3) workspace.RunOperatorOnce( core.CreateOperator('IndexGet', ['index', 'query3'], ['result3'])) result3 = workspace.FetchBlob('result3') np.testing.assert_array_equal([1, 4, 1, 5, 5], result3)
def InferTensorRunAndCompare(self, model, expected_uninferred_blobs=None): ''' Runs shape inference, and then the model to check that the inferred shapes agree with the actual ones 'expected_uninferred_blobs' is the list of blobs for which type and shape cannot be inferred. ''' (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], ) # .. Create net workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net, True) workspace.RunNet(model.Proto().name) # ... and then check the shapes mismatch correct_shapes = {} correct_types = {} for b in workspace.Blobs(): arr = workspace.FetchBlob(b) correct_shapes[b] = arr.shape if type(arr) is np.ndarray: if arr.dtype == np.dtype('float32'): correct_types[b] = caffe2_pb2.TensorProto.FLOAT elif arr.dtype == np.dtype('int32'): correct_types[b] = caffe2_pb2.TensorProto.INT32 # BYTE # STRING elif arr.dtype == np.dtype('bool'): correct_types[b] = caffe2_pb2.TensorProto.BOOL elif arr.dtype == np.dtype('uint8'): correct_types[b] = caffe2_pb2.TensorProto.UINT8 elif arr.dtype == np.dtype('int8'): correct_types[b] = caffe2_pb2.TensorProto.INT8 elif arr.dtype == np.dtype('uint16'): correct_types[b] = caffe2_pb2.TensorProto.UINT16 elif arr.dtype == np.dtype('int16'): correct_types[b] = caffe2_pb2.TensorProto.INT16 elif arr.dtype == np.dtype('int64'): correct_types[b] = caffe2_pb2.TensorProto.INT64 elif arr.dtype == np.dtype('float16'): correct_types[b] = caffe2_pb2.TensorProto.FLOAT16 elif arr.dtype == np.dtype('float64'): correct_types[b] = caffe2_pb2.TensorProto.DOUBLE else: correct_types[b] = "unknown {}".format(arr.dtype) else: correct_types[b] = str(type(arr)) if expected_uninferred_blobs is None: expected_uninferred_blobs = [] for b in correct_shapes: # skip blobs for which shape couldn't be inferred if b in expected_uninferred_blobs: continue self.assertTrue( np.array_equal( np.array(shapes[b]).astype(np.int32), np.array(correct_shapes[b]).astype(np.int32) ), "Shape {} mismatch: {} vs. correct {}".format( b, shapes[b], correct_shapes[b] ) ) self.assertFalse( b not in types and b in correct_types, "Type for {} not defined".format(b), ) self.assertEqual( types[b], correct_types[b], "Type {} mismatch: {} vs. {}".format( b, types[b], correct_types[b], ) )
def run_single_kpts( net, image, target_size, pixel_means=PIXEL_MEANS_DEFAULT, pixel_stds=PIXEL_STDS_DEFAULT, max_size=1333, ): inputs = utils2.prepare_blobs( image, target_size=target_size, max_size=max_size, pixel_means=pixel_means, pixel_stds=pixel_stds, ) # Prepare inputs for AABB and Int8AABB operators im_info = inputs["im_info"] scale = im_info[0][2] inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16) inputs["im_info2"] = im_info[:,:2] blob_names = [] ser_blobs = [] # Serialize inputs for remote device for k, v in inputs.items(): workspace.FeedBlob(k, v) blob_names.append(k) ser_blobs.append(workspace.SerializeBlob(k)) # Serialize output templates for remote device fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op) bbox_type = np.uint16 if fully_quantized else np.float32 output_templates = { "score_nms": np.zeros((3,), np.float32), "keypoint_rois": np.zeros((3, 4), bbox_type), "keypoints_out": np.zeros((3, 17, 2), bbox_type), "class_nms": np.zeros((3,), np.int32), "keypoints_scores_out": np.zeros((3, 17), np.float32), } for out_name in net.external_output: fake_name = out_name + "_empty_template" blob_names.append(out_name) workspace.FeedBlob(fake_name, output_templates[out_name]) ser_blobs.append(workspace.SerializeBlob(fake_name)) # Package inputs and output templates inout_netdef = caffe2_pb2.NetDef() inout_netdef.arg.extend([ utils.MakeArgument("blob_names", blob_names), utils.MakeArgument("ser_blobs", ser_blobs), ]) # Send in/out to the remote device with tempfile.NamedTemporaryFile() as inout_file: inout_file.write(inout_netdef.SerializeToString()) inout_file.flush() subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"]) try: # Run the model use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else "" subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True) # Retrieve and deserialize outputs with tempfile.TemporaryDirectory() as tmpdir: output_file = os.path.join(tmpdir, "output_blobs.pb") subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file]) out_net = caffe2_pb2.NetDef() with open(output_file, "rb") as handle: out_net.ParseFromString(handle.read()) all_outputs = utils.ArgsToDict(out_net.arg)["outputs"] for output in all_outputs: bp = caffe2_pb2.BlobProto() bp.ParseFromString(output) workspace.DeserializeBlob(bp.name, output) scores = workspace.FetchBlob("score_nms") boxes = workspace.FetchBlob("keypoint_rois") coords_preds = workspace.FetchBlob("keypoints_out") scores_preds = workspace.FetchBlob("keypoints_scores_out") classids = workspace.FetchBlob("class_nms") if boxes.dtype == np.uint16: boxes = boxes.astype(np.float32) * 0.125 # New output format of AABBRoIKeypoints: # - XY coordinates are [num_rois, num_keypoints, 2] array in keypoints_out # - Scores are [num_rois, num_keypoints] array in keypoints_scores_out if coords_preds.dtype == np.uint16: coords_preds = coords_preds.astype(np.float32) * 0.125 assert coords_preds.shape[:2] == scores_preds.shape num_rois, num_keypoints = coords_preds.shape[:2] xy_preds = np.concatenate( (coords_preds, scores_preds.reshape([num_rois, num_keypoints, 1]), np.zeros([num_rois, num_keypoints, 1], dtype=np.float32)), axis=2) assert xy_preds.shape == (num_rois, num_keypoints, 4) xy_preds = np.swapaxes(xy_preds, 1, 2) assert xy_preds.shape == (num_rois, 4, num_keypoints) except Exception as e: print(e) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) xy_preds = np.zeros((R, 4, 1), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) scale = inputs["im_info"][0][2] boxes /= scale if xy_preds is not None: xy_preds /= scale boxes = np.column_stack((boxes, scores)) return boxes, xy_preds, classids
fc_1 = m.net.FC(["data", "fc_w", "fc_b"], "fc1") pred = m.net.Sigmoid(fc_1, "pred") pred2 = m.net.FloatToHalf(pred, 'pred2') pred = m.net.HalfToFloat(pred2, 'pred3') softmax, loss = m.net.SoftmaxWithLoss([pred, "label"], ["softmax", "loss"]) # softmax2 = m.net.FloatToHalf(softmax, 'softmax2') print(m.net.Proto()) print(m.param_init_net.Proto()) m.net.RunAllOnGPU(gpu_id=0, use_cudnn=True) m.param_init_net.RunAllOnGPU(gpu_id=0, use_cudnn=True) workspace.RunNetOnce(m.param_init_net) workspace.CreateNet(m.net) # Run 100 x 10 iterations for _ in range(100): data = np.random.rand(16, 100).astype(np.float32) label = (np.random.rand(16) * 10).astype(np.int32) workspace.FeedBlob("data", data, device_opts) workspace.FeedBlob("label", label, device_opts) workspace.RunNet(m.name, 10) # run for 10 times pred2 = workspace.FetchBlob('pred2') print(pred2) print(pred2.dtype) # softmax2 = workspace.FetchBlob('softmax2') # print(softmax2) # print(softmax2.dtype) print(workspace.FetchBlob("softmax")) print(workspace.FetchBlob("loss"))
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name) ) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]] ) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name] ) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def testGatherRecord(self): indices = np.array([1, 3, 4], dtype=np.int32) dense = np.array(list(range(20)), dtype=np.float32).reshape(10, 2) lengths = np.array(list(range(10)), dtype=np.int32) items = np.array(list(range(lengths.sum())), dtype=np.int64) items_lengths = np.array(list(range(lengths.sum())), dtype=np.int32) items_items = np.array(list(range(items_lengths.sum())), dtype=np.int64) record = self.new_record( schema.Struct( ('dense', schema.Scalar(np.float32)), ('sparse', schema.Struct( ('list', schema.List(np.int64)), ('list_of_list', schema.List(schema.List(np.int64))), )), ('empty_struct', schema.Struct()))) indices_record = self.new_record(schema.Scalar(np.int32)) input_record = schema.Struct( ('indices', indices_record), ('record', record), ) schema.FeedRecord(input_record, [ indices, dense, lengths, items, lengths, items_lengths, items_items ]) gathered_record = self.model.GatherRecord(input_record) self.assertTrue(schema.equal_schemas(gathered_record, record)) self.run_train_net_forward_only() gathered_dense = workspace.FetchBlob(gathered_record.dense()) np.testing.assert_array_equal( np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense) gathered_lengths = workspace.FetchBlob( gathered_record.sparse.list.lengths()) np.testing.assert_array_equal( np.concatenate([lengths[i:i + 1] for i in indices]), gathered_lengths) gathered_items = workspace.FetchBlob( gathered_record.sparse.list.items()) offsets = lengths.cumsum() - lengths np.testing.assert_array_equal( np.concatenate( [items[offsets[i]:offsets[i] + lengths[i]] for i in indices]), gathered_items) gathered_items_lengths = workspace.FetchBlob( gathered_record.sparse.list_of_list.items.lengths()) np.testing.assert_array_equal( np.concatenate([ items_lengths[offsets[i]:offsets[i] + lengths[i]] for i in indices ]), gathered_items_lengths) nested_offsets = [] nested_lengths = [] nested_offset = 0 j = 0 for l in lengths: nested_offsets.append(nested_offset) nested_length = 0 for _i in range(l): nested_offset += items_lengths[j] nested_length += items_lengths[j] j += 1 nested_lengths.append(nested_length) gathered_items_items = workspace.FetchBlob( gathered_record.sparse.list_of_list.items.items()) np.testing.assert_array_equal( np.concatenate([ items_items[nested_offsets[i]:nested_offsets[i] + nested_lengths[i]] for i in indices ]), gathered_items_items)
def test_collect_tensor_ops(self): init_net = core.Net('init_net') blobs = ['blob_1', 'blob_2', 'blob_3'] bvec_map = {} ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1) for b in blobs: init_net.ConstantFill([], [b], shape=[1, 2], value=0) bvec_map[b] = b + '_vec' init_net.CreateTensorVector([], [bvec_map[b]]) reader_net = core.Net('reader_net') for b in blobs: reader_net.Add([b, ONE], [b]) collect_net = core.Net('collect_net') num_to_collect = 1000 max_example_to_cover = 100000 bvec = [bvec_map[b] for b in blobs] collect_net.CollectTensor( bvec + blobs, bvec, num_to_collect=num_to_collect, ) print('Collect Net Proto: {}'.format(collect_net.Proto())) plan = core.Plan('collect_data') plan.AddStep(core.execution_step('collect_init', init_net)) plan.AddStep( core.execution_step('collect_data', [reader_net, collect_net], num_iter=max_example_to_cover)) workspace.RunPlan(plan) # concat the collected tensors concat_net = core.Net('concat_net') bconcated_map = {} bsize_map = {} for b in blobs: bconcated_map[b] = b + '_concated' bsize_map[b] = b + '_size' concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]]) concat_net.TensorVectorSize([bvec_map[b]], [bsize_map[b]]) workspace.RunNetOnce(concat_net) # check data reference_result = workspace.FetchBlob(bconcated_map[blobs[0]]) self.assertEqual(reference_result.shape, (min(num_to_collect, max_example_to_cover), 2)) size = workspace.FetchBlob(bsize_map[blobs[0]]) self.assertEqual(tuple(), size.shape) self.assertEqual(min(num_to_collect, max_example_to_cover), size.item()) hist, _ = np.histogram(reference_result[:, 0], bins=10, range=(1, max_example_to_cover)) print('Sample histogram: {}'.format(hist)) self.assertTrue(all(hist > 0.7 * (num_to_collect / 10))) for i in range(1, len(blobs)): result = workspace.FetchBlob(bconcated_map[blobs[i]]) self.assertEqual(reference_result.tolist(), result.tolist())
def test_stateful_convolution_forward_only( self, sequence_length, conv_window, batch_size, state_size, ): ''' This unit test demonstrates another ways of using RecurrentNetwork. Imagine, that you want to compute convolution over a sequence, but sequence elements are not given to you from the beginning, so you have to loop over the sequence and compute convolution for each element separately. This situation can occur, during inference/generation step of the neural networks. First of all, you have to provide actual input via recurrent states, since the input of RecurrentNetwork should be known in advance. Here, we use `fake_inputs` as the input, and it's used by the op to extract batch size and sequence length. The actual input sequence is stored in the recurrent state `input_state`. At every step we generate a new element via input_state_t (in this example, input_state_t is generated at random, but in a real situation it can be created using convolution output from the previous step). A few important differences from regular RecurrentNetwork usecase: 1. input_state_t_prev is not only a single previous element of input_state sequence. It is last conv_window elements including (!) the current one - input_state_t. We specify that using `link_window` argument of RecurrentNetwork. We need that many elements to compute a single convolution step. Also, note that `link_window` specifies how many element to link starting at `timestep` + `link_offset` position. 2. First few steps might require additional zero padding from the left, since there is no enough element of input_state sequence are available. So the initial_state for input_state contains several elements (exactly how many pads we need for the first step). Also, because of that all offseting over input_state sequnece is being shifted by length of initial_input_state: see `link_offset` and `alias_offset` arguments of RecurrentNetwork. In this test, we assert that we get the same result if we apply convolution over all elements simultaneously, since the whole input_state sequence was generated at the end. ''' model = CNNModelHelper(name='model') fake_inputs = model.param_init_net.UniformFill( [], 'fake_inputs', min=-1.0, max=1.0, shape=[sequence_length, batch_size, state_size], ) initial_input_state = model.param_init_net.ConstantFill( [], 'initial_input_state', value=0.0, shape=[conv_window - 1, batch_size, state_size], ) initial_output_state = model.param_init_net.ConstantFill( [], 'initial_output_state', value=0.0, shape=[1, batch_size, state_size], ) step_model = CNNModelHelper(name='step_model', param_model=model) ( fake_input_t, timestep, input_state_t_prev, ) = step_model.net.AddExternalInputs( 'fake_input_t', 'timestep', 'input_state_t_prev', ) conv_filter = step_model.param_init_net.XavierFill( [], 'conv_filter', shape=[state_size, 1, conv_window, state_size], ) conv_bias = step_model.param_init_net.ConstantFill( [], 'conv_bias', shape=[state_size], value=0.0, ) step_model.params.extend([conv_filter, conv_bias]) input_state_t = step_model.net.UniformFill( [], 'input_state_t', min=-1.0, max=1.0, shape=[1, batch_size, state_size], ) output_state_t = self._convolution_1d( model=step_model, inputs=input_state_t_prev, conv_window=conv_window, conv_filter=conv_filter, conv_bias=conv_bias, output_name='output_state_t', left_pad=False, ) initial_recurrent_states = [initial_input_state, initial_output_state] all_inputs = ([fake_inputs] + step_model.params + initial_recurrent_states) all_outputs = ['input_state_all', 'output_state_all'] recurrent_states = ['input_state', 'output_state'] input_state_all, output_state_all, _ = model.net.RecurrentNetwork( all_inputs, all_outputs + ['step_workspaces'], param=map(all_inputs.index, step_model.params), alias_src=recurrent_states, alias_dst=all_outputs, alias_offset=[conv_window - 1, 1], recurrent_states=recurrent_states, initial_recurrent_state_ids=map( all_inputs.index, initial_recurrent_states, ), link_internal=map( str, [input_state_t_prev, input_state_t, output_state_t], ), link_external=['input_state', 'input_state', 'output_state'], link_offset=[0, conv_window - 1, 1], link_window=[conv_window, 1, 1], backward_link_internal=[], backward_link_external=[], backward_link_offset=[], step_net=str(step_model.net.Proto()), backward_step_net='', timestep='timestep' if timestep is None else str(timestep), outputs_with_grads=[], ) output_states_2 = self._convolution_1d( model=model, inputs=input_state_all, conv_window=conv_window, conv_filter=conv_filter, conv_bias=conv_bias, output_name='output_states_2', left_pad=True, ) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) np.testing.assert_almost_equal( workspace.FetchBlob(output_state_all), workspace.FetchBlob(output_states_2), )