def __init__(self, para, creator, valid, mapping, valid_batch, valid_iter, input_key): self.para = para network = creator(para) temp_err = 0 for i in range(valid_iter): data = valid.next_minibatch(valid_batch, input_map=mapping(valid)) temp_err += network.test_minibatch(data) self.accuracy = 1 - temp_err / valid_iter model_name = os.path.join('module', '_'.join(map(str, para))) network.model.save(model_name) cpu_timer = cntk.load_model(model_name, device=cntk.cpu()) time_cost = [] for i in range(valid_iter): data = valid.next_minibatch(valid_batch, input_map=mapping(valid)) arr = numpy.array(data[input_key].as_sequences()) arr = numpy.reshape(arr, (-1, ) + input_key.shape) current_time = time.clock() cpu_timer.eval(arr, device=cntk.cpu()) current_time = time.clock() - current_time time_cost.append(current_time) self.time = numpy.mean(time_cost)
def test_to_sequence_basic(device_id): dev = cntk_device(device_id) x = C.input_variable((C.FreeDimension, 2)) x_seq = C.to_sequence(x) assert len(x_seq.dynamic_axes) == 2 x_data = np.asarray([[[1, 2], [-1000, -1000]], [[3, 4], [5, 6]]], dtype=np.float32) result = x_seq.eval({x : x_data}, device=dev) assert np.array_equal(result, x_data) x = C.input_variable((C.FreeDimension, 2, 3), is_sparse=True) x_seq_lens = C.input_variable(()) x_seq = C.to_sequence(x, x_seq_lens) seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]] csr_seq1 = _to_csr(seq1_data) ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(2, 2, 3), device=C.cpu()) seq2_data = [[0, 1, 1], [1, 1, 0]] csr_seq2 = _to_csr([seq2_data, [[0, 0, 0], [0, 0, 0]]]) ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 2, 3), device=C.cpu()) x_data = C.Value.create(C.input_variable((2, 2, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=dev).data x_seq_lens_data = np.asarray([2, 1], dtype=np.float32) result = x_seq.eval({x : x_data, x_seq_lens : x_seq_lens_data}, device=dev, as_numpy=False) result_dense = _to_dense(result, True) assert np.array_equal(result_dense[0], seq1_data) assert np.array_equal(result_dense[1], [seq2_data])
def test_set_excluded_devices(): if len(C.device.all_devices()) == 1: return; assert C.try_set_default_device(C.cpu(), False) assert C.try_set_default_device(C.gpu(0), False) C.set_excluded_devices([C.cpu()]) assert not C.try_set_default_device(C.cpu(), False) C.set_excluded_devices([]) assert C.try_set_default_device(C.cpu(), False)
def test_set_excluded_devices(): if len(C.device.all_devices()) == 1: return assert C.try_set_default_device(C.cpu(), False) assert C.try_set_default_device(C.gpu(0), False) C.set_excluded_devices([C.cpu()]) assert not C.try_set_default_device(C.cpu(), False) C.set_excluded_devices([]) assert C.try_set_default_device(C.cpu(), False)
def test_2d_sparse_sequences_value(device_id): dev = cntk_device(device_id) seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]] csr_seq1 = _to_csr(seq1_data) ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(2, 2, 3), device=C.cpu()) seq2_data = [[0, 1, 1], [1, 1, 0]] csr_seq2 = _to_csr(seq2_data) ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(1, 2, 3), device=C.cpu()) x = C.sequence.input_variable((2, 3)) sequence_value = C.Value.create(x, [ndarrayview1, ndarrayview2], device=dev) assert np.array_equal(_to_dense(sequence_value.data), [seq1_data, [seq2_data, [[0, 0, 0], [0, 0, 0]]]])
def test_output_subset_evaluation(device_id): try: gpu_device = C.gpu(0) except ValueError: pytest.skip('Test only runs when GPU available') device = cntk_device(device_id) x1 = C.input_variable(shape=()) op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1) x2 = C.input_variable(shape=(1)) # Deliberately locate the parameter on a different device # instead of the actual compute target device, so that # if we try to use this parameter, it results in an error if (device.type() == 0): parameter_device = gpu_device else: parameter_device = C.cpu() p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device) op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p op = C.combine([op1, op2]); _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device) assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
def convert(model_path): device = C.cpu() model = C.Function.load(model_path, device=device) # Replace all python proposal layer user-functions with native proposal layer # user functions. return clone_with_native_proposal_layer(model)
def test_native_convolution(tmpdir): # this test needs native binary convolution library built with halide. if not C.contrib.netopt.native_convolve_function_registered: pytest.skip() z = _create_convolution_model() binz = qc.convert_to_binary_convolution(z, _filter) # save and load to transfer the model to CPU device as native binary # convolution does not run on GPU yet. model_file = str(tmpdir / ('binary_model.cmf')) binz.save(model_file) eval_device = C.cpu() model = C.Function.load(model_file, device=eval_device) # convert to native halide implementation. native_binz = qc.convert_to_native_binary_convolution(model) functions = C.logging.graph.depth_first_search( native_binz, (lambda x : type(x) == C.Function and x.op_name =='BinaryConvolveOp') , depth = 0) assert(len(functions) == 3) img_data = np.reshape(dat, (1, 1, 28, 28)) res = native_binz.eval(img_data, device=eval_device) assert(len(res) > 0) # evaluation should work with the new model.
def test_output_subset_evaluation(device_id): try: gpu_device = C.gpu(0) except ValueError: pytest.skip('Test only runs when GPU available') device = cntk_device(device_id) x1 = C.input_variable(shape=()) op1 = C.constant(value=1, shape=(1), device=device) + ( C.constant(value=1, shape=(1), device=device) + x1) x2 = C.input_variable(shape=(1)) # Deliberately locate the parameter on a different device # instead of the actual compute target device, so that # if we try to use this parameter, it results in an error if (device.type() == 0): parameter_device = gpu_device else: parameter_device = C.cpu() p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device) op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p op = C.combine([op1, op2]) _, result = op.forward({x1: np.asarray([1, 2, 3])}, [op1], device=device) assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
def test_cpu_and_gpu_devices(): device = C.cpu() assert device.type() == C.device.DeviceKind.CPU assert device.id() == 0 for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert device.type() == C.device.DeviceKind.GPU assert device.id() == i
def test(n_fold=4): input_xs = [np.empty([922, 93], dtype=np.float32)] input_xs, _ = fold_batch(xs=input_xs, n_fold=n_fold) cntk.device.try_set_default_device(cntk.cpu()) nn_model = CuteModel(dim_x=93*n_fold, dim_y=199*n_fold) t1 = time.time() output = nn_model.trainer.model.eval(input_xs) print(output[0].shape, time.time()-t1)
def test_use_default_device(): # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) q = Queue() p = Process(target=_use_default_device, args=(q,)) p.start() p.join() assert p.exitcode == 0 assert q.get()
def test_set_cpu_as_default_device(): device = C.cpu() assert not is_locked(device) assert not C.try_set_default_device(device, True) assert not is_locked(device) assert C.try_set_default_device(device) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device()
def test_use_default_device(): # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) q = Queue() p = Process(target=_use_default_device, args=(q, )) p.start() p.join() assert p.exitcode == 0 assert q.get()
def test_lstm_over_lstm_thought_vectors_2(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances') conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths') label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(utterances_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.sequence.last(model) model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input)) model = C.to_sequence_like(model, label_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model ce = C.cross_entropy_with_softmax(z, label_input) sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu()) c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu()) c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu()) c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu()) c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu()) c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu()) c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu()) all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32) seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0]] seq3_label_data = [[1, 0], [0, 1]] label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)] param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) loss_result = loss_result.as_sequences() absolute_tolerance = 0.01 assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance) assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance) assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
def test_native_binary_function(): # user functions need to be registered before being callable by python if not nopt.native_convolve_function_registered: pytest.skip("Could not find {0} library. " "Please check if HALIDE_PATH is configured properly " "and try building {1} again".format( 'Cntk.BinaryConvolution-' + C.__version__.rstrip('+'), 'Extnsibiliy\BinaryConvolution')) # be sure to only run on CPU, binary convolution does not have GPU support for now dev = C.cpu() # create an arbitrary input mimicking a realistic cifar input x = input((64, 28, 28)) # random filter weights for testing w = parameter((64, 64, 3, 3), init=np.reshape(2 * (np.random.rand(64 * 64 * 3 * 3) - .5), (64, 64, 3, 3)), dtype=np.float32, device=dev) # set the convolution parameters by passing in an attribute dictionary #attributes = {'stride' : 1, 'padding' : False, 'size' : 3} attributes = { 'stride': 1, 'padding': False, 'size': 3, 'h': 28, 'w': 28, 'channels': 64, 'filters': 64 } # define the binary convolution op op = ops.native_user_function('NativeBinaryConvolveFunction', [w, x], attributes, 'native_binary_convolve') # also define an op using python custom functions that should have the same output op2 = C.convolution(CustomMultibitKernel(w, 1), CustomSign(x), auto_padding=[False]) # create random input data x_data = NDArrayView.from_dense(np.asarray(np.reshape( 2 * (np.random.rand(64 * 28 * 28) - .5), (64, 28, 28)), dtype=np.float32), device=dev) # evaluate the CPP binary convolve result = op.eval({x: x_data}, device=dev) # evaluate the python emulator result2 = op2.eval({x: x_data}, device=dev) native_times_primitive = op.find_by_name('native_binary_convolve') # assert that both have the same result '''
def test_ndarray_properties(): ndav = C.NDArrayView((2, 3), np.float32, device=C.cpu()) dev = ndav.device assert isinstance(dev, C.DeviceDescriptor) assert str(dev) == 'CPU' assert ndav.is_read_only == False assert ndav.is_sparse == False assert ndav.dtype == np.float32
def test_value_properties(): ndav = C.NDArrayView((1, 2, 3), np.float32, device=C.cpu()) val = C.Value(batch=ndav) dev = val.device assert isinstance(dev, C.DeviceDescriptor) assert str(dev) == 'CPU' assert val.is_read_only == False assert val.is_sparse == False assert val.dtype == np.float32
def test_lstm_over_lstm_thought_vectors(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='features') label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.sequence.last(model) model = C.to_sequence_like(model, label_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model ce = C.cross_entropy_with_softmax(z, label_seq_input) seq1_data = [[[0, 1, 1], [0, 1, 0], [1, 0, 0]], [[1, 1, 0], [0, 0, 1], [1, 0, 1]], [[1, 0, 0], [0, 0, 1], [1, 1, 0]]] csr_seq1 = _to_csr(seq1_data) ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(3, 3, 3), device=C.cpu()) seq2_data = [[[0, 0, 1], [0, 1, 1], [1, 0, 1]], [[0, 1, 0], [1, 0, 1], [0, 0, 0]]] csr_seq2 = _to_csr(seq2_data) ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 3, 3), device=C.cpu()) x_seq_data = C.Value.create(C.sequence.input_variable((3, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=C.cpu()).data seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0], [0, 1]] label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)] param_grads, loss_result = ce.grad({x_seq_input : x_seq_data, label_seq_input : label_seq_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) loss_result = loss_result.as_sequences() absolute_tolerance = 0.02 assert np.allclose(loss_result[0], [[0.67126], [0.676331], [0.765814]], atol=absolute_tolerance) assert np.allclose(loss_result[1], [[0.685199], [0.681736]], atol=absolute_tolerance)
def test_native_user_function(tmpdir): if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'): C.ops.register_native_user_function( 'NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction') dev = C.cpu() x = C.input_variable((2)) w = C.parameter((2, 2), init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32), device=dev) attributes = { 'param_rank': 2, 'padding': True, 'none': None, 'nested lists': [[1, 2, 3], [4, 5, 6]], 'string': 'string', 'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3)) } def verify_attributes(udf): for k, v in attributes.items(): if not isinstance(v, np.ndarray): assert udf.attributes[k] == v else: assert (udf.attributes[k] == v).all() op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes, 'native_user_times_function') verify_attributes(op.owner) filepath = str(tmpdir / 'test_native_user_function.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]], dtype=np.float32), device=dev) result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev) assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]]) native_times_primitive = op_reloaded.find_by_name( 'native_user_times_function') verify_attributes(native_times_primitive)
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return; # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def test_override_serialize(tmpdir): dev = C.cpu() a, b = 1.2322341, -0.29084 op = MyPlusPlus([C.constant(a), C.constant(b)], '++') op = MyPlusPlus([op, op], '+++') op = MyPlusPlus([op, op], '++++') op = C.user_function(op) result1 = op.eval({}, device=dev) filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) assert result1 == op_reloaded.eval({}, device=dev)
def build_test_function(): dev = C.cpu() w_value = np.asarray([[0.5, 2], [-0.5, 1.5]]).astype(np.float32) c1_value = 2.718 c2_value = -3.141 if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'): C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction') x = C.input_variable((2)) w = C.parameter((2, 2), init=w_value, device=dev) op = C.user_function(MyPlus(x, C.constant(c1_value))) op = C.ops.native_user_function('NativeUserTimesOp', [w, op], user_function_instance_name='my_times') return dev, w_value, c1_value, c2_value, C.user_function(MyPlus(op, C.constant(c2_value)))
def evaluate(model_path): # ProposalLayer currently only runs on the CPU eval_device = C.cpu() model = C.Function.load(model_path, device=eval_device) from FasterRCNN.FasterRCNN_config import cfg as detector_cfg from utils.configs.AlexNet_config import cfg as network_cfg from utils.configs.Grocery_config import cfg as dataset_cfg from utils.config_helpers import merge_configs from FasterRCNN.FasterRCNN_train import prepare from FasterRCNN.FasterRCNN_eval import compute_test_set_aps cfg = merge_configs([detector_cfg, network_cfg, dataset_cfg]) cfg["CNTK"].FORCE_DETERMINISTIC = True prepare(cfg, False) eval_results = compute_test_set_aps(model, cfg) meanAP = np.nanmean(list(eval_results.values())) return meanAP
def test_native_binary_function(): # user functions need to be registered before being callable by python if not nopt.native_convolve_function_registered: pytest.skip("Could not find {0} library. " "Please check if HALIDE_PATH is configured properly " "and try building {1} again" .format('Cntk.BinaryConvolution-' + C.__version__.rstrip('+'), 'Extnsibiliy\\BinaryConvolution')) # be sure to only run on CPU, binary convolution does not have GPU support for now dev = C.cpu() # create an arbitrary input mimicking a realistic cifar input x = input((64, 28, 28)) # random filter weights for testing w = parameter((64, 64, 3, 3), init=np.reshape(2*(np.random.rand(64*64*3*3)-.5), (64, 64, 3, 3)), dtype=np.float32, device=dev) # set the convolution parameters by passing in an attribute dictionary #attributes = {'stride' : 1, 'padding' : False, 'size' : 3} attributes = {'stride' : 1, 'padding' : False, 'size' : 3, 'h' : 28, 'w' : 28, 'channels' : 64, 'filters' : 64 } # define the binary convolution op op = ops.native_user_function('NativeBinaryConvolveFunction', [w, x], attributes, 'native_binary_convolve') # also define an op using python custom functions that should have the same output op2 = C.convolution(CustomMultibitKernel(w, 1), CustomSign(x), auto_padding = [False]) # create random input data x_data = NDArrayView.from_dense(np.asarray(np.reshape(2*(np.random.rand(64*28*28)-.5), (64, 28, 28)),dtype=np.float32), device=dev) # evaluate the CPP binary convolve result = op.eval({x : x_data}, device=dev) # evaluate the python emulator result2 = op2.eval({x : x_data}, device=dev) native_times_primitive = op.find_by_name('native_binary_convolve') # assert that both have the same result '''
def test_native_user_function(tmpdir): if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'): C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction') dev = C.cpu() x = C.input_variable((2)) w = C.parameter((2, 2), init=np.asarray([[0.5, 2], [-0.5, 1.5]], dtype=np.float32), device=dev) attributes = {'param_rank': 2, 'padding': True, 'none': None, 'nested lists': [[1, 2, 3], [4, 5, 6]], 'string': 'string', 'some data': np.arange(1, 10, dtype=np.float32).reshape((3, 3)) } def verify_attributes(udf): for k, v in attributes.items(): if not isinstance(v, np.ndarray): assert udf.attributes[k] == v else: assert (udf.attributes[k] == v).all() op = C.ops.native_user_function('NativeUserTimesOp', [w, x], attributes, 'native_user_times_function') verify_attributes(op.owner) filepath = str(tmpdir / 'test_native_user_function.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) x_data = C.NDArrayView.from_dense(np.asarray([[0.1, 0.2], [-0.1, 0.3]], dtype=np.float32), device=dev) result = op_reloaded.eval({op_reloaded.arguments[0]: x_data}, device=dev) assert np.allclose(result, [[-0.05, 0.5], [-0.2, 0.25]]) native_times_primitive = op_reloaded.find_by_name('native_user_times_function') verify_attributes(native_times_primitive)
def test_native_binary_function(): # user functions need to be registered before being callable by python ops.register_native_user_function( 'NativeBinaryConvolveFunction', 'Cntk.BinaryConvolutionExample-' + C.__version__.rstrip('+'), 'CreateBinaryConvolveFunction') # be sure to only run on CPU, binary convolution does not have GPU support for now dev = cpu() # create an arbitrary input mimicking a realistic cifar input x = input((64, 30, 30)) # random filter weights for testing w = parameter((64, 64, 3, 3), init=np.reshape(2 * (np.random.rand(64 * 64 * 3 * 3) - .5), (64, 64, 3, 3)), dtype=np.float32, device=dev) # set the convolution parameters by passing in an attribute dictionary attributes = {'stride': 1, 'padding': False, 'size': 3} # define the binary convolution op op = ops.native_user_function('NativeBinaryConvolveFunction', [w, x], attributes, 'native_binary_convolve_function') # also define an op using python custom functions that should have the same output op2 = C.convolution(CustomMultibitKernel(w, 1), CustomSign(x), auto_padding=[False]) # create random input data x_data = NDArrayView.from_dense(np.asarray(np.reshape( 2 * (np.random.rand(64 * 30 * 30) - .5), (64, 30, 30)), dtype=np.float32), device=dev) # evaluate the CPP binary convolve result = op.eval({x: x_data}, device=dev) # evaluate the python emulator result2 = op2.eval({x: x_data}, device=dev) native_times_primitive = op.find_by_name('native_binary_convolve_function') # assert that both have the same result assert np.allclose(result, result2, atol=0.001)
def test_native_binary_function(): # user functions need to be registered before being callable by python ops.register_native_user_function('NativeBinaryConvolveFunction', 'Cntk.BinaryConvolutionExample-' + C.__version__.rstrip('+'), 'CreateBinaryConvolveFunction') # be sure to only run on CPU, binary convolution does not have GPU support for now dev = cpu() # create an arbitrary input mimicking a realistic cifar input x = input((64, 30, 30)) # random filter weights for testing w = parameter((64, 64, 3, 3), init=np.reshape(2*(np.random.rand(64*64*3*3)-.5), (64, 64, 3, 3)), dtype=np.float32, device=dev) # set the convolution parameters by passing in an attribute dictionary attributes = {'stride' : 1, 'padding' : False, 'size' : 3} # define the binary convolution op op = ops.native_user_function('NativeBinaryConvolveFunction', [w, x], attributes, 'native_binary_convolve_function') # also define an op using python custom functions that should have the same output op2 = C.convolution(CustomMultibitKernel(w, 1), CustomSign(x), auto_padding = [False]) # create random input data x_data = NDArrayView.from_dense(np.asarray(np.reshape(2*(np.random.rand(64*30*30)-.5), (64, 30, 30)),dtype=np.float32), device=dev) # evaluate the CPP binary convolve result = op.eval({x : x_data}, device=dev) # evaluate the python emulator result2 = op2.eval({x : x_data}, device=dev) native_times_primitive = op.find_by_name('native_binary_convolve_function') # assert that both have the same result assert np.allclose(result, result2, atol=0.001)
def evaluate(model_path): # ProposalLayer currently only runs on the CPU eval_device = C.cpu() model = C.Function.load(model_path, device=eval_device) set_global_vars(False) return eval_faster_rcnn_mAP(model)
def is_locked_cross_process(queue, device_id): device = C.cpu() if device_id < 0 else C.gpu(device_id) queue.put(device.is_locked())
from ConvNet_CIFAR10_DataAug import * ############################# # main function boilerplate # ############################# if __name__=='__main__': model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True) train_model(reader_train, z, criterion, max_epochs=80) # save and load (as an illustration) model_path = data_path + "/model.cmf" model.save(model_path) # We use the NativeBinaryConvolveFunction for testing the model, which currently only runs on the CPU eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # For testing, replace all python binary convolution user-functions with the fast Halide generated # NativeBinaryConvolveFunction. Note, the NativeBinaryConvolveFunction currently only supports eval, # and is thus not used for training. model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False) # TODO: The NativeBinaryConvolveFunction can currently only process one image at a time evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=1000)
def setup_nn_model(model_path, dim_input=93, dim_output=199, n_fold=1): cntk.device.try_set_default_device(cntk.cpu()) nn_model = CuteModel(dim_x=dim_input * n_fold, dim_y=dim_output * n_fold) nn_model.trainer.restore_from_checkpoint(model_path) return nn_model.trainer.model
def test_all_devices(): assert len(C.device.all_devices()) > 0 assert C.cpu() in C.device.all_devices() if (len(C.device.all_devices()) > 1): assert C.gpu(0) in C.device.all_devices()
############################# if __name__ == '__main__': model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True) train_model(reader_train, z, criterion, max_epochs=80) # save and load (as an illustration) model_path = data_path + "/model.cmf" model.save(model_path) # We use the NativeBinaryConvolveFunction for testing the model, which currently only runs on the CPU eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # For testing, replace all python binary convolution user-functions with the fast Halide generated # NativeBinaryConvolveFunction. Note, the NativeBinaryConvolveFunction currently only supports eval, # and is thus not used for training. model_with_native_binary_convolutions = clone_with_native_binary_convolutions( model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False) # TODO: The NativeBinaryConvolveFunction can currently only process one image at a time evaluate(reader_test,
### User inputs ### network_list = ['action+','action','action_m','feature','GRP','GRP+','GRP_feature'] parser = argparse.ArgumentParser() parser.add_argument('model_type', type=str, action='store', choices=network_list, help='The type of model to use') parser.add_argument('--data-file', dest='data_file', type=str, action='store', default='data/training_human_data.json') parser.add_argument('--gpu-id', dest='gpu_id', type=int, default=-2, help="""The GPU to use. -1 for CPU, -2 for default."""); cmdargs = parser.parse_args(sys.argv[1:]) # Set device to run on if cmdargs.gpu_id >= 0: C.try_set_default_device(C.gpu(cmdargs.gpu_id)) elif cmdargs.gpu_id == -1: C.try_set_default_device(C.cpu()) network = cmdargs.model_type data_file = cmdargs.data_file ###################### ### DATA INPUT ### ####################### target_dist = 30 target_var = 50000 ####################### max_velocity = 0.31 learning_rate = 0.1
def test_lstm_over_lstm_thought_vectors(device_id): previous_random_seed = C.cntk_py.get_random_seed() C.cntk_py.reset_random_seed(0) dev = cntk_device(device_id) input_vocab_size = 3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input((C.FreeDimension, input_vocab_size), is_sparse=True, name='features') label_seq_input = C.sequence.input(num_labels, is_sparse=True, sequence_axis=Axis('label_sequence'), name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.sequence.last(model) model = C.to_sequence_like(model, label_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model ce = C.cross_entropy_with_softmax(z, label_seq_input) seq1_data = [[[0, 1, 1], [0, 1, 0], [1, 0, 0]], [[1, 1, 0], [0, 0, 1], [1, 0, 1]], [[1, 0, 0], [0, 0, 1], [1, 1, 0]]] csr_seq1 = _to_csr(seq1_data) ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(3, 3, 3), device=C.cpu()) seq2_data = [[[0, 0, 1], [0, 1, 1], [1, 0, 1]], [[0, 1, 0], [1, 0, 1], [0, 0, 0]]] csr_seq2 = _to_csr(seq2_data) ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 3, 3), device=C.cpu()) x_seq_data = C.Value.create(C.sequence.input((3, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=C.cpu()).data seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0], [0, 1]] label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)] param_grads, loss_result = ce.grad( { x_seq_input: x_seq_data, label_seq_input: label_seq_data }, wrt=ce.parameters, outputs=[ce], as_numpy=False) loss_result = loss_result.as_sequences() # TODO: The tolerance here is inordinately high due to the non-determinism in initialization # of parameters as the individual tests are not run in separate processes resulting in the # addition or removal of tests to affect the random initialization of parameters in all other # tests that do not explicitly specify the random seed. The tolerance should be lowered to # 0.01 after this issue in the test infrastructure has been fixed. absolute_tolerance = 0.02 assert np.allclose(loss_result[0], [[0.63504], [0.673343], [0.698446]], atol=absolute_tolerance) assert np.allclose(loss_result[1], [[0.772344], [0.64295]], atol=absolute_tolerance) C.cntk_py.reset_random_seed(previous_random_seed)