def test_MaxRoiPool(tmpdir): input_map = [[[1., 2., 3.], # (1, 3, 3) input operand (conv feature map) [4., 5., 6.], [7., 8., 9.]]] input_rois = [[1, 1, 2, 2]] conv_input = np.asarray(input_map, dtype=np.float32) roi_input = np.asarray(input_rois, dtype=np.float32) a = C.input_variable(shape=conv_input.shape, dtype=np.float32, needs_gradient=True, name='a') b = C.input_variable(shape=roi_input.shape, dtype=np.float32, needs_gradient=False, name='b') # adding batch and sequence axis conv_input.shape = (1,) + conv_input.shape roi_input.shape = (1,) + roi_input.shape model = C.roipooling(a, b, C.MAX_POOLING, (3,3), 1.) verify_two_input(model, conv_input, roi_input, tmpdir, 'MaxRoiPool_1')
def test_op_batch_times_grad_with_beta_equals_to_one(left_operand, right_operand, device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] a = AA(left_operand, dtype=dt_precision) b = AA(right_operand, dtype=dt_precision) root_gradient = np.ones_like(a) input1 = C.input_variable((2,2), needs_gradient=True) input2 = C.input_variable((2,2), needs_gradient=True) z = input1 + input2 + C.times(input1, input2) state, actual_forward = z.forward({input1: a, input2: b}, [z.output], {z.output}, cntk_device(device_id)) actual_backwards = z.backward(state, {z.output: root_gradient}, [input1, input2]) k = a.shape[0] left_backward = np.ones_like(a) for x in range(k): left_backward[x, ...] += b[x].sum(axis=-1) right_backward = np.ones_like(b) for x in range(k): transpose_axes = list(np.roll(np.arange(len(b.shape[1:])), -1)) sum_axes = tuple(np.arange(0, len(a.shape) - len(b.shape) + 1)) right_backward[x, ...] += np.transpose( AA([a[x].sum(axis=sum_axes)]), axes=transpose_axes) assert np.allclose(actual_backwards[input1], left_backward) assert np.allclose(actual_backwards[input2], right_backward)
def test_composite_source_synced_transforms(tmpdir): from PIL import Image np.random.seed(1) tmpmap = str(tmpdir/'sync_test.map') with open(tmpmap, 'w') as f: for i in range(10): data = np.random.randint(0, 2**8, (224,224,3)) image = Image.fromarray(data.astype('uint8'), "RGB") tmpjpg = str(tmpdir/('%d.jpg'%i)) image.save(tmpjpg) f.write("%s\t0\n"%tmpjpg) def create_reader(map_file1, map_file2): transforms = [xforms.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio'), xforms.scale(width=224, height=224, channels=3, interpolations='linear')] source1 = C.io.ImageDeserializer(map_file1, C.io.StreamDefs( source_image = C.io.StreamDef(field='image', transforms=transforms))) source2 = C.io.ImageDeserializer(map_file2, C.io.StreamDefs( target_image = C.io.StreamDef(field='image', transforms=transforms))) return C.io.MinibatchSource([source1, source2], max_samples=sys.maxsize, randomize=True, multithreaded_deserializer=False) x = C.input_variable((3,224,224)) y = C.input_variable((3,224,224)) loss = C.squared_error(x, y) reader = create_reader(tmpmap, tmpmap) minibatch_size = 2 input_map={ x: reader.streams.source_image, y: reader.streams.target_image } for i in range(30): data=reader.next_minibatch(minibatch_size, input_map=input_map) assert np.allclose(loss.eval(data), np.zeros(minibatch_size))
def test_output_subset_evaluation(device_id): try: gpu_device = C.gpu(0) except ValueError: pytest.skip('Test only runs when GPU available') device = cntk_device(device_id) x1 = C.input_variable(shape=()) op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1) x2 = C.input_variable(shape=(1)) # Deliberately locate the parameter on a different device # instead of the actual compute target device, so that # if we try to use this parameter, it results in an error if (device.type() == 0): parameter_device = gpu_device else: parameter_device = C.cpu() p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device) op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p op = C.combine([op1, op2]); _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device) assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
def test_conv_free_static_axes(warmup_input_size, free_dimension_increment, filter_size, num_output_channels, device_id, precision): dt = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) conv_size = tuple([num_output_channels, warmup_input_size[0]]+filter_size) total_size = np.prod(conv_size) y = np.arange(total_size, dtype=dt) conv_map = constant(value=y.reshape(conv_size), device=dev) reference_input_size = tuple(warmup_input_size[:-len(free_dimension_increment)] + [x+y for x,y in zip(warmup_input_size[-len(free_dimension_increment):], free_dimension_increment)]) a_ref = C.input_variable(shape=reference_input_size, dtype=dt, needs_gradient=False, name='a_ref') a_test = C.input_variable(shape=tuple(warmup_input_size[:-len(free_dimension_increment)] + [C.FreeDimension]*len(free_dimension_increment)), dtype=dt, needs_gradient=False, name='a_test') from cntk import convolution conv_op_without_free_dim = convolution(conv_map, a_ref, auto_padding=[False] + [True]*len(filter_size)) conv_op_with_free_dim = convolution(conv_map, a_test, auto_padding=[False] + [True]*len(filter_size)) input_img_ref = np.ones(reference_input_size, dtype=dt) output_ref = conv_op_without_free_dim.eval({a_ref: input_img_ref}, device=dev) input_img_warmup = np.ones(warmup_input_size, dtype=dt) _ = conv_op_with_free_dim.eval({a_test: input_img_warmup}, device=dev) output_test = conv_op_with_free_dim.eval({a_test: input_img_ref}, device=dev) assert np.allclose(output_test, output_ref, atol = 1e-4)
def test_h_softmax(): set_fixed_random_seed(1) input_dim = 2 num_output_classes = 4 minibatch_size = 3 # neural network structure for hierarchical softmax h_input = C.input_variable(input_dim) h_target = C.input_variable([1]) h_z, class_probs, all_probs = C.hierarchical_softmax_layer(h_input, h_target, num_output_classes) a = np.reshape(np.arange(minibatch_size * input_dim, dtype = np.float32), (minibatch_size, input_dim)) labels = np.reshape(np.arange(minibatch_size, dtype = np.float32), (minibatch_size, 1)) % num_output_classes val_z = h_z.eval({h_input: a, h_target: labels}) val_class_probs = class_probs.eval({h_input: a, h_target: labels}) val_all_probs = [x.eval({h_input: a, h_target: labels}) for x in all_probs] expected_z = [[[0.17082828]], [[0.17143427]], [[0.0001837]]] expected_class_probs = [[ 0.4046618 , 0.59533817], [ 0.23773022, 0.76226979], [ 0.12518175, 0.87481827]] expected_all_probs = [[[ 0.17082828, 0.23383351], [ 0.06629595, 0.17143427], [ 0.02127092, 0.10391083]], [[1.76951319e-01, 4.18386817e-01], [7.11729145e-03, 7.55152524e-01], [1.83700817e-04, 8.74634564e-01]]] assert np.allclose(expected_z, val_z) assert np.allclose(expected_class_probs, val_class_probs) assert np.allclose(expected_all_probs, val_all_probs)
def test_nce_loss(classes, xdim, batch, expected_value, device_id, precision): dt = PRECISION_TO_TYPE[precision] from cntk.losses import nce_loss import scipy x = C.input_variable(xdim, needs_gradient=True) y = C.input_variable(classes, is_sparse=True) x0 = np.arange(batch * xdim, dtype=dt).reshape((batch, xdim))/(batch * xdim) data = np.ones(batch, dtype=dt) indices = list(range(10,10*batch+1,10)) indptr = list(range(batch+1)) y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes)) q = np.arange(classes, dtype=dt) + 1 b = C.parameter((classes, 1), init=-np.log(classes)) W = C.parameter((classes, C.InferredDimension), init=C.glorot_uniform(seed=98052)) loss = C.nce_loss(W, b, x, y, q, seed=98052) v = loss.grad({x:x0, y:y0}, wrt=loss.parameters, as_numpy=False) for key in v: assert v[key].is_sparse, "gradient of nce_loss with respect to %s is not sparse"%key losses = np.zeros((100,batch)) for i in range(100): losses[i,:] = loss.eval({x:x0, y:y0}) assert np.allclose(np.mean(losses, axis=0), AA(expected_value))
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = globalvars['e2e_lr_factor'] e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs']) return create_eval_model(loss, image_input, dims_input)
def test_sequence_unpack_basic(device_id): dev = cntk_device(device_id) # Unpack a placeholder p = C.placeholder() p_unpacked_outputs = C.sequence.unpack(p, padding_value=0).outputs assert len(p_unpacked_outputs) == 2 x = C.input_variable((C.FreeDimension, 2, 3), is_sparse=False) x_seq_lens = C.input_variable(()) x_seq = C.to_sequence(x, x_seq_lens) x_seq_unpacked = C.sequence.unpack(x_seq, padding_value=-1000.0) x_seq_unpacked_value_output = x_seq_unpacked.outputs[0] x_seq_unpacked_mask_output = x_seq_unpacked.outputs[1] assert len(x_seq_unpacked_value_output.dynamic_axes) == 1 assert x_seq_unpacked_value_output.shape == (C.FreeDimension, 2, 3) seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]] seq2_data = [[0, 1, 1], [1, 1, 0]] x_data = [np.asarray(seq1_data, dtype=np.float32), np.asarray([seq2_data, [[-100.0, -100.0, -100.0], [-100.0, -100.0, -100.0]]], dtype=np.float32)] x_seq_lens_data = np.asarray([2, 1], dtype=np.float32) result = x_seq_unpacked.eval({x : x_data, x_seq_lens : x_seq_lens_data}, device=dev) value = result[x_seq_unpacked_value_output] mask = result[x_seq_unpacked_mask_output] assert np.array_equal(value[0], seq1_data) assert np.array_equal(value[1], [seq2_data, [[-1000.0, -1000.0, -1000.0], [-1000.0, -1000.0, -1000.0]]]) assert np.array_equal(mask, [[1, 1], [1, 0]])
def test_times_2d_sparse_operand(device_id): from .. import times dev = cntk_device(device_id) vocab_size = 6 sample_shape = (2, vocab_size) input_sparse_indices = [[1, 3], [2, 4], [0, 2]] input_data = C.Value.one_hot(input_sparse_indices, sample_shape, device=dev) a = C.input_variable(shape=sample_shape, is_sparse=True, needs_gradient=True, name='a') w_init = np.eye(vocab_size, dtype=np.float32) w = C.parameter(init=w_init, device=dev) a_dense = times(a, w) # TODO: Also test the results from grad grad = a_dense.grad({a : input_data}, [w, a], as_numpy=False, device=dev) res = a_dense.eval({a : input_data}, device=dev) assert np.array_equal(res, [[w_init[input_sparse_indices[0]]], [w_init[input_sparse_indices[1]]], [w_init[input_sparse_indices[2]]]]) a_no_sequence = C.input_variable(shape=sample_shape, is_sparse=True, name='a', dynamic_axes=[C.Axis.default_batch_axis()]) a_no_sequence_dense = times(a_no_sequence, w) res = a_no_sequence_dense.eval({a_no_sequence : input_data}, device=dev) assert np.array_equal(res, [w_init[input_sparse_indices[0]], w_init[input_sparse_indices[1]], w_init[input_sparse_indices[2]]])
def test_trainer(tmpdir, no_eval_function): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def test_splice(shape1, shape2): a = C.input_variable(shape=shape1, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='a') b = C.input_variable(shape=shape2, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='b') # create batch input_data1.shape = (1,) + input_data1.shape input_data2.shape = (1,) + input_data2.shape # splice using the operator root_op = C.splice(a, b, axis=axis, name='splice_ab') forward_input = {a: input_data1, b: input_data2} # Backward pass test # ================== # The gradient of the splice operator is all ones in the shape of the input def grad_splice(x): return np.ones_like(x) expected_forward = [expected_result] expected_backward = { a: grad_splice(np.asarray(input_data1)), b: grad_splice(np.asarray(input_data2)) } unittest_helper(root_op, forward_input, expected_forward, expected_backward, device_id=device_id, precision=precision)
def test_MaxRoiPool(tmpdir, dtype): pytest.skip('MaxRoiPool is failing with ONNX shape inference (input rois). RuntimeError: [ShapeInferenceError] RoIs tensor must have 2 dimensions') with C.default_options(dtype = dtype): input_map = [[[1., 2., 3.], # (1, 3, 3) input operand (conv feature map) [4., 5., 6.], [7., 8., 9.]]] input_rois = [[1, 1, 2, 2]] conv_input = np.asarray(input_map, dtype=dtype) roi_input = np.asarray(input_rois, dtype=dtype) a = C.input_variable(shape=conv_input.shape, dtype=dtype, needs_gradient=True, name='a') b = C.input_variable(shape=roi_input.shape, dtype=dtype, needs_gradient=False, name='b') # adding batch and sequence axis conv_input.shape = (1,) + conv_input.shape roi_input.shape = (1,) + roi_input.shape model = C.roipooling(a, b, C.MAX_POOLING, (3,3), 1.) verify_two_input(model, conv_input, roi_input, tmpdir, 'MaxRoiPool_1')
def test_trainer_with_some_params_not_learned(): input_dim = 2 proj_dim = 2 x = input_variable(shape=(input_dim,)) W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform()) B = parameter(shape=(proj_dim,), init=glorot_uniform()) t = times(x, W) z = t + B W_orig_value = W.value B_orig_value = B.value labels = input_variable(shape=(proj_dim,)) ce = cross_entropy_with_softmax(z, labels) pe = classification_error(z, labels) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample)) x_value = [[1, 1],[2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} num_iters = 3 for i in range(num_iters): trainer.train_minibatch(arguments) assert np.array_equal(B.value, B_orig_value) assert not np.array_equal(W.value, W_orig_value) W_orig_value = W.value trainer.test_minibatch(arguments)
def test_load_save_inputs(tmpdir): i1 = C.input_variable((1,2), name='i1') i2 = C.input_variable((2,1), name='i2') root_node = C.plus(i1, i2) input1 = [[[1,2]]] input2 = [[[[1],[2]]]] result = root_node.eval({i1: input1, i2: input2}) expected = [[[[2,3],[3,4]]]] assert np.allclose(result, expected) filename = str(tmpdir / 'i_plus_i_0.mod') root_node.save(filename) loaded_node = C.Function.load(filename) # Test specifying the input nodes by name loaded_result = loaded_node.eval({'i1': input1, 'i2': input2}) assert np.allclose(loaded_result, expected) filename = filename + '.legacy' save_as_legacy_model(root_node, filename) loaded_node = C.Function.load(filename) loaded_result = loaded_node.eval({'i1': input1, 'i2': input2}) assert np.allclose(loaded_result, expected)
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def test_local_response_normalization(device_id, precision): dtype = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) def lrn(x, depth_radius, bias, alpha, beta, name=''): x2 = C.square(x) # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed. x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1) W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W') # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1 y = C.convolution (W, x2s) # reshape back to remove the fake singleton reduction dimension b = C.reshape(y, C.InferredDimension, 0, 2) den = C.exp(beta * C.log(bias + b)) return C.element_divide(x, den) from cntk import local_response_normalization img_shape = (64, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype) x_gt = C.input_variable(shape=img_shape, dtype=dtype) x_r = C.input_variable(shape=img_shape, dtype=dtype) gt = lrn(x_gt, 2, 1.0, 0.0001, 0.75) r = local_response_normalization(x_r, 2, 1.0, 0.0001, 0.75) ss = gt.eval({x_gt:img}) sa = r.eval({x_r:img}) assert np.allclose(r.eval({x_r:img}), gt.eval({x_gt:img}))
def test_to_sequence_basic(device_id): dev = cntk_device(device_id) x = C.input_variable((C.FreeDimension, 2)) x_seq = C.to_sequence(x) assert len(x_seq.dynamic_axes) == 2 x_data = np.asarray([[[1, 2], [-1000, -1000]], [[3, 4], [5, 6]]], dtype=np.float32) result = x_seq.eval({x : x_data}, device=dev) assert np.array_equal(result, x_data) x = C.input_variable((C.FreeDimension, 2, 3), is_sparse=True) x_seq_lens = C.input_variable(()) x_seq = C.to_sequence(x, x_seq_lens) seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]] csr_seq1 = _to_csr(seq1_data) ndarrayview1 = C.NDArrayView.from_csr(csr_seq1, shape=(2, 2, 3), device=C.cpu()) seq2_data = [[0, 1, 1], [1, 1, 0]] csr_seq2 = _to_csr([seq2_data, [[0, 0, 0], [0, 0, 0]]]) ndarrayview2 = C.NDArrayView.from_csr(csr_seq2, shape=(2, 2, 3), device=C.cpu()) x_data = C.Value.create(C.input_variable((2, 2, 3), is_sparse=True), [ndarrayview1, ndarrayview2], device=dev).data x_seq_lens_data = np.asarray([2, 1], dtype=np.float32) result = x_seq.eval({x : x_data, x_seq_lens : x_seq_lens_data}, device=dev, as_numpy=False) result_dense = _to_dense(result, True) assert np.array_equal(result_dense[0], seq1_data) assert np.array_equal(result_dense[1], [seq2_data])
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = input_variable((input_dim,)) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input_variable((model1_dim,), dynamic_axes=[Axis.default_batch_axis()]) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = input_variable((model2_dim,)) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def train_faster_rcnn_e2e(cfg): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_model(image_input, roi_input, dims_node, cfg) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = cfg["MODEL"].E2E_LR_FACTOR e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, cfg["CNTK"].E2E_MAX_EPOCHS, cfg) return create_faster_rcnn_eval_model(loss, image_input, dims_input, cfg)
def test_swapaxes_0d_1d_operands(): x1 = C.input_variable(()) with pytest.raises(ValueError): swapaxes_0d = C.swapaxes(x1) x2 = C.input_variable(2) with pytest.raises(ValueError): swapaxes_1d = C.swapaxes(x2)
def test_extra_arguments_in_eval(): x1 = C.input_variable((1,), name='x1') x2 = C.input_variable((1,), name='x2') x1_plus_1 = x1 + 1 x1_plus_1_plus_x2 = x1_plus_1 + x2 result = x1_plus_1.eval({x1 : np.asarray([[1]]), x2 : np.asarray([[1]])}) assert np.allclose(result, [[[2]]])
def __init__(self, eval_model, cfg): # load model once in constructor and push images through the model in 'process_image()' self._img_shape = (cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH) image_input = input_variable(shape=self._img_shape, dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') self._eval_model = eval_model(image_input, dims_input)
def create_trainer(use_sparse, device): a = C.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w = C.parameter(init=w_init, device=dev) z = times(a, w) l = C.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.007, C.UnitType.sample))) return (a, l, w, trainer)
def _test_binary_op(precision, device_id, op_func, left_operand, right_operand, expected_forward, expected_backward_all, wrap_batch_seq=True, op_param_dict={}, batch_size_greater_than_one=False): dt = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) left_value = AA(left_operand, dtype=dt) right_value = AA(right_operand, dtype=dt) left_operand_shape = left_value.shape[1:] if batch_size_greater_than_one else left_value.shape right_operand_shape = right_value.shape[1:] if batch_size_greater_than_one else right_value.shape a = C.input_variable(shape=left_operand_shape, dtype=sanitize_dtype_cntk(precision), needs_gradient=True, name='a') b = C.input_variable(shape=right_operand_shape, dtype=sanitize_dtype_cntk(precision), needs_gradient=True, name='b') const_a = constant(left_value, device=dev) const_b = constant(right_value, device=dev) if (type(op_func) == str): input_op_constant = eval('a %s const_b' % op_func) constant_op_input = eval('const_a %s b' % op_func) input_op_input = eval('a %s b' % op_func) else: input_op_constant = op_func(a, const_b, **op_param_dict) constant_op_input = op_func(const_a, b, **op_param_dict) input_op_input = op_func(a, b, **op_param_dict) # create batch by wrapping the data point into a batch of one sample if wrap_batch_seq and not batch_size_greater_than_one: left_value.shape = (1,) + left_value.shape right_value.shape = (1,) + right_value.shape forward_input = {a: left_value, b: right_value} expected_backward = {a: expected_backward_all[ 'left_arg'], b: expected_backward_all['right_arg'], } unittest_helper(input_op_input, forward_input, expected_forward, expected_backward, device_id=device_id, precision=precision) if not batch_size_greater_than_one: forward_input = {a: left_value} expected_backward = {a: expected_backward_all['left_arg'], } unittest_helper(input_op_constant, forward_input, expected_forward, expected_backward, device_id=device_id, precision=precision) forward_input = {b: right_value} expected_backward = {b: expected_backward_all['right_arg'], } unittest_helper(constant_op_input, forward_input, expected_forward, expected_backward, device_id=device_id, precision=precision)
def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer): input_dynamic_axes = [C.Axis.default_batch_axis()] Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes) X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes) X_real_scaled = 2*(X_real / 255.0) - 1.0 # Create the model function for the generator and discriminator models X_fake = generator(Z) D_real = discriminator(X_real_scaled) D_fake = D_real.clone( method = 'share', substitutions = {X_real_scaled.output: X_fake.output} ) # Create loss functions and configure optimazation algorithms G_loss = 1.0 - C.log(D_fake) D_loss = -(C.log(D_real) + C.log(1.0 - D_fake)) G_learner = C.fsadagrad( parameters = X_fake.parameters, lr = C.learning_parameter_schedule_per_sample(lr), momentum = C.momentum_schedule_per_sample(0.9985724484938566) ) D_learner = C.fsadagrad( parameters = D_real.parameters, lr = C.learning_parameter_schedule_per_sample(lr), momentum = C.momentum_schedule_per_sample(0.9985724484938566) ) DistG_learner = C.train.distributed.data_parallel_distributed_learner(G_learner) # The following API marks a learner as the matric aggregator, which is used by # the trainer to determine the training progress. # It is required, only when more than one learner is provided to a *single* trainer. # In this example, we use two trainers each with a single learner, so it # is not required and automatically set by CNTK for each single learner. However, if you # plan to use both learners with a single trainer, then it needs to be call before # creating the trainer. #DistG_learner.set_as_metric_aggregator() DistD_learner = C.train.distributed.data_parallel_distributed_learner(D_learner) # Instantiate the trainers G_trainer = C.Trainer( X_fake, (G_loss, None), DistG_learner, G_progress_printer ) D_trainer = C.Trainer( D_real, (D_loss, None), DistD_learner, D_progress_printer ) return X_real, X_fake, Z, G_trainer, D_trainer
def test_gather_op(device_id, precision): a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])] a = C.input_variable((2,1)) r_data = np.arange(12).reshape(6,2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a:a_data}) expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a:a_data}, [r]) expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed) indices_params = C.parameter(shape=(1,), init=1.0) grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params]) assert np.array_equal(grads[r], expectd_grad) assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32)) b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])] b = C.input_variable((2,2)) res2 = C.gather(r, b).eval({b:b_data}) expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2) #the following small model is to test the memory reuse issue of gather node. x = C.input((3, 4)) x1 = C.to_sequence(x) w = C.parameter((5, 6), init=1) z = C.gather(w, x1) assert z.shape == (4, 6) #need the unpack node to trigger memory reuse. f = C.sequence.unpack(z, 0, no_mask_output=True) y = C.input((3, 4, 6)) loss = C.reduce_mean(C.square(f - y), axis=-1) loss = C.reduce_mean(loss, axis=C.Axis.all_axes()) g = C.constant(0, shape=w.shape) u = C.assign(w, g + 1) learner = C.cntk_py.universal_learner([w], [g], u) trainer = C.trainer.Trainer(loss, [loss], [learner]) indices = np.asarray([[[1, 2, 1, 2]]]) input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0) lable = np.full((10, 3, 4, 6), 2) trainer.train_minibatch({x: input, y: lable}) # the 2nd and 3rd rows should be udpated by gradients. assert np.mean(w.value[1, :]) < 1 assert np.mean(w.value[2, :]) < 1 # the other three rows should keep as 1 assert np.isclose(np.mean(w.value[0, :]), 1) assert np.isclose(np.mean(w.value[3, :]), 1) assert np.isclose(np.mean(w.value[4, :]), 1)
def test_Mean(tmpdir, dtype): with C.default_options(dtype = dtype): in1 = C.input_variable((4,)) in2 = C.input_variable((4,)) model = C.mean([in1, in2]) in1_data = np.asarray([[1., 2., 3., 4.]], dtype = dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype = dtype) verify_two_input(model, in1_data, in2_data, tmpdir, 'Mean_2')
def test_free_dimension_broadcast(): i0 = C.sequence.input_variable(shape=(5,)) i0_unpacked, _ = C.sequence.unpack(i0, padding_value=0).outputs i1 = C.input_variable(shape=(5,)) m = i0_unpacked * i1 assert m.shape == (-3, 5) i1 = C.input_variable(shape=(1,5,)) m = i0_unpacked * i1 assert m.shape == (-3, 5)
# Plotting the scatter plot plt.scatter(features, predictions, c='r') plt.xlabel("X") plt.ylabel("Y") plt.show() # adding one dimension for further processing. Input must be formatted as (batch_size,1). features = features[:, None] predictions = predictions[:, None] ################### ##### Network ##### ################### # Output is a single node with a linear operation. input = cntk.input_variable(input_dim) label = cntk.input_variable(num_outputs) pred = Dense(num_outputs)(input) ################## ###### Loss ###### ################## # Defining loss function and evaluation metric loss = cntk.squared_error(pred, label) eval_fun = cntk.squared_error(pred, label) ###################### ###### Training ###### ######################
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** q_max_words= 12 p_max_words = 50 emb_dim = 50 num_classes = 2 minibatch_size = 32 epoch_size = 500000 #No.of samples in training set total_epochs = 5 #Total number of epochs to run query_total_dim = q_max_words*emb_dim label_total_dim = num_classes passage_total_dim = p_max_words*emb_dim #****** Create placeholders for reading Training Data *********** query_input_var = C.ops.input_variable((1,q_max_words,emb_dim),np.float32,is_sparse=False) passage_input_var = C.ops.input_variable((1,p_max_words,emb_dim),np.float32,is_sparse=False) output_var = C.input_variable(num_classes,np.float32,is_sparse = False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var : train_reader.streams.queryfeatures, passage_input_var:train_reader.streams.passagefeatures, output_var : train_reader.streams.labels} # ********* Model configuration ******* model_output = cnn_network(query_input_var, passage_input_var, num_classes) loss = C.binary_cross_entropy(model_output, output_var) pe = C.classification_error(model_output, output_var) lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch) learner = C.adagrad(model_output.parameters, lr=lr_per_minibatch) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) ; print() # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ",epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save("data/models/CNN_{}.dnn".format(epoch)) # Save the model for every epoch #*** Find metrics on validation set after every epoch ******# (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs) predicted_labels=[] for i in range(len(validation_query_vectors)): queryVec = np.array(validation_query_vectors[i],dtype="float32").reshape(1,q_max_words,emb_dim) passageVec = np.array(validation_passage_vectors[i],dtype="float32").reshape(1,p_max_words,emb_dim) scores = model_output(queryVec,passageVec)[0] # do forward-prop on model to get score predictLabel = 1 if scores[1]>=scores[0] else 0 predicted_labels.append(predictLabel) metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='binary') #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n") return model_output
def test_times_const_broadcast(): x = C.input_variable((3, )) a = C.constant(np.ones((3, ), dtype=np.float32)) y = C.times_transpose(a, x) result = y.eval({x: np.asarray([[1, 2, 3], [1, 2, 3]], dtype=np.float32)}) assert np.array_equal(result, [[6], [6]])
return runit def get_batch(): inp = np.random.rand(20, 2000, 2) target = np.sum(inp, axis=(1, 2)) target = np.reshape(target, (20, 1)) return inp, target if __name__ == '__main__': HIDDEN_DIM = 128 input_ph = C.sequence.input_variable(2) targets_ph = C.input_variable(shape=1) model = C.layers.Sequential([ C.layers.Recurrence(IndRNN(HIDDEN_DIM, 2).build()), C.layers.Recurrence(IndRNN(1, HIDDEN_DIM).build()), C.sequence.last ]) output = model(input_ph) loss = C.losses.squared_error(output, targets_ph) comp = C.combine(output, loss) lrs = [(1, 0.02), (300, 0.002), (600, 0.0001)] lr_schedule = C.learners.learning_parameter_schedule(lrs) learner = C.learners.adam(loss.parameters, lr_schedule, 0.9) trainer = C.Trainer(output, loss, learner, ProgressPrinter(5))
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size / np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save( os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer / metric_denom
def test_eval_again_with_prev_outputs_live(device_id): x = C.input_variable(2) dev = cntk_device(device_id) w1 = C.parameter(init=np.asarray([1], dtype=np.float32), device=dev) w2 = C.parameter(init=np.asarray([-1], dtype=np.float32), device=dev) out1 = x + w1 out2 = x + w2 op = C.combine([out1, out2]) result1 = op.eval({x: np.asarray([2, 5], dtype=np.float32)}, device=dev) assert np.array_equal(result1[out1.output], [[3, 6]]) assert np.array_equal(result1[out2.output], [[1, 4]]) result2 = op.eval({x: np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev) assert np.array_equal(result2[out1.output], [[0, 5], [-3, 8]]) assert np.array_equal(result2[out2.output], [[-2, 3], [-5, 6]]) # result1 should still be valid assert np.array_equal(result1[out1.output], [[3, 6]]) assert np.array_equal(result1[out2.output], [[1, 4]]) result1 = op.eval({x: np.asarray([2, 5], dtype=np.float32)}, device=dev, as_numpy=False) assert np.array_equal(result1[out1.output].asarray(), [[3, 6]]) assert np.array_equal(result1[out2.output].asarray(), [[1, 4]]) result2 = op.eval({x: np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev, as_numpy=False) assert np.array_equal(result2[out1.output].asarray(), [[0, 5], [-3, 8]]) assert np.array_equal(result2[out2.output].asarray(), [[-2, 3], [-5, 6]]) # Accessing result1 now will cause an error since it was a temporary that # is now erased, due to the subsequent eval call with pytest.raises(RuntimeError): assert np.array_equal(result1[out1.output].asarray(), [[3, 6]]) grad_op = out1 + out2 grad1 = grad_op.grad({x: np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev) assert np.array_equal(grad1[w1], [2]) assert np.array_equal(grad1[w2], [2]) grad2 = grad_op.grad({x: np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev) assert np.array_equal(grad2[w1], [4]) assert np.array_equal(grad2[w2], [4]) # grad1 should still be valid assert np.array_equal(grad1[w1], [2]) assert np.array_equal(grad1[w2], [2]) grad1 = grad_op.grad({x: np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False) assert np.array_equal(grad1[w1].asarray(), [2]) assert np.array_equal(grad1[w2].asarray(), [2]) grad2 = grad_op.grad({x: np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False) assert np.array_equal(grad2[w1].asarray(), [4]) assert np.array_equal(grad2[w2].asarray(), [4]) # Accessing grad1 now will cause an error since it was a temporary that # is now erased, due to the subsequent grad call with pytest.raises(RuntimeError): assert np.array_equal(grad1[w1].asarray(), [2])
def test_Reshape(tmpdir): data = np.asarray([[[[0., 1.], [2., 3.], [4., 5.]]]], dtype=np.float32) i1 = C.input_variable(shape=(3, 2)) model = C.reshape(i1, (2, 3)) verify_one_input(model, data, tmpdir, 'Reshape_1')
def test_op_batch_normalization(use_cudnn, sample, device_id, precision): dtype = PRECISION_TO_TYPE[precision] epsilon = 0.00001 dev = cntk_device(device_id) t = AA(sample, dtype=dtype).reshape(-1, 1) mean = 1 var = 2 init_scale = 3 init_bias = 4 forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias for x in t] expected_forward = AA(forward) scale = Parameter(init=AA([init_scale], dtype=dtype), dtype=dtype, device=dev) bias = Parameter(init=AA([init_bias], dtype=dtype), dtype=dtype, device=dev) run_mean = constant(mean, shape=(1), dtype=dtype, device=dev) run_variance = constant(var, shape=(1), dtype=dtype, device=dev) run_count = constant(0, dtype=dtype, device=dev) from cntk import batch_normalization a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a') with pytest.warns(Warning): op = batch_normalization( a, scale, bias, run_mean, run_variance, False, #no running_count here, epsilon=epsilon, use_cudnn_engine=use_cudnn) op_node = batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon, use_cudnn_engine=use_cudnn) forward_input = {a: t} unittest_helper(op_node, forward_input, expected_forward, expected_backward=None, device_id=device_id, precision=precision)
def create_alexnet(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value mean_removed_features = minus(feature_var, constant(114), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) Convolution2D((11, 11), 96, init=normal(0.01), pad=False, strides=(4, 4), name='conv1'), Activation(activation=relu, name='relu1'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'), MaxPooling((3, 3), (2, 2), name='pool1'), Convolution2D((5, 5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), Activation(activation=relu, name='relu2'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'), MaxPooling((3, 3), (2, 2), name='pool2'), Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'), Activation(activation=relu, name='relu3'), Convolution2D((3, 3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), Activation(activation=relu, name='relu4'), Convolution2D((3, 3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), Activation(activation=relu, name='relu5'), MaxPooling((3, 3), (2, 2), name='pool5'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, init=normal(0.01), name='fc8') ])(mean_removed_features) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'pe5': pe5, 'output': z }
def build_model(self): c = C.Axis.new_unique_dynamic_axis('c') q = C.Axis.new_unique_dynamic_axis('q') b = C.Axis.default_batch_axis() cgw = C.input_variable(self.wg_dim, dynamic_axes=[b, c], is_sparse=self.use_sparse, name='cgw') cnw = C.input_variable(self.wn_dim, dynamic_axes=[b, c], is_sparse=self.use_sparse, name='cnw') qgw = C.input_variable(self.wg_dim, dynamic_axes=[b, q], is_sparse=self.use_sparse, name='qgw') qnw = C.input_variable(self.wn_dim, dynamic_axes=[b, q], is_sparse=self.use_sparse, name='qnw') cc = C.input_variable((1, self.word_size), dynamic_axes=[b, c], name='cc') qc = C.input_variable((1, self.word_size), dynamic_axes=[b, q], name='qc') ab = C.input_variable(self.a_dim, dynamic_axes=[b, c], name='ab') ae = C.input_variable(self.a_dim, dynamic_axes=[b, c], name='ae') input_phs = { 'cgw': cgw, 'cnw': cnw, 'qgw': qgw, 'qnw': qnw, 'cc': cc, 'qc': qc, 'ab': ab, 'ae': ae } self._input_phs = input_phs seif.info['query'] = C.splice(qgw, qnw) self.info['doc'] = C.splice(cgw, gnw) # graph pu, qu = self.input_layer(cgw, cnw, cc, qgw, qnw, qc).outputs gate_pu, wei1 = self.gate_attention_layer( pu, qu, common_len=2 * self.hidden_dim, attn_kind=self.attn_configs[0]) # [#,c][4*hidden] self.info['attn1'] = wei1 * 1.0 print('[RNet build]gate_pu:{}'.format(gate_pu)) pv = self.reasoning_layer(gate_pu) # [#,c][2*hidden] gate_self, wei2 = self.gate_attention_layer( pv, pv, common_len=2 * self.hidden_dim, att_kind=self.attn_configs[1]) # [#,c][4*hidden] self.info['attn2'] = wei2 * 1.0 ph = self.reasoning_layer(gate_self) # [#,c][2*hidden] init_pu = self.weighted_sum(pu) start_logits, end_logits = self.output_layer( init_pu.outputs[0], ph, 2 * self.hidden_dim) # [#, c][1] # loss start_loss = seq_loss(start_logits, ab) end_loss = seq_loss(end_logits, ae) # paper_loss = start_loss + end_loss new_loss = all_spans_loss(start_logits, ab, end_logits, ae) self._model = C.combine([start_logits, end_logits]) self._loss = new_loss return self._model, self._loss, self._input_phs
import cntk as C import numpy as np import pandas as pd x = C.input_variable(2) y = C.input_variable(2) x0 = np.asarray([[2., 1.]], dtype=np.float32) y0 = np.asarray([[4., 6.]], dtype=np.float32) res = C.squared_error(x, y).eval({x: x0, y: y0}) print type(res)
for i in range(start, start + count): part.append(data[i]) return np.array(part) for i in range(0, len(x[ds]) - BATCH_SIZE, BATCH_SIZE): yield as_batch(x[ds], i, BATCH_SIZE), as_batch(y[ds], i, BATCH_SIZE) # Training parameters TRAINING_STEPS = 10000 BATCH_SIZE = 100 EPOCHS = 10 if isFast else 100 x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()] C.input_variable(1, dynamic_axes=x_axes) # input sequences x = C.sequence.input_variable(1) # create the model z = create_model(x) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") print l # the learning rate learning_rate = 0.02 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
def train(train_x, train_y, seed, model_dir, loss_dir): input_dim = 600 output_dim = 3631 num_epochs = 100 hidden_layer_type = ['TANH', 'TANH'] hidden_layer_size = [1024, 1024] momentum = 0.9 finetune_lr = 0.01 l2_regularization_weight = 0.00001 C.cntk_py.set_fixed_random_seed(seed) print('Creating DNN model...') input = C.input_variable(input_dim) output = C.input_variable(output_dim) dnn_model = create_dnn_model(input, hidden_layer_type, hidden_layer_size, output_dim) epoch_num = 0 current_finetune_lr = finetune_lr current_momentum = momentum train_loss_output = [] print('Learning...') while (epoch_num < num_epochs): print('started epoch %i' % epoch_num) epoch_num += 1 sub_start_time = time.time() lr_schedule = C.learning_rate_schedule(current_finetune_lr, C.UnitType.minibatch) momentum_schedule = C.momentum_schedule(current_momentum) learner = C.momentum_sgd( dnn_model.parameters, lr_schedule, momentum_schedule, unit_gain=False, l1_regularization_weight=0, l2_regularization_weight=l2_regularization_weight) #learner = C.adadelta(dnn_model.parameters, lr_schedule, rho=0.95, epsilon=1e-8, l1_regularization_weight=0, # l2_regularization_weight= 0.00001 ) loss = C.cross_entropy_with_softmax(dnn_model, output) error = loss trainer = C.Trainer(dnn_model, (loss, error), [learner]) train_error = [] for i in range(len(train_x)): temp_train_x = np.float32(train_x[i]) temp_train_y = np.float32(train_y[i]) trainer.train_minibatch({ input: temp_train_x, output: temp_train_y }) train_error.append(trainer.previous_minibatch_loss_average) this_train_loss = np.mean(train_error) sub_end_time = time.time() print('time for 1 epoch is %.1f' % (sub_end_time - sub_start_time)) train_loss_output.append(this_train_loss) print('loss is %.4f' % this_train_loss) if np.remainder(epoch_num, 10) == 0: nnets_file_name = 'dnn_model_ep' + np.str(epoch_num) + '.model' if not os.path.isdir(model_dir): os.makedirs(model_dir) dnn_model.save(os.path.join(model_dir, nnets_file_name)) if not os.path.isdir(loss_dir): os.makedirs(loss_dir) np.savetxt( os.path.join(loss_dir, 'loss_curve_ep' + np.str(epoch_num) + '.csv'), train_loss_output) nnets_file_name = 'dnn_model_final.model' if not os.path.isdir(model_dir): os.makedirs(model_dir) dnn_model.save(os.path.join(model_dir, nnets_file_name)) if not os.path.isdir(loss_dir): os.makedirs(loss_dir) np.savetxt( os.path.join(loss_dir, 'loss_curve_final' + np.str(epoch_num) + '.csv'), train_loss_output)
def main(base_folder, training_mode='majority', model_name='VGG13', max_epochs = 100): # create needed folders. output_model_path = os.path.join(base_folder, R'models') output_model_folder = os.path.join(output_model_path, model_name + '_' + training_mode) if not os.path.exists(output_model_folder): os.makedirs(output_model_folder) # creating logging file logging.basicConfig(filename = os.path.join(output_model_folder, "train.log"), filemode = 'w', level = logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) logging.info("Starting with training mode {} using {} model and max epochs {}.".format(training_mode, model_name, max_epochs)) # create the model num_classes = len(emotion_table) model = build_model(num_classes, model_name) # set the input variables. input_var = ct.input_variable((1, model.input_height, model.input_width), np.float32) label_var = ct.input_variable((num_classes), np.float32) # read FER+ dataset. logging.info("Loading data...") train_params = FERPlusParameters(num_classes, model.input_height, model.input_width, training_mode, False) test_and_val_params = FERPlusParameters(num_classes, model.input_height, model.input_width, "majority", True) train_data_reader = FERPlusReader.create(base_folder, train_folders, "label.csv", train_params) val_data_reader = FERPlusReader.create(base_folder, valid_folders, "label.csv", test_and_val_params) test_data_reader = FERPlusReader.create(base_folder, test_folders, "label.csv", test_and_val_params) # print summary of the data. display_summary(train_data_reader, val_data_reader, test_data_reader) # get the probalistic output of the model. z = model.model(input_var) pred = ct.softmax(z) epoch_size = train_data_reader.size() minibatch_size = 32 # Training config lr_per_minibatch = [model.learning_rate]*20 + [model.learning_rate / 2.0]*20 + [model.learning_rate / 10.0] mm_time_constant = -minibatch_size/np.log(0.9) lr_schedule = learning_rate_schedule(lr_per_minibatch, unit=UnitType.minibatch, epoch_size=epoch_size) mm_schedule = momentum_as_time_constant_schedule(mm_time_constant) # loss and error cost train_loss = cost_func(training_mode, pred, label_var) pe = classification_error(z, label_var) # construct the trainer learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = Trainer(z, (train_loss, pe), learner) # Get minibatches of images to train with and perform model training max_val_accuracy = 0.0 final_test_accuracy = 0.0 best_test_accuracy = 0.0 logging.info("Start training...") epoch = 0 best_epoch = 0 while epoch < max_epochs: train_data_reader.reset() val_data_reader.reset() test_data_reader.reset() # Training start_time = time.time() training_loss = 0 training_accuracy = 0 while train_data_reader.has_more(): images, labels, current_batch_size = train_data_reader.next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({input_var : images, label_var : labels}) # keep track of statistics. training_loss += trainer.previous_minibatch_loss_average * current_batch_size training_accuracy += trainer.previous_minibatch_evaluation_average * current_batch_size training_accuracy /= train_data_reader.size() training_accuracy = 1.0 - training_accuracy # Validation val_accuracy = 0 while val_data_reader.has_more(): images, labels, current_batch_size = val_data_reader.next_minibatch(minibatch_size) val_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size val_accuracy /= val_data_reader.size() val_accuracy = 1.0 - val_accuracy # if validation accuracy goes higher, we compute test accuracy test_run = False if val_accuracy > max_val_accuracy: best_epoch = epoch max_val_accuracy = val_accuracy trainer.save_checkpoint(os.path.join(output_model_folder, "model_{}".format(best_epoch))) test_run = True test_accuracy = 0 while test_data_reader.has_more(): images, labels, current_batch_size = test_data_reader.next_minibatch(minibatch_size) test_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size test_accuracy /= test_data_reader.size() test_accuracy = 1.0 - test_accuracy final_test_accuracy = test_accuracy if final_test_accuracy > best_test_accuracy: best_test_accuracy = final_test_accuracy logging.info("Epoch {}: took {:.3f}s".format(epoch, time.time() - start_time)) logging.info(" training loss:\t{:e}".format(training_loss)) logging.info(" training accuracy:\t\t{:.2f} %".format(training_accuracy * 100)) logging.info(" validation accuracy:\t\t{:.2f} %".format(val_accuracy * 100)) if test_run: logging.info(" test accuracy:\t\t{:.2f} %".format(test_accuracy * 100)) epoch += 1 logging.info("") logging.info("Best validation accuracy:\t\t{:.2f} %, epoch {}".format(max_val_accuracy * 100, best_epoch)) logging.info("Test accuracy corresponding to best validation:\t\t{:.2f} %".format(final_test_accuracy * 100)) logging.info("Best test accuracy:\t\t{:.2f} %".format(best_test_accuracy * 100))
def to_input(arg): if isinstance(arg, cntk_py.Variable): return arg else: from cntk import input_variable return input_variable(arg)
if not data_found: raise ValueError( "Please generate the data by completing the MNIST data loader in Lab 1" ) print("Data directory is {0}".format(data_dir)) #Model Creation------------------------------------------------------------------------------------------------------------------------B 5 #Our multi-layer perceptron will be relatively simple with 2 hidden layers (num_hidden_layers). The number of nodes in the hidden layer being #a parameter specified by hidden_layers_dim. num_hidden_layers = 2 hidden_layers_dim = 400 #Network input and output: input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) #Multi-layer Perceptron setup-----------------------------------------------------------------------------------------------------B 5.1 #The CNTK Layers module provides a Dense function that creates a fully connected layer -----------------------------------------------B 5.1.1 #which performs the above operations of weighted input summing and bias addition. def create_model(features): with C.layers.default_options(init=C.layers.glorot_uniform(), activation=C.ops.relu): h = features for _ in range(num_hidden_layers): h = C.layers.Dense(hidden_layers_dim)(h) r = C.layers.Dense(num_output_classes, activation=None)(h) return r
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [ x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE ] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [ x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE ] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output = cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage1_rpn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor( conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine( [rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot( stage1_frcn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage2_rpn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], [ "cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error" ], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot( stage2_frcn_network, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
labels[X[:, 0] > X[:, 1]] = [0, 0, 1] labels[X[:, 0] <= X[:, 1]] = [1, 0, 0] labels[X[:, 1] + X[:, 0] > 1] = [0, 1, 0] init = C.initializer.normal(0.01) theta1 = C.Parameter(shape=(2, 12), init=init) bias1 = C.Parameter(shape=(1, 12), init=init) theta2 = C.Parameter(shape=(12, 3), init=init) bias2 = C.Parameter(shape=( 1, 3, ), init=init) x = C.input_variable(shape=(2, ), needs_gradient=False) t = C.input_variable(shape=(3, ), needs_gradient=False) def forward(x): y = C.times(x, theta1) + C.squeeze(bias1, 0) y = C.element_max(y, 0.) return C.times(y, theta2) + C.squeeze(bias2, 0) def softmax(x): e = C.exp(x) s = C.reduce_sum(e, axis=0) return e / s
def next_minibatch(self, num_samples, number_of_workers=1, worker_rank=0, device=None): if self._total_num_samples >= self._max_samples: return {} # determine how many samples, starting from self._cursor, will fit into the requested minibatch size of num_samples begin = self._cursor end = self._cursor assert begin < self._num_samples actual_num_samples = {name: 0 for name in self._data.keys()} while end < self._num_samples: new_num_samples = { name: actual_num_samples[name] + (MinibatchSourceFromData._get_len( value[end]) if self._is_sequence[name] else 1) for name, value in self._data.items() } # return up to requested number of samples. but at least one even if longer # also stop if we hit the maximum requested number of samples max_num_samples = max(new_num_samples.values()) if actual_num_samples and (max_num_samples > num_samples or self._total_num_samples + max_num_samples > self._max_samples): break actual_num_samples = new_num_samples end += 1 self._total_num_samples += max(actual_num_samples.values()) # the minibatch data to return result = {} # [stream_info] -> MinibatchData at_end = (end == self._num_samples) for si in self.streams.values(): arg = self._data[si.name] if isinstance( arg, Value ): # if entire corpus is one big Value, then slice NDArrayView directly data = arg.data sub_shape = data.shape[1:] extent = (end - begin, ) + sub_shape start_offset = (begin, ) + tuple(0 for _ in sub_shape) if number_of_workers != 1: # slice_view presently does not support strides raise ValueError( 'distributed reading from Value objects is not supported' ) mb_data = data.slice_view(start_offset, extent, data.is_read_only) else: # in case of distributed reading, we sub-slice the minibatch #print('rank/worker', worker_rank, number_of_workers, 'reading', slice(begin+worker_rank, end+worker_rank, number_of_workers)) mb_data = arg[begin + worker_rank:end + worker_rank:number_of_workers] if number_of_workers != 1: mb_data = mb_data.copy( ) # un-stride it, to avoid performance warning if isinstance(mb_data, list): # create a Value object if si.name not in self._vars: # this case is more complex, we need a CNTK Variable from cntk import input_variable, device self._vars[si.name] = input_variable( **self._types[si.name]) value = Value.create(self._vars[si.name], mb_data) else: value = Value(mb_data) result[si] = MinibatchData( value, num_sequences=end - begin, num_samples=actual_num_samples[si.name], sweep_end=at_end or (self._total_num_samples >= self._max_samples)) # wrap around the cursor self._cursor = 0 if at_end else end return result
def compute_test_set_aps(eval_model, cfg): num_test_images = cfg["DATA"].NUM_TEST_IMAGES classes = cfg["DATA"].CLASSES image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TEST_MAP_FILE, cfg["DATA"].TEST_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=False, use_flipping=False, max_images=cfg["DATA"].NUM_TEST_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=None) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(cfg["DATA"].NUM_CLASSES)] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg.INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes)}) output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1,) scores.shape = scores.shape + (1,) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, cfg["DATA"].NUM_CLASSES): coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:, -1] == cls_j)] all_boxes[cls_j][img_i] = coords_score_label_for_cls[:, :-1].astype(np.float32, copy=False) if (img_i + 1) % 100 == 0: print("Processed {} samples".format(img_i + 1)) # calculate mAP aps = evaluate_detections(all_boxes, all_gt_infos, classes, use_gpu_nms=cfg.USE_GPU_NMS, device_id=cfg.GPU_ID, nms_threshold=cfg.RESULTS_NMS_THRESHOLD, conf_threshold=cfg.RESULTS_NMS_CONF_THRESHOLD) return aps
def Input(*args, **kwargs): return _name_node(input_variable(*args, **kwargs), 'input')
def test_gather_op(device_id, precision): a_data = [ AA([[0], [1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3], [4]], dtype=PRECISION_TO_TYPE[precision]) ] a = C.input_variable((2, 1)) r_data = np.arange(12).reshape(6, 2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a: a_data}) expectd = np.asarray([[[[0., 1.]], [[2., 3.]]], [[[6., 7.]], [[8., 9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a: a_data}, [r]) expectd_grad = np.asarray([[1, 1], [1, 1], [0, 0], [1, 1], [1, 1], [0, 0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed) indices_params = C.parameter(shape=(1, ), init=1.0) grads = C.gather(r, (indices_params * a)).grad({a: a_data}, [r, indices_params]) assert np.array_equal(grads[r], expectd_grad) assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32)) b_data = [ AA([[0, 2], [1, 3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2, 4], [3, 5]], dtype=PRECISION_TO_TYPE[precision]) ] b = C.input_variable((2, 2)) res2 = C.gather(r, b).eval({b: b_data}) expectd2 = np.asarray([[[[0., 1.], [4., 5.]], [[2., 3.], [6., 7.]]], [[[4., 5.], [8., 9.]], [[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2) #the following small model is to test the memory reuse issue of gather node. x = C.input((3, 4)) x1 = C.to_sequence(x) w = C.parameter((5, 6), init=1) z = C.gather(w, x1) assert z.shape == (4, 6) #need the unpack node to trigger memory reuse. f = C.sequence.unpack(z, 0, no_mask_output=True) y = C.input((3, 4, 6)) loss = C.reduce_mean(C.square(f - y), axis=-1) loss = C.reduce_mean(loss, axis=C.Axis.all_axes()) g = C.constant(0, shape=w.shape) u = C.assign(w, g + 1) learner = C.cntk_py.universal_learner([w], [g], u) trainer = C.trainer.Trainer(loss, [loss], [learner]) indices = np.asarray([[[1, 2, 1, 2]]]) input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0) lable = np.full((10, 3, 4, 6), 2) trainer.train_minibatch({x: input, y: lable}) # the 2nd and 3rd rows should be udpated by gradients. assert np.mean(w.value[1, :]) < 1 assert np.mean(w.value[2, :]) < 1 # the other three rows should keep as 1 assert np.isclose(np.mean(w.value[0, :]), 1) assert np.isclose(np.mean(w.value[3, :]), 1) assert np.isclose(np.mean(w.value[4, :]), 1)
# Call and create the ``test_reader`` object. test_reader = Batch_Reader(test_data, onehotlabels_test) ############################## ########## Network ########### ############################## # Architecture parameters feature_dim = 784 num_classes = 10 num_hidden_layers = 3 hidden_layer_neurons = 400 # Place holders. input = C.input_variable(feature_dim) label = C.input_variable(num_classes) # Creating the architecture def create_model(features): ''' This function creates the architecture model. :param features: The input features. :return: The output of the network which its dimentionality is num_classes. ''' with C.layers.default_options(init=C.layers.glorot_uniform(), activation=C.ops.relu): # Features are the initial values. hidden_out = features
def save_validation_no_input(y, file_path): constant_output = np.reshape(y, (y.size)) c = C.input_variable((1)) model = c * constant_output model.save(file_path, format=C.ModelFormat.ONNX)
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner.learning_rate() == 0.1 learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch)); assert learner.learning_rate() == 1.0 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.1]*5 lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.set_default_use_mean_gradient_value(False) use_mean_gradient_value = C.default_use_mean_gradient_value() assert not use_mean_gradient_value C.adadelta(res.parameters, lr_per_sample) C.set_default_use_mean_gradient_value(True) use_mean_gradient_value = C.default_use_mean_gradient_value() assert use_mean_gradient_value C.adadelta(res.parameters, lr_per_sample)
def test_LRN(tmpdir): img_shape = (64, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=np.float32) x_r = C.input_variable(shape=img_shape, dtype=np.float32) model = C.local_response_normalization(x_r, 2, 1.0, 0.0001, 0.75) verify_one_input(model, img, tmpdir, 'LRN_1')
def eval_faster_rcnn_mAP(eval_model): img_map_file = globalvars['test_map_file'] roi_map_file = globalvars['test_roi_file'] classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( img_map_file, roi_map_file, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where( all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({ 'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes) }) output = frcn_eval.eval({ image_input: mb_data[image_input], dims_input: mb_data[dims_input] }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1, ) scores.shape = scores.shape + (1, ) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, globalvars['num_classes']): coords_score_label_for_cls = coords_score_label[np.where( coords_score_label[:, -1] == cls_j)] all_boxes[cls_j][ img_i] = coords_score_label_for_cls[:, :-1].astype(np.float32, copy=False) if (img_i + 1) % 100 == 0: print("Processed {} samples".format(img_i + 1)) confusions = None try: conf_file = cfg["CNTK"].CONFUSION_FILE conf_file = os.path.join(map_file_path, conf_file) confusions = confusions_map(classes, conf_file) except: confusions = None # calculate mAP aps, fp_errors = evaluate_detections( all_boxes, all_gt_infos, classes, nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, conf_threshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD, soft=cfg["CNTK"].RESULTS_NMS_SOFT, confusions=confusions) if fp_errors: output_file = os.path.join( globalvars['output_path'], "{}_{}_fps.txt".format( cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage")) log_fp_errors(fp_errors, output_file) ap_list = [] for class_name in aps: ap_list += [aps[class_name]] print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name])) meanAP = np.nanmean(ap_list) print('Mean AP = {:.4f}'.format(meanAP)) return meanAP
def test_MaxPool(tmpdir): img = np.reshape(np.arange(16, dtype=np.float32), [1, 4, 4]) x = C.input_variable(img.shape) model = C.pooling(x, C.MAX_POOLING, (2, 2), (3, 3)) verify_one_input(model, img, tmpdir, 'MaxPool_1')
def test_sequential_max_pooling(): # =================================================================== # sequential max pool across rgb images with width in sequence axis # =================================================================== a = C.sequence.input_variable((2, 4)) b = SequentialMaxPooling(filter_shape=(2, 2), strides=(2, 2), pad=False)(a) n = np.ascontiguousarray( np.arange(2 * 5 * 4).reshape((1, 5, 2, 4)).astype(np.float32)) output = b.eval({a: n}) assert isinstance(output, list) and len(output) == 1 output = output[0] a = C.input_variable((2, 4, 5)) b = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2), pad=False)(a) n = np.arange(2 * 5 * 4).reshape((1, 5, 2, 4)).astype(np.float32) n = np.ascontiguousarray(np.moveaxis(n, 1, -1)) desired = b.eval({a: n}) desired = np.squeeze(np.moveaxis(desired, -1, 1)) np.testing.assert_almost_equal(output[:2, ...], desired) # BUGBUG: Sequential maxpooling will 'right pad' on sequential axis # BUGBUG: once fixed, this assertion should fail assert output.shape[0] == 3 and desired.shape[0] == 2 assert output.shape[0] != desired.shape[ 0], "Due to bug, sequence length is different between desired and output" # =================================================================== # sequential max pool across rgb images with width in sequence axis # =================================================================== a = C.sequence.input_variable((3, 25)) b = SequentialMaxPooling(filter_shape=(2, 2), strides=(2, 2), pad=False)(a) assert b.shape == (3, 12) n = np.ascontiguousarray( np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32)) output = b.eval({a: n}) assert isinstance(output, list) and len(output) == 1 output = output[0] a = C.input_variable((3, 25, 6)) b = C.layers.MaxPooling(filter_shape=(2, 2), strides=(2, 2), pad=False)(a) n = np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32) n = np.ascontiguousarray(np.moveaxis(n, 1, -1)) desired = b.eval({a: n}) desired = np.squeeze(np.moveaxis(desired, -1, 1)) np.testing.assert_almost_equal(output, desired) # =================================================================== # sequential max pool across rgb images with width in sequence axis # =================================================================== a = C.sequence.input_variable((3, 25)) b = SequentialMaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=False)(a) n = np.ascontiguousarray( np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32)) output = b.eval({a: n}) assert isinstance(output, list) and len(output) == 1 output = output[0] a = C.input_variable((3, 25, 6)) b = C.layers.MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=False)(a) n = np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32) n = np.ascontiguousarray(np.moveaxis(n, 1, -1)) desired = b.eval({a: n}) desired = np.squeeze(np.moveaxis(desired, -1, 1)) # BUGBUG: Sequential maxpooling will 'right pad' on sequential axis # BUGBUG: once fixed, this assertion should fail np.testing.assert_almost_equal(output[:2], desired) # =================================================================== # sequential max pool across rgb images with width in sequence axis # =================================================================== a = C.sequence.input_variable((3, 25)) b = SequentialMaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True)(a) assert b.shape == (3, 13) n = np.ascontiguousarray( np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32)) output = b.eval({a: n}) assert isinstance(output, list) and len(output) == 1 output = output[0] a = C.input_variable((3, 25, 6)) b = C.layers.MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True)(a) n = np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32) n = np.ascontiguousarray(np.moveaxis(n, 1, -1)) desired = b.eval({a: n}) desired = np.squeeze(np.moveaxis(desired, -1, 1)) np.testing.assert_almost_equal(output, desired) # =================================================================== # sequential max pool across rgb images with width in sequence axis # =================================================================== a = C.sequence.input_variable((3, 25)) b = SequentialMaxPooling(filter_shape=(4, 4), strides=(2, 2), pad=True)(a) n = np.ascontiguousarray( np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32)) output = b.eval({a: n}) assert isinstance(output, list) and len(output) == 1 output = output[0] a = C.input_variable((3, 25, 6)) b = C.layers.MaxPooling(filter_shape=(4, 4), strides=(2, 2), pad=True)(a) n = np.arange(3 * 6 * 25).reshape((1, 6, 3, 25)).astype(np.float32) n = np.ascontiguousarray(np.moveaxis(n, 1, -1)) desired = b.eval({a: n}) desired = np.squeeze(np.moveaxis(desired, -1, 1)) np.testing.assert_almost_equal(output, desired) # =================================================================== # sequential max pool across a sequence of vector or B&W images # =================================================================== a = C.sequence.input_variable((25, )) b = SequentialMaxPooling(filter_shape=(4, ), strides=(2, ), pad=True)(a) n = np.ascontiguousarray( np.arange(1 * 6 * 25).reshape((1, 6, 25)).astype(np.float32)) output = b.eval({a: n}) assert isinstance(output, list) and len(output) == 1 output = output[0] a = C.input_variable((25, 6)) b = C.layers.MaxPooling(filter_shape=(4, ), strides=(2, ), pad=True)(a) n = np.arange(1 * 6 * 25).reshape((1, 6, 25)).astype(np.float32) n = np.ascontiguousarray(np.moveaxis(n, 1, -1)) desired = b.eval({a: n}) desired = np.squeeze(np.moveaxis(desired, -1, 1)) np.testing.assert_almost_equal(output, desired)
def build_model(self): c = C.Axis.new_unique_dynamic_axis('c') q = C.Axis.new_unique_dynamic_axis('q') b = C.Axis.default_batch_axis() cgw = C.input_variable(self.wg_dim, dynamic_axes=[b, c], is_sparse=self.use_sparse, name='cgw') cnw = C.input_variable(self.wn_dim, dynamic_axes=[b, c], is_sparse=self.use_sparse, name='cnw') qgw = C.input_variable(self.wg_dim, dynamic_axes=[b, q], is_sparse=self.use_sparse, name='qgw') qnw = C.input_variable(self.wn_dim, dynamic_axes=[b, q], is_sparse=self.use_sparse, name='qnw') cc = C.input_variable((1, self.word_size), dynamic_axes=[b, c], name='cc') qc = C.input_variable((1, self.word_size), dynamic_axes=[b, q], name='qc') ab = C.input_variable(self.a_dim, dynamic_axes=[b, c], name='ab') ae = C.input_variable(self.a_dim, dynamic_axes=[b, c], name='ae') qf = C.input_variable(1, dynamic_axes=[b, q], is_sparse=False, name='query_feature') df = C.input_variable(3, dynamic_axes=[b, c], is_sparse=False, name='doc_feature') input_phs = { 'cgw': cgw, 'cnw': cnw, 'qgw': qgw, 'qnw': qnw, 'cc': cc, 'qc': qc, 'ab': ab, 'ae': ae, 'qf': qf, 'df': df } self._input_phs = input_phs self.info['query'] = C.splice(qgw, qnw) self.info['doc'] = C.splice(cgw, cnw) # graph elmo_encoder = self.__elmo_fac.build() #input layer reduction_cc = C.reshape(cc, (-1, )) reduction_qc = C.reshape(qc, (-1, )) c_elmo = elmo_encoder(reduction_cc) q_elmo = elmo_encoder(reduction_qc) pu, qu = self.input_layer(cgw, cnw, qgw, qnw).outputs enhance_pu = C.splice(pu, c_elmo, df) enhance_qu = C.splice(qu, q_elmo, qf) gate_pu, wei1 = self.gate_attention_layer(enhance_pu, enhance_qu, common_len=2*self.hidden_dim+1024,\ att_kind=self.attn_configs[0]) # [#,c][4*hidden] self.info['attn1'] = 1.0 * wei1 pv = self.reasoning_layer(gate_pu) # [#,c][2*hidden] # self attention gate_self, wei2 = self.gate_attention_layer( pv, pv, common_len=2 * self.hidden_dim, att_kind=self.attn_configs[1]) # [#,c][4*hidden] self.info['attn2'] = 1.0 * wei2 ph = self.reasoning_layer(gate_self) # [#,c][2*hidden] enhance_ph = C.splice(ph, c_elmo, df) init_pu = self.weighted_sum(enhance_pu) start_logits, end_logits = self.output_layer( init_pu.outputs[0], enhance_ph, 2 * self.hidden_dim + 1027) # [#, c][1] self.info['start_logits'] = start_logits * 1.0 self.info['end_logits'] = end_logits * 1.0 # loss start_loss = seq_loss(start_logits, ab) end_loss = seq_loss(end_logits, ae) # paper_loss = start_loss + end_loss new_loss = all_spans_loss(start_logits, ab, end_logits, ae) self._model = C.combine([start_logits, end_logits]) self._loss = new_loss return self._model, self._loss, self._input_phs