def test_op_times_reduce_sequence_axis(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times, Value, TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK from cntk import sequence dim = 10 seq = [[0,1,2], [3], [4,5,6,7,8,9]] right_data = Value.one_hot(seq, dim, dtype=dt_precision) right_var = sequence.input_variable(shape=(dim), is_sparse=True, dtype=dt_precision) left_data = [AA([1,1,1],dtype=dt_precision), AA([1],dtype=dt_precision), AA([1,1,1,1,1,1],dtype=dt_precision)] left_var = sequence.input_variable(shape=(1), dtype=dt_precision) func = times(left_var, right_var, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK) func2 = sequence.reduce_sum(times(left_var, right_var)) assert func.dynamic_axes == func2.dynamic_axes _, forward_output = func.forward({left_var:left_data, right_var:right_data}) actual_forward = forward_output[func.output] expected_forward = AA([[[1,1,1,0,0,0,0,0,0,0]], [[0,0,0,1,0,0,0,0,0,0]], [[0,0,0,0,1,1,1,1,1,1]]]) assert np.allclose(actual_forward, expected_forward)
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = sequence.input_variable((input_dim,)) core = C.layers.Embedding(proj_dim) model1 = C.layers.Dense(model1_dim)(sequence.last(core(x))) model1_label = C.input_variable((model1_dim,)) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = C.layers.Dense(model2_dim)(core(x)) model2_label = sequence.input_variable((model2_dim,)) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = C.learning_parameter_schedule(0.003, minibatch_size =1) trainer_multitask = C.Trainer(model1, (ce, pe_model1), C.sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def test_op_times_reduce_sequence_axis(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times, Value, TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK from cntk import sequence dim = 10 seq = [[0,1,2], [3], [4,5,6,7,8,9]] right_data = Value.one_hot(seq, dim, dtype=dt_precision) right_var = sequence.input_variable(shape=(dim), is_sparse=True, dtype=dt_precision) left_data = [AA([1,1,1],dtype=dt_precision), AA([1],dtype=dt_precision), AA([1,1,1,1,1,1],dtype=dt_precision)] left_var = sequence.input_variable(shape=(1), dtype=dt_precision) func = times(left_var, right_var, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK) func2 = sequence.reduce_sum(times(left_var, right_var)) assert func.dynamic_axes == func2.dynamic_axes _, forward_output = func.forward({left_var:left_data, right_var:right_data}) actual_forward = forward_output[func.output] expected_forward = AA([[[1,1,1,0,0,0,0,0,0,0]], [[0,0,0,1,0,0,0,0,0,0]], [[0,0,0,0,1,1,1,1,1,1]]]) assert np.allclose(actual_forward, expected_forward)
def create_sample_model(device, writer=None, lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])): in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) learner = C.sgd(z.parameters, lr_per_sample) trainer = C.Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def create_sample_model(device, writer=None): in1 = sequence.input_variable(shape=(input_dim, )) labels = sequence.input_variable(shape=(input_dim, )) p = parameter(shape=(input_dim, ), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = C.learning_rate_schedule([0.3, 0.2, 0.1, 0.0], C.UnitType.sample) learner = C.sgd(z.parameters, lr_per_sample) trainer = C.Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def test_usermbsource_training(tmpdir): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_rate_schedule, sgd, Trainer, \ training_session, times, UnitType feature = sequence.input_variable(shape=(input_dim, )) label = C.input_variable(shape=(num_output_classes, )) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = {feature: mbs.fsi, label: mbs.lsi} session = training_session(trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20) session.train() assert trainer.total_number_of_samples_seen == 20
def test_sanitize_batch_sparse(): batch = [csr([[1, 0, 2], [2, 3, 0]]), csr([5, 0, 1])] var = sequence.input_variable(3, is_sparse=True) b = sanitize_batch(var, batch) # 2 sequences, with max seq len of 2 and dimension 3 assert b.shape == (2, 2, 3)
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import times input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels=StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True))), randomize=False, max_samples=2) raw_input = sequence.input_variable(shape=input_vocab_dim, sequence_axis=Axis('inputAxis'), name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input: mbs.streams.features}, device=cntk_device(device_id)) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid, device=cntk_device(device_id)) # CSR with the raw_input encoding in ctf_data one_hot_data = [[3, 4, 5, 4, 7, 12, 1], [60, 61]] data = [ csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data ] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = Value.one_hot(one_hot_data, num_classes=input_vocab_dim, device=cntk_device(device_id)) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_hot)])
def test_one_hot_skip(): a = Value.one_hot([[0,1,Value.ONE_HOT_SKIP]], 3) i = sequence.input_variable(shape=(3,)) b = i * 1 expected = [[[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 0.]]] assert np.allclose(b.eval({i:a}), expected)
def test_sanitize_batch_sparse(): batch = [csr([[1,0,2],[2,3,0]]), csr([5,0,1])] var = sequence.input_variable(3, is_sparse=True) b = sanitize_batch(var, batch) # 2 sequences, with max seq len of 2 and dimension 3 assert b.shape == (2,2,3)
def test_mask(batch, seq_starts, expected): shape = () var = sequence.input_variable(shape) if type(expected) == type(ValueError): with pytest.raises(expected): s = sanitize_batch(var, batch, seq_starts) else: s = sanitize_batch(var, batch, seq_starts) assert np.allclose(s.mask, expected)
def test_mask(batch, seq_starts, expected): shape = () var = sequence.input_variable(shape) if type(expected) == type(ValueError): with pytest.raises(expected): s = sanitize_batch(var, batch, seq_starts) else: s = sanitize_batch(var, batch, seq_starts) assert np.allclose(s.mask, expected)
def test_one_hot_int_types(dtype): data = [[0, 2, 1], [1]] if dtype is not None: data = [np.asarray(d, dtype=dtype) for d in data] a = Value.one_hot(data, 3) i = sequence.input_variable(shape=(3, )) b = i * 1 expected = [[[1., 0., 0.], [0., 0., 1.], [0., 1., 0.]], [[0., 1., 0.]]] for a, b in zip(b.eval({i: a}), expected): assert np.allclose(a, b)
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func( C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [dist_learner]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input_variable(((2*context+1)*feature_dim)) labels = sequence.input_variable((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input_variable(((2*context+1)*feature_dim)) labels = sequence.input_variable((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [ dist_learner ]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def test_sanitize_batch_contiguity(): a1 = AA([[1, 2], [3, 4]]) a2 = AA([[5, 6], [7, 8]]) var = sequence.input_variable((2, 2), is_sparse=True) batch = [a1.T, a2.T] with pytest.warns(RuntimeWarning): b = sanitize_batch(var, batch) assert b.shape == (2, 1, 2, 2) batch = [a1, a2] b = sanitize_batch(var, batch) assert b.shape == (2, 1, 2, 2)
def test_eval_one_hot_seq(one_hot_batch, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input_variable(shape=(dim,), is_sparse=var_is_sparse) # Convert CNTK node value to dense so that we can compare it later z = times(in1, np.eye(dim)*multiplier) # Convert expectation to dense expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch] batch = Value.one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id)) result = z.eval({in1: batch}, device=cntk_device(device_id)) assert np.all([np.allclose(a,b) for a,b in zip(result, expected)])
def test_one_hot_int_types(dtype): data = [[0,2,1],[1]] if dtype is not None: data = [np.asarray(d, dtype=dtype) for d in data] a = Value.one_hot(data, 3) i = sequence.input_variable(shape=(3,)) b = i * 1 expected = [[[ 1., 0., 0.], [ 0., 0., 1.], [ 0., 1., 0.]], [[ 0., 1., 0.]]] for a, b in zip (b.eval({i:a}), expected): assert np.allclose(a, b)
def test_sanitize_batch_contiguity(): a1 = AA([[1,2],[3,4]]) a2 = AA([[5,6],[7,8]]) var = sequence.input_variable((2,2), is_sparse=True) batch = [a1.T,a2.T] with pytest.warns(RuntimeWarning): b = sanitize_batch(var, batch) assert b.shape == (2,1,2,2) batch = [a1,a2] b = sanitize_batch(var, batch) assert b.shape == (2,1,2,2)
def test_times_transpose_sequence_param(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times_transpose, parameter, sequence, Value dim = 5 num_sequences = 2 seq = [i for i in range(dim)] identity = np.identity(dim, dtype=dt_precision) input_data = Value.one_hot([seq] * num_sequences, dim, dtype=dt_precision) input_var = sequence.input_variable(shape=(dim), needs_gradient=True, dtype=dt_precision) e = parameter(shape=(dim, ), init=1, dtype=dt_precision) z = times_transpose(e, input_var) e_grad = z.grad({input_var: input_data}, [e, input_var])
def test_op_times_sparse_grad(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times, times_transpose, parameter, reshape, Value, sequence dim = 5 num_sequences = 2 seq = [i for i in range(dim)] identity = np.identity(dim, dtype=dt_precision) input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision) input_var = sequence.input_variable(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision) e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision) z = reshape(times_transpose(e, times(input_var, e)), dim) e_grad = z.grad({input_var : input_data}, [e]) assert np.allclose(e_grad, np.ones((dim,dim))*4)
def test_op_times_sparse_grad(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times, times_transpose, parameter, reshape, Value, sequence dim = 5 num_sequences = 2 seq = [i for i in range(dim)] identity = np.identity(dim, dtype=dt_precision) input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision) input_var = sequence.input_variable(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision) e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision) z = reshape(times_transpose(e, times(input_var, e)), dim) e_grad = z.grad({input_var : input_data}, [e]) assert np.allclose(e_grad, np.ones((dim,dim))*4)
def test_eval_sparse_seq_1(batch, device_id): dim = 4 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) if isinstance(batch[0], list): expected = [np.vstack([m.todense() * multiplier for m in seq]) for seq in batch] else: expected = [seq.todense() * multiplier for seq in batch] result = z.eval({in1: batch}, device=cntk_device(device_id)) assert np.all([np.allclose(a,b) for a,b in zip(result, expected)]), \ "%s != %s"%(result,expected)
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifier_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def test_usermbsource_training(tmpdir, with_checkpoint_impl): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) # Using this for testing the UserMinibatchSource checkpointing if with_checkpoint_impl: MBS_CV_CLASS = MyDataSourceWithCheckpoint else: MBS_CV_CLASS = MyDataSource mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_rate_schedule, sgd, Trainer, \ training_session, times, UnitType feature = sequence.input_variable(shape=(input_dim,)) label = C.input_variable(shape=(num_output_classes,)) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #note that training session can end earlier if there is no updates lr_per_sample = learning_rate_schedule(0.3, UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { feature: mbs.fsi, label: mbs.lsi } session = training_session( trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20, cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10, minibatch_size=2) ) session.train() assert trainer.total_number_of_samples_seen == 20 if with_checkpoint_impl: assert mbs_cv._restore_from_checkpoint_calls == 1
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifier_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_parameter_schedule_per_sample(0.0005) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def test_usermbsource_training(tmpdir, with_checkpoint_impl): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) # Using this for testing the UserMinibatchSource checkpointing if with_checkpoint_impl: MBS_CV_CLASS = MyDataSourceWithCheckpoint else: MBS_CV_CLASS = MyDataSource mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \ training_session, times feature = sequence.input_variable(shape=(input_dim,)) label = C.input_variable(shape=(num_output_classes,)) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #note that training session can end earlier if there is no updates lr_per_sample = learning_parameter_schedule_per_sample(0.3) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { feature: mbs.fsi, label: mbs.lsi } session = training_session( trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20, cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10, minibatch_size=2) ) session.train() assert trainer.total_number_of_samples_seen == 20 if with_checkpoint_impl: assert mbs_cv._restore_from_checkpoint_calls == 1
def test_sweep_based_schedule(tmpdir, device_id): from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence from cntk import Trainer input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), randomize=False) in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { in1 : mbs.streams.features, labels : mbs.streams.labels } # fetch minibatch (first sequence) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.3 # fetch minibatch (second sequence, sweep ends at this point) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.2 # fetch minibatch (both sequences -- entire sweep in one go) data = mbs.next_minibatch(9, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.1 # fetch minibatch (multiple sweeps) data = mbs.next_minibatch(30, input_map=input_map) trainer.train_minibatch(data, outputs=[z.output]) assert learner.learning_rate() == 0.0
def test_distributed_mb_source(tmpdir): input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH 2 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 60:1 |# <s> |S1 3:1 |# <s> 3 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 61:1 |# A |S1 32:1 |# ~AH 4 |S0 60:1 |# <s> |S1 3:1 |# <s> 5 |S0 60:1 |# <s> |S1 3:1 |# <s> 5 |S0 61:1 |# A |S1 32:1 |# ~AH 6 |S0 60:1 |# <s> |S1 3:1 |# <s> 6 |S0 61:1 |# A |S1 32:1 |# ~AH 7 |S0 60:1 |# <s> |S1 3:1 |# <s> 8 |S0 60:1 |# <s> |S1 3:1 |# <s> 8 |S0 61:1 |# A |S1 32:1 |# ~AH 9 |S0 60:1 |# <s> |S1 3:1 |# <s> 9 |S0 61:1 |# A |S1 32:1 |# ~AH 10 |S0 61:1 |# A |S1 32:1 |# ~AH ''' from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) # No randomization mb0 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=False, max_samples=36) # A bit more than a sweep mb1 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=False, max_samples=36) # A bit more than a sweep input = sequence.input_variable(shape=(input_dim, )) label = sequence.input_variable(shape=(input_dim, )) input_map = {input: mb0.streams.features, label: mb0.streams.labels} # Because we emulating two workers here, the minibatch_size_in_samples will be splitted in 2, # so below we expect 5 samples per worker. data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 7) # Sequence 0 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) # Sequence 3 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 5) # Sequences 5, 7, 9 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 7) # Sequence 0 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) # Sequence 3 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (len(data) == 0) # No data data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (data[input].num_samples == 4) # Sequences 2, 4 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (data[input].num_samples == 5) # Sequences 6, 8, 10 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (data[input].num_samples == 3) # Sequences 2 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (len(data) == 0) # No data # Radomization mb3 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), max_sweeps=1) mb4 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), max_sweeps=1) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 5) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 5) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 7) data = mb4.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (len(data) == 0 ) # Due to chunking we do not expect any data for rank 1
def test_sweep_based_schedule(tmpdir, device_id): from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence from cntk import Trainer input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), randomize=False) in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { in1 : mbs.streams.features, labels : mbs.streams.labels } # fetch minibatch (first sequence) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.3 # fetch minibatch (second sequence, sweep ends at this point) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.2 # fetch minibatch (both sequences -- entire sweep in one go) data = mbs.next_minibatch(9, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.1 # fetch minibatch (multiple sweeps) data = mbs.next_minibatch(30, input_map=input_map) trainer.train_minibatch(data, outputs=[z.output]) assert learner.learning_rate() == 0.0
def test_distributed_mb_source(tmpdir): input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH 2 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 60:1 |# <s> |S1 3:1 |# <s> 3 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 61:1 |# A |S1 32:1 |# ~AH 4 |S0 60:1 |# <s> |S1 3:1 |# <s> 5 |S0 60:1 |# <s> |S1 3:1 |# <s> 5 |S0 61:1 |# A |S1 32:1 |# ~AH 6 |S0 60:1 |# <s> |S1 3:1 |# <s> 6 |S0 61:1 |# A |S1 32:1 |# ~AH 7 |S0 60:1 |# <s> |S1 3:1 |# <s> 8 |S0 60:1 |# <s> |S1 3:1 |# <s> 8 |S0 61:1 |# A |S1 32:1 |# ~AH 9 |S0 60:1 |# <s> |S1 3:1 |# <s> 9 |S0 61:1 |# A |S1 32:1 |# ~AH 10 |S0 61:1 |# A |S1 32:1 |# ~AH ''' from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) # No randomization mb0 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), randomize=False, max_samples=36) # A bit more than a sweep mb1 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), randomize=False, max_samples=36) # A bit more than a sweep input = sequence.input_variable(shape=(input_dim,)) label = sequence.input_variable(shape=(input_dim,)) input_map = { input : mb0.streams.features, label : mb0.streams.labels } # Because we emulating two workers here, the minibatch_size_in_samples will be splitted in 2, # so below we expect 5 samples per worker. data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 7) # Sequence 0 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 4) # Sequence 3 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 5) # Sequences 5, 7, 9 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 7) # Sequence 0 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 4) # Sequence 3 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(len(data) == 0) # No data data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert(data[input].num_samples == 4) # Sequences 2, 4 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert(data[input].num_samples == 5) # Sequences 6, 8, 10 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert(data[input].num_samples == 3) # Sequences 2 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert(len(data) == 0) # No data # Radomization mb3 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), max_sweeps=1) mb4 = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), max_sweeps=1) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 5) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 4) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 4) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 5) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert(data[input].num_samples == 7) data = mb4.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert(len(data) == 0) # Due to chunking we do not expect any data for rank 1
def test_one_hot_skip(): a = Value.one_hot([[0, 1, Value.ONE_HOT_SKIP]], 3) i = sequence.input_variable(shape=(3, )) b = i * 1 expected = [[[1., 0., 0.], [0., 1., 0.], [0., 0., 0.]]] assert np.allclose(b.eval({i: a}), expected)