def train(train_reader, test_reader, model_func, num_sweeps_to_train_with=10): model = model_func(x/255) loss, label_error = create_criterion_function(model, y) learning_rate = 0.2 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, label_error), [learner]) minibatch_size = util.BATCH_SIZE num_sumples_per_sweep = util.N num_minibatches_to_train = util.EPOCHS input_map = { y : train_reader.streams.labels, x : train_reader.streams.features } training_progress_output_freq = 500 train_loss = [] train_acc = [] for i in range(0, int(num_minibatches_to_train)): data = train_reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) print_training_progress(trainer, i, training_progress_output_freq, verbose=1) train_loss.append(trainer.previous_minibatch_loss_average) train_acc.append(1 - trainer.previous_minibatch_evaluation_average) return train_loss, train_acc
def test_ext_backpropstate(payload): class TestBackPropState(UserFunction): def __init__(self, arg, payload, name='f1'): self.payload = payload super(TestBackPropState, self).__init__([arg]) def infer_outputs(self): return [C.output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)] def forward(self, argument, device=None, outputs_to_retain=None): return self.payload, argument def backward(self, state, root_gradients): assert state == self.payload return root_gradients dim = 4 p = C.parameter(shape=(dim,), init=10) in1 = C.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(TestBackPropState(in1, payload)) z = m + p lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)]) for i in range(100): input_data = np.random.rand(dim) trainer.train_minibatch({in1: [input_data]})
def create_trainer(use_sparse, device): a = C.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w = C.parameter(init=w_init, device=dev) z = times(a, w) l = C.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.007, C.UnitType.sample))) return (a, l, w, trainer)
def create_learner(model): '''Create the optimized method''' lr_per_minibatch = C.learning_parameter_schedule(opt.lr) momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_minibatch) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_minibatch) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def create_sample_model(device, writer=None, lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])): in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) learner = C.sgd(z.parameters, lr_per_sample) trainer = C.Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def create_learner(model): '''Create the optimized method''' lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_sample) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_sample) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def test_factor_dense_for_prediction(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 num_minibatches_to_train = 2000 minibatch_size = 25 learning_rate = 0.5 input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Run the trainer and perform model training training_progress_output_freq = 20 plotdata = {"batchsize":[], "loss":[], "error":[]} for i in range(0, int(num_minibatches_to_train)): features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({input : features, label : labels}) # generate some data to predict features, labels = _generate_random_data_sample(10, 2, 2) # factor the model. newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter) original_out = C.softmax(z) factored_out = C.softmax(newz) original_labels_probs = original_out.eval({input : features}) predicted_label_probs = factored_out.eval({input : features}) original_prediction_percentage = _percentage_match(labels, original_labels_probs) # reduced model should have at leat 50% match compared to the original # For the test, we reduced the training minibatches, thus the match is lower. assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
def test_clone_freeze(): inputs = 3 outputs = 5 features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) weights = C.parameter((inputs, outputs)) const_weights = C.constant(weights.value) z = C.times(features, weights) c = C.times(features, const_weights) z_clone = z.clone('freeze') c_clone = c.clone('freeze') # check that z and z_clone are the same for p, q in zip(z.parameters, z_clone.constants): assert np.array_equal(p.value, q.value) # check that c and c_clone are the same for p, q in zip(c.constants, c_clone.constants): assert np.array_equal(p.value, q.value) # keep copies of the old values z_copies = [q.value for q in z_clone.constants] c_copies = [q.value for q in c_clone.constants] # update z trainer = C.Trainer(z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_rate_schedule(1.0, C.UnitType.minibatch))) x = np.random.randn(16,3).astype('f') y = np.random.randn(16,5).astype('f') trainer.train_minibatch({features: x, label: y}) # update c for cc in c.constants: cc.value = np.random.randn(*cc.value.shape).astype('f') # check that z changed for p, q in zip(z.parameters, z_clone.constants): assert not np.array_equal(p.value, q.value) # check that z_clone did not change for p, q in zip(z_copies, z_clone.constants): assert np.array_equal(p, q.value) # check that c changed for p, q in zip(c.constants, c_clone.constants): assert not np.array_equal(p.value, q.value) # check that c_clone did not change for p, q in zip(c_copies, c_clone.constants): assert np.array_equal(p, q.value)
def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def test_data_resize(): batch_size = 8 w = C.parameter(shape=(3, 2), name='w1') x = C.input_variable(shape=[3], name='x') y = C.softmax(C.times(x, w)) y = C.unpack_batch(y) y = C.reshape(y, [batch_size * 2]) loss = C.reduce_mean(-C.log(y)) learning_rate = 0.01 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0) trainer = C.Trainer(y, (loss), [learner]) features = np.random.randn(batch_size, 3) trainer.train_minibatch({x: features})
def create_trainer(use_sparse, device): a = C.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w_i = C.parameter(init=w_init_i, device=dev) a_projection = times(a, w_i) p_o = C.placeholder_variable() h = C.past_value(p_o) w_h = C.parameter(init=w_init_h, device=dev) h_projection = times(h, w_h) z = a_projection + h_projection z = z.replace_placeholder(z) z = reshape(z, label_shape) l = C.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.007, C.UnitType.sample))) return (a, l, w_i, w_h, trainer)
def create_distributed_learner(self, mode, config): local_learner = C.sgd(self.z.parameters, C.learning_parameter_schedule_per_sample(0.01)) try: if mode == 'data_parallel': if config is None: config = DataParallelConfig(num_quantization_bits=32, distributed_after=0) learner = C.data_parallel_distributed_learner(local_learner, num_quantization_bits=config.num_quantization_bits, distributed_after=config.distributed_after) elif mode == 'block_momentum': if config is None: # the default config to match data parallel SGD config = BlockMomentumConfig(block_momentum_as_time_constant=0, block_learning_rate=1, block_size=NUM_WORKERS, distributed_after=0) learner = C.block_momentum_distributed_learner(local_learner, block_momentum_as_time_constant=config.block_momentum_as_time_constant, block_learning_rate=config.block_learning_rate, block_size=config.block_size, distributed_after=config.distributed_after) else: learner = local_learner except RuntimeError: learner = None return learner
def test_usermbsource_training(tmpdir, with_checkpoint_impl): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) # Using this for testing the UserMinibatchSource checkpointing if with_checkpoint_impl: MBS_CV_CLASS = MyDataSourceWithCheckpoint else: MBS_CV_CLASS = MyDataSource mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \ training_session, times feature = sequence.input_variable(shape=(input_dim,)) label = C.input_variable(shape=(num_output_classes,)) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #note that training session can end earlier if there is no updates lr_per_sample = learning_parameter_schedule_per_sample(0.3) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { feature: mbs.fsi, label: mbs.lsi } session = training_session( trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20, cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10, minibatch_size=2) ) session.train() assert trainer.total_number_of_samples_seen == 20 if with_checkpoint_impl: assert mbs_cv._restore_from_checkpoint_calls == 1
def train_sequence_classifier(): input_dim = 2000 hidden_dim = 25 embedding_dim = 50 num_classes = 5 # Input variables denoting the features and label data features = C.sequence.input_variable(shape=input_dim, is_sparse=True) label = C.input_variable(num_classes) # Instantiate the sequence classification model classifier_output = lstm_sequence_classifier(features, num_classes, embedding_dim, hidden_dim) ce = C.cross_entropy_with_softmax(classifier_output, label) pe = C.classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_classes) input_map = { features : reader.streams.features, label : reader.streams.labels } lr_per_sample = C.learning_rate_schedule(0.1, C.UnitType.sample) # Instantiate the trainer object to drive the model training progress_printer = C.logging.ProgressPrinter(0) trainer = C.Trainer(classifier_output, (ce, pe), C.sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = copy.copy(trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def test_restore_constants(tmpdir): C.device.try_set_default_device(C.device.cpu()) def _setvalue(x, v): x.value = 0 * x.value + v if len(x.shape)> 0 else np.array(v, dtype=np.float32) def _setall(f, v): for x in f.constants + f.parameters: _setvalue(x, v) def _checkall(f, v): for x in f.constants + f.parameters: assert (x.value == v).all() x = C.input_variable(10) f = C.layers.BatchNormalization()(x) trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_rate_schedule(0.1, 'sample'))) model_filename = str(tmpdir / 'function.out') checkpoint_filename = str(tmpdir / 'checkpoint.out') _setall(f, 1) f.save(model_filename) _checkall(f, 1) _setall(f, 2) trainer.save_checkpoint(checkpoint_filename) _checkall(f, 2) _setall(f, 3) _checkall(f, 3) trainer.restore_from_checkpoint(checkpoint_filename) _checkall(f, 2) f2 = C.Function.load(model_filename) _checkall(f2, 1) _setall(f, 4) _checkall(f, 4) f.restore(model_filename) _checkall(f, 1) _setall(f2, 5) _checkall(f2, 5)
def _train_backcompatible_test(z, loss, eval_error, f_input, l_input, num_output_classes, steps): np.random.seed(0) input_dim = 2 lr_schedule = learning_parameter_schedule(0.5) learner = sgd(z.parameters, lr_schedule) trainer = Trainer(z, (loss, eval_error), [learner]) minibatch_size = 10 for i in range(steps): features, labels = _generate_random_data_sample( minibatch_size, input_dim, num_output_classes) trainer.train_minibatch({f_input: features, l_input: labels})
def test_udf_checkpointing(tmpdir): dev, w_value, c1_value, c2_value, op = build_test_function() label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32)) loss = C.cross_entropy_with_softmax(op, label) eval_error = C.classification_error(op, label) lr_schedule = C.learning_rate_schedule(0.5, C.UnitType.minibatch) learner = C.sgd(op.parameters, lr_schedule) trainer = C.Trainer(op, (loss, eval_error), [learner]) trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev) filepath = str(tmpdir / 'test_checkpointing.out') trainer.save_checkpoint(filepath, external_state={'test': 'test'}) d = C.cntk_py.Dictionary.load(filepath) assert len(d.keys()) != 0
def _train(z, loss, eval_error, f_input, l_input, num_output_classes, steps): np.random.seed(0) input_dim = 2 lr_schedule = C.learning_parameter_schedule(0.5) #now we want the learning be compatible with the way in the literature without the per sample benefit: learner = sgd(z.parameters, lr_schedule, minibatch_size = C.learners.IGNORE) trainer = Trainer(z, (loss, eval_error), [learner]) minibatch_size = 10 for i in range(steps): features, labels = _generate_random_data_sample( minibatch_size, input_dim, num_output_classes) trainer.train_minibatch({f_input: features, l_input: labels})
momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule( 1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.momentum_sgd(params, lr=learning_rate_schedule( 1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)) ] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation
def main(): print("\nBegin binary classification (two-node technique) \n") print("Using CNTK version =" + str(C.__version__) + "\n") # define input parameters input_dim = 20 hidden_dim = 20 output_dim = 2 train_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"../data/hr-cleveland-all-data.txt") # 1. create network X = C.ops.input_variable(input_dim, dtype=np.float32) Y = C.ops.input_variable(output_dim, dtype=np.float32) print("Creating a 18-20-2 tanh-softmax NN") with C.layers.default_options( init=C.initializer.uniform(scale=0.01, seed=1)): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLAyer')(X) oLayer = C.layers.Dense(output_dim, activation=None, name='outLAyer')(hLayer) nnet = oLayer nnet = C.ops.softmax(oLayer) # 2. create learner and trainer print("Creating a cross entropy batch=10 SGD LP=0.005 Trainer") tr_loss = C.cross_entropy_with_softmax(nnet, Y) tr_class = C.classification_error(nnet, Y) max_iter = 5000 batch_size = 10 learning_rate = 0.005 learner = C.sgd(nnet.parameters, learning_rate) trainer = C.Trainer(nnet, (tr_loss, tr_class), [learner]) # 3. create reader for train data rdr = create_reader(train_file, input_dim, output_dim, rnd_order=False, sweeps=C.io.INFINITELY_REPEAT) heart_input_map = {X: rdr.streams.x_src, Y: rdr.streams.y_src} # 4. train print("\n Starting training") for i in range(0, max_iter): curr_batch = rdr.next_minibatch(batch_size, input_map=heart_input_map) trainer.train_minibatch(curr_batch) if i % int(max_iter / 10) == 0: mcee = trainer.previous_minibatch_loss_average macc = (1.0 - trainer.previous_minibatch_evaluation_average) * 100 print("batch %4d: mean loss =%0.4f, accuracy = %0.2f" % (i, mcee, macc)) print("\nTraining complete") # 5. evaluate model using all data print("\nEvaluating accuracy using built-in test_minibatch() \n") rdr = create_reader(train_file, input_dim, output_dim, rnd_order=False, sweeps=1) heart_input_map = {X: rdr.streams.x_src, Y: rdr.streams.y_src} num_test = 297 all_test = rdr.next_minibatch(num_test, input_map=heart_input_map) acc = (1.0 - trainer.test_minibatch(all_test)) * 100 print("Classification accuracy on the %d data items = %0.2f" % (num_test, acc)) # (could save model here) # (use trained to make prediction) print("\n End Cleveland Heart Disease classification ")
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w #test new API: learning_parameter_schedule #explicitly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
x = cntk.input_variable(2) y = cntk.input_variable(2) def getNetwork(_x): with cntk.layers.default_options(init=cntk.layers.glorot_uniform(), activation=cntk.relu): res = _x res = cntk.layers.Dense(4, name="l1")(res) res = cntk.layers.Dense(4, name="l2")(res) res = cntk.layers.Dense(2, name="lo", activation=None)(res) return res fnn = getNetwork(x) loss = cntk.cross_entropy_with_softmax(fnn, y) errs = cntk.classification_error(fnn, y) trainer = cntk.Trainer(fnn, (loss, errs), [cntk.sgd(fnn.parameters, cntk.learning_rate_schedule(0.03, cntk.UnitType.minibatch))]) for times in range(1000): for data in training_set: batch = {x: numpy.array(data[:2],dtype=float32).reshape(2), y:numpy.array(data[2:],dtype = float32).reshape(2)} trainer.train_minibatch(batch) print("\r"+str(times), end="") print("") #print(fnn.lo.b.value) out = cntk.softmax(fnn) print(numpy.argmax(out.eval({x: numpy.array([[0,0]],dtype=float32).reshape(2)}))) print(numpy.argmax(out.eval({x: numpy.array([[0,1]],dtype=float32).reshape(2)}))) print(numpy.argmax(out.eval({x: numpy.array([[1,0]],dtype=float32).reshape(2)}))) print(numpy.argmax(out.eval({x: numpy.array([[1,1]],dtype=float32).reshape(2)})))
""" Input and output shapes """ feature = C.input((input_dim), np.float32) label = C.input((output_dim), np.float32) # feature = C.input((input_dim), is_sparse=True) # label = C.input((output_dim), np.float32) #netout = self.create_model(input_dim, output_dim, hidden_dim, feature) netout = self.create_model(input_dim, output_dim, hidden_dim, feature) loss = C.squared_error(netout, feature) evaluation = C.squared_error(netout, feature) lr_per_minibatch = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(netout.parameters, lr=lr_per_minibatch) #learner = C.adagrad(netout.parameters, C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)) progress_printer = C.logging.ProgressPrinter(minibatch_size) trainer = C.Trainer(netout, (loss, evaluation), learner, progress_printer) plotdata = {"loss": []} for epoch in range(100): for i in range(100): d = self.get_next_data(minibatch_size) data = {feature: d, label: d} """ # This is how to get the Numpy typed data from the reader ldata = data[label].asarray() fdata = data[feature].asarray()
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10 * 1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase / num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff / 1024), int(mem_inc_fraction * 100), nonlinearity, mem_changes))
def do_demo(): # create NN, train, test, predict input_dim = 4 hidden_dim = 2 output_dim = 3 train_file = "trainData_cntk.txt" test_file = "testData_cntk.txt" input_Var = C.ops.input(input_dim, np.float32) label_Var = C.ops.input(output_dim, np.float32) print("Creating a 4-2-3 tanh softmax NN for Iris data ") with default_options(init=glorot_uniform()): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(input_Var) oLayer = Dense(output_dim, activation=C.ops.softmax, name='outLayer')(hLayer) nnet = oLayer print("Creating a cross entropy mini-batch Trainer \n") ce = C.cross_entropy_with_softmax(nnet, label_Var) pe = C.classification_error(nnet, label_Var) fixed_lr = 0.05 lr_per_batch = learning_rate_schedule(fixed_lr, UnitType.minibatch) learner = C.sgd(nnet.parameters, lr_per_batch) trainer = C.Trainer(nnet, (ce, pe), [learner]) max_iter = 5000 # Máximo de iterações para o treino batch_size = 5 # Define o tamanho para o mini-batch progress_freq = 1000 # Exibe o erro a cada n mini-batches reader_train = create_reader(train_file, True, input_dim, output_dim) my_input_map = { input_Var: reader_train.streams.features, label_Var: reader_train.streams.labels } pp = ProgressPrinter(progress_freq) print("Starting training \n") for i in range(0, max_iter): currBatch = reader_train.next_minibatch(batch_size, input_map=my_input_map) trainer.train_minibatch(currBatch) pp.update_with_trainer(trainer) print("\nTraining complete") # ---------------------------------- print("\nEvaluating test data \n") reader_test = create_reader(test_file, False, input_dim, output_dim) numTestItems = 30 allTest = reader_test.next_minibatch(numTestItems, input_map=my_input_map) test_error = trainer.test_minibatch(allTest) print("Classification error on the 30 test items = %f" % test_error) # ---------------------------------- # Faz a predição para uma flor desconhecida unknown = np.array([[6.9, 3.1, 4.6, 1.3]], dtype=np.float32) print( "\nPrevisão de espécies de Íris para as características de entrada:") my_print(unknown[0], 1) # 1 decimal predicted = nnet.eval({input_Var: unknown}) print("Prediction is: ") my_print(predicted[0], 3) # 3 decimais # --------------------------------- print("\nTrained model input-to-hidden weights:\n") print(hLayer.hidLayer.W.value) print("\nTrained model hidden node biases:\n") print(hLayer.hidLayer.b.value) print("\nTrained model hidden-to-output weights:\n") print(oLayer.outLayer.W.value) print("\nTrained model output node biases:\n") print(oLayer.outLayer.b.value) save_weights("weights.txt", hLayer.hidLayer.W.value, hLayer.hidLayer.b.value, oLayer.outLayer.W.value, oLayer.outLayer.b.value) return 0 # success
def create_learner(z, learning_rate=0.5): lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) return C.sgd(z.parameters, lr_schedule)
# Define the CNTK criterion function. A criterion function maps # (input vectors, labels) to a loss function and an optional additional # metric. The loss function is used to train the model parameters. # We use cross entropy as a loss function. label_one_hot = cntk.input_variable(num_classes, is_sparse=True) loss = cntk.cross_entropy_with_softmax( model, label_one_hot) # this applies softmax to model's output under the hood metric = cntk.classification_error(model, label_one_hot) criterion = cntk.combine( [loss, metric]) # criterion is a tuple-valued function (loss, metric) # Learner object. The learner implements the update algorithm, in this case plain SGD. learning_rate = 0.1 learner = cntk.sgd( model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)) # Trainer. minibatch_size = 32 progress_writer = cntk.logging.ProgressPrinter( 50) # helper for logging progress; log every 50 minibatches trainer = cntk.Trainer(None, criterion, [learner], [progress_writer]) # Train! for i in range(0, len(X_train), minibatch_size): # loop over minibatches x = X_train[i:i + minibatch_size] # get one minibatch worth of data y = Y_train[i:i + minibatch_size] trainer.train_minibatch({ data: x, label_one_hot: y
# Define the CNTK criterion function. A criterion function maps # (input vectors, labels) to a loss function and an optional additional # metric. The loss function is used to train the model parameters. # We use cross entropy as a loss function. # We use CNTK @Function.with_signature to declare a CNTK function with given input types. # The cross-entropy formula requires the labels to be in one-hot format. @cntk.Function.with_signature(cntk.layers.Tensor[input_dim], cntk.layers.SparseTensor[num_classes]) def criterion(data, label_one_hot): z = model(data) # apply model. Computes a non-normalized log probability for every output class. loss = cntk.cross_entropy_with_softmax(z, label_one_hot) # this applies softmax to z under the hood metric = cntk.classification_error(z, label_one_hot) return loss, metric # Learner object. The learner implements the update algorithm, in this case plain SGD. learning_rate = 0.1 learner = cntk.sgd(model.parameters, cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)) # Trainer configuration parameters. progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches # Train! progress = criterion.train((X_train, Y_train), parameter_learners=[learner], callbacks=[progress_writer]) final_loss, final_metric, final_samples = (progress.epoch_summaries[-1].loss, progress.epoch_summaries[-1].metric, progress.epoch_summaries[-1].samples) # Test error rate on the test set. test_metric = criterion.test((X_test, Y_test), callbacks=[progress_writer]).metric # Inspect predictions on one minibatch, for illustration. # For evaluation, we map the output of the network between 0-1 and convert them into probabilities # for the two classes. We use a softmax function to get the probabilities of each of the class. @cntk.Function.with_signature(cntk.layers.Tensor[input_dim])
def asset_trend(self): try: asset_data = self._get_asset_data() # Feature name list predictor_names = [] if "Close" in asset_data and "Volume" in asset_data: close_tag = "Close" volume_tag = "Volume" elif "close" in asset_data and "volume" in asset_data: close_tag = "close" volume_tag = "volume" else: return {"Error": "Couldn't find Close|Volume data."} # Compute price difference as a feature asset_data["diff"] = np.abs( (asset_data[close_tag] - asset_data[close_tag].shift(1)) / asset_data[close_tag]).fillna(0) predictor_names.append("diff") # Compute the volume difference as a feature asset_data["v_diff"] = np.abs( (asset_data[volume_tag] - asset_data[volume_tag].shift(1)) / asset_data[volume_tag]).fillna(0) predictor_names.append("v_diff") # Compute the asset being up (1) or down (0) over different day offsets compared to current closing price num_days_back = 8 for i in range(1, num_days_back + 1): # i: number of look back days asset_data["p_" + str(i)] = np.where( asset_data[close_tag] > asset_data[close_tag].shift(i), 1, 0) predictor_names.append("p_" + str(i)) asset_data["next_day"] = np.where( asset_data[close_tag].shift(-1) > asset_data[close_tag], 1, 0) # The label must be one-hot encoded asset_data["next_day_opposite"] = np.where( asset_data["next_day"] == 1, 0, 1) # Establish the start and end date of our training timeseries training_data = asset_data[self.start:self.end] training_features = np.asarray(training_data[predictor_names], dtype="float32") training_labels = np.asarray( training_data[["next_day", "next_day_opposite"]], dtype="float32") # Lets build the network input_dim = 2 + num_days_back # Remember we need to have 2 since we are trying to classify if the market goes up or down 1 hot encoded num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 2 + num_days_back input_dynamic_axes = [C.Axis.default_batch_axis()] net_input = C.input_variable(input_dim, dynamic_axes=input_dynamic_axes) label = C.input_variable(num_output_classes, dynamic_axes=input_dynamic_axes) z = self._create_model(net_input, num_output_classes, num_hidden_layers, hidden_layers_dim) loss = C.cross_entropy_with_softmax(z, label) label_error = C.classification_error(z, label) lr_per_minibatch = C.learning_parameter_schedule(0.125) trainer = C.Trainer(z, (loss, label_error), [C.sgd(z.parameters, lr=lr_per_minibatch)]) # Initialize the parameters for the trainer, we will train in large minibatches in sequential order minibatch_size = 100 num_minibatches = len(training_data.index) // minibatch_size # Run the trainer on and perform model training training_progress_output_freq = 1 # Visualize the loss over minibatch plotdata = {"batchsize": [], "loss": [], "error": []} # It is key that we make only one pass through the data linearly in time num_passes = 1 l_training_features = len(training_features) training_features = training_features[:l_training_features - (l_training_features % num_minibatches)] l_training_labels = len(training_labels) training_labels = training_labels[:l_training_labels - (l_training_labels % num_minibatches)] # Train our neural network tf = np.split(training_features, num_minibatches) tl = np.split(training_labels, num_minibatches) for i in range(num_minibatches * num_passes): # multiply by the features = np.ascontiguousarray(tf[i % num_minibatches]) labels = np.ascontiguousarray(tl[i % num_minibatches]) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({net_input: features, label: labels}) batchsize, loss, error = self._print_training_progress( trainer, i, training_progress_output_freq, verbose=1) if not (loss == "NA" or error == "NA"): plotdata["batchsize"].append(batchsize) plotdata["loss"].append(loss) plotdata["error"].append(error) # Now that we have trained the net, and we will do out of sample test to see how we did. # and then more importantly analyze how that set did test_data = asset_data[self.target_date:self.target_date] test_features = np.ascontiguousarray(test_data[predictor_names], dtype="float32") test_labels = np.ascontiguousarray( test_data[["next_day", "next_day_opposite"]], dtype="float32") avg_error = trainer.test_minibatch({ net_input: test_features, label: test_labels }) log.info("Average error: {0:2.2f}%".format(avg_error * 100)) sm_out = C.softmax(z) predicted_label_prob = sm_out.eval({net_input: test_features}) test_data["p_up"] = pd.Series(predicted_label_prob[:, 0], index=test_data.index) test_data["p_down"] = predicted_label_prob[:, 1] d = test_data.to_dict() prob_up = "Fail" prob_down = "Fail" up_d = d["p_up"] for k, v in up_d.items(): prob_up = v down_d = d["p_down"] for k, v in down_d.items(): prob_down = v if float(prob_up) > float(prob_down): k = "UP" v = round(prob_up, 2) else: k = "DOWN" v = round(prob_down, 2) return {k: v} except Exception as e: traceback.print_exc() log.error(e) return {"Error": "Please, check our User's Guide."}
def train_test(train_reader, test_reader, model_func, epoch_size): # Instantiate the model function; x is the input (feature) variable # We will scale the input image pixels within 0-1 range by dividing all input value by 255. model = model_func(x / 255) # Instantiate the loss and error function loss, label_error = create_criterion_function(model, y) # Instantiate the trainer object to drive the model training learning_rate = 0.2 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) # use a distributed learner for multi-GPU training # either a data_parallel learner distributed_learner = C.distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits=1) # or a block momemtum learner # distributed_learner = C.train.distributed.block_momentum_distributed_learner(learner, block_size=block_size) distributed_sync_report_freq = None #if block_size is not None: # distributed_sync_report_freq = 1 progress_writers = [ C.logging.ProgressPrinter( freq=None, tag='Training', log_to_file=None, rank=C.train.distributed.Communicator.rank(), gen_heartbeat=False, num_epochs=5, distributed_freq=distributed_sync_report_freq) ] trainer = C.Trainer(z, (loss, label_error), [distributed_learner], progress_writers) # Initialize the parameters for the trainer minibatch_size = 20000 # Map the data streams to the input and labels. input_map = { y: train_reader.streams.labels, x: train_reader.streams.features } # Uncomment below for more detailed logging training_progress_output_freq = 500 # Start a timer start = time.time() training_session(trainer=trainer, mb_source=train_reader, model_inputs_to_streams=input_map, mb_size=minibatch_size, progress_frequency=epoch_size, test_config=TestConfig(source=test_reader, mb_size=minibatch_size)).train() # Print training time print("Training took {:.1f} sec".format(time.time() - start))
def test_learner_empy_parameters_list(): lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) with pytest.raises(ValueError): learner = C.sgd([], lr_per_sample)
### Training # Using cross-entropy to measure the loss from the ground truth label = C.input_variable((num_output_classes), np.float32) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Configure training """Stochastic gradient descent - Trains over different samples over time to minimize the losses""" """The learning rate is how much we change the parameters in any iteration""" # Instantiate the trainer object to drive the model training learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Defines a utility function to compute the moving average sum """A more efficient implementation is possible with np.cumsum() function""" def moving_average(a, w=10): if len(a) < w: return a[:] return [val if idx < w else sum(a[(idx - w):idx]) / w for idx, val in enumerate(a)] # Defines a utility that prints the training progress def print_training_progress(trainer, mb, frequency, verbose=1): training_loss, eval_error = "NA", "NA" if mb % frequency == 0: training_loss = trainer.previous_minibatch_loss_average
def train_and_test(data_dir): train_file = os.path.join(data_dir, "Train-28x28_cntk_text.txt") test_file = os.path.join(data_dir, "Test-28x28_cntk_text.txt") input_dim = 784 output_dim = 10 input_var = C.input(input_dim) label_var = C.input(output_dim) cntk_model = create_model(input_var / 256.0, 2, 400, output_dim) cntk_loss = C.cross_entropy_with_softmax(cntk_model, label_var) cntk_error = C.classification_error(cntk_model, label_var) learning_rate = 0.2 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(cntk_model.parameters, lr_schedule) trainer = C.Trainer(cntk_model, (cntk_loss, cntk_error), [learner]) batch_size = 64 # ngraph import begin ================================================================== ng_model, ng_placeholders = CNTKImporter(batch_size=batch_size).import_model(cntk_model) ng_labels = ng.placeholder([ng.make_axis(output_dim), ng.make_axis(batch_size, 'N')]) ng_placeholders.append(ng_labels) transformer = ng.transformers.make_transformer() ng_loss = cross_entropy_with_softmax(ng_model, ng_labels) parallel_update = CommonSGDOptimizer(learning_rate).minimize(ng_loss, ng_loss.variables()) training_fun = transformer.computation([ng_loss, parallel_update], *ng_placeholders) ng_error = classification_error(ng_model, ng_labels) test_fun = transformer.computation(ng_error, *ng_placeholders) # ngraph import end ==================================================================== reader_train = create_reader(train_file, True, input_dim, output_dim) train_input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } num_samples = 60000 num_epochs = 10 num_minibatches_to_train = (num_samples * num_epochs) / batch_size for _ in range(0, int(num_minibatches_to_train)): data = reader_train.next_minibatch(batch_size, input_map=train_input_map) trainer.train_minibatch(data) # ngraph train features_batch = np.moveaxis(np.squeeze(data[input_var].asarray()), 0, -1) labels_batch = np.moveaxis(np.squeeze(data[label_var].asarray()), 0, -1) training_fun(features_batch, labels_batch) reader_test = create_reader(test_file, False, input_dim, output_dim) test_input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } cntk_result = 0.0 ng_error = 0.0 num_samples = 10000 num_minibatches_to_test = num_samples // batch_size for _ in range(num_minibatches_to_test): data = reader_test.next_minibatch(batch_size, input_map=test_input_map) cntk_result += trainer.test_minibatch(data) # ngraph test features_batch = np.moveaxis(np.squeeze(data[input_var].asarray()), 0, -1) labels_batch = np.moveaxis(np.squeeze(data[label_var].asarray()), 0, -1) ng_error += test_fun(features_batch, labels_batch) print("Average CNTK test error: {0:.2f}%".format(cntk_result * 100 / num_minibatches_to_test)) print("Average ngraph test error: {0:.2f}%".format(ng_error * 100 / num_minibatches_to_test)) C.softmax(cntk_model).save(os.path.join(MNIST, "MNIST.dnn"))
num_output_classes) # CNTK input = C.input_variable(input_dim, np.float32) output_dim = num_output_classes z = linear_layer(input, output_dim) label = C.input_variable((num_output_classes), np.float32) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learning_rate = 0.05 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.sample) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # ngraph L, placeholders = CNTKImporter().import_model(loss) parallel_update = CommonSGDOptimizer(learning_rate).minimize( L, L.variables()) transformer = ng.transformers.make_transformer() update_fun = transformer.computation([L, parallel_update], *placeholders) # CNTK training for i in range(0, number_of_iterations): for xs, ys in zip(features, labels): trainer.train_minibatch({input: [xs], label: [ys]}) training_loss = trainer.previous_minibatch_loss_average
# The cross-entropy formula requires the labels to be in one-hot format. @cntk.Function.with_signature(cntk.layers.Tensor[input_dim], cntk.layers.SparseTensor[num_classes]) def criterion(data, label_one_hot): z = model( data ) # apply model. Computes a non-normalized log probability for every output class. loss = cntk.cross_entropy_with_softmax( z, label_one_hot) # this applies softmax to z under the hood metric = cntk.classification_error(z, label_one_hot) return loss, metric # Learner object. The learner implements the update algorithm, in this case plain SGD. learning_rate = 0.1 learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate)) # Trainer configuration parameters. progress_writer = cntk.logging.ProgressPrinter( 50) # helper for logging progress; log every 50 minibatches # Train! progress = criterion.train((X_train, Y_train), parameter_learners=[learner], callbacks=[progress_writer]) final_loss, final_metric, final_samples = ( progress.epoch_summaries[-1].loss, progress.epoch_summaries[-1].metric, progress.epoch_summaries[-1].samples) # Test error rate on the test set. test_metric = criterion.test((X_test, Y_test),
def main(): print('\nBegin logistic regression training demo') ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # training data format: # 4.0, 3.0, 1 # 9.0, 5.0, 1 # . . . data_file = '.\\age_edu_sex.txt' print('\nLoading data from ' + data_file + '\n') features_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[0, 1]) print(features_matrix) labels_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[2], ndmin=2) print(labels_matrix) print(labels_matrix.shape) print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) print(combined_matrix) # create model features_dimension = 2 # x1, x2 labels_dimension = 1 # always 1 for logistic regression X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create an alias # create Learner and Trainer cross_entropy_error = C.binary_cross_entropy( model, y) # Cross entropy a bit more principled for Learning Rate # squared_error = C.squared_error(model, y) learning_rate = 0.010 learner = C.sgd( model.parameters, learning_rate) # stochastic gradient descent, adadelta, adam, nesterov trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 4000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) # pick a random row from training items trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value)
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10*1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff/1024), int(mem_inc_fraction*100), nonlinearity, mem_changes))
W = cntk.Parameter((input_dim, num_classes), init=cntk.glorot_uniform(), name='W') b = cntk.Parameter((num_classes,), init=0, name='b') model = cntk.times(data, W) + b # Define the CNTK criterion function. A criterion function maps # (input vectors, labels) to a loss function and an optional additional # metric. The loss function is used to train the model parameters. # We use cross entropy as a loss function. label_one_hot = cntk.input_variable(num_classes, is_sparse=True) loss = cntk.cross_entropy_with_softmax(model, label_one_hot) # this applies softmax to model's output under the hood metric = cntk.classification_error(model, label_one_hot) criterion = cntk.combine([loss, metric]) # criterion is a tuple-valued function (loss, metric) # Learner object. The learner implements the update algorithm, in this case plain SGD. learning_rate = 0.1 learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate)) # Trainer. minibatch_size = 32 progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches trainer = cntk.Trainer(None, criterion, [learner], [progress_writer]) # Train! for i in range(0, len(X_train), minibatch_size): # loop over minibatches x = X_train[i:i+minibatch_size] # get one minibatch worth of data y = Y_train[i:i+minibatch_size] trainer.train_minibatch({data: x, label_one_hot: y}) # update model from one minibatch trainer.summarize_training_progress() # Test error rate on the test set. evaluator = cntk.Evaluator(metric, [progress_writer])
input = cntk.input_variable(input_dim) label = cntk.input_variable(num_output_classes) # create a reader to read from the file reader_train = create_reader( "D:/Users/Sachit/source/repos/SamplesRepo/IrisData/IrisData/iris-data/trainData_cntk.txt", True, input_dim, num_output_classes) # Create the model z = create_model(input, hidden_dim, num_output_classes) loss = cntk.cross_entropy_with_softmax(z, label) label_error = cntk.classification_error(z, label) learning_rate = 0.2 lr_schedule = cntk.learning_parameter_schedule(learning_rate) learner = cntk.sgd(z.parameters, lr_schedule) trainer = cntk.Trainer(z, (loss, label_error), [learner]) #Init the params for trainer minibatch_size = 120 num_iterations = 20 # Map the data streams to input and labels input_map = { label: reader_train.streams.labels, input: reader_train.streams.features } training_output_freq = 2 plotdata = {"batchsize": [], "loss": [], "error": []}
MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation
def test_learner_empy_parameters_list(): lr_per_sample = C.learning_parameter_schedule_per_sample(0.1) with pytest.raises(ValueError): learner = C.sgd([], lr_per_sample)
bias = cntk.parameter(shape=(output_dim), name='b') return cntk.sigmoid(cntk.times(input_var, weight) + bias, name='o') feature = cntk.input_variable(input_dim, np.float32) model = create_model(feature, num_output_classes) # Set up inputs and functions used by the trainer label = cntk.input_variable(num_output_classes, np.float32) loss = cntk.squared_error(model, label) eval_error = cntk.squared_error(model, label) # Create the trianer using a stochastic gradient descent (sgd) learner learning_rate = 0.5 lr_schedule = cntk.learning_parameter_schedule(learning_rate) learner = cntk.sgd(model.parameters, lr_schedule) trainer = cntk.Trainer(model, (loss, eval_error), [learner]) # Fit the model for i in range(1000): trainer.train_minibatch({feature: features, label: labels}) if i % 100 == 0: print ('Batch: {0}, Loss: {1:.4f}, Error: {2:.2f}'.format(i, trainer.previous_minibatch_loss_average, trainer.previous_minibatch_evaluation_average)) # Save the model for later import into UWP app model.save('../CNTKUWPApp/model.model') # Evaluate the model on the training data result = model.eval({feature : features}) predicted = [np.asarray([1], np.float32) if r >= 0.5 else np.asarray([0], np.float32) for r in result]
def train_sequence_classifier(): hidden_dim = 300 embedding_dim = 300 #設定ファイルの取り込み with open("./data/export_info.json", "r") as json_file: json_data = json.load(json_file) input_dim = int(json_data["wordDimension"]) #onehotのindex数 num_output_classes = int(json_data["labelDimension"]) #topic数 print("input_dim:", input_dim) rel_path = r"data\cntk_train_data.tsv" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) print("filepath:", path) #トレーニングデータのフォーマット、ラベル設定。 features = C.sequence.input_variable( input_dim, is_sparse=True, name="features") # word sequence (one-hot vectors) label = C.input_variable(num_output_classes, is_sparse=True, name="label", dynamic_axes=C.Axis.default_batch_axis()) reader = create_reader(path, True, input_dim, num_output_classes) #訓練関数(Trainer)に渡すパラメータの作成。 #分類器の作成(LSTMのLayer構造) classifier_output = lstm_sequence_classifier(features, num_output_classes, embedding_dim, hidden_dim) #損失関数の設定 ce = C.cross_entropy_with_softmax(classifier_output, label) #エラー率の設定 pe = C.classification_error(classifier_output, label) #学習率の設定 lr_per_sample = C.learning_rate_schedule(0.05, C.UnitType.sample) # トレーナーの構築 SGD(勾配法の一つ)。 trainer = C.Trainer(classifier_output, (ce, pe), C.sgd(classifier_output.parameters, lr=lr_per_sample)) #設定 minibatch_size = 512 #一回トレーニング導入するデータ量 (センテンスの数ではない) training_progress_output_freq = 20 #何回loopしたら進捗表示する。 loop_count = 0 #ループの数 epoch = 1 #epochの初期値 epoch_max = 10 #Maxのepoch数、Maxになるとトレーニング終了 epoch_size = 5000 #epochのサイズ(ここでは5000センテンス1 epoch) samples = 0 #トレーニングしたセンテンス合計 #トレーニングのループ while True: mb = reader.next_minibatch(minibatch_size, { features: reader.streams.features, label: reader.streams.labels }) samples += mb[label].num_samples #今までトレーニングしたセンテンス数 trainer.train_minibatch(mb) #mbのデータをトレーニング training_loss, eval_crit = print_training_progress( trainer, loop_count, training_progress_output_freq, samples, epoch) #トレーニング進捗の表示 if samples >= epoch_size * epoch: #毎epochトレーニング完了後モデルを保存 classifier_output.save( os.path.join(abs_path, ".", "Models", "lstm_model_epoch{}.dnn".format(epoch))) epoch += 1 if epoch > epoch_max: break loop_count += 1 import copy #前minibatchの精度と損失関数の計算 evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def net_model(feature): with default_options(init=cntk.glorot_uniform()): layers = Dense(output_classes, activation=None)(feature) return layers input_layer = input / 255.0 net = net_model(input_layer) loss = cntk.cross_entropy_with_softmax(net, labels) error = cntk.classification_error(net, labels) learning_rate = 0.2 learning_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch) learner = cntk.sgd(net.parameters, learning_schedule) trainer = cntk.Trainer(net, (loss, error), [learner]) def cumulative_avg(arr, diff=5): if len(arr) < diff: return arr return [ val if ids < diff else np.cumsum(arr, axis=None) / 5 for ids, val in enumerate(arr) ] def print_progress(trainer, minibatch, freq, flag=True): loss = float() error = float()
def test_learner_init(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w #test new API: learning_parameter_schedule #explicitly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def one_step_sgd(loss, data, lr=0.1): learner = C.sgd(loss.parameters, C.learning_parameter_schedule(lr)) trainer = C.train.Trainer(loss, (loss, loss), learner, C.logging.ProgressPrinter(freq=0)) trainer.train_minibatch(data)
def main(): # HEADERS print( '\n Begin logistic regression on breast-cancer-wisconsin data training' ) ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # LOADING DATA data_file = '.\\breast-cancer-wisconsin.data' print('\nLoading data from ' + data_file + '\n') data_matrix = np.genfromtxt(data_file, dtype=np.float32, delimiter=',', usecols=range(1, 11)) # checking for NaNs and filtering data for i in range(699): for j in range(10): if np.isnan(data_matrix[i, j]): location = str(i) + ', ' + str(j) filtered_data_matrix = data_matrix[~np.isnan(data_matrix).any(axis=1)] sorted_by_label_data_matrix = filtered_data_matrix[ filtered_data_matrix[:, 9].argsort()] np.savetxt('sorted-breast-cancer-wisconsin.data', sorted_by_label_data_matrix, delimiter=',', newline='\n') # features matrix unnorm_features_matrix = sorted_by_label_data_matrix[:, 0:9] min_max_scaler = preprocessing.MinMaxScaler() features_matrix = min_max_scaler.fit_transform(unnorm_features_matrix) #print(features_matrix) # labels matrix - sorted and encoded to 0 or 1 unshaped_labels_matrix = sorted_by_label_data_matrix[:, 9] uncoded_labels_matrix = np.reshape(unshaped_labels_matrix, (-1, 1)) labels_logic_matrix = uncoded_labels_matrix > 2 labels_matrix = labels_logic_matrix.astype(np.float32) #print(labels_logic_matrix) #print(labels_matrix) #print(labels_matrix.shape) # making training data print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) #print(combined_matrix) # create a model features_dimension = 9 # x1, x2, x3, x4, x5, x6, x7, x8, x9 labels_dimension = 1 # always 1 for logistic regression, y X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create 'model' alias # create learner cross_entropy_error = C.binary_cross_entropy(model, y) learning_rate = 0.01 learner = C.sgd(model.parameters, learning_rate) # create trainer trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 5000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results - weights and bias np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value) # save results print('\nSaving files:') weights_file_name = str(learning_rate) + '-' + str( max_iterations) + '_' + 'weights' + '.txt' bias_file_name = str(learning_rate) + '-' + str( max_iterations) + '_' + 'bias' + '.txt' print(weights_file_name) print(bias_file_name) np.savetxt(weights_file_name, W.value) np.savetxt(bias_file_name, b.value) print('Saving complete') print('\n End training\n')
def main(): print("\nBegin binary classification (two-node technique) \n") print("Using CNTK version = " + str(C.__version__) + "\n") dirname = os.path.dirname(__file__) input_dim = 12 hidden_dim = 20 output_dim = 2 onnx_path = os.path.join(dirname, "..\HeartDiseasePrediction\Assets") train_file = os.path.join(dirname, "input\TrainingData.txt") test_file = os.path.join(dirname, "input\TestData.txt") # 1. create network X = C.ops.input_variable(input_dim, np.float32) Y = C.ops.input_variable(output_dim, np.float32) print("Creating a 12-20-2 tanh-softmax NN ") with C.layers.default_options( init=C.initializer.uniform(scale=0.01, seed=1)): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(X) oLayer = C.layers.Dense(output_dim, activation=None, name='outLayer')(hLayer) nnet = oLayer model = C.ops.softmax(nnet) # 2. create learner and trainer print("Creating a cross entropy batch=10 SGD LR=0.005 Trainer ") tr_loss = C.cross_entropy_with_softmax(nnet, Y) tr_clas = C.classification_error(nnet, Y) max_iter = 5000 batch_size = 10 learn_rate = 0.005 learner = C.sgd(nnet.parameters, learn_rate) trainer = C.Trainer(nnet, (tr_loss, tr_clas), [learner]) # 3. create reader for train data rdr = create_reader(train_file, input_dim, output_dim, rnd_order=True, sweeps=C.io.INFINITELY_REPEAT) heart_input_map = {X: rdr.streams.x_src, Y: rdr.streams.y_src} # 4. train print("\nStarting training \n") for i in range(0, max_iter): curr_batch = rdr.next_minibatch(batch_size, input_map=heart_input_map) trainer.train_minibatch(curr_batch) if i % int(max_iter / 10) == 0: mcee = trainer.previous_minibatch_loss_average macc = (1.0 - trainer.previous_minibatch_evaluation_average) * 100 print("batch %4d: mean loss = %0.4f, accuracy = %0.2f%% " % (i, mcee, macc)) trainer.summarize_training_progress() print("\nTraining complete") # Export as ONNX model.save(os.path.join(onnx_path, "Heart.onnx"), format=C.ModelFormat.ONNX) # 5. evaluate model using all data print("\nEvaluating accuracy using built-in test_minibatch() \n") rdr = create_reader(test_file, input_dim, output_dim, rnd_order=False, sweeps=1) heart_input_map = {X: rdr.streams.x_src, Y: rdr.streams.y_src} num_test = 91 all_test = rdr.next_minibatch(num_test, input_map=heart_input_map) acc = (1.0 - trainer.test_minibatch(all_test)) * 100 print("Classification accuracy on the %d data items = %0.2f%%" % (num_test, acc)) unknown = np.array( [1, 0, 0, 0, 1, 2, 0.0370370373, 0, 0.832061052, 0, 1, 0.6458333], dtype=np.float32) predicted = model.eval(unknown) print(predicted) # (use trained model to make prediction) print("\nEnd Cleveland Heart Disease classification ")
def policy_gradient(): import cntk as C TOTAL_EPISODES = 2000 if isFast else 10000 H = 100 # number of hidden layer neurons observations = input(STATE_COUNT, np.float32, name="obs") W1 = C.parameter(shape=(STATE_COUNT, H), init=C.glorot_uniform(), name="W1") b1 = C.parameter(shape=H, name="b1") layer1 = C.relu(C.times(observations, W1) + b1) W2 = C.parameter(shape=(H, ACTION_COUNT), init=C.glorot_uniform(), name="W2") b2 = C.parameter(shape=ACTION_COUNT, name="b2") score = C.times(layer1, W2) + b2 # Until here it was similar to DQN probability = C.sigmoid(score, name="prob") input_y = input(1, np.float32, name="input_y") advantages = input(1, np.float32, name="advt") loss = -C.reduce_mean(C.log(C.square(input_y - probability) + 1e-4) * advantages, axis=0, name='loss') lr = 1e-4 lr_schedule = learning_rate_schedule(lr, UnitType.sample) sgd = C.sgd([W1, W2], lr_schedule) gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2']) xs, hs, label, drs = [], [], [], [] running_reward = None reward_sum = 0 episode_number = 1 observation = env.reset() actionlist = [i for i in range(env.action_space['n']) ] #%% while episode_number <= TOTAL_EPISODES: x = np.reshape(observation, [1, STATE_COUNT]).astype(np.float32) # Run the policy network and get an action to take. #prob = probability.eval(arguments={observations: x})[0][0][0] prob = probability.eval(arguments={observations: x}) normalized_weights = (prob / np.sum(prob))[0][0] action = numpy.random.choice(actionlist, p=normalized_weights) #action = 1 if np.random.uniform() < prob else 0 xs.append(x) # observation # grad that encourages the action that was taken to be taken y = 1 if action == 0 else 0 # a "fake label" label.append(y) # step the environment and get new measurements observation, reward, done, info = env.step(action) reward_sum += float(reward) # Record reward (has to be done after we call step() to get reward for previous action) drs.append(float(reward)) if done: # Stack together all inputs, hidden states, action gradients, and rewards for this episode epx = np.vstack(xs) epl = np.vstack(label).astype(np.float32) epr = np.vstack(drs).astype(np.float32) xs, label, drs = [], [], [] # reset array memory # Compute the discounted reward backwards through time. discounted_epr = discount_rewards(epr) # Size the rewards to be unit normal (helps control the gradient estimator variance) discounted_epr -= np.mean(discounted_epr) discounted_epr /= (np.std(discounted_epr) + 0.000000000001) # Forward pass arguments = {observations: epx, input_y: epl, advantages: discounted_epr} state, outputs_map = loss.forward(arguments, outputs=loss.outputs, keep_for_backward=loss.outputs) # Backward psas root_gradients = {v: np.ones_like(o) for v, o in outputs_map.items()} vargrads_map = loss.backward(state, root_gradients, variables=set([W1, W2])) for var, grad in vargrads_map.items(): gradBuffer[var.name] += grad # Wait for some batches to finish to reduce noise if episode_number % BATCH_SIZE_BASELINE == 0: grads = {W1: gradBuffer['W1'].astype(np.float32), W2: gradBuffer['W2'].astype(np.float32)} updated = sgd.update(grads, BATCH_SIZE_BASELINE) # reset the gradBuffer gradBuffer = dict((var.name, np.zeros(shape=var.shape)) for var in loss.parameters if var.name in ['W1', 'W2', 'b1', 'b2']) print('Episode: %d. Average reward for episode %f.' % (episode_number, reward_sum / BATCH_SIZE_BASELINE)) if reward_sum / BATCH_SIZE_BASELINE > REWARD_TARGET: print('Task solved in: %d ' % episode_number) break reward_sum = 0 observation = env.reset() # reset env episode_number += 1 probability.save('pg.mod')
def train_test(train_reader, test_reader, model_func, num_sweeps_to_train_with=10): # Instantiate the model function; x is the input (feature) variable # We will scale the input image pixels within 0-1 range by dividing all input value by 255. model = model_func(x / 255) # Instantiate the loss and error function loss, label_error = create_criterion_function(model, y) # Instantiate the trainer object to drive the model training learning_rate = 0.2 lr_schedule = C.learning_parameter_schedule(learning_rate) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, label_error), [learner]) # Initialize the parameters for the trainer minibatch_size = 64 num_samples_per_sweep = 60000 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size # Map the data streams to the input and labels. input_map = { y: train_reader.streams.labels, x: train_reader.streams.features } # Uncomment below for more detailed logging training_progress_output_freq = 500 # Start a timer start = time.time() for i in range(0, int(num_minibatches_to_train)): # Read a mini batch from the training data file data = train_reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) helper.print_training_progress(trainer, i, training_progress_output_freq, verbose=1) # Print training time print("Training took {:.1f} sec".format(time.time() - start)) # Test the model test_input_map = { y: test_reader.streams.labels, x: test_reader.streams.features } # Test data for trained model test_minibatch_size = 512 num_samples = 10000 num_minibatches_to_test = num_samples // test_minibatch_size test_result = 0.0 for i in range(num_minibatches_to_test): # We are loading test data in batches specified by test_minibatch_size # Each data point in the minibatch is a MNIST digit image of 784 dimensions # with one pixel per dimension that we will encode / decode with the # trained model. data = test_reader.next_minibatch(test_minibatch_size, input_map=test_input_map) eval_error = trainer.test_minibatch(data) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches print("Average test error: {0:.2f}%".format(test_result * 100 / num_minibatches_to_test))
if __name__ == '__main__': set_devices() input_dim = 784 input_dim_model = (1, 28, 28) num_output_classes = 10 #model_definition = MlPerceptron(input_dim, num_output_classes) model_definition = ConvolutionalMaxPooling(input_dim_model, num_output_classes) learning_rate = 0.2 lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch) learner = cntk.sgd(model_definition.model.parameters, lr_schedule) tensor_writer = TensorWriter(model_definition.model) trainer = cntk.Trainer(model_definition.model, (model_definition.get_loss(), model_definition.get_classification_error()), [learner], tensor_writer.get_writer()) # Trainning minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size reader_train = init_reader(join_paths(MNIST_DATA_FOLDER, 'train.txt'),
strides=(1, 1), pad=True, name="second_conv")(h) h = cntk.layers.MaxPooling(filter_shape=(3, 3), strides=(3, 3), name="second_max")(h) r = cntk.layers.Dense(10, activation=None, name="classify")(h) return r cnn = create_model(x) cnn = cnn(x / 255) loss = cntk.cross_entropy_with_softmax(cnn, y) errs = cntk.classification_error(cnn, y) trainer = cntk.Trainer(cnn, (loss, errs), [ cntk.sgd(cnn.parameters, cntk.learning_rate_schedule(0.0105, cntk.UnitType.minibatch)) ]) count = 0 begin_time = time.time() for data in training_set: trainer.train_minibatch({ x: numpy.array(data[1:], dtype=float32).reshape(1, 28, 28), y: numpy.array([1 if x == int(data[0]) else 0 for x in range(10)], dtype=float32) }) count += 1 print("\r%.2f%%" % (count / len(training_set) * 100), file=sys.stderr, end="")
def main(): # model creation... num_hidden_layers = 1 hidden_layers_dim = 64 inputs = cntk.input_variable(input_dim) labels = cntk.input_variable(num_output_classes) # Normalize the features and create the model z = create_model(inputs / 255.0, num_hidden_layers, hidden_layers_dim) # training... loss = cntk.cross_entropy_with_softmax(z, labels) error = cntk.classification_error(z, labels) # Instantiate the trainer object to drive the model training learning_rate = 1e-10 lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.sample) learner = cntk.sgd(z.parameters, lr_schedule) trainer = cntk.Trainer(z, (loss, error), [learner]) # Initialize the parameters for the trainer minibatch_size = 64 epoch_size = 50000 # because there are 50000 training samples num_epochs = 5 num_minibatches_to_train = (epoch_size * num_epochs) / minibatch_size # Create the reader to training data set reader_train = create_reader(train_file, True, input_dim, num_output_classes) # Map the data streams to the input and labels. input_map = { labels: reader_train.streams.labels, inputs: reader_train.streams.features } # Run the trainer on and perform model training training_progress_output_freq = 500 plotdata = {"batchsize": [], "loss": [], "error": []} for i in range(0, int(num_minibatches_to_train)): # Read a mini batch from the training data file data = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) batchsize, loss, error = print_training_progress( trainer, i, training_progress_output_freq, verbose=1) # eval / test... reader_test = create_reader(test_file, False, input_dim, num_output_classes) test_input_map = { labels: reader_test.streams.labels, inputs: reader_test.streams.features, } # Test data for trained model test_minibatch_size = 512 num_samples = 10000 num_minibatches_to_test = num_samples // test_minibatch_size test_result = 0.0 for i in range(num_minibatches_to_test): data = reader_test.next_minibatch(test_minibatch_size, input_map=test_input_map) eval_error = trainer.test_minibatch(data) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches print("Average test error: {0:.2f}%".format(test_result * 100 / num_minibatches_to_test))
def main(): data_matrix = load_data('.\\visceral-fat-rating.data') checked_data_matrix = check_for_NaN(data_matrix) sorted_data_matrix = sort_data_by_column(checked_data_matrix, 13) #save_data(sorted_data_matrix, 'sorted_visceral-fat-rating.data') # features matrix unnorm_features_matrix = sorted_data_matrix[:, 0:13] min_max_scaler = preprocessing.MinMaxScaler() features_matrix = min_max_scaler.fit_transform(unnorm_features_matrix) # labels matrix labels_matrix = np.reshape(sorted_data_matrix[:, 13], (-1, 1)) print(' Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis = 1) print(combined_matrix) features_dimension = 13 labels_dimension = 1 X = C.input_variable(features_dimension, np.float32) y = C.input_variable(labels_dimension, np.float32) z, W, b = linear_layer(X, features_dimension, labels_dimension) p = 1.0 / (1.0 + C.exp(-z)) model = p ### cee = C.cross_entropy_with_softmax(model, y) eval_error = C.classification_error(model, y) learning_rate = 0.1 learner = C.sgd(model.parameters, learning_rate) ### trainer = C.Trainer(model, (cee, eval_error), [learner]) max_iterations = 8000 ### np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row]}) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print(str(i) + ' Cross entropy error on current item = %0.4f ' %mcee) # print out results - weights and bias np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value) # save results print('\nSaving files:') weights_file_name = str(learning_rate) + '-' + str(max_iterations) + '_' + 'weights' + '.txt' bias_file_name = str(learning_rate) + '-' + str(max_iterations) + '_' + 'bias' + '.txt' print(weights_file_name) print(bias_file_name) np.savetxt(weights_file_name, W.value) np.savetxt(bias_file_name, b.value) print('Saving complete') ########################## print('\n ### End training\n')
def main(): print("\nBegin binary classification (two-node technique) \n") print("Using CNTK version = " + str(C.__version__) + "\n") dirname = os.path.dirname(__file__) input_dim = 12 hidden_dim = 20 output_dim = 2 onnx_path = os.path.join(dirname, "..\HeartDiseasePrediction\Assets") train_file = os.path.join(dirname, "input\TrainingData.txt") test_file = os.path.join(dirname, "input\TestData.txt") # 1. create network X = C.ops.input_variable(input_dim, np.float32) Y = C.ops.input_variable(output_dim, np.float32) print("Creating a 12-20-2 tanh-softmax NN ") with C.layers.default_options(init=C.initializer.uniform(scale=0.01, seed=1)): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(X) oLayer = C.layers.Dense(output_dim, activation=None, name='outLayer')(hLayer) nnet = oLayer model = C.ops.softmax(nnet) # 2. create learner and trainer print("Creating a cross entropy batch=10 SGD LR=0.005 Trainer ") tr_loss = C.cross_entropy_with_softmax(nnet, Y) tr_clas = C.classification_error(nnet, Y) max_iter = 5000 batch_size = 10 learn_rate = 0.005 learner = C.sgd(nnet.parameters, learn_rate) trainer = C.Trainer(nnet, (tr_loss, tr_clas), [learner]) # 3. create reader for train data rdr = create_reader(train_file, input_dim, output_dim, rnd_order=True, sweeps=C.io.INFINITELY_REPEAT) heart_input_map = { X : rdr.streams.x_src, Y : rdr.streams.y_src } # 4. train print("\nStarting training \n") for i in range(0, max_iter): curr_batch = rdr.next_minibatch(batch_size, input_map=heart_input_map) trainer.train_minibatch(curr_batch) if i % int(max_iter/10) == 0: mcee = trainer.previous_minibatch_loss_average macc = (1.0 - trainer.previous_minibatch_evaluation_average) * 100 print("batch %4d: mean loss = %0.4f, accuracy = %0.2f%% " % (i, mcee, macc)) trainer.summarize_training_progress() print("\nTraining complete") # Export as ONNX model.save(os.path.join(onnx_path, "Heart.onnx"), format=C.ModelFormat.ONNX) # 5. evaluate model using all data print("\nEvaluating accuracy using built-in test_minibatch() \n") rdr = create_reader(test_file, input_dim, output_dim, rnd_order=False, sweeps=1) heart_input_map = { X : rdr.streams.x_src, Y : rdr.streams.y_src } num_test = 91 all_test = rdr.next_minibatch(num_test, input_map=heart_input_map) acc = (1.0 - trainer.test_minibatch(all_test)) * 100 print("Classification accuracy on the %d data items = %0.2f%%" % (num_test,acc)) unknown = np.array([1, 0, 0, 0, 1, 2, 0.0370370373, 0, 0.832061052, 0, 1, 0.6458333], dtype=np.float32) predicted = model.eval(unknown) print(predicted) # (use trained model to make prediction) print("\nEnd Cleveland Heart Disease classification ")
MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)), lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation