def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers): ''' Create Trainer ''' print('Creating the trainer.') # Differential Learning rate scheduler lr_schedule = C.learning_rate_schedule([2.5], unit=C.UnitType.minibatch) mm_schedule = C.momentum_schedule(0.9) l2_reg_weight = 0.001 # Create the Adam learners learner = C.adam(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False) # Compute the number of workers num_workers = C.distributed.Communicator.num_workers() print('Number of workers: {}'.format(num_workers)) if num_workers > 1: parameter_learner = C.train.distributed.data_parallel_distributed_learner(learner, num_quantization_bits=num_quantization_bits) trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers) else: trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers) return trainer
def __call__(self, parameters, opt_learning_rate=0.001, **kwargs): lr_per_minibatch = cntk.learning_rate_schedule( lr=opt_learning_rate, unit=cntk.UnitType.minibatch) momentum = cntk.momentum_schedule(momentum=0.99) return cntk.adam_sgd(parameters=parameters, lr=lr_per_minibatch, momentum=momentum)
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers): print('Creating the trainer.') # Train only the last layers lr_schedule = C.learning_rate_schedule([0.01] * 10 + [0.001] * 20 + [0.0001] * 30, unit=C.UnitType.minibatch) mm_schedule = C.momentum_schedule(0.9) l2_reg_weight = 0.0001 learner = C.adam(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False) num_workers = C.distributed.Communicator.num_workers() print('Number of workers: {}'.format(num_workers)) if num_workers > 1: parameter_learner = C.train.distributed.data_parallel_distributed_learner( learner, num_quantization_bits=num_quantization_bits) trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers) else: trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers) return trainer
def __init__(self, n_in, n_out, init_lr, momentum): self.param1 = 512 self.param2 = 256 self.n_in = int(n_in) self.n_out = int(n_out) self.input = C.sequence.input_variable(shape=(self.n_in,)) self.label = C.sequence.input_variable(shape=(self.n_out,)) self.three_dnn = C.layers.Sequential([ C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_1'), C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_2'), C.layers.Dense(self.param1, activation=C.tanh, name='dnn_three_3')]) self.final_dnn = C.layers.Dense(self.n_out, name='dnn_final') self.dnn_1 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_1') self.dnn_2 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_2') self.dnn_3 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_3') self.dnn_4 = C.layers.Dense(8 * self.param2, bias=False, name='dnn_4') self.list_bias = [] for i in xrange(16): self.list_bias.append(C.parameter(shape=(self.param2, ), name='bias_' + str(i))) self.output = self.model(self.input) self.loss = loss_fun(self.output, self.label) self.eval_err = loss_fun(self.output, self.label) self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample) self.mom_s = C.momentum_schedule(momentum) self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s) self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
def create_network(para, verbose=False): with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu): # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0], strides=(1, 1), pad=True, name='C1')(network_input / 255.0) h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h) h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1], strides=(1, 1), pad=True, name='C2')(h) h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h) h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2], strides=(1, 1), pad=True, name='C2')(h) h = cntk.layers.Dense(para[3])(h) h = cntk.layers.Dropout(0.25)(h) z = cntk.layers.Dense(10, activation=None, name='R')(h) loss = cntk.cross_entropy_with_softmax(z, network_label) label_error = cntk.classification_error(z, network_label) lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch) learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9)) trainer = cntk.Trainer(z, (loss, label_error), [learner]) if verbose: log = cntk.logging.ProgressPrinter(100) for _ in xrange(20000): data = train_reader.next_minibatch(100, input_map=mapping(train_reader)) trainer.train_minibatch(data) if verbose: log.update_with_trainer(trainer) return trainer
def init_model(m): progress_writers = [ cntk.logging.ProgressPrinter( freq=int(BATCHSIZE / 2), rank=cntk.train.distributed.Communicator.rank(), num_epochs=EPOCHS) ] # Loss (dense labels); check if support for sparse labels loss = cntk.cross_entropy_with_softmax(m, labels) # Momentum SGD # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient # if unit_gain=True then ...(1-momentum)*gradient local_learner = cntk.momentum_sgd( m.parameters, lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch), momentum=cntk.momentum_schedule(MOMENTUM), unit_gain=False) distributed_learner = cntk.train.distributed.data_parallel_distributed_learner( local_learner) trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [distributed_learner], progress_writers) return trainer, distributed_learner
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def test_noise_injection_with_checkpointing(): from cntk import initializer shape = (100,100) w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) lr=learning_rate_schedule(0.5, UnitType.sample) m=C.momentum_schedule(0.99) learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5) learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5) learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5) assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value) for i in range(10): checkpoint = learner1.create_checkpoint() v = np.float32(np.random.rand(100,100)) learner1.update({w1: v}, 1) learner2.update({w2: v}, 1) assert not np.allclose(w1.value, w2.value) learner3.restore_from_checkpoint(checkpoint) learner3.update({w3: v}, 1) assert np.allclose(w1.value, w3.value)
def train(self, report_freq = 500, as_policy=True): #loss = C.ops.minus(0, C.ops.argmin(self.model) - C.ops.argmin(self.model) + C.ops.minus(self.label_var, 0)) loss = C.squared_error(self.model, self.label_var) evaluation = C.squared_error(self.model, self.label_var) schedule = C.momentum_schedule(self.hp.learning_rate) progress_printer = C.logging.ProgressPrinter(num_epochs=self.hp.epochs/self.hp.minibatch_size) learner = C.adam(self.model.parameters, C.learning_rate_schedule(self.hp.learning_rate, C.UnitType.minibatch), momentum=schedule, l1_regularization_weight=self.hp.l1reg, l2_regularization_weight=self.hp.l2reg ) trainer = C.Trainer(self.model, (loss, evaluation), learner, progress_printer) self.plotdata = {"loss":[]} for epoch in range(self.hp.epochs): indata, label, total_reward = self.get_next_data(self.hp.minibatch_size, as_policy) data = {self.input_var: indata, self.label_var: label} trainer.train_minibatch(data) loss = trainer.previous_minibatch_loss_average if not (loss == "NA"): self.plotdata["loss"].append(loss) if epoch % report_freq == 0: print() print("last epoch total reward: {}".format(total_reward)) trainer.summarize_training_progress() print() # if self.hp.stop_loss > loss: # break print() trainer.summarize_training_progress()
def test_noise_injection_with_checkpointing(): from cntk import initializer shape = (100,100) w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) lr=learning_rate_schedule(0.5, UnitType.sample) m=C.momentum_schedule(0.99) learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5) learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5) learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5) assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value) for i in range(10): checkpoint = learner1.create_checkpoint() v = np.float32(np.random.rand(100,100)) learner1.update({w1: v}, 1) learner2.update({w2: v}, 1) assert not np.allclose(w1.value, w2.value) learner3.restore_from_checkpoint(checkpoint) learner3.update({w3: v}, 1) assert np.allclose(w1.value, w3.value)
def main(params): # Create output and log directories if they don't exist if not os.path.isdir(params['output_folder']): os.makedirs(params['output_folder']) if not os.path.isdir(params['log_folder']): os.makedirs(params['log_folder']) # Create the network network = create_network() # Create readers train_reader = cbf_reader(os.path.join(params['input_folder'], 'train{}.cbf'.format(params['prefix'])), is_training=True, max_samples=cntk.io.INFINITELY_REPEAT) cv_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False, max_samples=cntk.io.FULL_DATA_SWEEP) test_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False, max_samples=cntk.io.FULL_DATA_SWEEP) input_map = { network['input']: train_reader.streams.front, network['target']: train_reader.streams.label } # Create learner mm_schedule = momentum_schedule(0.90) lr_schedule = learning_parameter_schedule([(40, 0.1), (40, 0.01)], minibatch_size=params['minibatch_size']) learner = cntk.adam(network['model'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=0.0005, epoch_size=params['epoch_size'], minibatch_size=params['minibatch_size']) # Use TensorBoard for visual logging log_file = os.path.join(params['log_folder'], 'log.txt') pp_writer = cntk.logging.ProgressPrinter(freq=10, tag='Training', num_epochs=params['max_epochs'], log_to_file=log_file) tb_writer = cntk.logging.TensorBoardProgressWriter(freq=10, log_dir=params['log_folder'], model=network['model']) # Create trainer and training session trainer = Trainer(network['model'], (network['loss'], network['metric']), [learner], [pp_writer, tb_writer]) test_config = TestConfig(minibatch_source=test_reader, minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map) cv_config = CrossValidationConfig(minibatch_source=cv_reader, frequency=(1, DataUnit.sweep), minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map) checkpoint_config = CheckpointConfig(os.path.join(params['output_folder'], model_name), frequency=(10, DataUnit.sweep), restore=params['restore']) session = training_session(trainer=trainer, mb_source=train_reader, mb_size=params['minibatch_size'], model_inputs_to_streams=input_map, max_samples=params['epoch_size'] * params['max_epochs'], progress_frequency=(1, DataUnit.sweep), checkpoint_config=checkpoint_config, cv_config=cv_config, test_config=test_config) cntk.logging.log_number_of_parameters(network['model']) session.train() # Save the trained model path = os.path.join(params['output_folder'], 'final_model.dnn') network['model'].save(path) print('Saved final model to', path)
def set_optimizer(self, opt_type, opt_conf): if opt_type == 'SGD': self.lr_schedule = C.learning_rate_schedule( opt_conf['lr'], C.UnitType.minibatch) self.m_schedule = C.momentum_schedule( opt_conf['momentum'], C.UnitType.minibatch) else: raise NotImplementedError
def train(reader, model_func, max_epochs=10, task='slot_tagging'): # Create the containers for input feature (x) and the label (y) x = C.sequence.input_variable(vocab_size) y = C.sequence.input_variable(num_labels) # Instantiate the model function; x is the input (feature) variable model = model_func(x) # Instantiate the loss and error function loss, label_error = create_criterion_function_preferred(model, y) # training config epoch_size = 18000 # 18000 samples is half the dataset size minibatch_size = 70 # LR schedule over epochs # In CNTK, an epoch is how often we get out of the minibatch loop to # do other stuff (e.g. checkpointing, adjust learning rate, etc.) lr_per_sample = [3e-4]*4+[1.5e-4] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size) # Momentum schedule momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size) # We use a the Adam optimizer which is known to work well on this dataset # Feel free to try other optimizers from # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner learner = C.adam(parameters=model.parameters, lr=lr_schedule, momentum=momentums, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) # Uncomment below for more detailed logging #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # Instantiate the trainer trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) # process minibatches and perform model training C.logging.log_number_of_parameters(model) # Assign the data fields to be read from the input if task == 'slot_tagging': data_map={x: reader.streams.query, y: reader.streams.slot_labels} else: data_map={x: reader.streams.query, y: reader.streams.intent} t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch data = reader.next_minibatch(minibatch_size, input_map= data_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += data[y].num_samples # samples so far trainer.summarize_training_progress()
def __init__(self, dim_x, dim_y): self.dim_x = int(dim_x) self.dim_y = int(dim_y) self.input = cntk.sequence.input_variable(shape=(self.dim_x, )) self.label = cntk.sequence.input_variable(shape=(self.dim_y, )) self.output = self.model(self.input) self.loss = loss_fun(self.output, self.label) self.eval = loss_fun(self.output, self.label) self.learner = cntk.momentum_sgd(parameters=self.output.parameters, momentum=cntk.momentum_schedule(0.5), lr=cntk.learning_rate_schedule(0.006, cntk.UnitType.sample)) self.trainer = cntk.Trainer(self.output, (self.loss, self.eval), [self.learner])
def train(self): tmp_d = {"x": [], "y": []} num_list = [] count = 0 for idx, value in enumerate(self.series): if idx % self.h_dims == 0: num_list = [] count += 1 if (self.h_dims * count) > len(self.series): break num_list.append(np.float32(value)) increment_list = [] for num in num_list: increment_list.append(num) tmp_d["x"].append(np.array(increment_list)) tmp_d["y"].append( np.array([np.float32(self.series[self.h_dims * count])])) x = {"train": tmp_d["x"]} y = {"train": np.array(tmp_d["y"])} z = self.create_model(self.input_node, self.h_dims) var_l = cntk.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") learning_rate = 0.005 lr_schedule = cntk.learning_parameter_schedule(learning_rate) loss = cntk.squared_error(z, var_l) error = cntk.squared_error(z, var_l) momentum_schedule = cntk.momentum_schedule( 0.9, minibatch_size=self.batch_size) learner = cntk.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = cntk.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, self.epochs): for x_batch, l_batch in self.next_batch(x, y, "train", self.batch_size): trainer.train_minibatch({ self.input_node: x_batch, var_l: l_batch }) if epoch % (self.epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) return z
def lstm_basic(x, y, epochs=1000, batch_size=100, input_dim=5): x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()] C.input_variable(1, dynamic_axes=x_axes) # input sequences input_seq = C.sequence.input_variable(1) # create the model z = create_model(input_seq, input_dim) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input lb = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, lb) # use squared error to determine error for now error = C.squared_error(z, lb) # use fsadagrad optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, epochs): for x1, y1 in next_batch(x, y, "train", batch_size): trainer.train_minibatch({input_seq: x1, lb: y1}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) print("training took {0:.1f} sec".format(time.time() - start)) return z, trainer, input_seq
def train (self, train_file, output_resources_pickle_file, \ network_type = 'unidirectional', \ num_epochs = 1, batch_size = 50, \ dropout = 0.2, reg_alpha = 0.0, \ num_hidden_units = 150, num_layers = 1): train_X, train_Y = self.reader.read_and_parse_training_data(train_file, output_resources_pickle_file) print("Data Shape: ") print(train_X.shape) # (15380, 613) print(train_Y.shape) # (15380, 613, 8) #self.wordvecs.shape (66962, 50) print("Hyper parameters:") print("output_resources_pickle_file = {}".format(output_resources_pickle_file)) print("network_type = {}".format(network_type)) print("num_epochs= {}".format(num_epochs )) print("batch_size = {}".format(batch_size )) print("dropout = ".format(dropout )) print("reg_alpha = {}".format(reg_alpha )) print("num_hidden_units = {}".format(num_hidden_units)) print("num_layers = {}".format(num_layers )) # Instantiate the model function; features = C.sequence.input_variable(self.wordvecs.shape[0]) labels = C.input_variable(train_Y.shape[2], dynamic_axes=[C.Axis.default_batch_axis()]) self.model = self.__create_model(features, train_Y.shape[2], num_hidden_units, dropout) plot_path = "./lstm_model.png" plot(self.model, plot_path) # Instantiate the loss and error function loss = C.cross_entropy_with_softmax(self.model, labels) error = C.classification_error(self.model, labels) # LR schedule learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(self.model.parameters, lr = lr_schedule, momentum = momentum_schedule, unit_gain = True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=num_epochs) # Instantiate the trainer. We have all data in memory. https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_feed_data.ipynb print('Start training') train_summary = loss.train((train_X.astype('float32'), train_Y.astype('float32')), parameter_learners=[learner], callbacks=[progress_printer])
def train(model, reader): y_pre = model(x) loss, label_error = create_criterion_function(model, y_pre, y, True) lr_per_minibatch = [lr] + [lr / 2] + [lr / 4] # lr_per_minibatch = [lr * batch_size for lr in lr_per_sample] lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size) # Momentum schedule momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=batch_size) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epoch) # learner = C.sgd(model.parameters, lr_schedule) learner = C.adam(y_pre.parameters, lr_schedule, momentum=momentums, gradient_clipping_threshold_per_sample=15) trainer = C.Trainer(y_pre, (loss, label_error), learner, progress_printer) # [] C.logging.log_number_of_parameters( y_pre) # print # parameters and # tensor loss_summary = [] step = 0 data_map = {x: reader.streams.query, y: reader.streams.intent} t = 0 for epoch in range(max_epoch): # loop over epochs epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch data = reader.next_minibatch(batch_size, input_map=data_map) # fetch minibatch # print(data) trainer.train_minibatch(data) # update model with it t += data[y].num_samples if t % 6000 == 0: training_loss = trainer.previous_minibatch_loss_average error = trainer.previous_minibatch_evaluation_average print("epoch: {}, step: {}, loss: {:.5f}, error {:.5f}".format( epoch, t, training_loss, error)) trainer.summarize_training_progress()
def train(create_model, X, Y, epochs=500, batch_size=10, N=1): dim = Y.shape[1] # input sequences x = C.sequence.input_variable(dim) # create the model z = create_model(x, N=N, outputs=dim) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(dim, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use fsadagrad optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, epochs): for x1, y1 in next_batch(X, Y, batch_size): trainer.train_minibatch({x: x1, l: y1}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.5f}".format(epoch, training_loss)) print("training took {0:.1f} sec".format(time.time() - start)) return z
def build_SRResNet_graph(lr_image_shape, hr_image_shape, net): inp_dynamic_axes = [C.Axis.default_batch_axis()] real_X = C.input( lr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_X") real_Y = C.input( hr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_Y") real_X_scaled = real_X/255 real_Y_scaled = real_Y/255 genG = net(real_X_scaled) G_loss = C.reduce_mean(C.square(real_Y_scaled - genG)) G_optim = C.adam(G_loss.parameters, lr=C.learning_rate_schedule( [(1, 0.01), (1, 0.001), (98, 0.0001)], C.UnitType.minibatch, 10000), momentum=C.momentum_schedule(0.9), gradient_clipping_threshold_per_sample=1.0) G_G_trainer = C.Trainer(genG, (G_loss, None), G_optim) return (real_X, real_Y, genG, real_X_scaled, real_Y_scaled, G_optim, G_G_trainer)
def _create_model(self, input_dim, output_dim, hidden_dims): c_in = C.input_variable(input_dim, name='state') model = c_in for h in hidden_dims: model = C.layers.Dense(h, activation=C.relu)(model) model = C.layers.Dense(output_dim, activation=C.softmax)(model) c_action_prob = model c_action_onehot = C.input_variable(output_dim, name='action_onehot') c_reward = C.input_variable(1, name='reward') action_prob = C.reduce_sum(c_action_prob * c_action_onehot) log_action_prog = C.log(action_prob) loss = -log_action_prog * c_reward loss = C.reduce_mean(loss) lr = 1e-2 lr_schedule = C.learning_parameter_schedule(lr) learner = C.adam(model.parameters, lr_schedule, C.momentum_schedule(0.9)) trainer = C.Trainer(model, (loss, None), learner) return model, loss, trainer
def __init__(self, n_in, n_out, init_lr, momentum): self.param1 = 512 self.param2 = 256 self.n_in = int(n_in) self.n_out = int(n_out) self.input = C.sequence.input_variable(shape=(self.n_in, )) self.label = C.sequence.input_variable(shape=(self.n_out, )) self.three_dnn = Sequential([ Dense(self.param1, activation=C.tanh), Dense(self.param1, activation=C.tanh), Dense(self.param1, activation=C.tanh) ]) self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)), Recurrence(LSTM(self.param2), go_backwards=True)), C.splice]) self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)), Recurrence(LSTM(self.param2), go_backwards=True)), C.splice]) self.final_dnn = Dense(self.n_out) self.output = self.model(self.input) self.loss = loss_fun(self.output, self.label) self.eval_err = loss_fun(self.output, self.label) self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample) self.mom_s = C.momentum_schedule(momentum) self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s) self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
def train_and_test(reader_train, reader_test, model_func): ############################### # Training the model ############################### input = C.input_variable(input_dim) label = C.input_variable(input_dim) model = model_func(input) target = label / 255.0 loss = -(target * C.log(model) + (1 - target) * C.log(1 - model)) label_error = C.classification_error(model, target) epoch_size = 30000 minibatch_size = 64 num_sweeps_to_train_with = 5 if isFast else 100 num_samples_per_sweep = 60000 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) // minibatch_size lr_per_sample = [3e-4] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size) momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size) learner = C.fsadagrad(model.parameters, lr=lr_schedule, momentum=momentum_schedule) progress_printer = C.logging.ProgressPrinter(0) trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) input_map = { input: reader_train.streams.features, label: reader_train.streams.features } aggregate_metric = 0 for i in range(num_minibatches_to_train): data = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) samples = trainer.previous_minibatch_sample_count aggregate_metric += trainer.previous_minibatch_evaluation_average * samples train_error = (aggregate_metric * 100) / (trainer.total_number_of_samples_seen) print("Average training error: {0:0.2f}%".format(train_error)) ############################################################################# # Testing the model # Note: we use a test file reader to read data different from a training data ############################################################################# test_minibatch_size = 32 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0 # Test error metric calculation metric_numer = 0 metric_denom = 0 test_input_map = { input: reader_test.streams.features, label: reader_test.streams.features } for i in range(0, int(num_minibatches_to_test)): data = reader_test.next_minibatch(test_minibatch_size, input_map=test_input_map) eval_error = trainer.test_minibatch(data) metric_numer += np.abs(eval_error * test_minibatch_size) metric_denom += test_minibatch_size test_error = (metric_numer * 100) / (metric_denom) print("Average test error: {0:0.2f}%".format(test_error)) return model, train_error, test_error
def train(train_x, train_y, seed, model_dir, loss_dir): input_dim = 600 output_dim = 3631 num_epochs = 100 hidden_layer_type = ['TANH', 'TANH'] hidden_layer_size = [1024, 1024] momentum = 0.9 finetune_lr = 0.01 l2_regularization_weight = 0.00001 C.cntk_py.set_fixed_random_seed(seed) print('Creating DNN model...') input = C.input_variable(input_dim) output = C.input_variable(output_dim) dnn_model = create_dnn_model(input, hidden_layer_type, hidden_layer_size, output_dim) epoch_num = 0 current_finetune_lr = finetune_lr current_momentum = momentum train_loss_output = [] print('Learning...') while (epoch_num < num_epochs): print('started epoch %i' % epoch_num) epoch_num += 1 sub_start_time = time.time() lr_schedule = C.learning_rate_schedule(current_finetune_lr, C.UnitType.minibatch) momentum_schedule = C.momentum_schedule(current_momentum) learner = C.momentum_sgd( dnn_model.parameters, lr_schedule, momentum_schedule, unit_gain=False, l1_regularization_weight=0, l2_regularization_weight=l2_regularization_weight) #learner = C.adadelta(dnn_model.parameters, lr_schedule, rho=0.95, epsilon=1e-8, l1_regularization_weight=0, # l2_regularization_weight= 0.00001 ) loss = C.cross_entropy_with_softmax(dnn_model, output) error = loss trainer = C.Trainer(dnn_model, (loss, error), [learner]) train_error = [] for i in range(len(train_x)): temp_train_x = np.float32(train_x[i]) temp_train_y = np.float32(train_y[i]) trainer.train_minibatch({ input: temp_train_x, output: temp_train_y }) train_error.append(trainer.previous_minibatch_loss_average) this_train_loss = np.mean(train_error) sub_end_time = time.time() print('time for 1 epoch is %.1f' % (sub_end_time - sub_start_time)) train_loss_output.append(this_train_loss) print('loss is %.4f' % this_train_loss) if np.remainder(epoch_num, 10) == 0: nnets_file_name = 'dnn_model_ep' + np.str(epoch_num) + '.model' if not os.path.isdir(model_dir): os.makedirs(model_dir) dnn_model.save(os.path.join(model_dir, nnets_file_name)) if not os.path.isdir(loss_dir): os.makedirs(loss_dir) np.savetxt( os.path.join(loss_dir, 'loss_curve_ep' + np.str(epoch_num) + '.csv'), train_loss_output) nnets_file_name = 'dnn_model_final.model' if not os.path.isdir(model_dir): os.makedirs(model_dir) dnn_model.save(os.path.join(model_dir, nnets_file_name)) if not os.path.isdir(loss_dir): os.makedirs(loss_dir) np.savetxt( os.path.join(loss_dir, 'loss_curve_final' + np.str(epoch_num) + '.csv'), train_loss_output)
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_test(s2smodel) # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 learner = C.fsadagrad(model_train.parameters, #apply the learning rate as if it is a minibatch of size 1 lr = C.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size), momentum = C.momentum_schedule(0.9366416204111472, minibatch_size=minibatch_size), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = C.Trainer(None, criterion, learner) # records total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information C.logging.log_number_of_parameters(model_train) ; print() progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training') # a hack to allow us to print sparse vectors sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) # do the training trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # visualizing attention window if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model model_path = "model_%d.cmf" % epoch print("Saving final model to '%s'" % model_path) s2smodel.save(model_path) print("%d epochs complete." % max_epochs)
] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2, ), [0.2]), ((0.2, ), [0.2, 0.2, 0.2, 0.2]), (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20), (([(3, 0.2), (2, 0.4), (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad( params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule( 1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params,
# expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use adam optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] # time to start training start = time.time() for epoch in range(0, EPOCHS): for x_batch, l_batch in next_batch(X, Y, "train"): trainer.train_minibatch({x: x_batch, l: l_batch}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss)
def __train_cntk(self, path_to_folder: str, model_definition, epochs: int, output_model_path: str, classes, minibatch_size: int): import cntk from cntk.learners import learning_parameter_schedule from cntk.ops import input_variable from cntk.io import MinibatchSource, ImageDeserializer, StreamDefs, StreamDef, MinibatchData, UserDeserializer import cntk.io.transforms as xforms from cntk.layers import default_options, Dense, Sequential, Activation, Embedding, Convolution2D, MaxPooling, Stabilizer, Convolution, Dropout, BatchNormalization from cntk.ops.functions import CloneMethod from cntk.logging import ProgressPrinter from cntk.losses import cross_entropy_with_softmax from cntk import classification_error, softmax, relu, ModelFormat, element_times, momentum_schedule, momentum_sgd import pandas as pd path_to_folder = path_to_folder.rstrip('/') map_file_train = path_to_folder + "/train_map.txt" map_file_test = path_to_folder + "/test_map.txt" classes_set = set() num_train = 0 num_test = 0 num_channels = 3 class TrackDataset(UserDeserializer): def __init__(self, map_file, streams, chunksize=100): super(TrackDataset, self).__init__() self._batch_size = chunksize self.dataframes = pd.read_csv(map_file, sep='\t', dtype=str, header=None, names=["features", "labels"]) self._streams = [ cntk.io.StreamInformation(s['name'], i, 'dense', np.float32, s['shape']) for i, s in enumerate(streams) ] self._num_chunks = int( math.ceil(len(self.dataframes) / chunksize)) def _scale_image(self, image, width=224, height=168): try: return image.resize((width, height), Image.LINEAR) except: raise Exception('scale_image error') def stream_infos(self): return self._streams def num_chunks(self): return self._num_chunks def get_chunk(self, chunk_id): images = [] labels = [] maximum = (chunk_id + 1) * self._batch_size if (maximum > len(self.dataframes)): maximum = len(self.dataframes) for i in range(chunk_id * self._batch_size, maximum): img_name = self.dataframes.iloc[i, 0] image = Image.open(img_name) cl = self.dataframes.iloc[i, 1:].values[0] image = self._scale_image(image) image = np.moveaxis((np.array(image).astype('float32')), -1, 0) image -= np.mean(image, keepdims=True) image /= (np.std(image, keepdims=True) + 1e-6) images.append(image) yv = np.zeros(num_classes) yv[classes.index(cl)] = 1 labels.append(yv) result = {} features = np.array(images) lab = np.array(labels).astype('float32') result[self._streams[0].m_name] = features result[self._streams[1].m_name] = lab return result try: with open(map_file_train) as f: csv_reader = csv.reader(f, delimiter='\t') for row in csv_reader: cmd = row[1] classes_set.add(cmd) num_train = num_train + 1 except Exception as e: raise Exception( "No train_map.txt file found in path " + path_to_folder + ". Did you create a dataset using create_balanced_dataset()?") num_classes = len(classes) with open(map_file_test) as f: for num_test, l in enumerate(f): pass # transforms = [ # xforms.scale(width=self.__image_width, height=self.__image_height, channels=num_channels, interpolations='linear'), # xforms.mean(mean_file) # ] dataset_train = TrackDataset(map_file=map_file_train, streams=[ dict(name='features', shape=(num_channels, self.__image_height, self.__image_width)), dict(name='labels', shape=(num_classes, )) ]) reader_train = MinibatchSource([dataset_train], randomize=True) # a = dataset_train.num_chunks() dataset_test = TrackDataset(map_file=map_file_test, streams=[ dict(name='features', shape=(num_channels, self.__image_height, self.__image_width)), dict(name='labels', shape=(num_classes, )) ]) reader_test = MinibatchSource([dataset_test], randomize=True) # ImageDeserializer loads images in the BGR format, not RGB # reader_train = MinibatchSource(ImageDeserializer(map_file_train, StreamDefs( # features = StreamDef(field='image', transforms=transforms), # labels = StreamDef(field='label', shape=num_classes) # ))) # reader_test = MinibatchSource(ImageDeserializer(map_file_test, StreamDefs( # features = StreamDef(field='image', transforms=transforms), # labels = StreamDef(field='label', shape=num_classes) # ))) # mb = reader_train.next_minibatch(10) input_var = input_variable( (num_channels, self.__image_height, self.__image_width)) label_var = input_variable((num_classes)) model = model_definition(input_var) ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) epoch_size = num_train lr_per_minibatch = learning_parameter_schedule([0.01] * 10 + [0.003] * 10 + [0.001], epoch_size=epoch_size) momentums = momentum_schedule(0.9, minibatch_size=minibatch_size) l2_reg_weight = 0.001 learner = momentum_sgd(model.parameters, lr=lr_per_minibatch, momentum=momentums, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs) trainer = cntk.train.Trainer(model, (ce, pe), [learner], [progress_printer]) input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } print("Training started") batch_index = 0 plot_data = {'batchindex': [], 'loss': [], 'error': []} for epoch in range(epochs): sample_count = 0 while sample_count < epoch_size: data: MinibatchSource = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += data[label_var].num_samples batch_index += 1 plot_data['batchindex'].append(batch_index) plot_data['loss'].append( trainer.previous_minibatch_loss_average) plot_data['error'].append( trainer.previous_minibatch_evaluation_average) trainer.summarize_training_progress() metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 epoch_size = num_test while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) data = reader_test.next_minibatch(current_minibatch, input_map=input_map) metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") model.save(output_model_path, format=ModelFormat.ONNX)
init=np.float32(np.random.normal( 0, 0.1, [HIDDEN_DIM, 4]))) #normal(0.1)) out_num = times(cur_h, W_out) score = softmax(out_num) loss = cross_entropy_with_softmax(score, ys) #eval_error = cross_entropy_with_softmax(score, ys) eval_error = classification_error(score, ys) learning_rate = 1e-3 lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) #lr_schedule = learning_rate_schedule([(5000*minibatch_size,learning_rate), (1,learning_rate/100)], UnitType.minibatch) #learner = sgd(score.parameters, lr_schedule) mom_schedule = momentum_schedule(0.9) #var_mom_schedule = momentum_schedule(0.999) learner = adam_sgd(score.parameters, lr_schedule, mom_schedule, l2_regularization_weight=0) #learner = momentum_sgd(score.parameters, lr_schedule, mom_schedule) #lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) #learner = adagrad(score.parameters, lr_schedule) trainer = Trainer(score, loss, eval_error, [learner]) TcmpE = datetime.datetime.now() # ## Training
# loss = (mkld) # _q_prime = C.tanh(q) # _mu = C.reduce_mean(_q_prime, axis=C.Axis.default_batch_axis()) # _sigma = C.reduce_mean(C.square(_q_prime-_mu), axis=C.Axis.default_batch_axis()) # loss += C.reduce_mean(C.square(_mu)) + C.reduce_mean(C.square(_sigma-0.615)) # # _log_mu = C.reduce_mean(C.log(C.abs(q)), axis=C.Axis.default_batch_axis()) # # loss += C.reduce_mean(C.square(_log_mu+0.57)) from IPython import embed;embed() exit() lr_rate = 1e-3 learner = C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(lr_rate), C.momentum_schedule(0.99)) trainer = C.Trainer(loss, (loss, None), [learner]) for i in tqdm(range(10000)): # v = np.random.uniform(size=(1,2)) v = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32) trainer.train_minibatch({loss.arguments[0]:v}) # from IPython import embed;embed() if i%100 == 0: print('\n',trainer.previous_minibatch_loss_average) if len(bn) > 0: # batch norm result = C.combine(bn).eval({loss.arguments[0]:v}) result = list(result.values()) momentum = C.Constant(0.9)
def test_learner_init(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w #test new API: learning_parameter_schedule #explictly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explictly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1] * 3 + [0.2] * 2 + [0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size=1, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def test_momentum_schedule_per_sample(params, expectation): l = C.momentum_schedule(*params) assert [l[i] for i in range(len(expectation))] == expectation
def test_momentum_schedule_per_sample(params, expectation): l = C.momentum_schedule(*params) assert [l[i] for i in range(len(expectation))] == expectation
def test_lattice_deserializer(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) data_dir = '' if 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ: data_dir = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] else: print('CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY environment variable is not defined') print(data_dir) data_dir = os.path.join(data_dir, "Speech", "AN4Corpus", "v0") os.chdir(data_dir) feature_dimension = 33 feature = C.sequence.input_variable(feature_dimension) label_dimension = 133 label = C.sequence.input_variable(label_dimension) axis_lattice = C.Axis.new_unique_dynamic_axis('lattice_axis') lattice = C.sequence.input_variable(1, sequence_axis=axis_lattice) train_feature_filepath = os.path.join(data_dir,"glob_0000.scp") train_label_filepath = os.path.join(data_dir,"glob_0000.mlf") train_lattice_index_path = os.path.join(data_dir,"latticeIndex.txt") mapping_filepath = os.path.join(data_dir,"state.list") train_feature_stream = C.io.HTKFeatureDeserializer( C.io.StreamDefs(speech_feature = C.io.StreamDef(shape = feature_dimension, scp = train_feature_filepath))) train_label_stream = C.io.HTKMLFDeserializer( mapping_filepath, C.io.StreamDefs(speech_label = C.io.StreamDef(shape = label_dimension, mlf = train_label_filepath)), True) train_lattice_stream = C.io.LatticeDeserializer(train_lattice_index_path,C.io.StreamDefs(speech_lattice = C.io.StreamDef())) train_data_reader = C.io.MinibatchSource([train_feature_stream, train_label_stream, train_lattice_stream], frame_mode = False) train_input_map = {feature: train_data_reader.streams.speech_feature, label: train_data_reader.streams.speech_label, lattice: train_data_reader.streams.speech_lattice} feature_mean = np.fromfile(os.path.join("GlobalStats", "mean.363"), dtype=float, count=feature_dimension) feature_inverse_stddev = np.fromfile(os.path.join("GlobalStats", "var.363"), dtype=float, count=feature_dimension) feature_normalized = (feature - feature_mean) * feature_inverse_stddev with C.default_options(activation=C.sigmoid): z = C.layers.Sequential([ C.layers.For(range(3), lambda: C.layers.Recurrence(C.layers.LSTM(1024))), C.layers.Dense(label_dimension) ])(feature_normalized) mbsize = 1024 mbs_per_epoch = 10 max_epochs = 2 symListPath = os.path.join(data_dir,"CY2SCH010061231_1369712653.numden.lats.symlist") phonePath = os.path.join(data_dir,"model.overalltying") stateListPath = os.path.join(data_dir,"state.list") transProbPath = os.path.join(data_dir,"model.transprob") criteria = C.lattice_sequence_with_softmax(label, z, z, lattice, symListPath, phonePath, stateListPath, transProbPath) err = C.classification_error(label,z) lr = C.learning_parameter_schedule_per_sample([(3, .01), (1,.001)]) mm = C.momentum_schedule([(1000, 0.9), (0, 0.99)], mbsize) learner = C.momentum_sgd(z.parameters, lr, mm) trainer = C.Trainer(z, (criteria, err), learner) C.logging.log_number_of_parameters(z) progress_printer = C.logging.progress_print.ProgressPrinter(tag='Training', num_epochs = max_epochs) for epoch in range(max_epochs): for mb in range(mbs_per_epoch): minibatch = train_data_reader.next_minibatch(mbsize, input_map = train_input_map) trainer.train_minibatch(minibatch) progress_printer.update_with_trainer(trainer, with_metric = True) progress_printer.epoch_summary(with_metric = True) assert np.allclose(trainer.previous_minibatch_evaluation_average, 0.15064, atol=TOLERANCE_ABSOLUTE) assert np.allclose(trainer.previous_minibatch_loss_average, 0.035923, atol=TOLERANCE_ABSOLUTE) assert (trainer.previous_minibatch_sample_count == 218) assert (trainer.total_number_of_samples_seen == 5750) print("Completed successfully.")
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w #test new API: learning_parameter_schedule #explicitly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
def train_and_test(reader_train, reader_test, model_func): ############################################### # Training the model ############################################### # Instantiate the input and the label variables input = C.input_variable(input_dim) label = C.input_variable(input_dim) # Create the model function model = model_func(input) # The labels for this network is same as the input MNIST image. # Note: Inside the model we are scaling the input to 0-1 range # Hence we rescale the label to the same range # We show how one can use their custom loss function # loss = -(y* log(p)+ (1-y) * log(1-p)) where p = model output and y = target # We have normalized the input between 0-1. Hence we scale the target to same range target = label / 255.0 loss = -(target * C.log(model) + (1 - target) * C.log(1 - model)) label_error = C.classification_error(model, target) # training config epoch_size = 30000 # 30000 samples is half the dataset size minibatch_size = 64 num_sweeps_to_train_with = 5 if isFast else 100 num_samples_per_sweep = 60000 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) // minibatch_size # Instantiate the trainer object to drive the model training lr_per_sample = [0.00003] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size) # Momentum which is applied on every minibatch_size = 64 samples momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size) # We use a variant of the Adam optimizer which is known to work well on this dataset # Feel free to try other optimizers from # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner learner = C.fsadagrad(model.parameters, lr=lr_schedule, momentum=momentum_schedule) # Instantiate the trainer progress_printer = C.logging.ProgressPrinter(0) trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) # Map the data streams to the input and labels. # Note: for autoencoders input == label input_map = { input: reader_train.streams.features, label: reader_train.streams.features } aggregate_metric = 0 for i in range(num_minibatches_to_train): # Read a mini batch from the training data file data = reader_train.next_minibatch(minibatch_size, input_map=input_map) # Run the trainer on and perform model training trainer.train_minibatch(data) samples = trainer.previous_minibatch_sample_count aggregate_metric += trainer.previous_minibatch_evaluation_average * samples train_error = (aggregate_metric * 100.0) / (trainer.total_number_of_samples_seen) print("Average training error: {0:0.2f}%".format(train_error)) ############################################################################# # Testing the model # Note: we use a test file reader to read data different from a training data ############################################################################# # Test data for trained model test_minibatch_size = 32 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size # Test error metric calculation metric_numer = 0 metric_denom = 0 test_input_map = { input: reader_test.streams.features, label: reader_test.streams.features } for i in range(0, int(num_minibatches_to_test)): # We are loading test data in batches specified by test_minibatch_size # Each data point in the minibatch is a MNIST digit image of 784 dimensions # with one pixel per dimension that we will encode / decode with the # trained model. data = reader_test.next_minibatch(test_minibatch_size, input_map=test_input_map) # Specify the mapping of input variables in the model to actual # minibatch data to be tested with eval_error = trainer.test_minibatch(data) # minibatch data to be trained with metric_numer += np.abs(eval_error * test_minibatch_size) metric_denom += test_minibatch_size # Average of evaluation errors of all test minibatches test_error = (metric_numer * 100.0) / (metric_denom) print("Average test error: {0:0.2f}%".format(test_error)) return model, train_error, test_error
((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0), (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params)
def main(): # We keep upto 14 inputs from a day TIMESTEPS = int(input("TIMESTEPS: ")) # 20000 is the maximum total output in our dataset. We normalize all values with # this so our inputs are between 0.0 and 1.0 range. NORMALIZE = int(input("NORMALIZE: ")) # process batches of 10 days BATCH_SIZE = int(input("BATCH_SIZE: ")) BATCH_SIZE_TEST = int(input("BATCH_SIZE_TEST: ")) # Specify the internal-state dimensions of the LSTM cell H_DIMS = int(input("H_DIMS: ")) data_source = input("Source(1=solar,2=local,3=sin,4=my): ") if data_source == "1" or data_source == "": X, Y = get_solar_old(TIMESTEPS, NORMALIZE) elif data_source == "2": X, Y = get_solar(TIMESTEPS, NORMALIZE) elif data_source == "3": X, Y = get_sin(5, 5, input("Data length: ")) else: X, Y = get_my_data(H_DIMS, H_DIMS) epochs = input("Epochs: ") if epochs == "": EPOCHS = 100 else: EPOCHS = int(epochs) start_time = time.time() # input sequences x = C.sequence.input_variable(1) model_file = "{}_epochs.model".format(EPOCHS) if not os.path.exists(model_file): print("Training model {}...".format(model_file)) # create the model z = create_model(x, H_DIMS) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, var_l) # use squared error to determine error for now error = C.squared_error(z, var_l) # use adam optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, EPOCHS): for x_batch, l_batch in next_batch(X, Y, "train", BATCH_SIZE): trainer.train_minibatch({x: x_batch, var_l: l_batch}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f}".format(epoch, training_loss)) print("Training took {:.1f} sec".format(time.time() - start)) # Print the train, validation and test errors for labeltxt in ["train", "val", "test"]: print("mse for {}: {:.6f}".format( labeltxt, get_mse(trainer, x, X, Y, BATCH_SIZE, var_l, labeltxt))) z.save(model_file) else: z = C.load_model(model_file) x = cntk.logging.find_all_with_name(z, "")[-1] # Print out all layers in the model print("Loading {} and printing all nodes:".format(model_file)) node_outputs = cntk.logging.find_all_with_name(z, "") for n in node_outputs: print(" {}".format(n)) # predict # f, a = plt.subplots(2, 1, figsize=(12, 8)) for j, ds in enumerate(["val", "test"]): fig = plt.figure() a = fig.add_subplot(2, 1, 1) results = [] for x_batch, y_batch in next_batch(X, Y, ds, BATCH_SIZE_TEST): pred = z.eval({x: x_batch}) results.extend(pred[:, 0]) # because we normalized the input data we need to multiply the prediction # with SCALER to get the real values. a.plot((Y[ds] * NORMALIZE).flatten(), label=ds + " raw") a.plot(np.array(results) * NORMALIZE, label=ds + " pred") a.legend() fig.savefig("{}_chart_{}_epochs.jpg".format(ds, EPOCHS)) print("Delta: ", time.time() - start_time)
((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0), (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)), lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params)