def test_epochsize_wrn_for_momentum_time_constant(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") C.momentum_as_time_constant_schedule(1100, epoch_size=1000) assert len(w) == 1 assert issubclass(w[-1].category, RuntimeWarning) assert "epoch_size" in str(w[-1].message)
def test_momentum_schedule(): m = 2500 ms = C.momentum_as_time_constant_schedule([m]) assert ms[0] == np.exp(-1.0 / np.asarray(m)) ms = C.momentum_as_time_constant_schedule(m) assert ms[0] == np.exp(-1.0 / np.asarray(m)) mlist = [980, 520] msl = C.momentum_as_time_constant_schedule(mlist) expected = np.exp(-1.0 / np.asarray(mlist)) assert all(mi == ei for mi,ei in zip(msl,expected))
def create_trainer(self): try: p = self.output.parameters # Three of four parameters are learned by block_momentum_distributed_learner. bmd_learner = cntk.block_momentum_distributed_learner( cntk.momentum_sgd( [p[0], p[1], p[2]], cntk.learning_parameter_schedule(0.0001), cntk.momentum_as_time_constant_schedule(1000)), block_size=1000, block_learning_rate=0.01, block_momentum_as_time_constant=1000) # New API to mark which learner is to use for metric aggregaion. bmd_learner.set_as_metric_aggregator() # The last parameter is learned by the data_parallel_distributed_learner. momentum_schedule = cntk.momentum_schedule_per_sample( 0.9990913221888589) lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007) dpd_learner = cntk.data_parallel_distributed_learner( cntk.momentum_sgd([p[3]], lr_per_sample, momentum_schedule, True)) comm_rank = cntk.distributed.Communicator.rank() self.trainer = cntk.Trainer( self.output, (self.ce, self.err), [bmd_learner, dpd_learner], [ cntk.logging.ProgressPrinter( freq=progress_freq, tag="Training", rank=comm_rank) ]) except RuntimeError: self.trainer = None return
def train(streamf): global net minibatch_size = 1024 max_epochs = 2000 epoch_size = 50000 net = nn(input_var) loss = cntk.losses.binary_cross_entropy(net, label_var) error = cntk.classification_error(net, label_var) lr_per_sample = [3e-4] * 4 + [1.5e-4] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch, cntk.UnitType.minibatch) momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700) learner = cntk.adam(net.parameters, lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progres = cntk.logging.ProgressPrinter(0) trainer = cntk.Trainer(net, (loss, error), [learner], progress_writers=progres) input_map = { input_var: streamf.streams.features, label_var: streamf.streams.labels } t = 0 for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size while t < epoch_end: dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def train(streamf): global net minibatch_size = 512 max_epochs = 2000 epoch_size = 48985 net = nn(input_s, input_h, input_l, input_v) loss = cntk.losses.cross_entropy_with_softmax(net, label_var) error = cntk.classification_error(net, label_var) lr_per_sample = [3e-4] * 4 + [1.5e-4] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch, cntk.UnitType.minibatch) momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700) learner = cntk.fsadagrad(net.parameters, lr_schedule, momentum_as_time_constant) progres = cntk.logging.ProgressPrinter(0) trainer = cntk.Trainer(net, (loss, error), [learner], progress_writers=progres) input_map = { input_s: streamf.streams.spread, input_h: streamf.streams.high, input_l: streamf.streams.low, input_v: streamf.streams.volume, label_var: streamf.streams.labels } t = 0 for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size while t < epoch_end: dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def train(streamf): input_var = cntk.input_variable(45,np.float32, name = 'features',dynamic_axes=cntk.axis.Axis.default_input_variable_dynamic_axes()) label_var=cntk.input_variable(3,np.float32, name = 'labels') net=nn(input_var) loss = cntk.squared_error(net,label_var) error=cntk.squared_error(net,label_var) learning_rate=0.02 lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch) momentum_time_constant = cntk.momentum_as_time_constant_schedule(5000 / -np.math.log(0.9)) learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True) progres=cntk.logging.ProgressPrinter(0) trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres) input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 5000 num_samples_per_sweep = 2000 for i in range(0,num_samples_per_sweep): dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) trainer.train_minibatch(dat1) training_loss = trainer.previous_minibatch_loss_average eval_error = trainer.previous_minibatch_evaluation_average if training_loss<0.002: break return trainer
def test_trainer(tmpdir, no_eval_function): in1 = C.input_variable(shape=(1,)) labels = C.input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1) trainer = C.Trainer(z, (ce, errs), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') external_state = {"additional external state":math.pi, "nested dict":{"a":"b"}, "list":[1,2,3]} trainer.save_checkpoint(p, external_state) restored_state = trainer.restore_from_checkpoint(p) assert external_state == restored_state assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], C.Learner)
def create_trainer(): loss, label_error = create_criterion_function_preferred(dec, y) schedule_step = print_freq lr_per_sample = [2e-3] * 2 * schedule_step + [1e-3] * 2 * schedule_step + [ 5e-4 ] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size) momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000) learner = C.adam(parameters=dec.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer) if restore: trainer.restore_from_checkpoint("model-5.cntk") C.logging.log_number_of_parameters(dec) return trainer
def train(streamf): global net net=nn(input_var) loss = cntk.losses.squared_error(net,label_var) error=cntk.squared_error(net,label_var) learning_rate=0.01 lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch) momentum_time_constant = cntk.momentum_as_time_constant_schedule(140 / -np.math.log(0.9)) learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True) progres=cntk.logging.ProgressPrinter(0) trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres) input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 1024 max_epochs = 500 epoch_size = 48985 t = 0 for epoch in range(max_epochs): epoch_end = (epoch+1) * epoch_size while t < epoch_end: dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def test_momentum_schedule(): m = 2500 ms = C.momentum_as_time_constant_schedule([m]) #all the timeconstant schedule is for per sample assert ms.minibatch_size == 1 assert ms[0] == np.exp(-1.0 / np.asarray(m)) ms = C.momentum_as_time_constant_schedule(m) assert ms.minibatch_size == 1 assert ms[0] == np.exp(-1.0 / np.asarray(m)) mlist = [980, 520] msl = C.momentum_as_time_constant_schedule(mlist) assert ms.minibatch_size == 1 expected = np.exp(-1.0 / np.asarray(mlist)) assert all(mi == ei for mi, ei in zip(msl, expected))
def train(reader, model_func, max_epochs=10): # Instantiate the model function; x is the input (feature) variable model = model_func(x) # Instantiate the loss and error function loss, label_error = create_criterion_function_preferred(model, y) # training config epoch_size = 18000 # 18000 samples is half the dataset size minibatch_size = 70 # LR schedule over epochs # In CNTK, an epoch is how often we get out of the minibatch loop to # do other stuff (e.g. checkpointing, adjust learning rate, etc.) # (we don't run this many epochs, but if we did, these are good values) lr_per_sample = [0.003] * 4 + [0.0015] * 24 + [0.0003] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size) # Momentum schedule momentum_as_time_constant = C.momentum_as_time_constant_schedule(700) # We use a the Adam optimizer which is known to work well on this dataset # Feel free to try other optimizers from # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner learner = C.sgd( parameters=model.parameters, lr=lr_schedule, #momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) # Uncomment below for more detailed logging #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # Instantiate the trainer trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) # process minibatches and perform model training C.logging.log_number_of_parameters(model) t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch data = reader.next_minibatch( minibatch_size, input_map={ # fetch minibatch x: reader.streams.query, y: reader.streams.slot_labels }) trainer.train_minibatch(data) # update model with it t += data[y].num_samples # samples so far trainer.summarize_training_progress()
def test_momentum_schedule(): m = 2500 ms = C.momentum_as_time_constant_schedule([m]) #all the timeconstant schedule is for per sample assert ms.minibatch_size == 1 assert ms[0] == np.exp(-1.0 / np.asarray(m)) ms = C.momentum_as_time_constant_schedule(m) assert ms.minibatch_size == 1 assert ms[0] == np.exp(-1.0 / np.asarray(m)) mlist = [980, 520] msl = C.momentum_as_time_constant_schedule(mlist) assert ms.minibatch_size == 1 expected = np.exp(-1.0 / np.asarray(mlist)) assert all(mi == ei for mi,ei in zip(msl,expected))
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def fineTuneModel(folder_with_data,path_to_label_csv="label.csv", original_model_path="../vgg13.model",max_epochs=10): trainingValues = getData(folder_with_data,path_to_label_csv) input_var =ct.input((1,height,width),np.float32) label_var = ct.input((num_classes), np.float32) print("cloning old model") z = clone_model(original_model_path,input_var) loss = ct.cross_entropy_with_softmax(z, label_var) metric = ct.classification_error(z, label_var) minibatch_size = 32 epoch_size = trainingValues.getLengthOfData() lr_per_minibatch = [learning_rate]*10+[learning_rate/2.0] mm_time_constant = -minibatch_size/np.log(0.9) lr_schedule = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant) learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = ct.Trainer(z, (loss, metric), learner) print("created trainer and learner") print("training started") while epoch < max_epochs : trainingValues.reset() # Training start_time = time.time() training_loss = 0 training_accuracy = 0 #mini-batch learning while trainingValues.hasMoreMinibatches(): #while there is data for a mini batch: x,y,currBatchSize = trainingValues.getNextMinibatch(minibatch_size) # x - images y - labels/emotions trainer.train_minibatch({ input_var : x, label_var: y}) #maintain stats: training_loss += trainer.previous_minibatch_loss_average * currBatchSize training_accuracy += trainer.previous_minibatch_evaluation_average * currBatchSize training_accuracy /= trainingValues.getLengthOfData() training_accuracy = 1.0 - training_accuracy print("Epoch took:", time.time() - start_time, "seconds") print("training accuracy:\t\t{:.2f}%".format(training_accuracy*100)) epoch +=1 #SAVE MODEL z.save("../vgg13.model")
def create_learner(model): '''Create the optimized method''' lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_sample) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_sample) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def create_learner(model): '''Create the optimized method''' lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_minibatch) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_minibatch) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def _create_learner(self): lr_per_sample = [3e-5] * 10 + [1.5e-5] * 20 + [1e-5] lr_per_minibatch = [lr * self.minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, self.epoch_size) momentum_as_time_constant = C.momentum_as_time_constant_schedule(20) learner = C.adam(parameters=self.model.parameters, lr=3e-4, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=0.21, gradient_clipping_with_truncation=True) return learner
def modelInit(self): #create output model folder: self.output_model_folder = os.path.join(self.base_folder, R'models') if not os.path.exists(self.output_model_folder): os.makedirs(self.output_model_folder) self.model = VGG13(self.num_classes) self.input_var = ct.input( (1, self.model.input_height, self.model.input_width), np.float32) self.label_var = ct.input((self.num_classes), np.float32) print("initialized model") self.genData() #ct.input_variables takes the no. of dimensions. and automatically creates #1-hot encoded. ct.input doesn't. #criterian of model: loss, metric: #loss = cross_entropy_with_softmax #metric = classification error self.z = self.model.model(self.input_var) loss = ct.cross_entropy_with_softmax(self.z, self.label_var) metric = ct.classification_error(self.z, self.label_var) """ pred = ct.softmax(z) loss = ct.negate(ct.reduce_sum(ct.element_times(label_var, ct.log(pred)), axis=-1)) """ minibatch_size = 32 epoch_size = self.trainingValues.getLengthOfData() #THROW MOMENTUM: lr_per_minibatch = [self.model.learning_rate ] * 20 + [self.model.learning_rate / 2.0] * 20 + [ self.model.learning_rate / 10.0 ] #use eta for 20 minibatches, then half of eta for other 20 batches then eta/10 for remaining minimaches mm_time_constant = -minibatch_size / np.log(0.9) lr_schedule = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant) # construct the trainer #learner performs model updates. can be adam() or sgd() learner = ct.momentum_sgd(self.z.parameters, lr_schedule, mm_schedule) # The Trainer optimizes the loss by SGD, and logs the metric self.trainer = ct.Trainer(self.z, (loss, metric), learner) print("created trainer and learner")
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer(StreamDefs( amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file))) ld = HTKMLFDeserializer(label_mapping_file, StreamDefs( awesome_labels = StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd,ld]) features = C.input_variable(((2*context+1)*feature_dim)) labels = C.input_variable((num_classes)) model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error (z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def create_trainer(self): learner = cntk.block_momentum_distributed_learner( cntk.momentum_sgd(self.output.parameters, cntk.learning_parameter_schedule(0.0001), cntk.momentum_as_time_constant_schedule(1000)), block_size=1000, block_learning_rate=0.01, block_momentum_as_time_constant=1000) comm_rank = cntk.distributed.Communicator.rank() self.trainer = cntk.Trainer( self.output, (self.ce, self.err), [learner], [ cntk.logging.ProgressPrinter( freq=progress_freq, tag="Training", rank=comm_rank) ])
def test_output_to_retain(): in1 = C.input_variable(shape=(1,)) labels = C.input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size =1) trainer = C.Trainer(z, (ce, errs), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def test_ext_train(tmpdir): dim = 4 p = C.parameter(shape=(dim, ), init=10) i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = MyPlus(i, C.constant(3), 'my_plus') # keeping m unwrapped since we need to access its member variables z = C.user_function(m) + p momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1) trainer = C.Trainer(z, (z + 0, z + 0), [ C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True, minibatch_size=0) ]) i = 0 while i < 100: i += 1 input_data = np.random.rand(dim) trainer.train_minibatch([input_data]) assert m.forward_calls == m.backward_calls == 100 filepath = str(tmpdir / 'test_ext_train.dat') z.save(filepath) buf = open(filepath, 'rb').read() # this is only need for Python 2.7 # (which does not distinguish between bytes and strings) if isinstance(buf, str): buf = bytearray(buf) z1 = Function.load(buf) m1 = z1.find_by_name('my_plus') # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus, # using serialize as workaround: state = m1.serialize()['state'] assert state['forward_calls'] == state['backward_calls'] == 100
def create_trainer(): masked_dec = dec * C.ops.clip(C.ops.argmax(y), 0, 1) loss, label_error = criterion(masked_dec, y) loss *= C.ops.clip(C.ops.argmax(y), 0, 1) lr_schedule = C.learning_parameter_schedule_per_sample([1e-3] * 2 + [5e-4] * 2 + [1e-4], epoch_size=int(epoch_size)) momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000) learner = C.adam(parameters=dec.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer) C.logging.log_number_of_parameters(dec) return trainer
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = C.parameter(shape=(dim,), init=1) i = C.input_variable(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = C.user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) Function.register_udf_deserialize_callback('conditional_exec_lambda', lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)) z = Function.load(filepath) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = C.parameter(shape=(dim,), init=1) i = C.input_variable(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = C.user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) Function.register_udf_deserialize_callback('conditional_exec_lambda', lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)) z = Function.load(filepath) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func( C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [dist_learner]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def create_trainer(): loss, label_error = create_criterion_function_preferred(dec, y) schedule_step = 1 * print_freq lr_per_sample = [1e-3] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size) momentum_as_time_constant = C.momentum_as_time_constant_schedule(0) learner = C.adam(parameters=dec.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(dec, (loss, label_error), learner) trainer.restore_from_checkpoint(model_file) return trainer
def test_ext_train(tmpdir): dim = 4 p = C.parameter(shape=(dim,), init=10) i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = MyPlus(i, C.constant(3), 'my_plus') # keeping m unwrapped since we need to access its member variables z = C.user_function(m) + p momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 while i < 100: i += 1 input_data = np.random.rand(dim) trainer.train_minibatch([input_data]) assert m.forward_calls == m.backward_calls == 100 filepath = str(tmpdir / 'test_ext_train.dat') z.save(filepath) buf = open(filepath, 'rb').read() # this is only need for Python 2.7 # (which does not distinguish between bytes and strings) if isinstance(buf, str): buf = bytearray(buf) z1 = Function.load(buf) m1 = z1.find_by_name('my_plus') # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus, # using serialize as workaround: state = m1.serialize()['state'] assert state['forward_calls'] == state['backward_calls'] == 100
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [ dist_learner ]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def create_trainer(network, minibatch_size, epoch_size, progress_printer): """ Create trainer """ # Set learning parameters lr_per_sample = [0.0015625] * 10 + [0.00046875] * 10 + [0.00015625] momentum_time_constant = [0] * 20 + [-minibatch_size / np.log(0.9)] l2_reg_weight = 0.002 lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def test_learner_init_legacy(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner._learning_rate_schedule.minibatch_size == 1 # the deprecated per sample schedule should not use compatible mode assert learner.learning_rate() == 0.1 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch)) assert learner.is_compatible_mode() == False assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == 0 # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past. learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 #test the override in the new version assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate(learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value # back compatible API test momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
def create_trainer(self): try: learner = cntk.block_momentum_distributed_learner(cntk.momentum_sgd(self.output.parameters, cntk.learning_parameter_schedule(0.0001), cntk.momentum_as_time_constant_schedule(1000)), block_size=1000, block_learning_rate=0.01, block_momentum_as_time_constant=1000) comm_rank = cntk.distributed.Communicator.rank() self.trainer = cntk.Trainer(self.output, (self.ce, self.err), [learner], [cntk.logging.ProgressPrinter(freq=progress_freq, tag="Training", rank=comm_rank)]) except RuntimeError: self.trainer = None return
results = re.findall("Completed successfully.", str_out) if len(results) != 2: print(str_out) assert False if __name__=='__main__': in1 = C.input_variable(shape=1) labels = C.input_variable(shape=1) p1 = parameter(shape=1) p2 = parameter(shape=1) n = plus(in1, p1, name='n') z = plus(n, p2, name='z') ce = squared_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learners = [ C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_time_constant, True)), C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_time_constant, True)) ] trainer = C.Trainer(z, ce, dist_learners) in1_value = [[1]] label_value = [[0]] arguments = {in1: in1_value, labels: label_value} z_output = z.output def check_samples(learners, expected_number_of_samples): for learner in learners: if learner.total_number_of_samples_seen != expected_number_of_samples:
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner.learning_rate() == 0.1 learner.reset_learning_rate(learning_rate_schedule([1,2,3], UnitType.minibatch)); assert learner.learning_rate() == 1.0 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.nesterov(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) lr_per_sample = learning_rate_schedule([0.1]*3 +[0.2]*2 +[0.3], UnitType.sample) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3,0.1), (2, 0.2), (1, 0.3)], UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.1]*5 lr_per_sample = learning_rate_schedule([0.1, 0.2], UnitType.sample, 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.set_default_use_mean_gradient_value(False) use_mean_gradient_value = C.default_use_mean_gradient_value() assert not use_mean_gradient_value C.adadelta(res.parameters, lr_per_sample) C.set_default_use_mean_gradient_value(True) use_mean_gradient_value = C.default_use_mean_gradient_value() assert use_mean_gradient_value C.adadelta(res.parameters, lr_per_sample)
# # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 6600 #12000 #15000 minibatch_size = 64 # Set training parameters lr_per_minibatch = learning_rate_schedule([0.01] * 10 + [0.003] * 10 + [0.001], UnitType.minibatch, epoch_size) momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size / np.log(0.9)) l2_reg_weight = 0.001 # trainer objectS progress_printer = ProgressPrinter(0) learner = momentum_sgd(z.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant, l2_regularization_weight=l2_reg_weight) # ============================================================================= # Create or RESTORE trainer # ============================================================================= trainer = Trainer(z, (ce, pe), [learner], [progress_printer]) # trainer.restore_from_checkpoint(model_temp_file)
# expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") print l # the learning rate learning_rate = 0.02 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use fsadagrad optimizer momentum_time_constant = C.momentum_as_time_constant_schedule(BATCH_SIZE / -math.log(0.9)) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_time_constant, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, EPOCHS): for x1, y1 in next_batch(X, Y, "train"): trainer.train_minibatch({x: x1, l: y1}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average
def test_learner_init_legacy(): i = C.input_variable(shape=(1, ), needs_gradient=True, name='a') w = parameter(shape=(1, )) res = i * w # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample)) assert learner._learning_rate_schedule.minibatch_size == 1 # the deprecated per sample schedule should not use compatible mode assert learner.learning_rate() == 0.1 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch)) assert learner.is_compatible_mode() == False assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == 0 # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate( learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True # for backcompatibility test # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # this will be deprecated in future version: This is logical invalid combination but it was the only way to use mean gradient and set learning rate in the past. learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.sample), use_mean_gradient=True) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 #test the override in the new version assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.minibatch_size == C.learners.IGNORE # the learner's reference minibatch size is still 0 # for backcompatibility test # this will be deprecated in future version # The UnitType will provide per minibatch instruction for the learner # this will be deprecated in future version learner = sgd(res.parameters, lr=learning_rate_schedule(0.1, UnitType.minibatch), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.learning_rate() == 0.1 assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE # for backcompatibility test, in reset learning rate, the learner won't receive the reference minibatch size from the schedule # user will need to specify the reference minibatch size explicitly # this will be deprecated in future version learner = sgd(res.parameters, lr=0.1) learner.reset_learning_rate( learning_rate_schedule([1, 2, 3], UnitType.minibatch)) assert learner.learning_rate() == 1.0 learner.minibatch_size = C.learners.IGNORE # reset to be per minibatch assert learner.minibatch_size == C.learners.IGNORE assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.is_compatible_mode() == True learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value # back compatible API test momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size=1) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum_time_constant, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_rate_schedule([(3, 0.1), (2, 0.2), (1, 0.3)], unit=UnitType.sample) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant) C.fsadagrad(res.parameters, lr_per_sample, momentum_time_constant, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_rate_schedule([0.1, 0.2], unit=UnitType.sample, epoch_size=100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample, use_mean_gradient=True)
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32) label_var = C.input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with C.default_options (activation=C.relu): z = C.layers.Sequential([ C.layers.Convolution3D((3,3,3), 64, pad=True), C.layers.MaxPooling((1,2,2), (1,2,2)), C.layers.For(range(3), lambda i: [ C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True), C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True), C.layers.MaxPooling((2,2,2), (2,2,2)) ]), C.layers.For(range(2), lambda : [ C.layers.Dense(1024), C.layers.Dropout(0.5) ]), C.layers.Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config train_epoch_size = train_reader.size() train_minibatch_size = 2 # Set learning parameters lr_per_sample = [0.01]*10+[0.001]*10+[0.0001] lr_schedule = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample) momentum_time_constant = 4096 mm_schedule = C.momentum_as_time_constant_schedule([momentum_time_constant]) # Instantiate the trainer object to drive the model training learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) ; print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size) trainer.train_minibatch({input_var : videos, label_var : labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = test_reader.size() test_minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func): # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = element_times(feature_scale, input_var) # apply model to input z = model_func(input_var_norm, out_dims=num_classes) # # Training action # # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 20000 minibatch_size = 64 # Set training parameters lr_per_minibatch = learning_rate_schedule([0.01]*10 + [0.003]*10 + [0.001], UnitType.minibatch, epoch_size) momentum_time_constant = momentum_as_time_constant_schedule(-minibatch_size/np.log(0.9)) l2_reg_weight = 0.001 # trainer object progress_printer = ProgressPrinter(0) learner = momentum_sgd(z.parameters, lr = lr_per_minibatch, momentum = momentum_time_constant, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), [learner], [progress_printer]) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() #progress_printer = ProgressPrinter(tag='Training') # perform model training stop_run=False batch_index = 0 plot_data = {'batchindex':[], 'loss':[], 'error':[]} for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far # For visualization... plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append(trainer.previous_minibatch_evaluation_average) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress batch_index += 1 if trainer.previous_minibatch_evaluation_average < 0.025: stop_run=True break if stop_run: break progress_printer.epoch_summary(with_metric=True) #trainer.save_checkpoint(model_temp_file) # # Evaluation action # epoch_size = 6600 minibatch_size = 32 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') plt.show() plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') plt.show() return softmax(z)
def main(base_folder, training_mode='majority', model_name='VGG13', max_epochs = 100): # create needed folders. output_model_path = os.path.join(base_folder, R'models') output_model_folder = os.path.join(output_model_path, model_name + '_' + training_mode) if not os.path.exists(output_model_folder): os.makedirs(output_model_folder) # creating logging file logging.basicConfig(filename = os.path.join(output_model_folder, "train.log"), filemode = 'w', level = logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) logging.info("Starting with training mode {} using {} model and max epochs {}.".format(training_mode, model_name, max_epochs)) # create the model num_classes = len(emotion_table) model = build_model(num_classes, model_name) # set the input variables. input_var = ct.input((1, model.input_height, model.input_width), np.float32) label_var = ct.input((num_classes), np.float32) # read FER+ dataset. logging.info("Loading data...") train_params = FERPlusParameters(num_classes, model.input_height, model.input_width, training_mode, False) test_and_val_params = FERPlusParameters(num_classes, model.input_height, model.input_width, "majority", True) train_data_reader = FERPlusReader.create(base_folder, train_folders, "label.csv", train_params) val_data_reader = FERPlusReader.create(base_folder, valid_folders, "label.csv", test_and_val_params) test_data_reader = FERPlusReader.create(base_folder, test_folders, "label.csv", test_and_val_params) # print summary of the data. display_summary(train_data_reader, val_data_reader, test_data_reader) # get the probalistic output of the model. z = model.model((input_var - 127.5)/127.5) pred = ct.softmax(z) epoch_size = train_data_reader.size() minibatch_size = 32 # Training config lr_per_minibatch = [model.learning_rate]*20 + [model.learning_rate / 2.0]*20 + [model.learning_rate / 10.0] mm_time_constant = -minibatch_size/np.log(0.9) lr_schedule = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant) # loss and error cost train_loss = cost_func(training_mode, pred, label_var) pe = ct.classification_error(z, label_var) # construct the trainer learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = ct.Trainer(z, (train_loss, pe), learner) # Get minibatches of images to train with and perform model training max_val_accuracy = 0.0 final_test_accuracy = 0.0 best_test_accuracy = 0.0 logging.info("Start training...") epoch = 0 best_epoch = 0 while epoch < max_epochs: train_data_reader.reset() val_data_reader.reset() test_data_reader.reset() # Training start_time = time.time() training_loss = 0 training_accuracy = 0 while train_data_reader.has_more(): images, labels, current_batch_size = train_data_reader.next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({input_var : images, label_var : labels}) # keep track of statistics. training_loss += trainer.previous_minibatch_loss_average * current_batch_size training_accuracy += trainer.previous_minibatch_evaluation_average * current_batch_size training_accuracy /= train_data_reader.size() training_accuracy = 1.0 - training_accuracy # Validation val_accuracy = 0 while val_data_reader.has_more(): images, labels, current_batch_size = val_data_reader.next_minibatch(minibatch_size) val_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size val_accuracy /= val_data_reader.size() val_accuracy = 1.0 - val_accuracy # if validation accuracy goes higher, we compute test accuracy test_run = False if val_accuracy > max_val_accuracy: best_epoch = epoch max_val_accuracy = val_accuracy trainer.save_checkpoint(os.path.join(output_model_folder, "model_{}".format(best_epoch))) test_run = True test_accuracy = 0 while test_data_reader.has_more(): images, labels, current_batch_size = test_data_reader.next_minibatch(minibatch_size) test_accuracy += trainer.test_minibatch({input_var : images, label_var : labels}) * current_batch_size test_accuracy /= test_data_reader.size() test_accuracy = 1.0 - test_accuracy final_test_accuracy = test_accuracy if final_test_accuracy > best_test_accuracy: best_test_accuracy = final_test_accuracy logging.info("Epoch {}: took {:.3f}s".format(epoch, time.time() - start_time)) logging.info(" training loss:\t{:e}".format(training_loss)) logging.info(" training accuracy:\t\t{:.2f} %".format(training_accuracy * 100)) logging.info(" validation accuracy:\t\t{:.2f} %".format(val_accuracy * 100)) if test_run: logging.info(" test accuracy:\t\t{:.2f} %".format(test_accuracy * 100)) epoch += 1 logging.info("") logging.info("Best validation accuracy:\t\t{:.2f} %, epoch {}".format(max_val_accuracy * 100, best_epoch)) logging.info("Test accuracy corresponding to best validation:\t\t{:.2f} %".format(final_test_accuracy * 100)) logging.info("Best test accuracy:\t\t{:.2f} %".format(best_test_accuracy * 100)) pred.save('ferplus.onnx', ct.ModelFormat.ONNX)