def _sanitize_value(shape, value, dtype, device, is_param=False): np_dtype = utils.sanitize_dtype_numpy(dtype) cntk_dtype = utils.sanitize_dtype_cntk(dtype) if value is None: if shape is None: raise ValueError('you need to specify at least shape or value') shape = utils.sanitize_shape(shape) if is_param: # TODO: expose the initialization params ndav = NDArrayView.random_uniform_float(shape, -0.05, 0.05, 1, device) else: ndav = utils.create_NDArrayView(shape, cntk_dtype, device) else: if not isinstance(value, np.ndarray) or value.dtype!=np_dtype: value = np.asarray(value, dtype=np_dtype) #TODO: check whether this copy operation from cpu to gpu is not needed if device.type() != 0: ndav_cpu = utils.create_NDArrayView_from_NumPy(value, dev=DeviceDescriptor.cpu_device()) ndav = utils.create_NDArrayView(value.shape, data_type=cntk_dtype, dev=device) ndav.copy_from(ndav_cpu) else: ndav = utils.create_NDArrayView_from_NumPy(value, device) return ndav
trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { features: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 if __name__ == '__main__': # Specify the target device to be used for computing target_device = DeviceDescriptor.cpu_device() DeviceDescriptor.set_default_device(target_device) train_sequence_classifier()
lr = learning_rates_per_sample(0.007) momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 10 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 if __name__=='__main__': # Specify the target device to be used for computing target_device = DeviceDescriptor.cpu_device() DeviceDescriptor.set_default_device(target_device) train_sequence_to_sequence_translator()