def __init__(self, state_shape, action_count, model_func, vmin, vmax, n, gamma=0.99, lr=0.00025, mm=0.95, use_tensorboard=False): """ Creates a new agent that learns using Categorical DQN as described in "A Distributional Perspective on Reinforcement Learning" :param state_shape: The shape of each input shape e.g. (4 x 84 x 84) for Atari :param action_count: The number of actions e.g. 14 :param model_func: The model to train :param vmin: Minimum value of return distribution :param vmax: Maximum value of return distribution :param n: Number of support atoms :param gamma: Discount factor for Bellman update :param lr: The learning rate for Adam SGD :param mm: The momentum for Adam SGD """ self.state_shape = state_shape self.action_count = action_count self.gamma = gamma self.learning_rate = lr self.momentum = mm # Distribution parameters self.vmin = vmin self.vmax = vmax self.n = n self.dz = (vmax - vmin) / (n - 1) # Support atoms self.z = np.linspace(vmin, vmax, n, dtype=np.float32) # Model input and output self.state_var = C.input_variable(self.state_shape, name='state') self.action_return_dist = C.input_variable((self.action_count, n), name='ar_dist') # Model output assigns a probability to each support atom for each action self.raw = model_func(self.state_var) self.model = C.softmax(self.raw, axis=1) # Adam-based SGD with cross-entropy loss loss = C.cross_entropy_with_softmax(self.raw, self.action_return_dist, axis=1, name='loss') lr_schedule = C.learning_rate_schedule(self.learning_rate, C.UnitType.sample) mom_schedule = C.momentum_schedule(self.momentum) vm_schedule = C.momentum_schedule(0.999) learner = C.adam(self.raw.parameters, lr_schedule, mom_schedule, variance_momentum=vm_schedule) if use_tensorboard: self.writer = TensorBoardProgressWriter(log_dir='metrics', model=self.model) else: self.writer = None self.trainer = C.Trainer(self.raw, (loss, None), [learner], self.writer) # Create target network as copy of online network self.target_model = None self.update_target()
def train_model(model_details, num_classes, train_map_file, learning_params, max_images=-1): num_epochs = learning_params['max_epochs'] epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) minibatch_size = learning_params['mb_size'] # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, model_details['image_dims'], num_classes) image_input = C.input_variable(model_details['image_dims']) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source['features'], label_input: minibatch_source['labels'] } # Instantiate the transfer learning model and loss function tl_model = create_model(model_details, num_classes, image_input, freeze=learning_params['freeze_weights']) ce = C.cross_entropy_with_softmax(tl_model, label_input) pe = C.classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = C.learning_parameter_schedule(learning_params['lr_per_mb']) mm_schedule = C.momentum_schedule(learning_params['momentum_per_mb']) learner = C.momentum_sgd( tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=learning_params['l2_reg_weight']) trainer = C.Trainer(tl_model, (ce, pe), learner) # Get minibatches of images and perform model training print( "Training transfer learning model for {0} epochs (epoch_size = {1}).". format(num_epochs, epoch_size)) C.logging.log_number_of_parameters(tl_model) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( minibatch_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % (100 * minibatch_size) == 0: print("Processed {0} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) return tl_model
def train(data_path, model_path, log_file, config_file, restore=False, profiling=False, gen_heartbeat=False): training_config = importlib.import_module(config_file).training_config # config for using multi GPUs if training_config['multi_gpu']: gpu_pad = training_config['gpu_pad'] gpu_cnt = training_config['gpu_cnt'] my_rank = C.Communicator.rank() my_gpu_id = (my_rank + gpu_pad) % gpu_cnt print("rank = " + str(my_rank) + ", using gpu " + str(my_gpu_id) + " of " + str(gpu_cnt)) C.try_set_default_device(C.gpu(my_gpu_id)) else: C.try_set_default_device(C.gpu(0)) # outputs while training normal_log = os.path.join(data_path, training_config['logdir'], log_file) # tensorboard files' dir tensorboard_logdir = os.path.join(data_path, training_config['logdir'], log_file) polymath = PolyMath(config_file) z, loss = polymath.model() max_epochs = training_config['max_epochs'] log_freq = training_config['log_freq'] progress_writers = [ C.logging.ProgressPrinter(num_epochs=max_epochs, freq=log_freq, tag='Training', log_to_file=normal_log, rank=C.Communicator.rank(), gen_heartbeat=gen_heartbeat) ] # add tensorboard writer for visualize tensorboard_writer = C.logging.TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, rank=C.Communicator.rank(), model=z) progress_writers.append(tensorboard_writer) lr = C.learning_parameter_schedule(training_config['lr'], minibatch_size=None, epoch_size=None) ema = {} dummies_info = {} dummies = [] for p in z.parameters: ema_p = C.constant(0, shape=p.shape, dtype=p.dtype, name='ema_%s' % p.uid) ema[p.uid] = ema_p dummies.append(C.reduce_sum(C.assign(ema_p, p))) dummies_info[dummies[-1].output] = (p.name, p.shape) dummy = C.combine(dummies) learner = C.adadelta(z.parameters, lr) if C.Communicator.num_workers() > 1: learner = C.data_parallel_distributed_learner(learner) trainer = C.Trainer(z, (loss, None), learner, progress_writers) if profiling: C.debugging.start_profiler(sync_gpu=True) train_data_file = os.path.join(data_path, training_config['train_data']) train_data_ext = os.path.splitext(train_data_file)[-1].lower() model_file = os.path.join(model_path, model_name) model = C.combine(list(z.outputs) + [loss.output]) label_ab = argument_by_name(loss, 'ab') epoch_stat = { 'best_val_err': 100, 'best_since': 0, 'val_since': 0, 'record_num': 0 } if restore and os.path.isfile(model_file): trainer.restore_from_checkpoint(model_file) #after restore always re-evaluate epoch_stat['best_val_err'] = validate_model( os.path.join(data_path, training_config['val_data']), model, polymath, config_file) def post_epoch_work(epoch_stat): trainer.summarize_training_progress() epoch_stat['val_since'] += 1 if epoch_stat['val_since'] == training_config['val_interval']: epoch_stat['val_since'] = 0 temp = dict((p.uid, p.value) for p in z.parameters) for p in trainer.model.parameters: p.value = ema[p.uid].value val_err = validate_model( os.path.join(data_path, training_config['val_data']), model, polymath, config_file) if epoch_stat['best_val_err'] > val_err: epoch_stat['best_val_err'] = val_err epoch_stat['best_since'] = 0 os.system("ls -la >> log.log") os.system("ls -la ./Models >> log.log") save_flag = True fail_cnt = 0 while save_flag: if fail_cnt > 100: print("ERROR: failed to save models") break try: trainer.save_checkpoint(model_file) epoch_stat['record_num'] += 1 record_file = os.path.join( model_path, str(epoch_stat['record_num']) + '-' + model_name) trainer.save_checkpoint(record_file) save_flag = False except: fail_cnt = fail_cnt + 1 for p in trainer.model.parameters: p.value = temp[p.uid] else: epoch_stat['best_since'] += 1 if epoch_stat['best_since'] > training_config['stop_after']: return False if profiling: C.debugging.enable_profiler() return True if train_data_ext == '.ctf': mb_source, input_map = create_mb_and_map(loss, train_data_file, polymath) minibatch_size = training_config['minibatch_size'] # number of samples epoch_size = training_config['epoch_size'] for epoch in range(max_epochs): num_seq = 0 while True: if trainer.total_number_of_samples_seen >= training_config[ 'distributed_after']: data = mb_source.next_minibatch( minibatch_size * C.Communicator.num_workers(), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) else: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) num_seq += trainer.previous_minibatch_sample_count # print_para_info(dummy, dummies_info) if num_seq >= epoch_size: break if not post_epoch_work(epoch_stat): break else: if train_data_ext != '.tsv': raise Exception("Unsupported format") minibatch_seqs = training_config[ 'minibatch_seqs'] # number of sequences for epoch in range(max_epochs): # loop over epochs tsv_reader = create_tsv_reader(loss, train_data_file, polymath, minibatch_seqs, C.Communicator.num_workers()) minibatch_count = 0 for data in tsv_reader: if (minibatch_count % C.Communicator.num_workers()) == C.Communicator.rank(): trainer.train_minibatch(data) # update model with it dummy.eval() minibatch_count += 1 if not post_epoch_work(epoch_stat): break if profiling: C.debugging.stop_profiler()
input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) z = create_model(input) # Scale the input to 0-1 range by dividing each pixel by 255. z = create_model(input / 255.0) loss = C.cross_entropy_with_softmax(z, label) label_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training learning_rate = 0.2 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, label_error), [learner]) # Initialize the parameters for the trainer minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size # Create the reader to training data set reader_train = create_reader(train_file, True, input_dim, num_output_classes) # Map the data streams to the input and labels. input_map = { label: reader_train.streams.labels,
def deconv_mnist(max_epochs=3): image_height = 28 image_width = 28 num_channels = 1 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variable and normalization input_var = cntk.ops.input((num_channels, image_height, image_width), np.float32) scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var) # Define the auto encoder model cMap = 1 conv1 = cntk.layers.Convolution2D ((5,5), cMap, pad=True, activation=cntk.ops.relu)(scaled_input) pool1 = cntk.layers.MaxPooling ((4,4), (4,4))(conv1) unpool1 = cntk.layers.MaxUnpooling ((4,4), (4,4))(pool1, conv1) z = cntk.layers.ConvolutionTranspose2D((5,5), num_channels, pad=True, bias=False, init=cntk.glorot_uniform(0.001))(unpool1) # define rmse loss function (should be 'err = cntk.ops.minus(deconv1, scaled_input)') f2 = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var) err = cntk.ops.reshape(cntk.ops.minus(z, f2), (784)) sq_err = cntk.ops.element_times(err, err) mse = cntk.ops.reduce_mean(sq_err) rmse_loss = cntk.ops.sqrt(mse) rmse_eval = cntk.ops.sqrt(mse) reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 60000 minibatch_size = 64 # Set learning parameters lr_schedule = cntk.learning_rate_schedule([0.00015], cntk.learners.UnitType.sample, epoch_size) mm_schedule = cntk.learners.momentum_as_time_constant_schedule([600], epoch_size) # Instantiate the trainer object to drive the model training learner = cntk.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True) progress_printer = cntk.logging.ProgressPrinter(tag='Training') trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner, progress_printer) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features } cntk.logging.log_number_of_parameters(z) ; print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[input_var].num_samples # count samples processed so far trainer.summarize_training_progress() z.save(os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(epoch))) # rename final model last_model_name = os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(max_epochs - 1)) final_model_name = os.path.join(model_path, "07_Deconvolution_PY.model") try: os.remove(final_model_name) except OSError: pass os.rename(last_model_name, final_model_name) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features } # Test data for trained model epoch_size = 10000 minibatch_size = 1024 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[input_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def benchmark(self): # Common suffix suffix = "cntk_{}_{}by{}_{}".format(self.dataset, self.resize_size[0], self.resize_size[1], self.preprocessing) # Construct model, io and metrics cntk_input = C.input_variable((3, self.resize_size[0], self.resize_size[1]), np.float32) cntk_output = C.input_variable((self.class_num), np.float32) self.constructCNN(cntk_input) cntk_cost = Softmax(self.cntk_model, cntk_output) cntk_error = ClassificationError(self.cntk_model, cntk_output) # Prepare training/validation/testing sets cntk_train_x = np.ascontiguousarray(np.vstack([np.expand_dims(x, axis=0).transpose([0,3,1,2]).astype('float32')/255 for x in self.x_train]), dtype=np.float32) cntk_valid_x = np.ascontiguousarray(np.vstack([np.expand_dims(x, axis=0).transpose([0,3,1,2]).astype('float32')/255 for x in self.x_valid]), dtype=np.float32) cntk_test_x = np.ascontiguousarray(np.vstack([np.expand_dims(x, axis=0).transpose([0,3,1,2]).astype('float32')/255 for x in self.testImages]), dtype=np.float32) cntk_train_y = C.one_hot(C.input_variable(1), self.class_num, sparse_output=False)(np.expand_dims(np.array(self.y_train, dtype='f'), axis=1)) cntk_valid_y = C.one_hot(C.input_variable(1), self.class_num, sparse_output=False)(np.expand_dims(np.array(self.y_valid, dtype='f'), axis=1)) cntk_test_y = C.one_hot(C.input_variable(1), self.class_num, sparse_output=False)(np.expand_dims(np.array(self.testLabels, dtype='f'), axis=1)) # Trainer and mb source cntk_learner = SGD(self.cntk_model.parameters, lr=0.01, momentum=0.9, unit_gain=False, use_mean_gradient=True) # To compare performance with other frameworks cntk_trainer = C.Trainer(self.cntk_model, (cntk_cost, cntk_error), cntk_learner) cntk_train_src = C.io.MinibatchSourceFromData(dict(x=C.Value(cntk_train_x), y=C.Value(cntk_train_y)), max_samples=len(cntk_train_x)) cntk_valid_src = C.io.MinibatchSourceFromData(dict(x=C.Value(cntk_valid_x), y=C.Value(cntk_valid_y)), max_samples=len(cntk_valid_x)) cntk_test_src = C.io.MinibatchSourceFromData(dict(x=C.Value(cntk_test_x), y=C.Value(cntk_test_y)), max_samples=len(cntk_test_x)) # Mapping for training, validation and testing def getMap(src, bs): batch = src.next_minibatch(bs) return { cntk_input: batch[src.streams['x']], cntk_output: batch[src.streams['y']] } # Create log file train_batch_count = len(self.x_train) // self.batch_size + 1 valid_batch_count = len(self.x_valid) // self.batch_size + 1 test_batch_count = len(self.testImages) // self.batch_size + 1 filename = "./saved_data/{}/{}/callback_data_{}.h5".format(self.network_type, self.devices[0], suffix) f = DLHelper.init_h5py(filename, self.epoch_num, train_batch_count * self.epoch_num) # Start training try: batch_count = 0 f['.']['time']['train']['start_time'][0] = time.time() # Each epoch for epoch in range(0, self.epoch_num): cntk_train_src.restore_from_checkpoint({'cursor': 0, 'total_num_samples': 0}) cntk_valid_src.restore_from_checkpoint({'cursor': 0, 'total_num_samples': 0}) cntk_test_src.restore_from_checkpoint({'cursor': 0, 'total_num_samples': 0}) # Each batch for i in range(train_batch_count): batch_count += 1 # Read a mini batch from the training data file data = getMap(cntk_train_src, self.batch_size) # Train a batch start = default_timer() cntk_trainer.train_minibatch(data) # Save training loss training_loss = cntk_trainer.previous_minibatch_loss_average # Save batch time. Prevent asynchronous train_batch_time = default_timer() - start f['.']['time']['train_batch'][batch_count-1] = train_batch_time # Continue saving training loss eval_error = cntk_trainer.previous_minibatch_evaluation_average f['.']['cost']['train'][batch_count-1] = np.float32(training_loss) f['.']['accuracy']['train'][batch_count-1] = np.float32((1.0 - eval_error) * 100.0) if i % 30 == 0: # Print per 30 batches print("Epoch: {0}, Minibatch: {1}, Loss: {2:.4f}, Error: {3:.2f}%".format(epoch, i, training_loss, eval_error * 100.0)) # Save batch marker f['.']['time_markers']['minibatch'][epoch] = np.float32(batch_count) # Validation validation_loss = 0 validation_error = 0 for j in range(valid_batch_count): # Read a mini batch from the validation data file data = getMap(cntk_valid_src, self.batch_size) # Valid a batch batch_x, batch_y = data[cntk_input].asarray(), data[cntk_output].asarray() validation_loss += cntk_cost(batch_x, batch_y).sum() validation_error += cntk_trainer.test_minibatch(data) * len(batch_x) validation_loss /= len(self.x_valid) validation_error /= len(self.x_valid) # Save validation loss for the whole epoch f['.']['cost']['loss'][epoch] = np.float32(validation_loss) f['.']['accuracy']['valid'][epoch] = np.float32((1.0 - validation_error) * 100.0) print("[Validation]") print("Epoch: {0}, Loss: {1:.4f}, Error: {2:.2f}%\n".format(epoch, validation_loss, validation_error * 100.0)) # Save related params f['.']['time']['train']['end_time'][0] = time.time() # Save training time f['.']['config'].attrs["total_minibatches"] = batch_count f['.']['time_markers'].attrs['minibatches_complete'] = batch_count # Testing test_error = 0 for j in range(test_batch_count): # Read a mini batch from the validation data file data = getMap(cntk_test_src, self.batch_size) # Valid a batch test_error += cntk_trainer.test_minibatch(data) * data[cntk_input].num_samples test_error /= len(self.testImages) f['.']['infer_acc']['accuracy'][0] = np.float32((1.0 - test_error) * 100.0) print("Accuracy score is %f" % (1.0 - test_error)) self.cntk_model.save("./saved_models/{}/{}/{}.pth".format(self.network_type, self.devices[0], suffix)) except KeyboardInterrupt: pass except Exception as e: raise e finally: print("Close file descriptor") f.close()
def train_sequence_classifier(): hidden_dim = 300 embedding_dim = 300 #設定ファイルの取り込み with open("./data/export_info.json", "r") as json_file: json_data = json.load(json_file) input_dim = int(json_data["wordDimension"]) #onehotのindex数 num_output_classes = int(json_data["labelDimension"]) #topic数 print("input_dim:", input_dim) rel_path = r"data\cntk_train_data.tsv" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) print("filepath:", path) #トレーニングデータのフォーマット、ラベル設定。 features = C.sequence.input_variable( input_dim, is_sparse=True, name="features") # word sequence (one-hot vectors) label = C.input_variable(num_output_classes, is_sparse=True, name="label", dynamic_axes=C.Axis.default_batch_axis()) reader = create_reader(path, True, input_dim, num_output_classes) #訓練関数(Trainer)に渡すパラメータの作成。 #分類器の作成(LSTMのLayer構造) classifier_output = lstm_sequence_classifier(features, num_output_classes, embedding_dim, hidden_dim) #損失関数の設定 ce = C.cross_entropy_with_softmax(classifier_output, label) #エラー率の設定 pe = C.classification_error(classifier_output, label) #学習率の設定 lr_per_sample = C.learning_rate_schedule(0.05, C.UnitType.sample) # トレーナーの構築 SGD(勾配法の一つ)。 trainer = C.Trainer(classifier_output, (ce, pe), C.sgd(classifier_output.parameters, lr=lr_per_sample)) #設定 minibatch_size = 512 #一回トレーニング導入するデータ量 (センテンスの数ではない) training_progress_output_freq = 20 #何回loopしたら進捗表示する。 loop_count = 0 #ループの数 epoch = 1 #epochの初期値 epoch_max = 10 #Maxのepoch数、Maxになるとトレーニング終了 epoch_size = 5000 #epochのサイズ(ここでは5000センテンス1 epoch) samples = 0 #トレーニングしたセンテンス合計 #トレーニングのループ while True: mb = reader.next_minibatch(minibatch_size, { features: reader.streams.features, label: reader.streams.labels }) samples += mb[label].num_samples #今までトレーニングしたセンテンス数 trainer.train_minibatch(mb) #mbのデータをトレーニング training_loss, eval_crit = print_training_progress( trainer, loop_count, training_progress_output_freq, samples, epoch) #トレーニング進捗の表示 if samples >= epoch_size * epoch: #毎epochトレーニング完了後モデルを保存 classifier_output.save( os.path.join(abs_path, ".", "Models", "lstm_model_epoch{}.dnn".format(epoch))) epoch += 1 if epoch > epoch_max: break loop_count += 1 import copy #前minibatchの精度と損失関数の計算 evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def train(data_path, model_path, log_file, config_file, restore=False, profiling=False, gen_heartbeat=False): polymath = PolyMath(config_file) z, loss = polymath.model() training_config = importlib.import_module(config_file).training_config max_epochs = training_config['max_epochs'] log_freq = training_config['log_freq'] progress_writers = [C.logging.ProgressPrinter( num_epochs = max_epochs, freq = log_freq, tag = 'Training', log_to_file = log_file, rank = C.Communicator.rank(), gen_heartbeat = gen_heartbeat)] lr = C.learning_parameter_schedule(training_config['lr'], minibatch_size=None, epoch_size=None) ema = {} dummies = [] for p in z.parameters: ema_p = C.constant(0, shape=p.shape, dtype=p.dtype, name='ema_%s' % p.uid) ema[p.uid] = ema_p dummies.append(C.reduce_sum(C.assign(ema_p, 0.999 * ema_p + 0.001 * p))) dummy = C.combine(dummies) learner = C.adadelta(z.parameters, lr) if C.Communicator.num_workers() > 1: learner = C.data_parallel_distributed_learner(learner) trainer = C.Trainer(z, (loss, None), learner, progress_writers) if profiling: C.debugging.start_profiler(sync_gpu=True) train_data_file = os.path.join(data_path, training_config['train_data']) train_data_ext = os.path.splitext(train_data_file)[-1].lower() model_file = os.path.join(model_path, model_name) model = C.combine(list(z.outputs) + [loss.output]) label_ab = argument_by_name(loss, 'ab') epoch_stat = { 'best_val_err' : 100, 'best_since' : 0, 'val_since' : 0} if restore and os.path.isfile(model_file): trainer.restore_from_checkpoint(model_file) #after restore always re-evaluate epoch_stat['best_val_err'] = validate_model(os.path.join(data_path, training_config['val_data']), model, polymath) def post_epoch_work(epoch_stat): trainer.summarize_training_progress() epoch_stat['val_since'] += 1 if epoch_stat['val_since'] == training_config['val_interval']: epoch_stat['val_since'] = 0 temp = dict((p.uid, p.value) for p in z.parameters) for p in trainer.model.parameters: p.value = ema[p.uid].value val_err = validate_model(os.path.join(data_path, training_config['val_data']), model, polymath) if epoch_stat['best_val_err'] > val_err: epoch_stat['best_val_err'] = val_err epoch_stat['best_since'] = 0 trainer.save_checkpoint(model_file) for p in trainer.model.parameters: p.value = temp[p.uid] else: epoch_stat['best_since'] += 1 if epoch_stat['best_since'] > training_config['stop_after']: return False if profiling: C.debugging.enable_profiler() return True if train_data_ext == '.ctf': mb_source, input_map = create_mb_and_map(loss, train_data_file, polymath) minibatch_size = training_config['minibatch_size'] # number of samples epoch_size = training_config['epoch_size'] for epoch in range(max_epochs): num_seq = 0 while True: if trainer.total_number_of_samples_seen >= training_config['distributed_after']: data = mb_source.next_minibatch(minibatch_size*C.Communicator.num_workers(), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) else: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) num_seq += trainer.previous_minibatch_sample_count dummy.eval() if num_seq >= epoch_size: break if not post_epoch_work(epoch_stat): break else: if train_data_ext != '.tsv': raise Exception("Unsupported format") minibatch_seqs = training_config['minibatch_seqs'] # number of sequences for epoch in range(max_epochs): # loop over epochs tsv_reader = create_tsv_reader(loss, train_data_file, polymath, minibatch_seqs, C.Communicator.num_workers()) minibatch_count = 0 for data in tsv_reader: if (minibatch_count % C.Communicator.num_workers()) == C.Communicator.rank(): trainer.train_minibatch(data) # update model with it dummy.eval() minibatch_count += 1 if not post_epoch_work(epoch_stat): break if profiling: C.debugging.stop_profiler()
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10 * 1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase / num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff / 1024), int(mem_inc_fraction * 100), nonlinearity, mem_changes))
def retrain_model(map_filename, output_dir, num_classes, epoch_size, model_filename, num_epochs, model_type, retraining_type): ''' Coordinates retraining after MAP file creation ''' # load minibatch and model minibatch_source = create_minibatch_source(map_filename, num_classes) image_input = cntk.ops.input_variable((3, 224, 224)) label_input = cntk.ops.input_variable((num_classes)) input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } if model_type == 'alexnet': model = load_alexnet_model(image_input, num_classes, model_filename, retraining_type) elif model_type == 'resnet18': model = load_resnet18_model(image_input, num_classes, model_filename, retraining_type) # Set learning parameters ce = cntk.losses.cross_entropy_with_softmax(model, label_input) pe = cntk.metrics.classification_error(model, label_input) l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 33 + [0.000001] * 33 + [0.0000001] momentum_time_constant = 10 mb_size = 16 lr_schedule = cntk.learners.learning_rate_schedule( lr_per_sample, unit=cntk.UnitType.sample) mm_schedule = cntk.learners.momentum_as_time_constant_schedule( momentum_time_constant) # Instantiate the appropriate trainer object my_rank = distributed.Communicator.rank() num_workers = distributed.Communicator.num_workers() num_minibatches = int(np.ceil(epoch_size / mb_size)) progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=num_minibatches, rank=my_rank) ] learner = cntk.learners.fsadagrad(parameters=model.parameters, lr=lr_schedule, momentum=mm_schedule, l2_regularization_weight=l2_reg_weight) if num_workers > 1: parameter_learner = distributed.data_parallel_distributed_learner( learner, num_quantization_bits=32) trainer = cntk.Trainer(model, (ce, pe), parameter_learner, progress_writers) else: trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) # Print summary lines to stdout and perform training if my_rank == 0: print('Retraining model for {} epochs.'.format(num_epochs)) print('Found {} workers'.format(num_workers)) print('Printing progress every {} minibatches'.format(num_minibatches)) cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=mb_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'retrained_checkpoint.model')), progress_frequency=epoch_size).train() distributed.Communicator.finalize() if my_rank == 0: trainer.model.save(os.path.join(output_dir, 'retrained.model')) return (my_rank)
def train(input_dir, output_dir, num_epochs): ''' Coordinates model creation and training; minibatch creation ''' num_landcover_classes = 5 num_color_channels = 4 block_size = 256 padding = int(block_size / 4) my_rank = distributed.Communicator.rank() number_of_workers = distributed.Communicator.num_workers() os.makedirs(output_dir, exist_ok=True) # We extract 160 sample regions from an input image before moving along to # the next image file. Our epoch size is 16,000 samples. minibatch_size = 10 minibatches_per_image = 160 minibatches_per_epoch = 1600 epoch_size = minibatch_size * minibatches_per_epoch # Define the input variables f_dim = (num_color_channels, block_size, block_size) l_dim = (num_landcover_classes, block_size, block_size) feature = cntk.input_variable(f_dim, np.float32) label = cntk.input_variable(l_dim, np.float32) # Define the minibatch source minibatch_source = MyDataSource(f_dim, l_dim, number_of_workers, input_dir, minibatches_per_image) input_map = { feature: minibatch_source.streams.features, label: minibatch_source.streams.labels } # Define the model model = model_mini_pub.model(num_landcover_classes, block_size, 2, [64, 32, 32, 32])(feature) # Define the loss function and metric. Note that loss is not computed # directly on the model's output; the edges are first dropped. output = center_square( cntk.reshape(model, (num_landcover_classes, block_size, block_size)), block_size, padding) label_center = center_square(label, block_size, padding) mean_ce, pe = criteria(label_center, output, block_size, num_landcover_classes, [0.0, 1.0, 1.0, 1.0, 1.0]) # Create the progress writer, learner, and trainer (which will be a # distributed trainer if number_of_workers > 1) progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=epoch_size, rank=my_rank) ] lr_per_mb = [0.0001] * 30 + [0.00001] * 30 + [0.000001] lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=cntk.UnitType.sample) learner = cntk.rmsprop(model.parameters, lr_schedule, 0.95, 1.1, 0.9, 1.1, 0.9, l2_regularization_weight=0.00001) if number_of_workers > 1: parameter_learner = distributed.data_parallel_distributed_learner( learner, num_quantization_bits=32) trainer = cntk.Trainer(output, (mean_ce, pe), parameter_learner, progress_writers) else: trainer = cntk.Trainer(output, (mean_ce, pe), learner, progress_writers) # Perform the training! Note that some progress output will be generated by # each of the workers. if my_rank == 0: print('Retraining model for {} epochs.'.format(num_epochs)) print('Found {} workers'.format(number_of_workers)) print('Printing progress every {} minibatches'.format( minibatches_per_epoch)) cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=minibatch_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'trained_checkpoint.model'), preserve_all=True), progress_frequency=epoch_size).train() distributed.Communicator.finalize() if my_rank == 0: trainer.model.save(os.path.join(output_dir, 'trained.model')) return
input_shape = [batch_size, seq_len, input_dim] #Y = C.input_variable(shape=output_shape) X = C.sequence.input_variable(input_dim) Y = C.sequence.input_variable(1) Y_model = create_model(X) # loss function error = loss = C.squared_error(Y_model, Y) # define optimizer learner = C.adam(Y_model.parameters, lr=0.02, momentum=0.99) trainer = C.Trainer(Y_model, (loss, error), [learner]) # create some mocked data (4 sequences of different lenghts arr_dtype = np.float32 s1_x = np.random.randn(10000, input_dim).astype(dtype=arr_dtype) s2_x = np.random.randn(400, input_dim).astype(dtype=arr_dtype) s3_x = np.random.randn(300, input_dim).astype(dtype=arr_dtype) s4_x = np.random.randn(600, input_dim).astype(dtype=arr_dtype) s1_y = np.random.randn(10000, 1).astype(dtype=arr_dtype) s2_y = np.random.randn(400, 1).astype(dtype=arr_dtype) s3_y = np.random.randn(300, 1).astype(dtype=arr_dtype) s4_y = np.random.randn(600, 1).astype(dtype=arr_dtype) data_x = [s1_x, s2_x, s3_x, s4_x] data_y = [s1_y, s2_y, s3_y, s4_y]
def do_demo(): # create NN, train, test, predict input_dim = 4 hidden_dim = 2 output_dim = 3 train_file = "trainData_cntk.txt" test_file = "testData_cntk.txt" input_Var = C.ops.input(input_dim, np.float32) label_Var = C.ops.input(output_dim, np.float32) print("Creating a 4-2-3 tanh softmax NN for Iris data ") with default_options(init=glorot_uniform()): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(input_Var) oLayer = Dense(output_dim, activation=C.ops.softmax, name='outLayer')(hLayer) nnet = oLayer print("Creating a cross entropy mini-batch Trainer \n") ce = C.cross_entropy_with_softmax(nnet, label_Var) pe = C.classification_error(nnet, label_Var) fixed_lr = 0.05 lr_per_batch = learning_rate_schedule(fixed_lr, UnitType.minibatch) learner = C.sgd(nnet.parameters, lr_per_batch) trainer = C.Trainer(nnet, (ce, pe), [learner]) max_iter = 5000 # Máximo de iterações para o treino batch_size = 5 # Define o tamanho para o mini-batch progress_freq = 1000 # Exibe o erro a cada n mini-batches reader_train = create_reader(train_file, True, input_dim, output_dim) my_input_map = { input_Var: reader_train.streams.features, label_Var: reader_train.streams.labels } pp = ProgressPrinter(progress_freq) print("Starting training \n") for i in range(0, max_iter): currBatch = reader_train.next_minibatch(batch_size, input_map=my_input_map) trainer.train_minibatch(currBatch) pp.update_with_trainer(trainer) print("\nTraining complete") # ---------------------------------- print("\nEvaluating test data \n") reader_test = create_reader(test_file, False, input_dim, output_dim) numTestItems = 30 allTest = reader_test.next_minibatch(numTestItems, input_map=my_input_map) test_error = trainer.test_minibatch(allTest) print("Classification error on the 30 test items = %f" % test_error) # ---------------------------------- # Faz a predição para uma flor desconhecida unknown = np.array([[6.9, 3.1, 4.6, 1.3]], dtype=np.float32) print( "\nPrevisão de espécies de Íris para as características de entrada:") my_print(unknown[0], 1) # 1 decimal predicted = nnet.eval({input_Var: unknown}) print("Prediction is: ") my_print(predicted[0], 3) # 3 decimais # --------------------------------- print("\nTrained model input-to-hidden weights:\n") print(hLayer.hidLayer.W.value) print("\nTrained model hidden node biases:\n") print(hLayer.hidLayer.b.value) print("\nTrained model hidden-to-output weights:\n") print(oLayer.outLayer.W.value) print("\nTrained model output node biases:\n") print(oLayer.outLayer.b.value) save_weights("weights.txt", hLayer.hidLayer.W.value, hLayer.hidLayer.b.value, oLayer.outLayer.W.value, oLayer.outLayer.b.value) return 0 # success
strides=(1, 1), pad=True, name="second_conv")(h) h = cntk.layers.MaxPooling(filter_shape=(3, 3), strides=(3, 3), name="second_max")(h) r = cntk.layers.Dense(10, activation=None, name="classify")(h) return r cnn = create_model(x) cnn = cnn(x / 255) loss = cntk.cross_entropy_with_softmax(cnn, y) errs = cntk.classification_error(cnn, y) trainer = cntk.Trainer(cnn, (loss, errs), [ cntk.sgd(cnn.parameters, cntk.learning_rate_schedule(0.0105, cntk.UnitType.minibatch)) ]) count = 0 begin_time = time.time() for data in training_set: trainer.train_minibatch({ x: numpy.array(data[1:], dtype=float32).reshape(1, 28, 28), y: numpy.array([1 if x == int(data[0]) else 0 for x in range(10)], dtype=float32) }) count += 1 print("\r%.2f%%" % (count / len(training_set) * 100), file=sys.stderr, end="")
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** q_max_words = 12 p_max_words = 50 emb_dim = 50 num_classes = 2 minibatch_size = 250 epoch_size = 100000 #No.of samples in training set total_epochs = 200 #Total number of epochs to run query_total_dim = q_max_words * emb_dim label_total_dim = num_classes passage_total_dim = p_max_words * emb_dim #****** Create placeholders for reading Training Data *********** query_input_var = C.ops.input_variable((1, q_max_words, emb_dim), np.float32, is_sparse=False) passage_input_var = C.ops.input_variable((1, p_max_words, emb_dim), np.float32, is_sparse=False) output_var = C.input_variable(num_classes, np.float32, is_sparse=False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var: train_reader.streams.queryfeatures, passage_input_var: train_reader.streams.passagefeatures, output_var: train_reader.streams.labels } # ********* Model configuration ******* model_output = cnn_network(query_input_var, passage_input_var, num_classes) loss = C.binary_cross_entropy(model_output, output_var) pe = C.classification_error(model_output, output_var) lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch) learner = C.adagrad(model_output.parameters, lr=lr_per_minibatch) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) print() # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ", epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[ output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save( "CNN_{}.dnn".format(epoch)) # Save the model for every epoch #*** Find metrics on validation set after every epoch ******# (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs) if epoch % 19 == 0: predicted_labels = [] for i in range(len(validation_query_vectors)): queryVec = np.array(validation_query_vectors[i], dtype="float32").reshape( 1, q_max_words, emb_dim) passageVec = np.array(validation_passage_vectors[i], dtype="float32").reshape( 1, p_max_words, emb_dim) scores = model_output( queryVec, passageVec)[0] # do forward-prop on model to get score predictLabel = 1 if scores[1] >= scores[0] else 0 predicted_labels.append(predictLabel) metrics = precision_recall_fscore_support( np.array(validation_labels), np.array(predicted_labels), average='binary') print("precision : " + str(metrics[0]) + " recall : " + str(metrics[1]) + " f1 : " + str(metrics[2]) + "\n") return model_output
])(C.placeholder(2)) nett = lambda: C.layers.Sequential([ C.layers.Dense(256, activation=C.leaky_relu), C.layers.Dense(256, activation=C.leaky_relu), C.layers.Dense(2) ])(C.placeholder(2)) masks = C.Constant(np.array([[0, 1], [1, 0]] * 3).astype(np.float32), name='mask') prior = MultivariateNormalDiag(loc=[0., 0.], scale_diag=[1., 1.]) flow = RealNVP(nets, nett, masks, prior) loss = -C.reduce_mean(flow.log_prob) learner = C.adam(loss.parameters, C.learning_parameter_schedule(1e-1), C.momentum_schedule(0.9)) trainer = C.Trainer(flow.forward, (loss, None), learner) for t in range(5001): noisy_moons = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32) trainer.train_minibatch({loss.arguments[0]: noisy_moons}) if t % 500 == 0: print('iter %s:' % t, 'loss = %.3f' % trainer.previous_minibatch_loss_average) noisy_moons = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32) f = flow.forward.eval({flow.forward.arguments[0]: noisy_moons}) # v = flow.sample(1000) or
def run_experiment_cntk(): if os.path.isfile('x_train_imdb.bin'): print('Loading from .bin files') x_train, y_train, x_test, y_test = load_from_files(x_shape=(25000, 500), y_shape=(25000, )) else: print('Loading data...') (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data( num_words=Constants.max_words) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = keras.preprocessing.sequence.pad_sequences( x_train, maxlen=Constants.maxlen) x_test = keras.preprocessing.sequence.pad_sequences( x_test, maxlen=Constants.maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Saving to .bin files') save_to_files(x_train, y_train, x_test, y_test) x = cntk.sequence.input_variable(shape=(), dtype=np.float32) y = cntk.input_variable(shape=(), dtype=np.float32) x_placeholder = cntk.placeholder(shape=(), dynamic_axes=[ cntk.Axis.default_batch_axis(), cntk.Axis.default_dynamic_axis() ]) model = cntk.one_hot(x_placeholder, num_classes=Constants.max_words, sparse_output=True) model = cntk.layers.Embedding(Constants.embedding_dim)(model) model = cntk.layers.Recurrence(cntk.layers.LSTM(32))(model) model = cntk.sequence.last(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) model.save('ch6-2.cntk.model') model = None model = cntk.load_model('ch6-2.cntk.model') model.replace_placeholders({model.placeholders[0]: x}) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=cntk.Axis.all_static_axes()) max_epochs = 10 batch_size = 128 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.01), cntk.learning_parameter_schedule_per_sample(0.9)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func): input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) feature_scale = 1.0 / 256.0 input_var_norm = C.element_times(feature_scale, input_var) z = model_func(input_var_norm, out_dims=2) ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config epoch_size = 900 minibatch_size = 64 # Set training parameters lr_per_minibatch = C.learning_rate_schedule([0.01] * 100 + [0.003] * 100 + [0.001], C.UnitType.minibatch, epoch_size) m = C.momentum_schedule(0.9) l2_reg_weight = 0.001 learner = C.momentum_sgd(z.parameters, lr=lr_per_minibatch, momentum=m, l2_regularization_weight=l2_reg_weight) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), [learner], [progress_printer]) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } C.logging.log_number_of_parameters(z) print() # perform model training batch_index = 0 plot_data = {'batchindex': [], 'loss': [], 'error': []} # loop over epochs for epoch in range(max_epochs): sample_count = 0 # loop over minibatches in the epoch while sample_count < epoch_size: # fetch minibatch. data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # update model with it trainer.train_minibatch(data) # count samples processed so far sample_count += data[label_var].num_samples # For visualization... plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append( trainer.previous_minibatch_evaluation_average) batch_index += 1 trainer.summarize_training_progress() z.save("simpleconv3.dnn") # # Evaluation action # epoch_size = 100 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') plt.show() plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') plt.show() return C.softmax(z)
def main(): print("\nBegin binary classification (two-node technique) \n") print("Using CNTK version =" + str(C.__version__) + "\n") # define input parameters input_dim = 20 hidden_dim = 20 output_dim = 2 train_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), r"../data/hr-cleveland-all-data.txt") # 1. create network X = C.ops.input_variable(input_dim, dtype=np.float32) Y = C.ops.input_variable(output_dim, dtype=np.float32) print("Creating a 18-20-2 tanh-softmax NN") with C.layers.default_options( init=C.initializer.uniform(scale=0.01, seed=1)): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLAyer')(X) oLayer = C.layers.Dense(output_dim, activation=None, name='outLAyer')(hLayer) nnet = oLayer nnet = C.ops.softmax(oLayer) # 2. create learner and trainer print("Creating a cross entropy batch=10 SGD LP=0.005 Trainer") tr_loss = C.cross_entropy_with_softmax(nnet, Y) tr_class = C.classification_error(nnet, Y) max_iter = 5000 batch_size = 10 learning_rate = 0.005 learner = C.sgd(nnet.parameters, learning_rate) trainer = C.Trainer(nnet, (tr_loss, tr_class), [learner]) # 3. create reader for train data rdr = create_reader(train_file, input_dim, output_dim, rnd_order=False, sweeps=C.io.INFINITELY_REPEAT) heart_input_map = {X: rdr.streams.x_src, Y: rdr.streams.y_src} # 4. train print("\n Starting training") for i in range(0, max_iter): curr_batch = rdr.next_minibatch(batch_size, input_map=heart_input_map) trainer.train_minibatch(curr_batch) if i % int(max_iter / 10) == 0: mcee = trainer.previous_minibatch_loss_average macc = (1.0 - trainer.previous_minibatch_evaluation_average) * 100 print("batch %4d: mean loss =%0.4f, accuracy = %0.2f" % (i, mcee, macc)) print("\nTraining complete") # 5. evaluate model using all data print("\nEvaluating accuracy using built-in test_minibatch() \n") rdr = create_reader(train_file, input_dim, output_dim, rnd_order=False, sweeps=1) heart_input_map = {X: rdr.streams.x_src, Y: rdr.streams.y_src} num_test = 297 all_test = rdr.next_minibatch(num_test, input_map=heart_input_map) acc = (1.0 - trainer.test_minibatch(all_test)) * 100 print("Classification accuracy on the %d data items = %0.2f" % (num_test, acc)) # (could save model here) # (use trained to make prediction) print("\n End Cleveland Heart Disease classification ")
def create_model(self): modeli = C.layers.Sequential([ # Convolution layers C.layers.Convolution2D((1, 3), num_filters=8, pad=True, reduction_rank=0, activation=C.ops.tanh, name='conv_a'), C.layers.Convolution2D((1, 3), num_filters=16, pad=True, reduction_rank=1, activation=C.ops.tanh, name='conv2_a'), C.layers.Convolution2D((1, 3), num_filters=32, pad=False, reduction_rank=1, activation=C.ops.tanh, name='conv3_a'), ###### # Dense layers #C.layers.Dense(128, activation=C.ops.relu,name='dense1_a'), #C.layers.Dense(64, activation=C.ops.relu,name='dense2_a'), C.layers.Dense(361, activation=C.ops.relu, name='dense3_a') ])(self._input) ### target modelt = C.layers.Sequential( [C.layers.Dense(360, activation=C.ops.relu, name='dense4_a')])(self._target) ### concatenate both processed target and observations inputs = C.ops.splice(modeli, modelt) ### Use input to predict next hidden state, and generate ### next observation model = C.layers.Sequential([ ###### C.layers.Dense(720, activation=C.ops.relu, name='dense5_a'), # Recurrence C.layers.Recurrence(C.layers.LSTM(2048, init=C.glorot_uniform()), name='lstm_a'), C.layers.Dense(1024, activation=None) ])(inputs) ###### # Prediction direction = C.layers.Sequential([ C.layers.Dense(720, activation=C.ops.relu, name='dense6_a'), C.layers.Dense(360, activation=None, name='dense7_a') ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=None), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction, velocity) #model = velocity if self._load_model: model = C.load_model('dnns/action_predicter.dnn') direction = model[0:360] velocity = model[360] print(model) loss = C.cross_entropy_with_softmax( direction, self._output) + C.squared_error(velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) #loss = C.squared_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) #error = C.classification_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training') trainer = C.Trainer(model, (loss, error), learner, progress_printer) return model, loss, learner, trainer
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = C.input_variable( (num_channels, sequence_length, image_height, image_width), np.float32) label_var = C.input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with C.default_options(activation=C.relu): z = C.layers.Sequential([ C.layers.Convolution3D((3, 3, 3), 64, pad=True), C.layers.MaxPooling((1, 2, 2), (1, 2, 2)), C.layers.For( range(3), lambda i: [ C.layers.Convolution3D( (3, 3, 3), [96, 128, 128][i], pad=True), C.layers.Convolution3D( (3, 3, 3), [96, 128, 128][i], pad=True), C.layers.MaxPooling((2, 2, 2), (2, 2, 2)) ]), C.layers.For(range(2), lambda: [C.layers.Dense(1024), C.layers.Dropout(0.5)]), C.layers.Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config train_epoch_size = train_reader.size() train_minibatch_size = 2 # Set learning parameters lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001] lr_schedule = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample) momentum_time_constant = 4096 mm_schedule = C.momentum_as_time_constant_schedule( [momentum_time_constant]) # Instantiate the trainer object to drive the model training learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch( train_minibatch_size) trainer.train_minibatch({input_var: videos, label_var: labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = test_reader.size() test_minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch( test_minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({ input_var: videos, label_var: labels }) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def main(): print('\nBegin logistic regression training demo') ver = C.__version__ print('(Using CNTK version ' + str(ver) + ')') # training data format: # 4.0, 3.0, 1 # 9.0, 5.0, 1 # . . . data_file = '.\\age_edu_sex.txt' print('\nLoading data from ' + data_file + '\n') features_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[0, 1]) print(features_matrix) labels_matrix = np.loadtxt(data_file, dtype=np.float32, delimiter=',', skiprows=0, usecols=[2], ndmin=2) print(labels_matrix) print(labels_matrix.shape) print('Training data:') combined_matrix = np.concatenate((features_matrix, labels_matrix), axis=1) print(combined_matrix) # create model features_dimension = 2 # x1, x2 labels_dimension = 1 # always 1 for logistic regression X = C.input_variable(features_dimension, np.float32) # cntk.Variable y = C.input_variable(labels_dimension, np.float32) # correct class value W = C.parameter(shape=(features_dimension, 1)) # trainable cntk.Parameter b = C.parameter(shape=(labels_dimension)) z = C.times(X, W) + b # or z = C.plus(C.times(X, W), b) p = 1.0 / (1.0 + C.exp(-z)) # or p = C.sigmoid(z) model = p # create an alias # create Learner and Trainer cross_entropy_error = C.binary_cross_entropy( model, y) # Cross entropy a bit more principled for Learning Rate # squared_error = C.squared_error(model, y) learning_rate = 0.010 learner = C.sgd( model.parameters, learning_rate) # stochastic gradient descent, adadelta, adam, nesterov trainer = C.Trainer(model, (cross_entropy_error), [learner]) max_iterations = 4000 # train print('Start training') print('Iterations: ' + str(max_iterations)) print('Learning Rate (LR): ' + str(learning_rate)) print('Mini-batch = 1') np.random.seed(4) N = len(features_matrix) for i in range(0, max_iterations): row = np.random.choice(N, 1) # pick a random row from training items trainer.train_minibatch({ X: features_matrix[row], y: labels_matrix[row] }) if i % 1000 == 0 and i > 0: mcee = trainer.previous_minibatch_loss_average print( str(i) + ' Cross entropy error on current item = %0.4f ' % mcee) print('Training complete') # print out results np.set_printoptions(precision=4, suppress=True) print('Model weights:') print(W.value) print('Model bias:') print(b.value)
def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_epochs=5): image_height = 28 image_width = 28 num_channels = 1 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = C.ops.input_variable((num_channels, image_height, image_width), np.float32) label_var = C.ops.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = C.ops.element_times(C.ops.constant(0.00390625), input_var) with C.layers.default_options(activation=C.ops.relu, pad=False): conv1 = C.layers.Convolution2D((5, 5), 32, pad=True)(scaled_input) pool1 = C.layers.MaxPooling((3, 3), (2, 2))(conv1) conv2 = C.layers.Convolution2D((3, 3), 48)(pool1) pool2 = C.layers.MaxPooling((3, 3), (2, 2))(conv2) conv3 = C.layers.Convolution2D((3, 3), 64)(pool2) f4 = C.layers.Dense(96)(conv3) drop4 = C.layers.Dropout(0.5)(f4) z = C.layers.Dense(num_output_classes, activation=None)(drop4) ce = C.losses.cross_entropy_with_softmax(z, label_var) pe = C.metrics.classification_error(z, label_var) reader_train = create_reader( os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes) # Set learning parameters lr_per_sample = [0.001] * 10 + [0.0005] * 10 + [0.0001] lr_schedule = C.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample, epoch_size) mm_time_constant = [0] * 5 + [1024] mm_schedule = C.learners.momentum_as_time_constant_schedule( mm_time_constant, epoch_size) # Instantiate the trainer object to drive the model training learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), learner, progress_printer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } C.logging.log_number_of_parameters(z) print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[ label_var].num_samples # count samples processed so far trainer.summarize_training_progress() z.save(os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch))) # Export as ONNX z.save(os.path.join(model_path, "MNIST.onnx"), format=C.ModelFormat.ONNX) # Load test data reader_test = create_reader( os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 1024 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
# feature = C.input((input_dim), is_sparse=True) # label = C.input((output_dim), np.float32) #netout = self.create_model(input_dim, output_dim, hidden_dim, feature) netout = self.create_model(input_dim, output_dim, hidden_dim, feature) loss = C.squared_error(netout, feature) evaluation = C.squared_error(netout, feature) lr_per_minibatch = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(netout.parameters, lr=lr_per_minibatch) #learner = C.adagrad(netout.parameters, C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)) progress_printer = C.logging.ProgressPrinter(minibatch_size) trainer = C.Trainer(netout, (loss, evaluation), learner, progress_printer) plotdata = {"loss": []} for epoch in range(100): for i in range(100): d = self.get_next_data(minibatch_size) data = {feature: d, label: d} """ # This is how to get the Numpy typed data from the reader ldata = data[label].asarray() fdata = data[feature].asarray() """ trainer.train_minibatch(data) loss = trainer.previous_minibatch_loss_average if not (loss == "NA"): plotdata["loss"].append(loss)
return linear_layer(h, num_output_classes) # Create the fully connected classfier z = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, C.sigmoid) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Initialize the parameters for the trainer minibatch_size = each_batch_size #50 num_samples = train_count #5000 num_minibatches_to_train = num_samples / minibatch_size print("train our model now") data1 = {"batchsize": [], "loss": [], "error": []} for train_times in xrange(1, 10): data2 = {"batchsize_t": [], "loss_t": [], "error_t": []} for i in range(0, int(num_minibatches_to_train)): features = np.float32(training_batch_x[i]) labels = np.float32(training_batch_y[i]) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs=80): _cntk_py.set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), input_var) with C.layers.default_options(activation=C.relu, pad=True): z = C.layers.Sequential([ C.layers.For( range(2), lambda: [ C.layers.Convolution2D((3, 3), 64), C.layers.Convolution2D((3, 3), 64), LocalResponseNormalization(1.0, 4, 0.001, 0.75), C.layers.MaxPooling((3, 3), (2, 2)) ]), C.layers.For( range(2), lambda i: [C.layers.Dense([256, 128][i]), C.layers.Dropout(0.5)]), C.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 10 + [0.000015625] lr_schedule = C.learning_rate_schedule(lr_per_sample, unit=C.learners.UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0] * 20 + [600] * 20 + [1200] mm_schedule = C.learners.momentum_as_time_constant_schedule( mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True, l2_regularization_weight=l2_reg_weight) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), learner, progress_printer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } C.logging.log_number_of_parameters(z) print() # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() z.save( os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
input_dim_model = (1, 28, 28) num_output_classes = 10 mnist_downloader = MnistDownloader(MINST_DOWNLOAD_INFO) mnist_downloader.download_and_save_data() model_definition = ConvolutionalMaxPooling(input_dim_model, num_output_classes) learning_rate = 0.2 lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch) learner = cntk.sgd(model_definition.model.parameters, lr_schedule) tensor_writer = TensorWriter(model_definition.model) trainer = cntk.Trainer(model_definition.model, (model_definition.get_loss(), model_definition.get_classification_error()), [learner], tensor_writer.get_writer()) # Trainning minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size reader_train = init_reader(mnist_downloader.train_file, input_dim, num_output_classes) input_map = { model_definition.label: reader_train.streams.labels, model_definition.input: reader_train.streams.features }
def main(input_map_file, output_retrained_model_file, pretrained_model_file): # Count the number of classes and samples in the MAP file labels = set([]) epoch_size = 0 with open(input_map_file, 'r') as f: for line in f: labels.add(line.strip().split('\t')[-1]) epoch_size += 1 n_classes = len(labels) # Create the training minibatch source minibatch_source = create_minibatch_source(input_map_file, n_classes) # Input variables for image and label data image_input = cntk.ops.input_variable((3, 224, 224)) label_input = cntk.ops.input_variable((n_classes)) input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Load the model file and modify as needed model = load_pretrained_model(image_input, n_classes, pretrained_model_file) # Set learning parameters ce = cntk.losses.cross_entropy_with_softmax(model, label_input) pe = cntk.metrics.classification_error(model, label_input) l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 25 + [0.000001] * 15 + [0.0000001] momentum_time_constant = 10 max_epochs = 50 mb_size = 16 lr_schedule = cntk.learners.learning_rate_schedule( lr_per_sample, unit=cntk.UnitType.sample) mm_schedule = cntk.learners.momentum_as_time_constant_schedule( momentum_time_constant) # Instantiate the trainer object progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=max_epochs) ] learner = cntk.learners.fsadagrad(parameters=model.parameters, lr=lr_schedule, momentum=mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print('Retraining AlexNet model for {} epochs.'.format(max_epochs)) cntk.logging.progress_print.log_number_of_parameters(model) for epoch in range(max_epochs): sample_count = 0 while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count trainer.summarize_training_progress() model.save(output_retrained_model_file) return
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = cntk.blocks.Input(input_dim, is_sparse=False) slot_labels = cntk.blocks.Input( num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, slot_labels) pe = cntk.ops.classification_error(z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 # TODO: Change to round number. This is 664.39. 700? momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule( minibatch_size / -math.log(0.9)) # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) lr_schedule = [0.003] * 2 + [0.0015] * 12 + [0.0003] # trainer object lr_per_sample = cntk.learner.learning_rate_schedule( lr_schedule, cntk.learner.UnitType.sample, epoch_size) learner = cntk.learner.adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = cntk.Trainer(z, (ce, pe), [learner]) # define mapping from reader streams to network inputs input_map = { query: reader.streams.query, slot_labels: reader.streams.slot_labels } # process minibatches and perform model training cntk.utils.log_number_of_parameters(z) print() progress_printer = cntk.ProgressPrinter( freq=100, first=10, tag='Training', num_epochs=max_epochs) # more detailed logging #progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) t = 0 # loop over epochs for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size # loop over minibatches on the epoch while t < epoch_end: # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch( min(minibatch_size, epoch_end - t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) return loss, metric
def _run(self): m = self.train_model config = self.config params = self.params vmax = self.vmax output = m.output _inputs = m.inputs _target = m.target criterion = m.criterion cost = m.cost lr = config.learning_rate last_epoch = 0 last_step = 0 if not os.path.exists(params.model_dir): os.makedirs(params.model_dir) if not os.path.exists(params.log_dir): os.makedirs(params.log_dir) def model_path(epoch): model_path = os.path.join(params.model_dir, params.save_model_name) + ".cmf." + str(epoch) return model_path if params.continue_training: load_model = os.path.join(params.load_model_dir, params.load_model_name) if os.path.exists(load_model): last_epoch = int(params.load_model_name.split(".")[-1]) logits.restore(load_model) print("[INFO] Restore from %s at Epoch %d" % (params.load_model_name, last_epoch)) else: raise ValueError("'load model path' can't be found") print() print("_____________Starting training______________") cntk.logging.log_number_of_parameters(logits) print(output.parameters) print() # learner = cntk.learners.fsadagrad( # output.parameters, # lr = cntk.learners.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=self.config.train_epoch), # momentum = cntk.learners.momentum_schedule_per_sample(0.9), # gradient_clipping_threshold_per_sample = config.grad_clip, # gradient_clipping_with_truncation = True # ) learner = cntk.learners.adagrad( output.parameters, lr = cntk.learners.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=self.config.train_epoch), gradient_clipping_threshold_per_sample = config.grad_clip, gradient_clipping_with_truncation = True ) progress_log_file = os.path.join(params.model_dir, params.save_model_name) + ".txt" progress_writer = cntk.logging.ProgressPrinter(freq=params.print_freq, tag='Training', log_to_file=progress_log_file) tensorboard_writer = cntk.logging.TensorBoardProgressWriter(log_dir=params.log_dir, model=output) trainer = cntk.Trainer(None, criterion, learner, progress_writers=tensorboard_writer) start_time = time.time() localtime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())) print(localtime) if last_epoch < config.train_epoch: for epoch in range(last_epoch, config.train_epoch): print("---------------------------------") print("epoch ", epoch+1, " start") for step, (x, y) in enumerate(zip(self.x_train, self.y_train)): trainer.train_minibatch({_inputs: x, _targets: y}) progress_writer.update_with_trainer(trainer, with_metric=True) progress_writer.epoch_summary(with_metric=True) output.save(model_path(epoch+1) print("Saving model to '%s'" % model_path(epoch+1)) costs = {'mape':[], 'mae':[], 'rmse:',[]} _costs = cost.eval({_inputs:x_v, _targets:y_v}) for key in _costs: if key_name = 'mape' costs[key.name].append(_costs[key]) else: costs[key.name].append(_costs[key]*vmax) print("valid_mape:", costs['mape'][-1]) print("valid_mae:", costs['mae'][-1]) print("valid_rmse:", costs['rmse'][-1])) print() print("train time:", time.time()-start_time) localtime = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())) print(localtime) y_predict = output.eval({_inputs:x_v, _targets:y_v}) fileout = os.path.join(params.model_dir, 'results.txt') sio.savemat(fileout, {'y_predict':y_predict, 'costs':costs})