def train(): global sentences, vocabulary, reverse_vocabulary # function will create the trainer and train it for specified number of epochs G.num_minibatches = G.train_words // G.minibatch_size # Print loss 50 times while training print_freqency = G.num_minibatches // 50 pp = ProgressPrinter(print_freqency) # get the trainer word_one_hot, context_one_hots, negative_one_hots, target, trainer, word_negative_context_product, embedding_layer = create_trainer( ) # Get the input generator minibatch_generator = cntk_minibatch_generator(G.minibatch_size, sentences, vocabulary, reverse_vocabulary) for train_steps in range(G.num_minibatches): # Get mini_batch and train for one minibatch word, contexts, negatives, targets = next(minibatch_generator) mapping = {word_one_hot: word, target: targets} for i in range(context_size): mapping[context_one_hots[i]] = contexts[i] for i in range(G.negative): mapping[negative_one_hots[i]] = negatives[i] trainer.train_minibatch(mapping) pp.update_with_trainer(trainer) return word_negative_context_product
def train(): global sentences, vocabulary, reverse_vocabulary # function will create the trainer and train it for specified number of epochs # Print loss 50 times while training print_freqency = 50 pp = ProgressPrinter(print_freqency) # get the trainer word_one_hot, context_one_hots, negative_one_hots, target, trainer, word_negative_context_product, embedding_layer = create_trainer( ) # Train the network using instances form the input generator training_instances = 0 # print("Calculating the number of training instances") # start = time.time() # prev_time = time.time() # for word, contexts, negatives, targets in cntk_minibatch_generator(G.minibatch_size, sentences, vocabulary, reverse_vocabulary): # training_instances += 1 # print(training_instances) # print("time taken for 1 training instance = %.8fsecs" % (time.time() - prev_time)) # prev_time = time.time() # end = time.time() # print("Total training instances =", training_instances) # print("Time taken in one entire loop = %.2fsecs" % (end - start)) training_instances = 0 start_batch_collection = time.time() for word, contexts, negatives, targets in cntk_minibatch_generator( G.minibatch_size, sentences, vocabulary, reverse_vocabulary): end_batch_collection = time.time() print("Batch collection time = %.6fsecs" % (end_batch_collection - start_batch_collection)) print("Time taken to collect one training_instance = %.6fsecs" % ((end_batch_collection - start_batch_collection) / G.minibatch_size)) start_mapping = time.time() mapping = {word_one_hot: word, target: targets} for i in range(context_size): mapping[context_one_hots[i]] = contexts[i] for i in range(G.negative): mapping[negative_one_hots[i]] = negatives[i] end_mapping = time.time() print("Mapping time = %.6fsecs" % (end_mapping - start_mapping)) start_train = time.time() trainer.train_minibatch(mapping) end_train = time.time() print("minibatch train time = %.6fsecs" % (end_train - start_train)) print("Time per training instance = %.6fsecs" % ((end_train - start_train) / G.minibatch_size)) pp.update_with_trainer(trainer) training_instances += 1 if training_instances % 20000 == 0: # Save embeddings temporarily print("training instances till now =", training_instances) save_embeddings(word_negative_context_product, vocabulary) # start_batch_collection = time.time() print("Total training instances =", training_instances) return word_negative_context_product
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train(): global sentences, vocabulary, reverse_vocabulary # function will create the trainer and train it for specified number of epochs # Print loss 50 times while training print_freqency = 50 pp = ProgressPrinter(print_freqency) # get the trainer word_one_hot, context_one_hots, negative_one_hots, targets, trainer, word_negative_context_product, embedding_layer = create_trainer() # Create a CTF reader which reads the sparse inputs print("reader started") reader = CTFDeserializer(G.CTF_input_file) reader.map_input(G.word_input_field, dim=G.embedding_vocab_size, format="sparse") # context inputs for i in range(context_size): reader.map_input(G.context_input_field.format(i), dim=G.embedding_vocab_size, format="sparse") # negative inputs for i in range(G.negative): reader.map_input(G.negative_input_field.format(i), dim=G.embedding_vocab_size, format="sparse") # targets reader.map_input(G.target_input_field, dim=(G.negative + 1), format="dense") print("reader done") # Get minibatch source from reader is_training = True minibatch_source = MinibatchSource(reader, randomize=is_training, epoch_size=INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP) minibatch_source.streams[targets] = minibatch_source.streams[G.target_input_field] del minibatch_source.streams[G.target_input_field] print("minibatch source done") total_minibatches = total_training_instances // G.minibatch_size print("traning started") print("Total minibatches to train =", total_minibatches) for i in range(total_minibatches): # Collect minibatch # start_batch_collection = time.time() mb = minibatch_source.next_minibatch(G.minibatch_size, input_map=minibatch_source.streams) # end_batch_collection = time.time() # print("Batch collection time = %.6fsecs" % (end_batch_collection - start_batch_collection)) # print("Time taken to collect one training_instance = %.6fsecs" % ((end_batch_collection - start_batch_collection)/G.minibatch_size)) # Train minibatch # start_train = time.time() trainer.train_minibatch(mb) # end_train = time.time() # print("minibatch train time = %.6fsecs" % (end_train - start_train)) # print("Time per training instance = %.6fsecs" % ((end_train - start_train)/G.minibatch_size)) # Update progress printer pp.update_with_trainer(trainer) # start_batch_collection = time.time() print("Total training instances =", total_training_instances) return word_negative_context_product
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 pp = ProgressPrinter(0) for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) pp.update_with_trainer(trainer, True) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def evaluate_decoding(reader, s2smodel, i2w): model_decoding = create_model_greedy( s2smodel) # wrap the greedy decoder around the model progress_printer = ProgressPrinter(tag='Evaluation') sparse_to_dense = create_sparse_to_dense(input_vocab_dim) minibatch_size = 1024 num_total = 0 num_wrong = 0 while True: mb = reader.next_minibatch(minibatch_size) if not mb: # finish when end of test set reached break e = model_decoding(mb[reader.streams.features]) outputs = format_sequences(e, i2w) labels = format_sequences(sparse_to_dense(mb[reader.streams.labels]), i2w) # prepend sentence start for comparison outputs = ["<s> " + output for output in outputs] num_total += len(outputs) num_wrong += sum( [label != output for output, label in zip(outputs, labels)]) rate = num_wrong / num_total print("string error rate of {:.1f}% in {} samples".format( 100 * rate, num_total)) return rate
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[ Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 pp = ProgressPrinter(0) for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) pp.update_with_trainer(trainer, True) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(tl_model, (ce, pe), learner) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) return tl_model
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential( [Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) trainer = C.Trainer(z, ce, pe, [sgd(z.parameters, lr=lr_per_minibatch)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 pp = ProgressPrinter(0) for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) pp.update_with_trainer(trainer) last_avg_error = pp.avg_loss_since_start() test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({ features: test_features, label: test_labels }) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 pp = ProgressPrinter(0) for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) pp.update_with_trainer(trainer) last_avg_error = pp.avg_loss_since_start() test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net( input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=lr_per_minibatch)) # Get minibatches of training data and perform model training minibatch_size = 25 pp = ProgressPrinter(128) for i in range(1024): features, labels = generate_random_data( minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) pp.update_with_trainer(trainer) pp.epoch_summary() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {input: test_features, label: test_labels}) return avg_error
def _hyper_train_target_sub(self, **kwargs): """ Actual training procedure for specific set of hyper parameters. """ if self.saver.log_filename: fh = logging.FileHandler(self.saver.log_filename) self.logger.addHandler(fh) self.logger.info("Training with parameters: {}".format(kwargs)) X_train, Y_train, X_val, Y_val = self.data_source(**kwargs) input_var, label_var, output = self.model(**kwargs) loss = cross_entropy_with_softmax(output, label_var) label_error = classification_error(output, label_var) learner = self.optimizer( parameters=output.parameters, momentum=0.9, **kwargs) progress_printer = ProgressPrinter(tag='Training', num_epochs=self.num_epoch) trainer = Trainer(output, (loss, label_error), [learner], [progress_printer]) # input_map = { # input_var: reader_train.streams.features, # label_var: reader_train.streams.labels # } num_minibatches_to_train = X_train.shape[0] / self.data_source.batch_size for i in range(0, int(num_minibatches_to_train)): features = X_train[:self.data_source.batch_size] labels = Y_train[:self.data_source.batch_size] trainer.train_minibatch({input_var: features, label_var: labels}) if self.saver.log_filename: self.logger.removeHandler(fh) fh.close() best_value = 0.0 return best_value
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ input: test_features, label: test_labels }) return avg_error
def main(): # Ensure we always get the same amount of randomness np.random.seed(0) global minibatch_size, skip_window if len(sys.argv) < 2: print( 'Insufficient number of arguments. For running the example case, run: $ python word2vec.py runexample' ) exit(1) filename = sys.argv[1] process_text(filename) inp, label, trainer = train(emb_size, vocab_size) pp = ProgressPrinter(50) for _epoch in range(num_epochs): i = 0 while curr_epoch == _epoch: features, labels = generate_batch(minibatch_size, skip_window) features = get_one_hot(features) labels = get_one_hot(labels) trainer.train_minibatch({inp: features, label: labels}) pp.update_with_trainer(trainer) i += 1 if i % 200 == 0: print('Saving Embeddings..') with open(embpickle, 'wb') as handle: pickle.dump(embeddings.value, handle) pp.epoch_summary() test_features, test_labels = generate_batch(minibatch_size, skip_window) test_features = get_one_hot(test_features) test_labels = get_one_hot(test_labels) avg_error = trainer.test_minibatch({ inp: test_features, label: test_labels }) print('Avg. Error on Test Set: ', avg_error)
def train_lm(training_file, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (ce, errs), learner) sample_freq = 1000 epochs = 50 minibatches_per_epoch = int((data_size / minibatch_size)) minibatches = min(epochs * minibatches_per_epoch, max_num_minibatches) # print out some useful training information log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(freq=100, tag='Training') e = 0 p = 0 for i in range(0, minibatches): if p + minibatch_size+1 >= data_size: p = 0 e += 1 model_filename = "models/shakespeare_epoch%d.dnn" % e z.save(model_filename) print("Saved model to '%s'" % model_filename) # get the data features, labels = get_data(p, minibatch_size, data, char_to_ix, vocab_dim) # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [False] if p == 0: mask = [True] arguments = ({input_sequence : features, label_sequence : labels}, mask) trainer.train_minibatch(arguments) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if i % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) p += minibatch_size # Do a final save of the model model_filename = "models/shakespeare_epoch%d.dnn" % e z.save(model_filename)
return batch, labels #Creating One-Hot set def get_one_hot(origlabels): global minibatch_size, vocab_size labels = np.zeros(shape=(minibatch_size, vocab_size), dtype=np.float32) for t in range(len(origlabels)): if origlabels[t, 0] < vocab_size and origlabels[t, 0] >= 0: labels[t, origlabels[t, 0]] = 1.0 return labels #Testing & training build_dataset() inp, label, trainer = train(emb_size, vocab_size) print('Model Creation Done.') pp = ProgressPrinter(50) for _epoch in range(num_epochs): i = 0 while curr_epoch == _epoch: features, labels = generate_batch(minibatch_size, skip_window) features = get_one_hot(features) labels = get_one_hot(labels) trainer.train_minibatch({inp: features, label: labels}) pp.update_with_trainer(trainer) i += 1 if i % 200 == 0: print('Saving Embeddings..') with open(embpickle, 'wb') as handle: pickle.dump(embeddings.value, handle)
def train_lm(training_file, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab( training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (ce, errs), learner) sample_freq = 1000 epochs = 50 minibatches_per_epoch = int((data_size / minibatch_size)) minibatches = min(epochs * minibatches_per_epoch, max_num_minibatches) # print out some useful training information log_number_of_parameters(z) print() progress_printer = ProgressPrinter(freq=100, tag='Training') e = 0 p = 0 for i in range(0, minibatches): if p + minibatch_size + 1 >= data_size: p = 0 e += 1 model_filename = "models/shakespeare_epoch%d.dnn" % e z.save(model_filename) print("Saved model to '%s'" % model_filename) # get the data features, labels = get_data(p, minibatch_size, data, char_to_ix, vocab_dim) # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [False] if p == 0: mask = [True] arguments = ({input_sequence: features, label_sequence: labels}, mask) trainer.train_minibatch(arguments) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if i % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) p += minibatch_size # Do a final save of the model model_filename = "models/shakespeare_epoch%d.dnn" % e z.save(model_filename)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(tl_model, (ce, pe), learner) # Get minibatches of images and perform model training print( "Training transfer learning model for {0} epochs (epoch_size = {1}).". format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) return tl_model
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = adam_sgd( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels] }) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print( format_sequences( sparse_to_dense( mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, (ce, pe), sgd(netout.parameters, lr=lr_per_minibatch)) # Instantiate a ProgressPrinter. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") progress_printer = ProgressPrinter(tag='Training', freq=1, tensorboard_log_dir=logdir, model=netout) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch(reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Take snapshot of loss and eval criterion for the previous minibatch. progress_printer.update_with_trainer(trainer, with_metric=True) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: progress_printer.update_value("mb_" + p.uid + "_max", reduce_max(p).eval(), minibatch_idx) progress_printer.update_value("mb_" + p.uid + "_min", reduce_min(p).eval(), minibatch_idx) progress_printer.update_value("mb_" + p.uid + "_mean", reduce_mean(p).eval(), minibatch_idx) # Load test data try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input: reader_train.streams.features, label: reader_train.streams.labels } lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 #training_progress_output_freq = 100 progress_printer = ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) session = training_session( training_minibatch_source=reader_train, trainer=trainer, mb_size_schedule=minibatch_size_schedule(minibatch_size), progress_printer=progress_printer, model_inputs_to_mb_source_mapping=input_map, progress_frequency=num_samples_per_sweep, max_training_samples=num_samples_per_sweep * num_sweeps_to_train_with) session.train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=lr_per_minibatch)) # Instantiate a ProgressPrinter. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") progress_printer = ProgressPrinter(tag='Training', freq=1, tensorboard_log_dir=logdir, model=netout) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch( reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Take snapshot of loss and eval criterion for the previous minibatch. progress_printer.update_with_trainer(trainer, with_metric=True) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: progress_printer.update_value("mb_" + p.uid + "_max", reduce_max(p).eval(), minibatch_idx) progress_printer.update_value("mb_" + p.uid + "_min", reduce_min(p).eval(), minibatch_idx) progress_printer.update_value("mb_" + p.uid + "_mean", reduce_mean(p).eval(), minibatch_idx) # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = adam_sgd(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) ; print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)