def _build(self, batch_size, init): print_section('Building model') index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.imatrix('y') #label data #Drop switch. Only train should drop units. For testing and validation all units should be used (but output rescaled) drop = T.iscalar('drop') learning_rate = T.scalar('learning_rate', dtype=theano.config.floatX) mix_factor = T.scalar('factor', dtype=theano.config.floatX) self.model.build(x, drop, batch_size, init_params=init) errors = self.model.get_output_layer().errors(y) self.test_model = create_theano_func('test', self.data, x, y, drop, [index], errors, batch_size) self.validate_model = create_theano_func('validation', self.data, x, y, drop, [index], errors, batch_size) self.get_training_loss = create_theano_func( 'train', self.data, x, y, drop, [index], errors, batch_size, prefix="_loss" ) cost = self.model.get_cost(y, mix_factor) + (self.params.l2_reg * self.model.getL2()) opt = Backpropagation.create(self.model.params) grads = T.grad(cost, self.model.params) updates = opt.updates(self.model.params, grads, learning_rate, self.params.momentum) self.train_model = create_theano_func( 'train', self.data, x, y, drop, [index, learning_rate, mix_factor], cost, batch_size, updates=updates, dropping=True ) self.tester = create_profiler_func( self.data, x, y, drop, [index, mix_factor], self.model.get_output_layer(), cost, batch_size )
def _build(self, batch_size, init): print_section('Building model') index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.imatrix('y') #label data #Drop switch. Only train should drop units. For testing and validation all units should be used (but output rescaled) drop = T.iscalar('drop') learning_rate = T.scalar('learning_rate', dtype=theano.config.floatX) mix_factor = T.scalar('factor', dtype=theano.config.floatX) self.model.build(x, drop, batch_size, init_params=init) errors = self.model.get_output_layer().errors(y) self.test_model = create_theano_func('test', self.data, x, y, drop, [index], errors, batch_size) self.validate_model = create_theano_func('validation', self.data, x, y, drop, [index], errors, batch_size) self.get_training_loss = create_theano_func('train', self.data, x, y, drop, [index], errors, batch_size, prefix="_loss") cost = self.model.get_cost( y, mix_factor) + (self.params.l2_reg * self.model.getL2()) opt = Backpropagation.create(self.model.params) grads = T.grad(cost, self.model.params) updates = opt.updates(self.model.params, grads, learning_rate, self.params.momentum) self.train_model = create_theano_func( 'train', self.data, x, y, drop, [index, learning_rate, mix_factor], cost, batch_size, updates=updates, dropping=True) self.tester = create_profiler_func(self.data, x, y, drop, [index, mix_factor], self.model.get_output_layer(), cost, batch_size)
def load(self, dataset_path, params, batch_size=1): print_section('Creating aerial image dataset') self.std = params.dataset_std chunks = params.chunk_size #TODO: ensure that the dataset is as expected. creator = Creator(dataset_path, dim=(params.input_dim, params.output_dim), rotation=params.use_rotation, preproccessing=params.use_preprocessing, std=self.std, only_mixed=params.only_mixed_labels, reduce_testing=params.reduce_testing, reduce_training=params.reduce_training, reduce_validation=params.reduce_validation) train, valid, test = creator.dynamically_create( params.samples_per_image, enable_label_noise=params.use_label_noise, label_noise=params.label_noise, only_mixed=params.only_mixed_labels) #Testing dataset size requirements AerialDataset.dataset_check('train', train, batch_size) AerialDataset.dataset_check('valid', valid, batch_size) AerialDataset.dataset_check('test', test, batch_size) AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks) self.set_nr_examples(train, valid, test) nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks) training_chunks = self._chunkify(train, nr_of_chunks, batch_size) AerialDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0])) self.active = self.shared_dataset(training_chunks[0], cast_to_int=False) self.set['train'] = self.active[0], T.cast(self.active[1], 'int32') self.set['validation'] = self.shared_dataset(valid, cast_to_int=True) self.set['test'] = self.shared_dataset(test, cast_to_int=True) #Not stored on the GPU, unlike the shared variables defined above. self.all_training = training_chunks return True
def load(self, dataset_path, params, batch_size=1): print_section('Creating aerial image dataset') self.std = params.dataset_std chunks = params.chunk_size #TODO: ensure that the dataset is as expected. creator = Creator(dataset_path, dim=(params.input_dim, params.output_dim), rotation=params.use_rotation, preproccessing=params.use_preprocessing, std=self.std, only_mixed=params.only_mixed_labels, reduce_testing=params.reduce_testing, reduce_training=params.reduce_training, reduce_validation=params.reduce_validation) train, valid, test = creator.dynamically_create( params.samples_per_image, enable_label_noise=params.use_label_noise, label_noise=params.label_noise, only_mixed=params.only_mixed_labels ) #Testing dataset size requirements AerialDataset.dataset_check('train', train, batch_size) AerialDataset.dataset_check('valid', valid, batch_size) AerialDataset.dataset_check('test', test, batch_size) AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks) self.set_nr_examples(train, valid, test) nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks) training_chunks = self._chunkify(train, nr_of_chunks, batch_size) AerialDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0])) self.active = self.shared_dataset(training_chunks[0], cast_to_int=False) self.set['train'] = self.active[0], T.cast(self.active[1], 'int32') self.set['validation'] = self.shared_dataset(valid, cast_to_int=True ) self.set['test'] = self.shared_dataset(test, cast_to_int=True) #Not stored on the GPU, unlike the shared variables defined above. self.all_training = training_chunks return True
def load(self, dataset_path, params, batch_size=1): print_section('Loading aerial curriculum dataset') chunks = params.chunk_size self.std = params.dataset_std #Need for debug #For later stage loading self.stage = 0 self.stage_path = os.path.join(dataset_path, "train") self.nr_of_stages = len(os.listdir(os.path.join(dataset_path, "train"))) train = self.load_set(dataset_path, "train", stage="stage{}".format(self.stage)) valid = self.load_set(dataset_path, "valid") test = self.load_set(dataset_path, "test") #Testing dataset size requirements AerialCurriculumDataset.dataset_check('train', train, batch_size) AerialCurriculumDataset.dataset_check('valid', valid, batch_size) AerialCurriculumDataset.dataset_check('test', test, batch_size) AerialCurriculumDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks) self.set_nr_examples(train, valid, test) nr_of_chunks = AerialCurriculumDataset.dataset_sizes( train, valid, test, chunks) training_chunks = self._chunkify(train, nr_of_chunks, batch_size) AerialCurriculumDataset.dataset_chunk_stats( len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0])) self.active = self.shared_dataset(training_chunks[0], cast_to_int=False) self.set['train'] = self.active[0], T.cast(self.active[1], 'int32') self.set['validation'] = self.shared_dataset(valid, cast_to_int=True) self.set['test'] = self.shared_dataset(test, cast_to_int=True) #Not stored on the GPU, unlike the shared variables defined above. self.all_training = training_chunks return True
def load(self, dataset_path, params, batch_size=1): print_section('Loading aerial curriculum dataset') chunks = params.chunk_size self.std = params.dataset_std #Need for debug #For later stage loading self.stage = 0 self.stage_path = os.path.join(dataset_path, "train") self.nr_of_stages = len(os.listdir(os.path.join(dataset_path, "train"))) train = self.load_set(dataset_path, "train", stage="stage{}".format(self.stage)) valid = self.load_set(dataset_path, "valid") test = self.load_set(dataset_path, "test") #Testing dataset size requirements AerialCurriculumDataset.dataset_check('train', train, batch_size) AerialCurriculumDataset.dataset_check('valid', valid, batch_size) AerialCurriculumDataset.dataset_check('test', test, batch_size) AerialCurriculumDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks) self.set_nr_examples(train, valid, test) nr_of_chunks = AerialCurriculumDataset.dataset_sizes(train, valid, test, chunks) training_chunks = self._chunkify(train, nr_of_chunks, batch_size) AerialCurriculumDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0])) self.active = self.shared_dataset(training_chunks[0], cast_to_int=False) self.set['train'] = self.active[0], T.cast(self.active[1], 'int32') self.set['validation'] = self.shared_dataset(valid, cast_to_int=True ) self.set['test'] = self.shared_dataset(test, cast_to_int=True) #Not stored on the GPU, unlike the shared variables defined above. self.all_training = training_chunks return True
def _train(self, batch_size, max_epochs): print_section('Training model') patience = self.params.initial_patience # look as this many examples regardless patience_increase = self.params.patience_increase # wait this much longer when a new best is found improvement_threshold = self.params.improvement_threshold # a relative improvement of this much is considered significant learning_rate = self.params.learning_rate learning_adjustment = self.params.learning_adjustment learning_decrease = self.params.learning_decrease nr_learning_adjustments = 0 print('---- Initial learning rate {}'.format(learning_rate)) max_factor = self.params.factor_rate factor_adjustment = self.params.factor_adjustment factor_decrease = self.params.factor_decrease factor_minimum = self.params.factor_minimum print('---- Initial loss mixture ratio {}'.format(max_factor)) curriculum = self.params.curriculum_enable curriculum_start = self.params.curriculum_start curriculum_adjustment = self.params.curriculum_adjustment # go through this many minibatch before checking the network on the validation set gui_frequency = 500 validation_frequency = min(self.nr_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. self.start_time = timeit.default_timer() storage = ParamStorage() nr_chunks = self.data.get_chunk_number() epoch = 0 done_looping = False iter = 0 #==== INITIAL PERFORMANCE ==== chunk_batches = self.data.get_elements(0) / batch_size validation_score = self._get_validation_score(batch_size, epoch, 0) test_score = self._get_test_score(batch_size) training_score = self._get_training_score(chunk_batches) #==== UPDATE GUI ==== if visual_params.gui_enabled: interface.server.append_job_update(epoch, training_score, validation_score, test_score, learning_rate) try: while (epoch < max_epochs) and (not done_looping): epoch = epoch + 1 if (epoch % learning_adjustment == 0): learning_rate *= learning_decrease nr_learning_adjustments += 1 #Temp learning_adjustment = max(10, int(learning_adjustment / 2)) print('---- New learning rate {}'.format(learning_rate)) if (epoch > factor_adjustment): max_factor = max(max_factor * factor_decrease, factor_minimum) print('---- New convex combination {}'.format(max_factor)) if (epoch % 20 == 0): print('---- Storing temp model') storage.store_params(self.model.params, id=str(epoch)) if (curriculum and epoch % curriculum_adjustment == 0 and epoch >= curriculum_start): print( "---- Mixing examples from next stage with training data" ) self.data.mix_in_next_stage() #For current examples chunk in GPU memory for chunk_index in range(nr_chunks): self.data.switch_active_training_set(chunk_index) nr_elements = self.data.get_elements(chunk_index) chunk_batches = nr_elements / batch_size #Each chunk contains a certain number of batches. for minibatch_index in range(chunk_batches): cost_ij = self.train_model(minibatch_index, learning_rate, max_factor) if iter % 1000 == 0: print( '---- Training @ iter = {}. Patience = {}. Loss = {}' .format(iter, patience, cost_ij)) if visual_params.gui_enabled and iter % gui_frequency == 0: interface.server.get_command_status() if visual_params.gui_enabled and interface.server.is_testing( ): self._debug(batch_size, chunk_batches, max_factor) #if(np.isnan(cost_ij)): # print('cost IS NAN') #==== EVAULATE ==== if (iter + 1) % validation_frequency == 0: #==== CURRENT PERFORMANCE ==== validation_score = self._get_validation_score( batch_size, epoch, minibatch_index) test_score = self._get_test_score(batch_size) train_score = self._get_training_score( chunk_batches) #No other purpose than charting #==== UPDATE GUI ==== if visual_params.gui_enabled: interface.server.append_job_update( epoch, train_score, validation_score, test_score, learning_rate) self.events.append({ "epoch": epoch, "training_loss": train_score, "validation_loss": validation_score, "test_loss": test_score, "training_rate": learning_rate }) #==== EARLY STOPPING ==== if validation_score < best_validation_loss: #improve patience if loss improvement is good enough if validation_score < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) print( "---- New best validation loss. Patience increased to {}" .format(patience)) # save best validation score and iteration number best_validation_loss = validation_score best_iter = iter if patience <= iter: done_looping = True break if visual_params.gui_enabled and interface.server.stop: done_looping = True iter += 1 #Increment interation after each batch has been processed. except KeyboardInterrupt: self.set_result(best_iter, iter, best_validation_loss, test_score, nr_learning_adjustments, epoch) print( "Inpterupted by user. Current model params will be saved now.") except Exception as e: print "Unexpected error:", sys.exc_info()[0] raise self.set_result(best_iter, iter, best_validation_loss, test_score, nr_learning_adjustments, epoch)
from storage import ParamStorage from precisionrecall import PrecisionRecallCurve from interface.server import send_precision_recall_data from interface.command import get_command ''' This tool creates the datapoints necessary for a precision and recall curve figure. The tool samples a patch dataset from the test and validation set (-data), and creates predictions using a trained model (-model). These predictions are thresholded at several values. The binarized predictions and the label are then used to calculate the precision as well as the recall. These values including the threshold amount, constitute a data point. Supplying a experiment id (-store_gui), will store the datapoints in the web GUI. It's worth noting that the measurements are relaxed. Relaxed precision and relaxed recall. This is implemented by the image processing operation, dilation. The slack variable is set to 3 pixels. ''' print_section("TOOLS: Measure precision and recall of model") print("-data: path to dataset | -store: job_gui id to store curve in GUI "\ "| -store_path: store results locally | -model: stored model to use") #====== Arguments =============================================== is_dataset_path, dataset_path = get_command( '-data', default='/home/olav/Pictures/Mass_roads_alpha') store_gui, job_id = get_command('-store_gui', default='-1') is_store_path, store_path = get_command('-store_path', default='./pr_data.json') is_model, model_path = get_command('-model', default='./results/params.pkl') #============================================================== store = ParamStorage() data = store.load_params(path=model_path) batch_size = data['optimization'].batch_size
import matplotlib.pyplot as plt sys.path.append(os.path.abspath("./")) from interface.command import get_command from printing import print_section, print_action from storage import ParamStorage from config import filename_params, dataset_params, pr_path, dataset_path from augmenter.aerial import Creator from data import AerialCurriculumDataset import tools.util as util ''' Create histograms of difference between prediction and label for dataset. Allow finetuning of curriculum strategy. ''' print_section( 'Generating plot of diff distribution between label and prediction') #====== Arguments =============================================== is_samples, samples = get_command('-samples', default="100") samples = int(samples) is_teacher_location, teacher_location = get_command( '-teacher', default=filename_params.curriculum_teacher) verify, stage = get_command('-verify', default="0") stage = "stage" + stage is_tradeoff, tradeoff = get_command('-tradeoff', default="0.5") tradeoff = float(tradeoff) #Dataset path. Config used if not supplied is_alt_dataset, alt_dataset = get_command('-dataset')
def run_cnn(model_params, optimization_params, dataset_path, dataset_params, filename_params, visual_params, epochs, verbose=False): print(filename_params) if not os.path.exists(filename_params.results): os.makedirs(filename_params.results) is_config, config_values = interface.command.get_command("-config") is_curriculum, curriculum_set = interface.command.get_command("-curriculum") is_batch_run, batch_index = interface.command.get_command("-batch", default="0") is_init_params, param_path = interface.command.get_command("-params") if is_config: #Assume config is speficially for running bootstrapping batches. config_arr = eval(config_values) if len(config_arr) == 2: loss_function = config_arr[0] label_noise = float(config_arr[1]) dataset_params.label_noise = label_noise model_params.loss = loss_function batch_index = loss_function + "-" + str(label_noise) + "-" + batch_index print(batch_index) if is_curriculum: dataset_path = curriculum_set weights = None if is_init_params: store = ParamStorage() if not param_path: param_path = "./results/params.pkl" weights = store.load_params(path=param_path)['params'] dataset = DataLoader.create() dataset.load(dataset_path, dataset_params, optimization_params.batch_size) #Input stage model = ConvModel(model_params, verbose=True) #Create network stage evaluator = Evaluator(model, dataset, optimization_params, dataset_path) evaluator.run(epochs=epochs, verbose=verbose, init=weights) report = evaluator.get_result() network_store_path = filename_params.network_save_name result_path = filename_params.results + "/results.json" if is_batch_run: network_store_path = filename_params.results + "/batch" + batch_index + ".pkl" result_path =filename_params.results + "/batch" + batch_index + ".json" storage = ParamStorage(path=network_store_path) storage.store_params(model.params) dataset.destroy() if visual_params.gui_enabled: interface.server.stop_job(report) printing.print_section('Evaluation precision and recall') prc = PrecisionRecallCurve(pr_path, model.params, model_params, dataset_params) test_datapoints = prc.get_curves_datapoints(optimization_params.batch_size, set_name="test") valid_datapoints = prc.get_curves_datapoints(optimization_params.batch_size, set_name="valid") #Stores the model params. Model can later be restored. printing.print_section('Storing model parameters') if visual_params.gui_enabled: interface.server.send_precision_recall_data(test_datapoints, valid_datapoints) storage.store_result(result_path, evaluator.events, test_datapoints, valid_datapoints)
from precisionrecall import PrecisionRecallCurve from interface.server import send_precision_recall_data from interface.command import get_command """ This tool creates the datapoints necessary for a precision and recall curve figure. The tool samples a patch dataset from the test and validation set (-data), and creates predictions using a trained model (-model). These predictions are thresholded at several values. The binarized predictions and the label are then used to calculate the precision as well as the recall. These values including the threshold amount, constitute a data point. Supplying a experiment id (-store_gui), will store the datapoints in the web GUI. It's worth noting that the measurements are relaxed. Relaxed precision and relaxed recall. This is implemented by the image processing operation, dilation. The slack variable is set to 3 pixels. """ print_section("TOOLS: Measure precision and recall of model") print( "-data: path to dataset | -store: job_gui id to store curve in GUI " "| -store_path: store results locally | -model: stored model to use" ) # ====== Arguments =============================================== is_dataset_path, dataset_path = get_command("-data", default="/home/olav/Pictures/Mass_roads_alpha") store_gui, job_id = get_command("-store_gui", default="-1") is_store_path, store_path = get_command("-store_path", default="./pr_data.json") is_model, model_path = get_command("-model", default="./results/params.pkl") # ============================================================== store = ParamStorage() data = store.load_params(path=model_path) batch_size = data["optimization"].batch_size
The tool creates and saves the prediction stitch image, as well as a hit and miss image. This image show, where the prediction are correct (green), where they are missing (red) and where they should not be according to the label (blue). ''' def store_image(image, job_id, store_gui, name="image"): out = Image.resize(image, 1.0) if store_gui: buf= StringIO.StringIO() out.save(buf, format='JPEG') send_result_image(job_id, buf.getvalue()) image.save('./tools/visualize/'+ name +'.jpg') image.show() print_section('TOOLS: Visualize result from model') print("-data: Path to image in dataset you want visualization of | -store_gui: Upload images to exp with supplied id | \ -tradeoff: Threshold value associated with precision recall breakeven |-storeimage: Include aerial image") is_image_path, image_path = get_command('-data', default='/home/olav/Pictures/Mass_roads/test/data/10378780_15.tiff') store_data_image, temp = get_command('-storeimage') store_gui, job_id = get_command('-store_gui', default="None") is_tradeoff, bto = get_command('-tradeoff', default="0.5") bto = float(bto) is_model, model_path = get_command('-model', default="./results/params.pkl") store = ParamStorage() data = store.load_params(path=model_path)
''' This tool pre-generate a patch dataset. The tool is especially necessary for curriculum learning. The reason for not doing this every time the network is trained, is that a previously trained model needs to be loaded in order to do difficulty estimation. There are several properties that can be set, when using command line. -baseline: No difficulty estimation -stages: Array with floats, setting difficulty threshold per stage. Please refrain from using space inside array. -tradeoff: Previously trained curriculum teacher's best precision and recall tradeoff. (threshold value) -dataset: Path to dataset. IE -initsamples: Samples per image for first stage -currsamples: Samples per image for remaining stages -teacher: Curriculum teacher model -save: Path to where pre-generated patch dataset should be stored. REMEMBER: The patch creator is initialized using the config.py file. ''' print_section("TOOLS: Creating curriculum learning dataset") # Baseline will create a curriculum with no example ordering, but same amount of examples. # Avoids results from curriculum learning to be caused by the model just having seen more examples. is_baseline, baseline = get_command('-baseline') is_stages, stages = get_command('-stages', default="[0.1, 1.0]") stages = np.array(eval(stages)) #Precision recall breakeven point. 0.5 used as a default. is_tradeoff, tradeoff = get_command('-tradeoff') if is_tradeoff: tradeoff = float(tradeoff) #Dataset path. Config used if not supplied is_alt_dataset, alt_dataset = get_command('-dataset')
''' def store_image(image, job_id, store_gui, name="image"): out = Image.resize(image, 1.0) if store_gui: buf = StringIO.StringIO() out.save(buf, format='JPEG') send_result_image(job_id, buf.getvalue()) image.save('./tools/visualize/' + name + '.jpg') image.show() print_section('TOOLS: Visualize result from model') print( "-data: Path to image in dataset you want visualization of | -store_gui: Upload images to exp with supplied id | \ -tradeoff: Threshold value associated with precision recall breakeven |-storeimage: Include aerial image" ) is_image_path, image_path = get_command( '-data', default='/home/olav/Pictures/Mass_roads/test/data/10378780_15.tiff') store_data_image, temp = get_command('-storeimage') store_gui, job_id = get_command('-store_gui', default="None") is_tradeoff, bto = get_command('-tradeoff', default="0.5") bto = float(bto)
from interface.command import get_command from printing import print_section, print_action from storage import ParamStorage from config import filename_params, dataset_params, pr_path, dataset_path from augmenter.aerial import Creator from data import AerialCurriculumDataset import tools.util as util ''' Create histograms of difference between prediction and label for dataset. Allow finetuning of curriculum strategy. ''' print_section('Generating plot of diff distribution between label and prediction') #====== Arguments =============================================== is_samples, samples = get_command('-samples', default="100") samples = int(samples) is_teacher_location, teacher_location = get_command('-teacher', default=filename_params.curriculum_teacher) verify, stage = get_command('-verify', default="0") stage = "stage" + stage is_tradeoff, tradeoff = get_command('-tradeoff', default="0.5") tradeoff = float(tradeoff) #Dataset path. Config used if not supplied is_alt_dataset, alt_dataset = get_command('-dataset') if is_alt_dataset:
def run_cnn(model_params, optimization_params, dataset_path, dataset_params, filename_params, visual_params, epochs, verbose=False): print(filename_params) if not os.path.exists(filename_params.results): os.makedirs(filename_params.results) is_config, config_values = interface.command.get_command("-config") is_curriculum, curriculum_set = interface.command.get_command( "-curriculum") is_batch_run, batch_index = interface.command.get_command("-batch", default="0") is_init_params, param_path = interface.command.get_command("-params") if is_config: #Assume config is speficially for running bootstrapping batches. config_arr = eval(config_values) if len(config_arr) == 2: loss_function = config_arr[0] label_noise = float(config_arr[1]) dataset_params.label_noise = label_noise model_params.loss = loss_function batch_index = loss_function + "-" + str( label_noise) + "-" + batch_index print(batch_index) if is_curriculum: dataset_path = curriculum_set weights = None if is_init_params: store = ParamStorage() if not param_path: param_path = "./results/params.pkl" weights = store.load_params(path=param_path)['params'] dataset = DataLoader.create() dataset.load(dataset_path, dataset_params, optimization_params.batch_size) #Input stage model = ConvModel(model_params, verbose=True) #Create network stage evaluator = Evaluator(model, dataset, optimization_params, dataset_path) evaluator.run(epochs=epochs, verbose=verbose, init=weights) report = evaluator.get_result() network_store_path = filename_params.network_save_name result_path = filename_params.results + "/results.json" if is_batch_run: network_store_path = filename_params.results + "/batch" + batch_index + ".pkl" result_path = filename_params.results + "/batch" + batch_index + ".json" storage = ParamStorage(path=network_store_path) storage.store_params(model.params) dataset.destroy() if visual_params.gui_enabled: interface.server.stop_job(report) printing.print_section('Evaluation precision and recall') prc = PrecisionRecallCurve(pr_path, model.params, model_params, dataset_params) test_datapoints = prc.get_curves_datapoints(optimization_params.batch_size, set_name="test") valid_datapoints = prc.get_curves_datapoints( optimization_params.batch_size, set_name="valid") #Stores the model params. Model can later be restored. printing.print_section('Storing model parameters') if visual_params.gui_enabled: interface.server.send_precision_recall_data(test_datapoints, valid_datapoints) storage.store_result(result_path, evaluator.events, test_datapoints, valid_datapoints)
def _train(self, batch_size, max_epochs): print_section('Training model') patience = self.params.initial_patience # look as this many examples regardless patience_increase = self.params.patience_increase # wait this much longer when a new best is found improvement_threshold = self.params.improvement_threshold # a relative improvement of this much is considered significant learning_rate = self.params.learning_rate learning_adjustment = self.params.learning_adjustment learning_decrease = self.params.learning_decrease nr_learning_adjustments = 0 print('---- Initial learning rate {}'.format(learning_rate)) max_factor = self.params.factor_rate factor_adjustment = self.params.factor_adjustment factor_decrease = self.params.factor_decrease factor_minimum = self.params.factor_minimum print('---- Initial loss mixture ratio {}'.format(max_factor)) curriculum = self.params.curriculum_enable curriculum_start = self.params.curriculum_start curriculum_adjustment = self.params.curriculum_adjustment # go through this many minibatch before checking the network on the validation set gui_frequency = 500 validation_frequency = min(self.nr_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. self.start_time = timeit.default_timer() storage = ParamStorage() nr_chunks = self.data.get_chunk_number() epoch = 0 done_looping = False iter = 0 #==== INITIAL PERFORMANCE ==== chunk_batches = self.data.get_elements( 0 ) / batch_size validation_score = self._get_validation_score(batch_size, epoch, 0) test_score = self._get_test_score(batch_size) training_score = self._get_training_score(chunk_batches) #==== UPDATE GUI ==== if visual_params.gui_enabled: interface.server.append_job_update(epoch, training_score, validation_score, test_score, learning_rate) try: while (epoch < max_epochs) and (not done_looping): epoch = epoch + 1 if(epoch % learning_adjustment == 0): learning_rate *= learning_decrease nr_learning_adjustments += 1 #Temp learning_adjustment = max(10, int(learning_adjustment/2)) print('---- New learning rate {}'.format(learning_rate)) if(epoch > factor_adjustment): max_factor = max(max_factor * factor_decrease, factor_minimum) print('---- New convex combination {}'.format(max_factor)) if(epoch % 20 == 0): print('---- Storing temp model') storage.store_params(self.model.params, id=str(epoch)) if(curriculum and epoch % curriculum_adjustment == 0 and epoch >= curriculum_start): print("---- Mixing examples from next stage with training data") self.data.mix_in_next_stage() #For current examples chunk in GPU memory for chunk_index in range(nr_chunks): self.data.switch_active_training_set( chunk_index ) nr_elements = self.data.get_elements( chunk_index ) chunk_batches = nr_elements / batch_size #Each chunk contains a certain number of batches. for minibatch_index in range(chunk_batches): cost_ij = self.train_model(minibatch_index, learning_rate, max_factor) if iter % 1000 == 0: print('---- Training @ iter = {}. Patience = {}. Loss = {}'.format(iter, patience, cost_ij)) if visual_params.gui_enabled and iter % gui_frequency == 0: interface.server.get_command_status() if visual_params.gui_enabled and interface.server.is_testing(): self._debug(batch_size, chunk_batches, max_factor) #if(np.isnan(cost_ij)): # print('cost IS NAN') #==== EVAULATE ==== if (iter + 1) % validation_frequency == 0: #==== CURRENT PERFORMANCE ==== validation_score = self._get_validation_score(batch_size, epoch, minibatch_index) test_score = self._get_test_score(batch_size) train_score = self._get_training_score(chunk_batches) #No other purpose than charting #==== UPDATE GUI ==== if visual_params.gui_enabled: interface.server.append_job_update( epoch, train_score, validation_score, test_score, learning_rate) self.events.append({ "epoch": epoch, "training_loss": train_score, "validation_loss": validation_score, "test_loss": test_score, "training_rate": learning_rate }) #==== EARLY STOPPING ==== if validation_score < best_validation_loss: #improve patience if loss improvement is good enough if validation_score < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) print("---- New best validation loss. Patience increased to {}".format(patience)) # save best validation score and iteration number best_validation_loss = validation_score best_iter = iter if patience <= iter: done_looping = True break if visual_params.gui_enabled and interface.server.stop: done_looping = True iter += 1 #Increment interation after each batch has been processed. except KeyboardInterrupt: self.set_result(best_iter, iter, best_validation_loss, test_score, nr_learning_adjustments, epoch) print("Inpterupted by user. Current model params will be saved now.") except Exception as e: print "Unexpected error:", sys.exc_info()[0] raise self.set_result(best_iter, iter, best_validation_loss, test_score, nr_learning_adjustments, epoch)