def __loadmapping(self, fn): classmap = dd() labelmap = dd() currentlabel = -1 with open(fn, 'rb') as f: for line in f: # Split line line = line.replace('\n', '') split = line.split('=') # Extract key and value key = split[0] val = split[len(split) == 2] # Add class description if not in label mapping (from class description to class number) if val not in labelmap: currentlabel += 1 labelmap[val] = currentlabel # Add key to classmap if key not in classmap : classmap[key] = dd() # Assign description and label to key classmap[key].label = labelmap[val] classmap[key].desc = val return classmap
def learn(self, inp, trainer, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type=None, hid_corruption_level=0, cost_weight = cast_x(1), learn_scale_first=False, debug_path=None, nb_frames=None): if trainer: # Build noisy autoencoder for training train_enc = self(inp, inp_corruption_type, inp_corruption_level, 'full') train_dec = self.dec(train_enc, hid_corruption_type, hid_corruption_level) train_cost = self.cost(inp, train_dec, cost_weight) # Build noiseless autoencoder for validation valid_enc = self(inp, border_mode = 'full') valid_dec = self.dec(valid_enc) valid_cost = self.cost(inp, valid_dec, cost_weight) # Quick training for weight scaling if learn_scale_first: lookback = trainer.lookback momentum = trainer.momentum trainer.lookback = int(ceil(trainer.lookback / 20.)) trainer.momentum = 0 trainer([self.scale], train_cost, valid_cost, model_id=self.model_id + '_scaling').learn() trainer.lookback = lookback trainer.momentum = momentum debug_args = dd() debug_args.debug_path = debug_path debug_args.nb_frames = nb_frames debug_args.prefix = 'unsupervised' self.trainer = trainer(self.params.values(), train_cost, valid_cost, model_id=self.model_id, additionnal_updates = self.additionnal_update(), debug_calls=(self.debug_call, debug_args), debug_nodes = dd({'unsupervised_'+self.model_id+'_encoder_act_trainset':train_enc})) # Learn model self.trainer.learn()
def __log_output(self, epoch): # UNSUPERVISED --------------------------------------------------------------------------- # Set current model log dictionnary model_log = dd() model_log.nb_epoch = epoch if self.cost.valid: if "cost" not in model_log: model_log.cost = dd() model_log.cost.valid = self.cost.valid[-1] model_log.cost.valid_smooth = self.cost.valid_ema[-1] if self.error.valid: if "error" not in model_log: model_log.error = dd() model_log.error.valid = self.error.valid[-1] model_log.error.valid_smooth = self.error.valid_ema[-1] if self.cost.train: if "cost" not in model_log: model_log.cost = dd() model_log.cost.train = self.cost.train[-1] model_log.cost.train_smooth = self.cost.train_ema[-1] if self.error.train: if "error" not in model_log: model_log.error = dd() model_log.error.train = self.error.train[-1] model_log.error.train_smooth = self.error.train_ema[-1] self.log[self.model_id] = model_log self.log.dump(join(self.output_path, "out.pkl"), True) if self.cost.train: self.subplots.graphs[0, 0].add_point( train=(epoch, self.cost.train[-1]), train_ema=(epoch, self.cost.train_ema[-1])) if self.cost.valid: self.subplots.graphs[0, 0].add_point( valid=(epoch, self.cost.valid[-1]), valid_ema=(epoch, self.cost.valid_ema[-1])) if self.error.train: self.subplots.graphs[0, 1].add_point( train=(epoch, self.error.train[-1]), train_ema=(epoch, self.error.train_ema[-1])) if self.error.valid: self.subplots.graphs[0, 1].add_point( valid=(epoch, self.error.valid[-1]), valid_ema=(epoch, self.error.valid_ema[-1])) self.subplots.graphs.save(join(self.output_path, self.model_id + ".png"), dpi=100)
def __init__(self, examples, valid_ratio=0.1, test_ratio=0.1, shuffle=True): # Initialization self.__splits = dd([('train', []), ('valid', []), ('test', []), ('full', [])]) # Extract a copy of the example list examples = list(examples) # If required, shuffle example list if shuffle: random.shuffle(examples) # Assign examples to splits self.__splits.train = examples[:int( len(examples) * (1. - test_ratio - valid_ratio))] self.__splits.valid = examples[len(self.__splits.train ):len(self.__splits.train) + int(len(examples) * valid_ratio)] self.__splits.test = examples[len(self.__splits.train + self.__splits.valid):] self.__splits.full = examples print("\nSplits statistics:") for split_id in self.__splits: print(" {:7d} items in {} set".format( len(self[split_id]), split_id))
def __generate(self): tasks = self.maketasks() results = mp.Queue(1) buff = q.Queue(1) nb_tasks = tasks.qsize() perftrace = dd([(kw, mp.Queue()) for kw in self.extractor.statics]) self.__perftraces.append(perftrace) # Start workers processes = [] for i in range(self.__nb_workers): p = mp.Process(target=self.worker, args=(tasks, results), kwargs=perftrace) p.daemon = True p.start() processes.append(p) # Start t = Thread(target=extractfromqueue, args=(results, buff)) t.daemon = True t.start() # Generator function def generator(buff, nb_tasks): for i in range(nb_tasks): yield buff.get() # Instanciate generator return generator(buff, nb_tasks)
def __init__(self, classmap_path=None, **models): super(marshall, self).__init__() self.models = list(models.values()) # Classmap maps class name from the dataset domain to generalized class names self.classmap = self.__loadmapping(classmap_path) # Extract classnames self.classes += [c.desc for c in list(self.classmap.values())] # Marchall all classes and convert class description frequency = dd([(cls, 0.) for cls in self.classes]) for model in self.models: for e in model.examples: e.label = self.classmap[e.desc].label e.desc = self.classmap[e.desc].desc self.examples += [e] frequency[e.desc] += 1 # Normalize frequencies for cls in frequency: frequency[cls] /= len(self.examples) # Report number of examples per class print("Marshalling datasets with a total of {} examples".format(self.nb_examples)) for cls, freq in list(frequency.items()): print(" {:7d} ({:6.3f}%) items of class {}".format(int(freq*self.nb_examples), freq*100., cls))
def add_line(self, line=None, name=None, x=None, y=None, **line_kwargs): if line is not None and name is None: if line.get_label(): name = line.get_label() if line is None and name is None: if 'label' in line_kwargs: name = line_kwargs['label'] if name in self.__lines: raise ValueError, "A line referenced by name {} already exists".format( name) x = x if isinstance(x, list) else [] y = y if isinstance(y, list) else [] if not line: if 'color' not in line_kwargs: line_kwargs['color'] = self.__colorcycle[ self.__colorcycle_id % len(self.__colorcycle)] self.__colorcycle_id += 1 line = Line2D(x, y, **line_kwargs) if name: line_dd = dd() line_dd.x = x line_dd.y = y line_dd.line = line self.__lines[name] = line_dd axes_base.add_line(self, line) self.relim() self.autoscale()
def export(self, path): make_dir(path) hparams = dd({'hp': self.hp, 'input_sz': self.input_sz}) hparams.dump(join(path, 'hparams.pkl')) for i, layer in enumerate(self.layers): layer.export(join(path, '{:03d}_{}'.format(i, layer.name)))
def __iadd__(self, other): if not isinstance(other, dict): raise TypeError("__iadd__ rhs is of must be or derive from type dict. Rather, is of type type {}".format(type(other))) self._nb += 1 for key, value in list(other.items()): if key in self: self[key].value += [value] self[key].index += [self._nb-1] else: self[key] = dd() self[key].value = [value] self[key].index = [self._nb-1] return self
def __init__(self, path=None): datamodel.__init__(self) self.path = path if path else self.path self.splits = dd({split: [] for split in ['train', 'valid', 'test']}) # Maps class label to a list of images for each given class for fn in walk_files(self.path, join=False, explore_subfolders=False): # example class name cls = fn[:3] idx = int(fn[4:-4]) # Add class to class list if cls not in self.classes: self.classes.append(cls) example = element(self.path, fn, label=self.classes.index(cls), desc=cls, index=idx) self.examples.append(example) if idx < 20000 // 2: self.splits.train.append(example) elif idx < 22500 // 2: self.splits.valid.append(example) else: self.splits.test.append(example) print("Cats and dogs dataset is loaded with:") for split_id in self.splits: print(" {:7d} examples in {} set".format( len(self.splits[split_id]), split_id))
def run(self): hp = self.hp # Initialize log file self.log = dd() # Model input and labels self.input = T.tensor4("input_tensor4", float_x) self.label = T.ivector("label_vector") # :earing model self.model = model(hp.model, self.trainiterator.shape) # Class probability output train_prob = self.model(self.input, 'train') valid_prob = self.model(self.input, 'valid') test_prob = self.model(self.input, 'test') # Loss functions train_loss = cross_entropy(train_prob, self.label) valid_loss = cross_entropy(valid_prob, self.label) test_loss = cross_entropy(test_prob, self.label) # Error functions train_error = error(train_prob, self.label) valid_error = error(valid_prob, self.label) test_error = error(test_prob, self.label) # Initialize dataset self.source = source(hp.dataset.path) # Iterators that will read and extract images from the dataset self.trainiterator = randompatch(self.source.splits.train, load_ratio=0.05, dataset_per_epoch=1, corrupt=True, nb_workers=1, **hp.iterator) self.validiterator = randompatch(self.source.splits.valid, load_ratio=0.1, dataset_per_epoch=1, corrupt=False, nb_workers=1, **hp.iterator) self.testiterator = randompatch(self.source.splits.test, load_ratio=0.1, corrupt=False, nb_workers=1, **hp.iterator) # Trainer object self.trainer = sgd(self.input, self.trainiterator, self.validiterator, \ log=self.log, output_path=join(hp.meta.path, hp.meta.result_folder), **hp.trainer) # Greedy layer-wise pre-training self.model.pretrain(self.input, self.input, self.trainiterator) # Training self.trainer(self.model.optimizables, train_loss, valid_loss, self.label, train_error, valid_error, self.model.name).learn() # Greedy layer-wise post-training self.model.posttrain(self.input, self.input, self.trainiterator) # Compute final valid and test result valid_performance_fn = th.function(inputs=[self.input, self.label], outputs=[valid_loss, valid_error], allow_input_downcast=True) test_performance_fn = th.function(inputs=[self.input, self.label], outputs=[test_loss, test_error], allow_input_downcast=True) # Test performance on validation set valid_loss, valid_error = self.test(self.validiterator, valid_performance_fn) print("Final valid loss = {:0.4f}, error = {:0.4f}".format( valid_loss, valid_error)) test_loss, test_error = self.test(self.testiterator, test_performance_fn) print("Final test loss = {:0.4f}, error = {:0.4f}".format( test_loss, test_error)) # Final results to log file self.log[self.model.name].cost.final_valid = valid_loss self.log[self.model.name].cost.final_test = test_loss self.log[self.model.name].error.final_valid = valid_error self.log[self.model.name].error.final_test = test_error self.log.dump(join(hp.meta.path, hp.meta.result_folder, "out.pkl"), True)
def generate_hp(self, path): # Seed random number generator np.random.seed(datetime.now().microsecond) # Hyperparameters hp = dd() # Set debug mode hp.debug = False # Experiment parameters hp.meta = dd() hp.meta.path = path hp.meta.result_folder = './' hp.meta.export_folder = 'export' # Dataset parameters hp.dataset = dd() hp.dataset.path = join(datasets.path(), 'catsanddogs') # Feature extraction parameters hp.iterator = dd() hp.iterator.patch_sz = (90, 90, 3) hp.iterator.reshape_sz = (100, 100, 3) # Feature learning layers hp.model = dd() hp.model.name = 'convnet' hp.model.layers = dd() # Preprocess layer i = 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_preprocess' hp.model.layers[i].nb_channels = 3 hp.model.layers[i].nb_pretrain_iterations = 1 # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 16 hp.model.layers[i].filter_sz = (3, 3) # Max pooling layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_maxpool' hp.model.layers[i].downsample_sz = 2 # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 32 hp.model.layers[i].filter_sz = (3, 3) # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 32 hp.model.layers[i].filter_sz = (3, 3) # Max pooling layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_maxpool' hp.model.layers[i].downsample_sz = 2 # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 64 hp.model.layers[i].filter_sz = (5, 5) # Max pooling layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_maxpool' hp.model.layers[i].downsample_sz = 2 # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Convolutional layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_vanilla' hp.model.layers[i].activation = "relu" hp.model.layers[i].nb_filters = 64 hp.model.layers[i].filter_sz = (5, 5) # Batch normalization layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'conv_batchnorm' # Fully connected layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'hidden' hp.model.layers[i].activation = "relu" hp.model.layers[ i].nb_hid = 256 # int(10**np.random.uniform(log(128)/log(10), log(512)/log(10))) # Logistic layer i += 1 hp.model.layers[i] = dd() hp.model.layers[i].type = 'logistic' hp.model.layers[i].nb_out = 2 # Trainer hp.trainer = dd() hp.trainer.max_epoch = 1000 hp.trainer.lookback = randint(10, 30) hp.trainer.minibatch_sz = 100 hp.trainer.init_lr = None hp.trainer.incr_lr = None hp.trainer.lr = 10**np.random.uniform( log(0.01) / log(10), log(0.1) / log(10)) hp.trainer.decay_rate = uniform(0.985, 1.0) hp.trainer.momentum = 10**uniform( log(0.8) / log(10), log(0.99) / log(10)) hp.trainer.momentum_reset_prob = 0 print("Save hyperparameters to file") hp.dump(join(hp.meta.path, 'hp.pkl'), save_pretty_textfile=True) print(hp) return hp
def analyze(experiment_path, output_path, objectives, ignores, ignore_vals, substitute, out_to_hp): # Remove unfinished experiments clean(experiment_path, objectives) # Get experiment dictionary lpath = walk_folders(experiment_path) # Get all experiment files ehp = expdd() eout = expdd() for path in lpath: try: hp = read_hp(os.path.join(path, 'hp.txt'), substitute) out = read_hp(os.path.join(path, 'out.txt'), substitute) except: continue ehp += hp eout += out # Make output directory make_dir(output_path) # Transfer output data to input for obj in out_to_hp: if obj in eout: ehp["out."+obj] = dd() ehp["out."+obj] = eout[obj] # Get exclusion arr, _ = ehp.get_array(list(ehp.keys())[0]) include = np.ones(arr.size) if ignore_vals != None: for key, value in list(ignore_vals.items()): arr, _ = ehp.get_array(key) include *= (arr != value) fig = plt.figure() #(figsize = (4,3)) for xlabel in list(ehp.keys()): do_ignore = False for ignore in ignores: print(ignore, xlabel, ignore in xlabel) if ignore in xlabel: do_ignore = True break if do_ignore: continue fig.clf() ax = fig.add_subplot(111) ax.set_xlabel(xlabel) ax.set_ylabel('Objective') ax.set_title('Hyperparameter optimization for {}'.format(os.path.split(experiment_path)[1])) x, this_include = ehp.get_array(xlabel) this_include *= include # Do not output hyperparameters with fixed size if np.unique(x).size == 1: continue print("For {}, number of unique x is {}".format(xlabel, np.unique(x).size)) if np.unique(x).size == 2: for val in np.unique(x): print(' {}'.format(val)) # Convert object dtype to string if x.dtype == np.object: x = [str(obj) for obj in x] # Rotate string to 90 degrees for readability of graph if isinstance(x[0], str): for i in range(len(x)): x[i] = x[i].replace("\\", "/") if 'path' in xlabel: tail, head = os.path.split(x[i]) x[i] = os.path.join(os.path.split(tail)[1], head) un = np.unique(x).astype(sstr) xticks = un.copy() un = dict(list(zip(un, np.arange(un.size)))) x = np.asarray([un[val] for val in x], dtype = sstr) ax.set_xticks(np.arange(xticks.size)) ax.set_xticklabels(xticks) for ticklabel in ax.get_xticklabels(): ticklabel.set_rotation(90) if 'path' in xlabel: ticklabel.set_fontsize(3) for ylabel in list(eout.keys()): if ylabel in objectives: y, _ = eout.get_array(ylabel) ax.plot(x[this_include * (y!=0)],y[this_include * (y!=0)], marker = '.', markersize = 5, linestyle='none', label = ylabel) try: xlim = ax.get_xlim() ax.set_xlim(xlim[0]-(xlim[1]-xlim[0])*0.05, xlim[1]+(xlim[1]-xlim[0])*0.05) ax.legend(loc=1, fontsize=7) #ax.set_yscale('log') fig.savefig(os.path.join(output_path, os.path.split(experiment_path)[1] + "." + xlabel + '.png'), bbox_inches='tight', dpi=600) except: print_exc()
def __init__(self, inp, train_data, valid_data=None, max_epoch=10000, lookback=1000, minibatch_sz=100, init_lr=None, incr_lr=None, lr=0.01, decay_rate=0, momentum=0.9, momentum_reset_prob=0, loops_per_epoch=1, output_path=None, log=dd()): """ Implement stochastic gradient descent Parameters ---------- inp: theano tensor model input train_data / valid_data: tuple or function object training/validation data with (examples, labels) examples should be a ndarray with shape [nb_examples, dim1, dim2, ...] labels should be None for unsupervised learning or a ndarray with shape [nb_examples] If train_data is a function object, a call without argument should return a tuple. max_epoch: integer type Maximum number epochs lookback: integer type EMA smoothing period on validation cost, used for early-stop, to prevent overfit minibatch_sz: integer type number of examples per parameter update init_lr: float type Initial learning rate incr_lr: float type Learning rate increment applied after each epoch until init_lr + lr_increment >= lr. Then the learning rate lr is decayed. lr: float type Gradient descent's learning rate decay_rate: float_type Decay rate learning_rate (t+1) = learning_rate(t) * decay_rate momentum: float type last_update*momentum + learning_rate*grad momentum_reset_prob: float type probability of reseting momentum after on on an epoch log : dict like object Dictionary in which the trainer will log learning's current status. output_path : str output path to which the trainer will dump the log dict to text file and ouput a graph of the learning progression """ # Model input self.inp = inp self.max_epoch = max_epoch self.lookback = lookback self.minibatch_sz = int(minibatch_sz) self.init_lr = init_lr self.incr_lr = incr_lr self.high_lr = lr self.decay_rate = decay_rate self.momentum = momentum self.train_data = train_data self.valid_data = valid_data self.log = log self.output_path = output_path self.subplots = dd() self.do_validation = valid_data is not None self.momentum_reset_prob = momentum_reset_prob self.loops_per_epoch = loops_per_epoch # Learning data shared variables self.shr_train_data = shared_x(np.empty([0] * self.inp.ndim), name='training_set_data')
def __call__(self, params, train_cost, valid_cost=None, labels=None, train_error=None, valid_error=None, model_id='', additionnal_updates=[], debug_calls=[], debug_nodes={}): # Determine whether learning is supervised or not self.supervised = labels != None self.do_validation = self.do_validation and bool(valid_cost) print('\nInitializing sgd for {} learning of {} {} validation.'.format( { 0: 'unsupervised', 1: 'supervised' }[self.supervised], model_id, { 0: 'without', 1: 'with' }[self.do_validation])) # Initializations self.additionnal_updates = additionnal_updates if isinstance( additionnal_updates, list) else [additionnal_updates] self.learning_divergence_counter = 0 self.rampup = bool(self.init_lr) self.lr = self.init_lr if self.rampup else self.high_lr # Outputs self.cost = dd() self.cost.epoch = [] self.cost.train = [] self.cost.train_ema = [] self.cost.valid = [] self.cost.valid_ema = [] self.error = dd() self.error.epoch = [] self.error.train = [] self.error.train_ema = [] self.error.valid = [] self.error.valid_ema = [] # Validation data self.valid_list = [] self.valid_label_list = [] # Input parameters index = T.lscalar(name='lscalar_index') minibatch_sz = T.lscalar(name="lscalar_minibatch_sz") momentum = T.fscalar(name="fscalar_momentum") learning_rate = T.fscalar(name="fscalar_learning_rate") # Save reference to parameters self.params = params # Compute gradient self.grads = T.grad(cost=train_cost, wrt=params, disconnected_inputs='ignore', return_disconnected='None') # Keep only gradients that are connected to the update tree self.grads = dd([(param, grad) for (param, grad) in zip(params, self.grads) if grad != None]) # Save initial value of parameters self.init = dd([(param, shared_x(param.get_value())) for param in self.params]) if self.do_validation: self.best_performance = sys.float_info.max self.best_params = dd([(param, param.get_value()) for param in self.params]) # Initialize lastupdates self.last_update = dd([(p, shared_x(np.zeros_like(p.get_value()))) for p in self.params]) # Learning updates updates = [] for param, grad in list(self.grads.items()): last = self.last_update[param] gradient = last * momentum + learning_rate * grad updates.append((param, param - gradient)) updates.append((last, gradient)) # Learning function if self.supervised: # Learning labels shared variables self.shr_train_labels = th.shared(np.empty([0] * labels.ndim, dtype=labels.dtype), name='training_set_labels') # Learning function self.learningstep_fn = th.function( inputs=[index, minibatch_sz, momentum, learning_rate], outputs=[train_cost, train_error], updates=updates, givens={ self.inp: self.shr_train_data[index * minibatch_sz:(index + 1) * minibatch_sz], labels: self.shr_train_labels[index * minibatch_sz:(index + 1) * minibatch_sz] }, allow_input_downcast=True) # Noisy cost on train shared variables self.train_cost_fn = th.function(inputs=[], outputs=[valid_cost, valid_error], givens={ self.inp: self.shr_train_data, labels: self.shr_train_labels }, allow_input_downcast=True) # Clean reconstruction cost (for validation) self.valid_cost_fn = th.function(inputs=[self.inp, labels], outputs=[valid_cost, valid_error], allow_input_downcast=True) if debug_nodes: self.debug_fn = th.function( inputs=[], outputs=[output for output in list(debug_nodes.values())], givens={ self.inp: self.shr_train_data, labels: self.shr_train_labels }, allow_input_downcast=True, on_unused_input='ignore') else: # Theano learning function self.learningstep_fn = th.function( inputs=[index, minibatch_sz, momentum, learning_rate], outputs=train_cost, updates=updates, givens={ self.inp: self.shr_train_data[index * minibatch_sz:(index + 1) * minibatch_sz] }, allow_input_downcast=True, on_unused_input='ignore') # Noisy cost on train shared variables self.train_cost_fn = th.function( inputs=[], outputs=valid_cost, givens={self.inp: self.shr_train_data}, allow_input_downcast=True) # Clean reconstruction cost (for validation) self.valid_cost_fn = th.function(inputs=[self.inp], outputs=valid_cost, allow_input_downcast=True) if debug_nodes: self.debug_fn = th.function( inputs=[], outputs=[output for output in list(debug_nodes.values())], givens={self.inp: self.shr_train_data}, allow_input_downcast=True, on_unused_input='ignore') # Debug self.debug_calls = debug_calls self.debug_nodes = dd(debug_nodes) self.model_id = model_id # For learning stats graph outputs self.last_batch_param = dd([(param, shared_copy(param)) for param in self.params]) self.last_batch_update = dd([(param, shared_zeros_like(param)) for param in self.params]) self.this_batch_update = dd([(param, shared_zeros_like(param)) for param in self.params]) updates = [] for param in self.params: updates += [(self.last_batch_update[param], self.this_batch_update[param])] updates += [(self.this_batch_update[param], param - self.last_batch_param[param])] updates += [(self.last_batch_param[param], param)] self.update_learning_stats_fn = th.function(inputs=[], outputs=[], updates=updates) # Initialize learning stats plots if self.subplots: for sp in list(self.subplots.values()): sp.clf() self.subplots = dd() self.subplots.graphs = subplots(1, 1 + self.supervised, 3, 3 + 3 * self.supervised, projection='recurrent') line_names = ['p005', 'median', 'p995', 'std'] line_labels = ['0.5%', 'median', '99.5%', 'std'] for param in self.params: nb_plots = 3 if param.ndim == 1 else 6 sp = subplots(2, nb_plots, 6, nb_plots * 3, projection='recurrent') for i in range(nb_plots): for name, label in zip(line_names, line_labels): sp[1, i].add_line(name=name, label=label) sp[1, i].set(xlabel='epoch', xscale='log', xtick_fontsize=6, ytick_fontsize=6) sp[1, i].legend(loc='upper center', fontsize=6) sp[0, i].set(xtick_fontsize=6, ytick_fontsize=6) if nb_plots == 6: sp[1, 3].set(yscale='log') self.subplots[param] = sp for node in list(self.debug_nodes.values()): sp = subplots(2, 1, 6, 3, projection='recurrent') for name, label in zip(line_names, line_labels): sp[1, 0].add_line(name=name, label=label) sp[1, 0].add_line(name='nonzero', label='non-zeros') sp[1, 0].set(xlabel='epoch', xscale='log', xtick_fontsize=6, ytick_fontsize=6) sp[1, 0].legend(loc='upper center', fontsize=6) sp[0, 0].set(xtick_fontsize=6, ytick_fontsize=6) self.subplots[node] = sp self.subplots.graphs[0, 0].set(xlabel='epoch', yscale='log', xtick_fontsize=8, ytick_fontsize=8) self.subplots.graphs[0, 0].set_title( 'cost' ' - {}'.format(model_id) if model_id else None, fontsize=10) self.subplots.graphs[0, 0].add_line(name='train', label='train') self.subplots.graphs[0, 0].add_line(name='train_ema', label='train (EMA)') if self.do_validation: self.subplots.graphs[0, 0].add_line(name='valid', label='valid') self.subplots.graphs[0, 0].add_line(name='valid_ema', label='valid (EMA)') self.subplots.graphs[0, 0].legend(loc='best', fontsize=6) if self.supervised: self.subplots.graphs[0, 1].set(xlabel='epoch', yscale='log', xtick_fontsize=8, ytick_fontsize=8) self.subplots.graphs[0, 1].set_title('error', fontsize=10) self.subplots.graphs[0, 1].add_line(name='train', label='train') self.subplots.graphs[0, 1].add_line(name='train_ema', label='train (EMA)') if self.do_validation: self.subplots.graphs[0, 1].add_line(name='valid', label='valid') self.subplots.graphs[0, 1].add_line(name='valid_ema', label='valid (EMA)') self.subplots.graphs[0, 1].legend(loc='best', fontsize=6) return self
def __init__(self, function, statics): from libutils.dict import dd self.__function = function self.statics = dd(statics)
def additionnal_update(self): return th.function(inputs=[], outputs=[], updates=dd({self.W:self.W/T.sqrt((self.W**2).sum(3).sum(2).sum(1))[:,None,None,None]}))