def test_revisit(): # Test that each call to monitor revisits exactly the same data BATCH_SIZE = 3 MAX_BATCH_SIZE = 12 BATCH_SIZE_STRIDE = 3 NUM_BATCHES = 10 num_examples = NUM_BATCHES * BATCH_SIZE monitoring_dataset = ArangeDataset(num_examples) for mon_batch_size in xrange(BATCH_SIZE, MAX_BATCH_SIZE + 1, BATCH_SIZE_STRIDE): for num_mon_batches in [ 1, 3, num_examples / mon_batch_size, None ]: for mode in sorted(_iteration_schemes): if num_mon_batches is None and mode in ['random_uniform', 'random_slice']: continue if has_uniform_batch_size(mode) and \ num_mon_batches is not None and \ num_mon_batches * mon_batch_size > num_examples: num_mon_batches = int(num_examples / float(mon_batch_size)) model = DummyModel(1) monitor = Monitor.get_monitor(model) try: monitor.add_dataset(monitoring_dataset, mode, batch_size=mon_batch_size, num_batches=num_mon_batches) except TypeError: monitor.add_dataset(monitoring_dataset, mode, batch_size=mon_batch_size, num_batches=num_mon_batches, seed = 0) if has_uniform_batch_size(mode) and num_mon_batches is None: num_mon_batches = int(num_examples / float(mon_batch_size)) elif num_mon_batches is None: num_mon_batches = int(np.ceil(float(num_examples) / float(mon_batch_size))) batches = [ None ] * num_mon_batches visited = [ False ] * num_mon_batches batch_idx = shared(0) class RecorderAndValidator(object): def __init__(self): self.validate = False def __call__(self, *data): """ Initially, records the batches the monitor shows it. When set to validate mode, makes sure the batches shown on the second monitor call match those from the first.""" X, = data idx = batch_idx.get_value() batch_idx.set_value(idx + 1) # Note: if the monitor starts supporting variable batch sizes, # take this out. Maybe move it to a new test that the iterator's # uneven property is set accurately warnings.warn("TODO: add unit test that iterators uneven property is set correctly.") # assert X.shape[0] == mon_batch_size if self.validate: previous_batch = batches[idx] assert not visited[idx] visited[idx] = True if not np.allclose(previous_batch, X): print('Visited different data in batch',idx) print(previous_batch) print(X) print('Iteration mode', mode) assert False else: batches[idx] = X # end if # end __call__ #end class prereq = RecorderAndValidator() monitor.add_channel(name = 'dummy', ipt = model.input_space.make_theano_batch(), val = 0., prereqs = [ prereq ], data_specs=(model.get_input_space(), model.get_input_source())) try: monitor() except RuntimeError: print('monitor raised RuntimeError for iteration mode', mode) raise assert None not in batches batch_idx.set_value(0) prereq.validate = True monitor() assert all(visited)
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if np.any(np.isinf(param.get_value()))] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([np.any(np.isnan(param.get_value())) for param in model.get_params()]): nan_params = [param for param in model.get_params() if np.any(np.isnan(param.get_value()))] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size if getattr(model, "force_batch_size", False) and \ any(dataset.get_design_matrix().shape[0] % self.batch_size != 0 for dataset in self.monitoring_dataset.values()) and \ not has_uniform_batch_size(self.monitor_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: if (self.monitoring_batch_size is None and self.monitoring_batches is None): self.monitoring_batch_size = self.batch_size self.monitoring_batches = self.batches_per_iter self.monitor.setup(dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.monitoring_batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] #TODO: have Monitor support non-data-dependent channels self.monitor.add_channel(name='learning_rate', ipt=None, val=learning_rate, data_specs=(NullSpace(), ''), dataset=monitoring_dataset) if self.learning_rule: self.learning_rule.add_channels_to_monitor( self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i self.params = params grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") assert len(updates.keys()) == 0 def get_func(learn_discriminator, learn_generator, dont_you_fucking_dare_touch_the_generator=False): updates = OrderedDict() assert (learn_discriminator or learn_generator) and not (learn_discriminator and learn_generator) if learn_discriminator: cur_params = model.discriminator.get_params() else: cur_params = model.generator.get_params() def check(): for param in params: if param not in cur_params: assert param not in updates cur_grads = OrderedDict() for param in cur_params: cur_grads[param] = grads[param] for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype cur_lr_scalers = OrderedDict() for param in cur_params: if param in lr_scalers: lr_scaler = lr_scalers[param] cur_lr_scalers[param] = lr_scaler log.info('Parameter and initial learning rate summary:') for param in cur_params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * cur_lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) updates.update(self.learning_rule.get_updates( learning_rate, cur_grads, cur_lr_scalers)) check() for param in cur_params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' check() model.modify_updates(updates) check() for param in cur_params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) check() if dont_you_fucking_dare_touch_the_generator: for param in model.generator.get_params(): assert param not in updates with log_timing(log, 'Compiling sgd_update'): return function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.d_func = get_func(1, 0, dont_you_fucking_dare_touch_the_generator=True) self.g_func = get_func(0, 1)
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: "+str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: "+str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = \ dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = \ self.monitoring_dataset is not None and \ self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = \ any(d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values()) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and \ not has_uniform_batch_size(self.train_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") if has_force_batch_size and has_monitoring_datasets and \ monitoring_datasets_are_uneven and \ not has_uniform_batch_size(self.monitor_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, ** fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' learning_rate = self.learning_rate params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i grads, updates = self.cost.get_gradients(model, nested_args, ** fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError(str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {'costname': cost_value.name, 'paramname': param.name}) assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") log.info('Parameter and initial learning rate summary:') for param in params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * lr_scalers.get(param,1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates( learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost. # We have to do that after learning_rule.get_updates has been # called, since it may have an effect on # learning_rule.add_channels_to_monitor (that is currently the case # for AdaDelta and RMSProp). self._setup_monitor() with log_timing(log, 'Compiling sgd_update'): self.sgd_update = function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.params = params
def test_revisit(): # Test that each call to monitor revisits exactly the same data BATCH_SIZE = 3 MAX_BATCH_SIZE = 12 BATCH_SIZE_STRIDE = 3 NUM_BATCHES = 10 num_examples = NUM_BATCHES * BATCH_SIZE monitoring_dataset = ArangeDataset(num_examples) for mon_batch_size in xrange(BATCH_SIZE, MAX_BATCH_SIZE + 1, BATCH_SIZE_STRIDE): nums = [1, 3, int(num_examples / mon_batch_size), None] for mode in sorted(_iteration_schemes): if mode == 'even_sequences' and nums is not None: # even_sequences iterator does not support specifying a fixed number # of minibatches. continue for num_mon_batches in nums: if num_mon_batches is None and mode in [ 'random_uniform', 'random_slice' ]: continue if has_uniform_batch_size(mode) and \ num_mon_batches is not None and \ num_mon_batches * mon_batch_size > num_examples: num_mon_batches = int(num_examples / float(mon_batch_size)) model = DummyModel(1) monitor = Monitor.get_monitor(model) try: monitor.add_dataset(monitoring_dataset, mode, batch_size=mon_batch_size, num_batches=num_mon_batches) except TypeError: monitor.add_dataset(monitoring_dataset, mode, batch_size=mon_batch_size, num_batches=num_mon_batches, seed=0) if has_uniform_batch_size(mode) and num_mon_batches is None: num_mon_batches = int(num_examples / float(mon_batch_size)) elif num_mon_batches is None: num_mon_batches = int( np.ceil(float(num_examples) / float(mon_batch_size))) batches = [None] * int(num_mon_batches) visited = [False] * int(num_mon_batches) batch_idx = shared(0) class RecorderAndValidator(object): def __init__(self): self.validate = False def __call__(self, *data): """ Initially, records the batches the monitor shows it. When set to validate mode, makes sure the batches shown on the second monitor call match those from the first.""" X, = data idx = batch_idx.get_value() batch_idx.set_value(idx + 1) # Note: if the monitor starts supporting variable batch sizes, # take this out. Maybe move it to a new test that the iterator's # uneven property is set accurately warnings.warn( "TODO: add unit test that iterators uneven property is set correctly." ) # assert X.shape[0] == mon_batch_size if self.validate: previous_batch = batches[idx] assert not visited[idx] visited[idx] = True if not np.allclose(previous_batch, X): print('Visited different data in batch', idx) print(previous_batch) print(X) print('Iteration mode', mode) assert False else: batches[idx] = X # end if # end __call__ #end class prereq = RecorderAndValidator() monitor.add_channel(name='dummy', ipt=model.input_space.make_theano_batch(), val=0., prereqs=[prereq], data_specs=(model.get_input_space(), model.get_input_source())) try: monitor() except RuntimeError: print('monitor raised RuntimeError for iteration mode', mode) raise assert None not in batches batch_idx.set_value(0) prereq.validate = True monitor() assert all(visited)
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ self.i = 0 if self.cost is None: self.cost = model.get_default_cost() inf_params = [ param for param in model.get_params() if np.any(np.isinf(param.get_value())) ] if len(inf_params) > 0: raise ValueError("These params are Inf: " + str(inf_params)) if any([ np.any(np.isnan(param.get_value())) for param in model.get_params() ]): nan_params = [ param for param in model.get_params() if np.any(np.isnan(param.get_value())) ] raise ValueError("These params are NaN: " + str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size if getattr(model, "force_batch_size", False) and \ any(dataset.get_design_matrix().shape[0] % self.batch_size != 0 for dataset in self.monitoring_dataset.values()) and \ not has_uniform_batch_size(self.monitor_iteration_mode): raise ValueError("Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential") data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, **fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = 'objective' # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: if (self.monitoring_batch_size is None and self.monitoring_batches is None): self.monitoring_batch_size = self.batch_size self.monitoring_batches = self.batches_per_iter self.monitor.setup(dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.monitoring_batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] #TODO: have Monitor support non-data-dependent channels self.monitor.add_channel(name='learning_rate', ipt=None, val=learning_rate, data_specs=(NullSpace(), ''), dataset=monitoring_dataset) if self.learning_rule: self.learning_rule.add_channels_to_monitor( self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = 'sgd_params[%d]' % i self.params = params grads, updates = self.cost.get_gradients(model, nested_args, **fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError( str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict.") for param in grads: assert param in params for param in params: assert param in grads lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError("Tried to scale the learning rate on " +\ str(key)+" which is not an optimization parameter.") assert len(updates.keys()) == 0 def get_func(learn_discriminator, learn_generator): updates = OrderedDict() assert (learn_discriminator or learn_generator ) and not (learn_discriminator and learn_generator) if learn_discriminator: cur_params = model.discriminator.get_params() else: cur_params = model.generator.get_params() cur_grads = OrderedDict() for param in cur_params: cur_grads[param] = grads[param] for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = ('grad(%(costname)s, %(paramname)s)' % { 'costname': cost_value.name, 'paramname': param.name }) assert grads[param].dtype == param.dtype cur_lr_scalers = OrderedDict() for param in cur_params: if param in lr_scalers: lr_scaler = lr_scalers[param] cur_lr_scalers[param] = lr_scaler log.info('Parameter and initial learning rate summary:') for param in cur_params: param_name = param.name if param_name is None: param_name = 'anon_param' lr = learning_rate.get_value() * cur_lr_scalers.get(param, 1.) log.info('\t' + param_name + ': ' + str(lr)) if self.learning_rule: updates.update( self.learning_rule.get_updates(learning_rate, cur_grads, cur_lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict(safe_zip(params, [param - learning_rate * \ lr_scalers.get(param, 1.) * grads[param] for param in params]))) for param in cur_params: if updates[param].name is None: updates[param].name = 'sgd_update(' + param.name + ')' model.modify_updates(updates) for param in cur_params: update = updates[param] if update.name is None: update.name = 'censor(sgd_update(' + param.name + '))' for update_val in get_debug_values(update): if np.any(np.isinf(update_val)): raise ValueError("debug value of %s contains infs" % update.name) if np.any(np.isnan(update_val)): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, 'Compiling sgd_update'): return function(theano_args, updates=updates, name='sgd_update', on_unused_input='ignore', mode=self.theano_function_mode) self.d_func = get_func(1, 0) self.g_func = get_func(0, 1)
def setup(self, model, dataset): """ Compiles the theano functions needed for the train method. Parameters ---------- model : a Model instance dataset : Dataset """ if self.cost is None: self.cost = model.get_default_cost() inf_params = [param for param in model.get_params() if contains_inf(param.get_value())] if len(inf_params) > 0: raise ValueError("These params are Inf: " + str(inf_params)) if any([contains_nan(param.get_value()) for param in model.get_params()]): nan_params = [param for param in model.get_params() if contains_nan(param.get_value())] raise ValueError("These params are NaN: " + str(nan_params)) self.model = model self._synchronize_batch_size(model) model._test_batch_size = self.batch_size self.monitor = Monitor.get_monitor(model) self.monitor._sanity_check() # test if force batch size and batch size has_force_batch_size = getattr(model, "force_batch_size", False) train_dataset_is_uneven = dataset.get_num_examples() % self.batch_size != 0 has_monitoring_datasets = self.monitoring_dataset is not None and self.monitoring_dataset.values() > 0 if has_monitoring_datasets: monitoring_datasets_are_uneven = any( d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values() ) else: monitoring_datasets_are_uneven = False # or True it doesn't matter if has_force_batch_size and train_dataset_is_uneven and not has_uniform_batch_size(self.train_iteration_mode): raise ValueError( "Dataset size is not a multiple of batch size." "You should set train_iteration_mode (and " "maybe monitor_iteration_mode) to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential" ) if ( has_force_batch_size and has_monitoring_datasets and monitoring_datasets_are_uneven and not has_uniform_batch_size(self.monitor_iteration_mode) ): raise ValueError( "Dataset size is not a multiple of batch size." "You should set monitor_iteration_mode to " "even_sequential, even_shuffled_sequential or " "even_batchwise_shuffled_sequential" ) data_specs = self.cost.get_data_specs(self.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = "%s[%s]" % (self.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=self.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args) self.on_load_batch = fixed_var_descr.on_load_batch cost_value = self.cost.expr(model, nested_args, **fixed_var_descr.fixed_vars) if cost_value is not None and cost_value.name is None: # Concatenate the name of all tensors in theano_args !? cost_value.name = "objective" # Set up monitor to model the objective value, learning rate, # momentum (if applicable), and extra channels defined by # the cost learning_rate = self.learning_rate if self.monitoring_dataset is not None: if self.monitoring_batch_size is None and self.monitoring_batches is None: self.monitoring_batch_size = self.batch_size self.monitoring_batches = self.batches_per_iter self.monitor.setup( dataset=self.monitoring_dataset, cost=self.cost, batch_size=self.monitoring_batch_size, num_batches=self.monitoring_batches, extra_costs=self.monitoring_costs, mode=self.monitor_iteration_mode, ) dataset_name = self.monitoring_dataset.keys()[0] monitoring_dataset = self.monitoring_dataset[dataset_name] # TODO: have Monitor support non-data-dependent channels self.monitor.add_channel( name="learning_rate", ipt=None, val=learning_rate, data_specs=(NullSpace(), ""), dataset=monitoring_dataset, ) if self.learning_rule: self.learning_rule.add_channels_to_monitor(self.monitor, monitoring_dataset) params = list(model.get_params()) assert len(params) > 0 for i, param in enumerate(params): if param.name is None: param.name = "sgd_params[%d]" % i grads, updates = self.cost.get_gradients(model, nested_args, **fixed_var_descr.fixed_vars) if not isinstance(grads, OrderedDict): raise TypeError( str(type(self.cost)) + ".get_gradients returned " + "something with" + str(type(grads)) + "as its " + "first member. Expected OrderedDict." ) for param in grads: assert param in params for param in params: assert param in grads for param in grads: if grads[param].name is None and cost_value is not None: grads[param].name = "grad(%(costname)s, %(paramname)s)" % { "costname": cost_value.name, "paramname": param.name, } assert grads[param].dtype == param.dtype lr_scalers = model.get_lr_scalers() for key in lr_scalers: if key not in params: raise ValueError( "Tried to scale the learning rate on " + str(key) + " which is not an optimization parameter." ) log.info("Parameter and initial learning rate summary:") for param in params: param_name = param.name if param_name is None: param_name = "anon_param" lr = learning_rate.get_value() * lr_scalers.get(param, 1.0) log.info("\t" + param_name + ": " + str(lr)) if self.learning_rule: updates.update(self.learning_rule.get_updates(learning_rate, grads, lr_scalers)) else: # Use standard SGD updates with fixed learning rate. updates.update( dict( safe_zip( params, [param - learning_rate * lr_scalers.get(param, 1.0) * grads[param] for param in params] ) ) ) for param in params: if updates[param].name is None: updates[param].name = "sgd_update(" + param.name + ")" model.modify_updates(updates) for param in params: update = updates[param] if update.name is None: update.name = "censor(sgd_update(" + param.name + "))" for update_val in get_debug_values(update): if contains_inf(update_val): raise ValueError("debug value of %s contains infs" % update.name) if contains_nan(update_val): raise ValueError("debug value of %s contains nans" % update.name) with log_timing(log, "Compiling sgd_update"): self.sgd_update = function( theano_args, updates=updates, name="sgd_update", on_unused_input="ignore", mode=self.theano_function_mode, ) self.params = params