def main(): var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(var, add_uniform(input=var, noise_level=.02))] stats = get_stats(var) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True, out_service=FileService('outs/mean.txt')) var_monitor = Monitor('var', var, out_service=FileService('outs/var.txt')) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[var_monitor]) monitors = [w_channel, stat_channel] train_collapsed_raw = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(item[0], item[1]) for item in train_collapsed_raw]) train_services = OrderedDict([(item[0], item[2]) for item in train_collapsed_raw]) valid_collapsed_raw = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(item[0], item[1]) for item in valid_collapsed_raw]) valid_services = OrderedDict([(item[0], item[2]) for item in valid_collapsed_raw]) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1=time.time() for epoch in range(10): t=time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) for name, service in train_services.items(): if name in m: service.write(m[name], "train") log.debug('----- '+make_time_units_string(time.time()-t)) for epoch in range(10): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) for name, service in valid_services.items(): if name in m: service.write(m[name], "valid") log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
def main(): var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(var, add_uniform(input=var, noise_level=.02))] stats = get_stats(var) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True, out_service=FileService('outs/mean.txt')) var_monitor = Monitor('var', var, out_service=FileService('outs/var.txt')) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[var_monitor]) monitors = [w_channel, stat_channel] train_collapsed_raw = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(item[0], item[1]) for item in train_collapsed_raw]) train_services = OrderedDict([(item[0], item[2]) for item in train_collapsed_raw]) valid_collapsed_raw = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(item[0], item[1]) for item in valid_collapsed_raw]) valid_services = OrderedDict([(item[0], item[2]) for item in valid_collapsed_raw]) log.debug('compiling...') f = theano.function(inputs=[], outputs=train_collapsed.values(), updates=updates) f2 = theano.function(inputs=[], outputs=valid_collapsed.values(), updates=updates) log.debug('done') t1=time.time() for epoch in range(10): t=time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) for name, service in train_services.items(): if name in m: service.write(m[name], TRAIN) log.debug('----- '+make_time_units_string(time.time()-t)) for epoch in range(10): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) for name, service in valid_services.items(): if name in m: service.write(m[name], VALID) log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
def main(): w = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(w, add_uniform(input=w, noise_level=.02))] stats = get_stats(w) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True) stat_monitor = Monitor('max', max) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[stat_monitor]) monitors = [w_channel, stat_channel] train_collapsed = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) valid_collapsed = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1=time.time() for epoch in range(100): t=time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) plot.update_plots(epoch, m) time.sleep(0.02) log.debug('----- '+make_time_units_string(time.time()-t)) for epoch in range(100): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) plot.update_plots(epoch, m) time.sleep(0.02) log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
def train(self, monitor_channels=None, train_outservice=None, plot=None): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). """ if not self.model: log.error("No self.model for the Optimizer!") raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() " "was called from the Model. Try initializing the Optimizer with the model param " "and calling optimizer.train().") ######################### # gradients and updates # ######################### # grab the model parameters to use during training self.params = self.model.get_params() # Now create the training cost function for the model to use while training - update parameters # gradient! gradients = grad(cost=self.loss_expression, wrt=list(self.params.values())) # now create the dictionary mapping the parameter with its gradient gradients = OrderedDict( [(param, g) for param, g in zip(list(self.params.values()), gradients)] ) # clip gradients if we want. gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates log.info("%s params: %s", self.model._classname, str(list(self.params.keys()))) ############ # monitors # ############ # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### function_input = raise_to_list(self.model.get_inputs()) if self.loss_targets is not None: function_input += self.loss_targets # Compile the training function! log.info('Compiling f_learn function for model %s...', self.model._classname) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[self.loss_expression] + list(self.train_monitors_dict.values()), name='f_learn') log.info('f_learn compilation took %s', make_time_units_string(time.time() - t)) # figure out if we want valid and test (monitors) self.valid_flag = (self.dataset.valid_inputs is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.test_inputs is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.valid_monitors_dict.values()), name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.test_monitors_dict.values()), name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## log.info("-----------TRAINING %s FOR %d EPOCHS-----------", self.model._classname, self.n_epoch) self.STOP = False self.epoch_counter = 0 # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(f_learn, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") for best_param, param_value in self.best_params.items(): self.params[best_param].set_value(param_value, borrow=False) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter)) log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t))
def main(): var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(var, add_uniform(input=var, noise_level=.02))] stats = get_stats(var) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True) var_monitor = Monitor('var', var) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[var_monitor]) monitors = [w_channel, stat_channel] train_collapsed = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) valid_collapsed = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1 = time.time() for epoch in range(100): t = time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) plot.update_plots(epoch, m) log.debug('----- ' + make_time_units_string(time.time() - t)) for epoch in range(100): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) plot.update_plots(epoch, m) log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME " + make_time_units_string(time.time() - t1))
def train(self, monitor_channels=None, train_outservice=None, plot=None, additional_cost=None): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). additional_cost : theano expression or list(theano expression), optional Any additional cost expressions to use during training (things like regularization). These will be summed with the existing cost. """ if not self.model: log.error("No self.model for the Optimizer!") raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() " "was called from the Model. Try initializing the Optimizer with the model param " "and calling optimizer.train().") ##################################################### # handle additional costs (normally regularization) # ##################################################### # Create the gradient updates for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) # deal with any other additional costs (like regularization, etc.) if additional_cost is not None: additional_costs = raise_to_list(additional_cost) if len(additional_costs) > 1: additional_cost = T.sum(additional_costs) ######################### # gradients and updates # ######################### train_updates = [] self.gradients = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! if len(train_costs) > 1 and additional_cost is not None: log.warning("additional_cost will double count with gradients during layer-wise pretraining!") warnings.warn("additional_cost will double count with gradients during layer-wise pretraining!") # TODO: additional_cost will double count with gradients during layer-wise pretraining. # Need to somehow make w.r.t. params appropriate for the individual training costs. gradients, _ = self.model.get_gradient(cost=train_cost, additional_cost=additional_cost) # clip gradients if we want. gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip) # append to list self.gradients.append(gradients) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates train_updates.append(updates) # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) ############ # monitors # ############ # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### function_input = raise_to_list(self.model.get_inputs()) + raise_to_list(self.model.get_targets()) train_functions = [] for i, (updates, train_cost) in enumerate(zip(train_updates, train_costs)): # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates), str(type(self.model))) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[train_cost] + list(self.train_monitors_dict.values()), name='f_learn_%d' % i) log.info('f_learn %d compilation took %s', i + 1, make_time_units_string(time.time() - t)) train_functions.append(f_learn) # figure out if we want valid and test (monitors) self.valid_flag = (self.dataset.valid_inputs is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.test_inputs is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.valid_monitors_dict.values()), name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.test_monitors_dict.values()), name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS-----------", str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch) self.STOP = False self.epoch_counter = 0 # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter)) log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))
def train(self, monitor_channels=None, plot=None): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). """ if not self.model: log.error("No self.model for the Optimizer!") raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() " "was called from the Model. Try initializing the Optimizer with the model param " "and calling optimizer.train().") ######################### # gradients and updates # ######################### # grab the model parameters to use during training self.params = self.model.get_params() # Now create the training cost function for the model to use while training - update parameters # gradient! # First find the basic variables that will be updated params = set() for param in self.params.values(): params.update(base_variables(param)) params = list(params) gradients = grad(cost=self.loss_expression, wrt=params) # now create the dictionary mapping the parameter with its gradient gradients = OrderedDict( [(param, g) for param, g in zip(params, gradients)] ) # clip gradients if we want. gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates log.info("%s params: %s", self.model._classname, str(list(self.params.keys()))) ############ # monitors # ############ # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) ####################################### # compile train and monitor functions # ####################################### function_input = raise_to_list(self.model.get_inputs()) if self.loss_targets is not None: function_input += self.loss_targets # Compile the training function! log.info('Compiling f_learn function for model %s...', self.model._classname) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[self.loss_expression] + list(self.train_monitors_dict.values()), name='f_learn') log.info('f_learn compilation took %s', make_time_units_string(time.time() - t)) # figure out if we want valid and test (monitors) self.valid_flag = (self.dataset.valid_inputs is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.test_inputs is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.valid_monitors_dict.values()), name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.test_monitors_dict.values()), name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## log.info("-----------TRAINING %s FOR %d EPOCHS-----------", self.model._classname, self.n_epoch) self.STOP = False self.epoch_counter = 0 # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(f_learn, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") self.model.set_param_values(self.best_params, borrow=False) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter)) log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t))
def train(self, monitor_channels=None, train_outservice=None, plot=None, additional_cost=None): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). additional_cost : theano expression or list(theano expression), optional Any additional cost expressions to use during training (things like regularization). These will be summed with the existing cost. """ if not self.model: log.error("No self.model for the Optimizer!") raise AssertionError("Needs to be initialized with a Model! (Or something went wrong if train() " "was called from the Model. Try initializing the Optimizer with the model param " "and calling optimizer.train().") ##################################################### # handle additional costs (normally regularization) # ##################################################### # Create the gradient updates for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) # deal with any other additional costs (like regularization, etc.) if additional_cost is not None: additional_costs = raise_to_list(additional_cost) if len(additional_costs) > 1: additional_cost = T.sum(additional_costs) ######################### # gradients and updates # ######################### train_updates = [] self.gradients = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! if len(train_costs) > 1 and additional_cost is not None: log.warning("additional_cost will double count with gradients during layer-wise pretraining!") warnings.warn("additional_cost will double count with gradients during layer-wise pretraining!") # TODO: additional_cost will double count with gradients during layer-wise pretraining. # Need to somehow make w.r.t. params appropriate for the individual training costs. gradients, _ = self.model.get_gradient(cost=train_cost, additional_cost=additional_cost) # clip gradients if we want. gradients = clip_gradients(gradients, self.grad_clip, self.hard_clip) # append to list self.gradients.append(gradients) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates train_updates.append(updates) # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) ############ # monitors # ############ # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### function_input = raise_to_list(self.model.get_inputs()) + raise_to_list(self.model.get_targets()) train_functions = [] for i, (updates, train_cost) in enumerate(zip(train_updates, train_costs)): # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates), str(type(self.model))) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[train_cost] + list(self.train_monitors_dict.values()), name='f_learn_%d' % i) log.info('f_learn %d compilation took %s', i + 1, make_time_units_string(time.time() - t)) train_functions.append(f_learn) # figure out if we want valid and test (monitors) self.valid_flag = (self.dataset.valid_inputs is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.test_inputs is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.valid_monitors_dict.values()), name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=list(self.test_monitors_dict.values()), name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS-----------", str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch) self.STOP = False self.epoch_counter = 0 # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl') log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))
def train(self, monitor_channels=None, train_outservice=None, plot=None, continue_training=False): """ This method performs the training!!! It is an online training method that goes over minibatches from the dataset for a number of epochs, updating parameters after each minibatch. You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully. Parameters ---------- monitor_channels : list(MonitorsChannel or Monitor), optional The list of channels or monitors containing monitor expressions/variables to compile and evaluate on the data. train_outservice : OutService, optional The OutService to use for the automatically created train_cost monitor. Default of None just outputs to logs. plot : Plot, optional The Plot object to use if we want to graph the outputs (uses bokeh server). continue_training : bool Whether to continue training from a previous point. """ ############################################### # theano index variable to use on the dataset # ############################################### # index to a [mini]batch - both start and end data_idx = T.iscalar('data_index') data_end_idx = T.iscalar('data_end_index') function_input = [data_idx, data_end_idx] batch_slice = slice(data_idx, data_end_idx) # compute number of minibatches for training, validation and testing # shapes is list of list - input list of datasets to optimizer (for multiple inputs), and each dataset # could be a list of shared variables (like multiple sequences from files) train_data_shapes = raise_to_list(self.dataset.getDataShape(TRAIN)) valid_data_shapes = raise_to_list(self.dataset.getDataShape(VALID)) test_data_shapes = raise_to_list(self.dataset.getDataShape(TEST)) # train_batches is going to be lists of tuples that contain the start and end indices for train data. # this is more useful in the case of datasets that are lists of sequences, so that the start and end # indices can make sure a batch does not cross the sequence boundary on the concatenated data train_data_lens = [shape[0] for shape in train_data_shapes] self.train_batches = self._get_batch_indices(train_data_lens) if valid_data_shapes is not None: valid_data_lens = [shape[0] for shape in valid_data_shapes] self.valid_batches = self._get_batch_indices(valid_data_lens) else: self.valid_batches = None if test_data_shapes is not None: test_data_lens = [shape[0] for shape in test_data_shapes] self.test_batches = self._get_batch_indices(test_data_lens) else: self.test_batches = None # create the givens for the input function as pairs of (input_variable: sliced_data) train_givens = self._get_givens_subset(TRAIN, batch_slice) valid_givens = self._get_givens_subset(VALID, batch_slice) test_givens = self._get_givens_subset(TEST, batch_slice) # Now time to create the gradient updates for the model - make sure to handle the possible # list of costs used for pretraining of certain parts of the model. train_costs = raise_to_list(self.model.get_train_cost()) train_updates = [] self.gradients = [] for i, train_cost in enumerate(train_costs): # Now create the training cost function for the model to use while training - update parameters # gradient! gradients, _ = self.model.get_gradient(cost=train_cost) self.gradients.append(gradients) # Calculate the optimizer updates each run # This is where the magic happens for a lot of sub-implementations of SGD! # It tells how to update the params each training epoch gradient_updates = self.get_updates(gradients) # Combine the updates from the model also if applicable updates = self.model.get_updates() if updates: updates.update(gradient_updates) else: updates = gradient_updates train_updates.append(updates) # grab the model parameters to use during training self.params = self.model.get_params() log.info("%s params: %s", str(type(self.model)), str(self.params)) # deal with the monitor channels if they were given (or take them from the plot) if monitor_channels is None and plot is not None and len(plot.channels) > 0: monitor_channels = plot.channels self.train_monitors_dict = {} self.valid_monitors_dict = {} self.test_monitors_dict = {} self.train_monitors_outservice_dict = {} self.valid_monitors_outservice_dict = {} self.test_monitors_outservice_dict = {} if monitor_channels: # collapse the appropriate monitors into their (name, expression, out_service) tuples train_collapsed = collapse_channels(monitor_channels, train=True) valid_collapsed = collapse_channels(monitor_channels, valid=True) test_collapsed = collapse_channels(monitor_channels, test=True) # get name: expression dictionary self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) self.test_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in test_collapsed]) # get name: outservice dictionary self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed]) self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed]) self.test_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in test_collapsed]) # finally deal with an outservice provided to monitor training cost self.train_outservice = train_outservice # remove redundant files made by the fileservice for the train monitor. # TODO: THIS FEELS LIKE A HACK. I don't like it. if isinstance(self.train_outservice, FileService): os.remove(self.train_outservice.valid_filename) os.remove(self.train_outservice.test_filename) ####################################### # compile train and monitor functions # ####################################### train_functions = [] for i in range(len(train_costs)): updates = train_updates[i] train_cost = train_costs[i] # Compile the training function! log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates), str(type(self.model))) t = time.time() f_learn = function(inputs=function_input, updates=updates, outputs=[train_cost] + self.train_monitors_dict.values(), givens=train_givens, name='f_learn_%d' % i) log.info('f_learn compilation took %s', make_time_units_string(time.time() - t)) train_functions.append(f_learn) # figure out if we want valid and test self.valid_flag = (self.dataset.getSubset(VALID)[0] is not None) and (len(self.valid_monitors_dict) > 0) self.test_flag = (self.dataset.getSubset(TEST)[0] is not None) and (len(self.test_monitors_dict) > 0) # Now compile the monitor functions! log.debug("Compiling monitor functions...") monitor_t = time.time() # valid monitors if self.valid_flag: self.valid_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=self.valid_monitors_dict.values(), givens=valid_givens, name='valid_monitor_function' ) else: self.valid_monitor_function = None # test monitors if self.test_flag: self.test_monitor_function = function( inputs=function_input, updates=self.model.get_updates(), outputs=self.test_monitors_dict.values(), givens=test_givens, name='test_monitor_function' ) else: self.test_monitor_function = None log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t)) ################## # start training # ################## # make sure to deal with a list of train_cost functions - for layer-wise pretraining! # this list of training functions was created during __init__() start_time = time.time() for func_i, train_function in enumerate(train_functions): log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS (continue_training=%s)-----------", str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch, str(continue_training)) log.debug("Train dataset size is: %s", self.dataset.getDataShape(TRAIN)) if self.dataset.getSubset(VALID)[0] is not None: log.debug("Valid dataset size is: %s", self.dataset.getDataShape(VALID)) if self.dataset.getSubset(TEST)[0] is not None: log.debug("Test dataset size is: %s", self.dataset.getDataShape(TEST)) self.STOP = False self.epoch_counter = 0 if not continue_training: # reset any decay params for decay_param in self.get_decay_params(): decay_param.reset() self.times = [] self.best_cost = numpy.inf self.best_params = None self.patience = 0 t = time.time() while not self.STOP: try: self.STOP = self._perform_one_epoch(train_function, plot) except KeyboardInterrupt: log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT") self.STOP = True # save params if self.best_params is not None: log.debug("Restoring best model parameters...") set_shared_values(self.params, self.best_params) log.debug("Saving model parameters...") self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl') log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t)) log.info("------------TOTAL %s TRAIN TIME TOOK %s---------", str(type(self.model)), make_time_units_string(time.time() - start_time))