def load(filename): """ :type filename: str :rtype: ConvolutionalAttentionalLearner """ with open(filename, 'rb') as f: learner = cPickle.load(f) learner.model = CopyConvolutionalRecurrentAttentionalModel( learner.hyperparameters, len(learner.naming_data.all_tokens_dictionary), learner.naming_data.name_empirical_dist) learner.model.restore_parameters(learner.parameters) return learner
def train(self, input_file, patience=5, max_epochs=1000, minibatch_size=500): # assert self.parameters is None, "Model is already trained" print "saving best result so far to %s"%( "copy_convolutional_att_rec_model" + os.path.basename(self.hyperparameters["train_file"]) + ".pkl") print "Extracting data..." # Get data (train, validation) train_data, validation_data, self.naming_data = TokenCodeNamingData.get_data_in_recurrent_copy_convolution_format_with_validation(input_file, .92, self.padding_size) train_name_targets, train_code_sentences, train_code, train_target_is_unk, train_copy_vectors = train_data val_name_targets, val_code_sentences, val_code, val_target_is_unk, val_copy_vectors = validation_data # Create theano model and train model = CopyConvolutionalRecurrentAttentionalModel(self.hyperparameters, len(self.naming_data.all_tokens_dictionary), self.naming_data.name_empirical_dist) self.model = model def compute_validation_score_names(): return model.log_prob_with_targets(val_code_sentences, val_copy_vectors, val_target_is_unk, val_name_targets) best_params = [p.get_value() for p in model.train_parameters] best_name_score = float('-inf') ratios = np.zeros(len(model.train_parameters)) n_batches = 0 epochs_not_improved = 0 print "[%s] Starting training..." % time.asctime() for i in xrange(max_epochs): start_time = time.time() name_ordering = np.arange(len(train_name_targets), dtype=np.int32) np.random.shuffle(name_ordering) sys.stdout.write(str(i)) num_minibatches = min(int(ceil(float(len(train_name_targets)) / minibatch_size))-1, 25) # Clump minibatches for j in xrange(num_minibatches): if (j + 1) * minibatch_size > len(name_ordering): j = 0 name_batch_ids = name_ordering[j * minibatch_size:(j + 1) * minibatch_size] batch_code_sentences = train_code_sentences[name_batch_ids] for k in xrange(len(name_batch_ids)): pos = name_batch_ids[k] model.grad_accumulate(batch_code_sentences[k], train_copy_vectors[pos], train_target_is_unk[pos], train_name_targets[pos]) assert len(name_batch_ids) > 0 ratios += model.grad_step() sys.stdout.write("\r%d %d"%(i, n_batches)) n_batches += 1 sys.stdout.write("|") if i % 1 == 0: name_ll = compute_validation_score_names() if name_ll > best_name_score: best_name_score = name_ll best_params = [p.get_value() for p in model.train_parameters] self.parameters = best_params print "At %s validation: name_ll=%s [best so far]" % (i, name_ll) epochs_not_improved = 0 self.save( "copy_convolutional_att_rec_model" + os.path.basename(self.hyperparameters["train_file"]) + ".pkl") else: print "At %s validation: name_ll=%s" % (i, name_ll) epochs_not_improved += 1 for k in xrange(len(model.train_parameters)): print "%s: %.0e" % (model.train_parameters[k].name, ratios[k] / n_batches) n_batches = 0 ratios = np.zeros(len(model.train_parameters)) if epochs_not_improved >= patience: print "Not improved for %s epochs. Stopping..." % patience break elapsed = int(time.time() - start_time) print "Epoch elapsed %sh%sm%ss" % ((elapsed / 60 / 60) % 60, (elapsed / 60) % 60, elapsed % 60) print "[%s] Training Finished..." % time.asctime() self.parameters = best_params model.restore_parameters(best_params)
def train(self, input_file, patience=5, max_epochs=1000, minibatch_size=500): assert self.parameters is None, "Model is already trained" print "Extracting data..." # Get data (train, validation) train_data, validation_data, self.naming_data = TokenCodeNamingData.get_data_in_recurrent_copy_convolution_format_with_validation( input_file, .92, self.padding_size) train_name_targets, train_code_sentences, train_code, train_target_is_unk, train_copy_vectors = train_data val_name_targets, val_code_sentences, val_code, val_target_is_unk, val_copy_vectors = validation_data # Create theano model and train model = CopyConvolutionalRecurrentAttentionalModel( self.hyperparameters, len(self.naming_data.all_tokens_dictionary), self.naming_data.name_empirical_dist) def compute_validation_score_names(): return model.log_prob_with_targets(val_code_sentences, val_copy_vectors, val_target_is_unk, val_name_targets) best_params = [p.get_value() for p in model.train_parameters] best_name_score = float('-inf') ratios = np.zeros(len(model.train_parameters)) n_batches = 0 epochs_not_improved = 0 print "[%s] Starting training..." % time.asctime() for i in xrange(max_epochs): start_time = time.time() name_ordering = np.arange(len(train_name_targets), dtype=np.int32) np.random.shuffle(name_ordering) sys.stdout.write(str(i)) num_minibatches = min( int(ceil(float(len(train_name_targets)) / minibatch_size)) - 1, 25) # Clump minibatches for j in xrange(num_minibatches): name_batch_ids = name_ordering[j * minibatch_size:(j + 1) * minibatch_size] batch_code_sentences = train_code_sentences[name_batch_ids] for k in xrange(len(name_batch_ids)): pos = name_batch_ids[k] model.grad_accumulate(batch_code_sentences[k], train_copy_vectors[pos], train_target_is_unk[pos], train_name_targets[pos]) assert len(name_batch_ids) > 0 ratios += model.grad_step() n_batches += 1 sys.stdout.write("|") if i % 1 == 0: name_ll = compute_validation_score_names() if name_ll > best_name_score: best_name_score = name_ll best_params = [ p.get_value() for p in model.train_parameters ] print "At %s validation: name_ll=%s [best so far]" % ( i, name_ll) epochs_not_improved = 0 else: print "At %s validation: name_ll=%s" % (i, name_ll) epochs_not_improved += 1 for k in xrange(len(model.train_parameters)): print "%s: %.0e" % (model.train_parameters[k].name, ratios[k] / n_batches) n_batches = 0 ratios = np.zeros(len(model.train_parameters)) if epochs_not_improved >= patience: print "Not improved for %s epochs. Stopping..." % patience break elapsed = int(time.time() - start_time) print "Epoch elapsed %sh%sm%ss" % ( (elapsed / 60 / 60) % 60, (elapsed / 60) % 60, elapsed % 60) print "[%s] Training Finished..." % time.asctime() self.parameters = best_params model.restore_parameters(best_params) self.model = model