def preprocess(self, data): """ We expect an object of type MturkData here. """ content = data.data response = self.preprocessing.preprocess(TextData(content)) return MturkData.create_object_from(data, response)
def take_input(self): # return [TextData((next_line, another_line))] if self.count == 1: raise Exception("Training terminated") self.count += 1 return [TextData("Start")]
def take_input(self): try: next_line = next(self.reader) another_line = next(self.reader) return [TextData((next_line, another_line))] except StopIteration: # End of file self.reader.close() raise Exception("End of input stream.")
def take_input(self): output = [TextData(self.data[self.index])] self.index = (self.index + 1) % len(self.data) if self.index == 0: self.epoch += 1 if self.epoch == self.max_epochs: self.publish('Terminate', system_channels.TRAINING) raise Exception("Training finished. Terminating input thread.") return output
def model_postprocess(self, outputs): if self.mode == system_modes.EXECUTION: for output in outputs: self.queue_output(TextData(chat.remove_speaker_tokens(output)))
def model_postprocess(self, outputs): for data in outputs: self.queue_output(TextData(data))
def take_input(self): try: next_line = next(reader) return [TextData(next_line)] except StopIteration: # End of file reader.close()
def take_input(self): data = self.stdin_with_timeout(config['io_timeout']) if data: return [TextData(data)] else: return []
def preprocess(self, data): sentence = data.data return TextData(sentence.split())
def __init__(self): super(FlightBookingAgent, self).__init__() self.current_state = GreetingsState(self) self.queue_output(TextData('Hi!'))
def extract_information(self, data): return TextData('Flight booking confirmed.')
def extract_information(self, data): return TextData("This is a test. The input is {0}".format(data.data))
def process_inputs(self, inputs): """ Feed input to internal model. Parameters ---------- inputs : a list of pairs of (context, response). Returns ------- A list of array, each array has size of batch size. Each entry of each array is the model prediction on whether the context fits with the given response. If there are more input rows than batch size, there will be multiple matrices at output. """ if self.mode == system_modes.EXECUTION: data = { 'c': [row[0] for row in inputs], 'r': [row[1] for row in inputs], 'y': [1] * len(inputs) } loop_count = 1 + (len(inputs) // self.model.batch_size) results = [] for i in xrange(loop_count): self.model.set_shared_variables(data, i) results.append(self.model.get_pred()) return TextData(results) elif self.mode == system_modes.TRAINING: indices = range(self.n_train_batches) epoch = inputs['epoch'] shuffle_batch = inputs['shuffle_batch'] train_data = self.domain_knowledge.dataset.get_training_data() validation_data = self.domain_knowledge.dataset.get_training_data() test_data = self.domain_knowledge.dataset.get_testing_data() if shuffle_batch: indices = np.random.permutation(indices) total_cost = 0 start_time = time.time() for minibatch_index in indices: self.model.set_shared_variables(train_data, minibatch_index) cost_epoch = self.model.train_model() # logger.info("cost epoch:", cost_epoch) total_cost += cost_epoch self.model.set_zero(self.model.zero_vec) end_time = time.time() logger.info("cost: {} took: {}(s)".format( total_cost / len(indices), end_time - start_time)) # Compute TRAIN performance: train_losses = [ self.model.compute_loss(train_data, i) for i in xrange(self.n_train_batches) ] train_perf = 1 - np.sum(train_losses) / len(train_data['y']) logger.info("epoch %i, train perf %f" % (epoch, train_perf * 100)) # evaluation for each model id in train_data['id'] # MODIFIED # self.model.compute_performace_models("train") # Compute VALIDATION performance: val_losses = [ self.model.compute_loss(validation_data, i) for i in xrange(self.n_val_batches) ] self.val_perf = 1 - np.sum(val_losses) / len(validation_data['y']) logger.info('epoch %i, val_perf %f' % (epoch, self.val_perf * 100)) # evaluation for each model id in validation_data['id'] # MODIFIED # self.model.compute_performace_models("val") # If doing better on validation set: if self.val_perf > self.best_val_perf: logger.info("\nImproved validation score!") self.best_val_perf = self.val_perf # Compute TEST performance: test_losses = [ self.model.compute_loss(test_data, i) for i in xrange(self.n_test_batches) ] self.test_perf = 1 - np.sum(test_losses) / len(test_data['y']) logger.info('epoch %i, test_perf %f' % (epoch, self.test_perf * 100)) # evaluation for each model id in test_data['id'] # MODIFIED # self.model.compute_performace_models("test") # Save current best model parameters. logger.info("\nSaving current model parameters...") with open('weights_%s_best.pkl' % self.model.encoder, 'wb') as handle: params = [ np.asarray(p.eval()) for p in lasagne.layers.get_all_params(self.model.l_out) ] cPickle.dump(params, handle) with open('embed_%s_best.pkl' % self.model.encoder, 'wb') as handle: cPickle.dump(self.model.embeddings.eval(), handle) with open('M_%s_best.pkl' % self.model.encoder, 'wb') as handle: cPickle.dump(self.model.M.eval(), handle) logger.info("Saved.\n") # return test_perf, test_probas return None # No output for training