コード例 #1
0
    def preprocess(self, data):
        """
            We expect an object of type MturkData here.
        """
        content = data.data

        response = self.preprocessing.preprocess(TextData(content))
        return MturkData.create_object_from(data, response)
コード例 #2
0
    def take_input(self):
        # return [TextData((next_line, another_line))]

        if self.count == 1:
            raise Exception("Training terminated")

        self.count += 1
        return [TextData("Start")]
コード例 #3
0
 def take_input(self):
     try:
         next_line = next(self.reader)
         another_line = next(self.reader)
         return [TextData((next_line, another_line))]
     except StopIteration:  # End of file
         self.reader.close()
         raise Exception("End of input stream.")
コード例 #4
0
    def take_input(self):
        output = [TextData(self.data[self.index])]
        self.index = (self.index + 1) % len(self.data)

        if self.index == 0:
            self.epoch += 1

        if self.epoch == self.max_epochs:
            self.publish('Terminate', system_channels.TRAINING)

            raise Exception("Training finished. Terminating input thread.")

        return output
コード例 #5
0
 def model_postprocess(self, outputs):
     if self.mode == system_modes.EXECUTION:
         for output in outputs:
             self.queue_output(TextData(chat.remove_speaker_tokens(output)))
コード例 #6
0
 def model_postprocess(self, outputs):
     for data in outputs:
         self.queue_output(TextData(data))
コード例 #7
0
 def take_input(self):
     try:
         next_line = next(reader)
         return [TextData(next_line)]
     except StopIteration:  # End of file
         reader.close()
コード例 #8
0
 def take_input(self):
     data = self.stdin_with_timeout(config['io_timeout'])
     if data:
         return [TextData(data)]
     else:
         return []
コード例 #9
0
    def preprocess(self, data):
    	sentence = data.data

        return TextData(sentence.split())
コード例 #10
0
 def __init__(self):
     super(FlightBookingAgent, self).__init__()
     self.current_state = GreetingsState(self)
     self.queue_output(TextData('Hi!'))
コード例 #11
0
 def extract_information(self, data):
     return TextData('Flight booking confirmed.')
コード例 #12
0
 def extract_information(self, data):
     return TextData("This is a test. The input is {0}".format(data.data))
コード例 #13
0
    def process_inputs(self, inputs):
        """
            Feed input to internal model.

            Parameters
            ----------
            inputs : a list of pairs of (context, response).

            Returns
            -------
            A list of array, each array has size of batch size. Each entry of each array is the model prediction on whether the context fits with the given response.
            If there are more input rows than batch size, there will be multiple matrices at output.
        """

        if self.mode == system_modes.EXECUTION:
            data = {
                'c': [row[0] for row in inputs],
                'r': [row[1] for row in inputs],
                'y': [1] * len(inputs)
            }

            loop_count = 1 + (len(inputs) // self.model.batch_size)

            results = []
            for i in xrange(loop_count):
                self.model.set_shared_variables(data, i)
                results.append(self.model.get_pred())

            return TextData(results)
        elif self.mode == system_modes.TRAINING:
            indices = range(self.n_train_batches)
            epoch = inputs['epoch']
            shuffle_batch = inputs['shuffle_batch']

            train_data = self.domain_knowledge.dataset.get_training_data()
            validation_data = self.domain_knowledge.dataset.get_training_data()
            test_data = self.domain_knowledge.dataset.get_testing_data()

            if shuffle_batch:
                indices = np.random.permutation(indices)

            total_cost = 0
            start_time = time.time()

            for minibatch_index in indices:
                self.model.set_shared_variables(train_data, minibatch_index)
                cost_epoch = self.model.train_model()
                # logger.info("cost epoch:", cost_epoch)
                total_cost += cost_epoch
                self.model.set_zero(self.model.zero_vec)

            end_time = time.time()
            logger.info("cost: {} took: {}(s)".format(
                total_cost / len(indices), end_time - start_time))

            # Compute TRAIN performance:
            train_losses = [
                self.model.compute_loss(train_data, i)
                for i in xrange(self.n_train_batches)
            ]
            train_perf = 1 - np.sum(train_losses) / len(train_data['y'])
            logger.info("epoch %i, train perf %f" % (epoch, train_perf * 100))
            # evaluation for each model id in train_data['id']
            # MODIFIED
            # self.model.compute_performace_models("train")

            # Compute VALIDATION performance:
            val_losses = [
                self.model.compute_loss(validation_data, i)
                for i in xrange(self.n_val_batches)
            ]
            self.val_perf = 1 - np.sum(val_losses) / len(validation_data['y'])
            logger.info('epoch %i, val_perf %f' % (epoch, self.val_perf * 100))
            # evaluation for each model id in validation_data['id']
            # MODIFIED
            # self.model.compute_performace_models("val")

            # If doing better on validation set:
            if self.val_perf > self.best_val_perf:
                logger.info("\nImproved validation score!")
                self.best_val_perf = self.val_perf
                # Compute TEST performance:
                test_losses = [
                    self.model.compute_loss(test_data, i)
                    for i in xrange(self.n_test_batches)
                ]
                self.test_perf = 1 - np.sum(test_losses) / len(test_data['y'])
                logger.info('epoch %i, test_perf %f' %
                            (epoch, self.test_perf * 100))
                # evaluation for each model id in test_data['id']
                # MODIFIED
                # self.model.compute_performace_models("test")

                # Save current best model parameters.
                logger.info("\nSaving current model parameters...")
                with open('weights_%s_best.pkl' % self.model.encoder,
                          'wb') as handle:
                    params = [
                        np.asarray(p.eval()) for p in
                        lasagne.layers.get_all_params(self.model.l_out)
                    ]
                    cPickle.dump(params, handle)
                with open('embed_%s_best.pkl' % self.model.encoder,
                          'wb') as handle:
                    cPickle.dump(self.model.embeddings.eval(), handle)
                with open('M_%s_best.pkl' % self.model.encoder,
                          'wb') as handle:
                    cPickle.dump(self.model.M.eval(), handle)
                logger.info("Saved.\n")

            # return test_perf, test_probas
            return None  # No output for training