Beispiel #1
0
    def __init__(self, params):
        self.batch_size = params.batch_size
        self.vocab_size = params.vocab_size
        self.utc_length = params.utc_length
        self.turn_num = params.turn_num
        self.act_size = params.act_size
        self.id2act = [
            'you_are_welcome', 'request_food', 'api', 'reservation', 'hello',
            'inform_address', 'inform_phone', 'request_number', 'on_it',
            'any_help', 'find_options', 'update', 'another_option',
            'recommend', 'request_area', 'request_price'
        ]
        self.word2id, self.id2word = dl.read_word2id(params.vocab_path,
                                                     params.vocab_size)
        # print self.word2id
        self.names, self.values, self.val2attr, self.entities = dl.read_kb_value(
            params.kb_path)
        self.train_usr, self.train_sys, train_api = dl.read_dialog(
            params.train_path)
        self.dev_usr, self.dev_sys, dev_api = dl.read_dialog(params.dev_path)
        self.test_usr, self.test_sys, test_api = dl.read_dialog(
            params.test_path)
        self.train_label = dl.get_template_label(params.train_path,
                                                 params.template_path,
                                                 params.kb_path)
        self.dev_label = dl.get_template_label(params.dev_path,
                                               params.template_path,
                                               params.kb_path)
        self.test_label = dl.get_template_label(params.test_path,
                                                params.template_path,
                                                params.kb_path)
        # Merge the history turns. The number of turns to be merged is decided by params.turn_num
        train_input = dl.merge_dialog(self.train_usr, self.train_sys,
                                      params.turn_num)
        dev_input = dl.merge_dialog(self.dev_usr, self.dev_sys,
                                    params.turn_num)
        test_input = dl.merge_dialog(self.test_usr, self.test_sys,
                                     params.turn_num)
        # Flatten all history of a turn into a single string
        train_input = dl.flatten_history(train_input)
        dev_input = dl.flatten_history(dev_input)
        test_input = dl.flatten_history(test_input)
        # Convert the strings to indexes
        train_input_id = dl.convert_2D_str2id(train_input,
                                              self.word2id,
                                              self.names,
                                              self.val2attr,
                                              params.turn_num *
                                              params.utc_length,
                                              back=True)
        dev_input_id = dl.convert_2D_str2id(dev_input,
                                            self.word2id,
                                            self.names,
                                            self.val2attr,
                                            params.turn_num *
                                            params.utc_length,
                                            back=True)
        test_input_id = dl.convert_2D_str2id(test_input,
                                             self.word2id,
                                             self.names,
                                             self.val2attr,
                                             params.turn_num *
                                             params.utc_length,
                                             back=True)
        # Get number of restaurant in api_call result
        train_api_number = dl.get_api_number(train_api, train_input)
        dev_api_number = dl.get_api_number(dev_api, dev_input)
        test_api_number = dl.get_api_number(test_api, test_input)

        # Flatten the 2D list to 1D (Merge all dialogs into a single list)
        self.train_input_id = dl.flatten_2D(train_input_id)
        self.dev_input_id = dl.flatten_2D(dev_input_id)
        self.test_input_id = dl.flatten_2D(test_input_id)

        self.train_api_num = dl.flatten_2D(train_api_number)
        self.dev_api_num = dl.flatten_2D(dev_api_number)
        self.test_api_num = dl.flatten_2D(test_api_number)

        self.num_train = len(self.train_input_id)
        self.num_dev = len(self.dev_input_id)
        self.num_test = len(self.test_input_id)

        # m = 4
        # print self.train_usr[0]
        # print self.train_sys[0]
        # print self.train_input_id[m]
        # print self.train_label[m]
        # print self.train_api_num[m]

        print '\tNumber of turns: train: %d, dev: %d, test: %d' % (
            self.num_train, self.num_dev, self.num_test)
        self._pointer = 0
Beispiel #2
0
    def __init__(self, params):
        self.batch_size = params.batch_size
        self.vocab_size = params.vocab_size
        self.utc_length = params.utc_length
        self.turn_num = params.turn_num

        self.word2id, self.id2word = dl.read_word2id(params.vocab_path,
                                                     params.vocab_size)
        self.names, self.values, self.val2attr, self.entities = dl.read_kb_value(
            params.kb_path)
        self.train_usr, self.train_sys, train_api = dl.read_dialog(
            params.train_path)
        self.dev_usr, self.dev_sys, dev_api = dl.read_dialog(params.dev_path)
        self.test_usr, self.test_sys, test_api = dl.read_dialog(
            params.test_path)
        # Merge the history turns. The number of turns to be merged is decided by params.turn_num
        train_input = dl.merge_dialog(self.train_usr, self.train_sys,
                                      params.turn_num)
        dev_input = dl.merge_dialog(self.dev_usr, self.dev_sys,
                                    params.turn_num)
        test_input = dl.merge_dialog(self.test_usr, self.test_sys,
                                     params.turn_num)
        # Flatten all history of a turn into a single string
        train_input = dl.flatten_history(train_input)
        dev_input = dl.flatten_history(dev_input)
        test_input = dl.flatten_history(test_input)
        # Convert the strings to indexes
        train_input_id = dl.convert_2D_str2id(train_input,
                                              self.word2id,
                                              self.names,
                                              self.val2attr,
                                              params.turn_num *
                                              params.utc_length,
                                              back=True)
        dev_input_id = dl.convert_2D_str2id(dev_input,
                                            self.word2id,
                                            self.names,
                                            self.val2attr,
                                            params.turn_num *
                                            params.utc_length,
                                            back=True)
        test_input_id = dl.convert_2D_str2id(test_input,
                                             self.word2id,
                                             self.names,
                                             self.val2attr,
                                             params.turn_num *
                                             params.utc_length,
                                             back=True)
        train_output_id = dl.convert_2D_str2id(self.train_sys,
                                               self.word2id,
                                               self.names,
                                               self.val2attr,
                                               params.utc_length,
                                               add_headrear=True)
        dev_output_id = dl.convert_2D_str2id(self.dev_sys,
                                             self.word2id,
                                             self.names,
                                             self.val2attr,
                                             params.utc_length,
                                             add_headrear=True)
        test_output_id = dl.convert_2D_str2id(self.test_sys,
                                              self.word2id,
                                              self.names,
                                              self.val2attr,
                                              params.utc_length,
                                              add_headrear=True)
        # Get number of restaurant in api_call result
        train_api_number = dl.get_api_number(train_api, train_input)
        dev_api_number = dl.get_api_number(dev_api, dev_input)
        test_api_number = dl.get_api_number(test_api, test_input)

        # Flatten the 2D list to 1D (Merge all dialogs into a single list)
        self.train_input_id = dl.flatten_2D(train_input_id)
        self.dev_input_id = dl.flatten_2D(dev_input_id)
        self.test_input_id = dl.flatten_2D(test_input_id)

        self.train_output_id = dl.flatten_2D(train_output_id)
        self.dev_output_id = dl.flatten_2D(dev_output_id)
        self.test_output_id = dl.flatten_2D(test_output_id)

        self.train_api_num = dl.flatten_2D(train_api_number)
        self.dev_api_num = dl.flatten_2D(dev_api_number)
        self.test_api_num = dl.flatten_2D(test_api_number)

        self.num_train = len(self.train_input_id)
        self.num_dev = len(self.dev_input_id)
        self.num_test = len(self.test_input_id)
        print '\tNumber of turns: train: %d, dev: %d, test: %d' % (
            self.num_train, self.num_dev, self.num_test)
        self._pointer = 0