예제 #1
0
    def get_reordered(self, order, click_id):
        """
        get item-level features by order

        order: (b, order_len)
        click_id: (b, order_len)

        last_click_id will be removed
        """
        AssertEqual(len(order), self.batch_size())
        AssertEqual(len(click_id), self.batch_size())

        global_item_indice = []
        for sub_order, sub_offset in zip(order, self.offset()):
            global_item_indice.append(np.array(sub_order) + sub_offset)
        global_item_indice = np.concatenate(global_item_indice, axis=0)

        new_batch_data = BatchData(self.conf, self.tensor_dict)
        new_seq_lens = [len(od) for od in order]
        for name in new_batch_data.conf.item_slot_names:
            if name == 'last_click_id':
                continue
            else:
                v = new_batch_data.tensor_dict[name].values[global_item_indice]
            new_batch_data.tensor_dict[name] = FakeTensor(v, new_seq_lens)
        new_batch_data.tensor_dict['click_id'] = FakeTensor(click_id.reshape([-1, 1]), new_seq_lens)
        return new_batch_data
예제 #2
0
    def expand_candidates(self, other_batch_data, lens):
        """
        Regard other_batch_data as a candidate pool
        Only expand item-level values
            1. append values of self and other_batch_data
            2. construct index to get new batch_data
        lens: (batch_size,), len to expand
        
        ignore `last_click_id`
        """
        AssertEqual(len(lens), self.batch_size())

        total_cand_len = other_batch_data.total_item_num()
        total_item_len = self.total_item_num()
        cand_indice = np.arange(total_item_len, total_item_len + total_cand_len)     

        global_item_indice = []
        lod = self.lod()[0]
        for i in range(len(lod) - 1):
            start, end = lod[i], lod[i+1]
            old_indice = np.arange(start, end)
            new_indice = np.random.choice(cand_indice, size=lens[i], replace=False)
            global_item_indice.append(old_indice)
            global_item_indice.append(new_indice)
        global_item_indice = np.concatenate(global_item_indice, axis=0)

        prev_seq_lens = self.seq_lens()
        seq_lens = [s + l for s,l in zip(prev_seq_lens, lens)]
        # update tensor_dict
        for name in self.conf.item_slot_names:
            if name == 'last_click_id':
                continue
            values = np.concatenate([self.tensor_dict[name].values, other_batch_data.tensor_dict[name].values], 0)
            self.tensor_dict[name] = FakeTensor(values[global_item_indice], seq_lens)
예제 #3
0
 def __init__(self, values, seq_lens=None):
     self.values = values
     self.seq_lens = seq_lens
     if seq_lens is None:
         self.lod = []
     else:
         AssertEqual(len(values), np.sum(seq_lens))
         self.lod = [seq_len_2_lod(seq_lens)]
예제 #4
0
def click_prob_2_score(click_prob):
    """
    args:
        click_prob: (n, dim)
    return:
        click_score: (n,)
    """
    AssertEqual(len(click_prob.shape), 2)
    dim0, dim1 = click_prob.shape
    weight = np.arange(dim1).reshape([1, -1])
    click_score = np.sum(click_prob * weight, 1)
    return click_score
예제 #5
0
def sequence_gather(input_sequence, lens, index):
    """
    input_sequence: (sum(lens), *)
    lens: (batch_size,)
    index: len() = batch_size
    e.g.
        input_sequence = [1,2, 3,4,5,6]
        lens = [2, 0, 4]
        index = [1, None, 2]
        return [2, [], 5]
    """
    AssertEqual(len(input_sequence), np.sum(lens))
    AssertEqual(len(lens), len(index))
    input_unconcat = sequence_unconcat(input_sequence, lens)
    res = []
    for sub_input, sub_index in zip(input_unconcat, index):
        if not sub_index is None:
            res.append(sub_input[sub_index])
        else:
            res.append([])
    return res
예제 #6
0
def sequence_unconcat(input_sequence, lens):
    """
    input_sequence: (sum(lens), *)
    e.g.
        input_sequence = [1,2,3,4,5,6]
        lens = [2, 0, 4]
        return [[1,2], [], [3,4,5,6]]
    """
    AssertEqual(len(input_sequence), np.sum(lens))
    res = []
    start = 0
    for l in lens:
        res.append(input_sequence[start: start + l])
        start += l
    return res
예제 #7
0
    def get_candidates(self, pre_items, stop_flags=None):
        """
        pre_items: len() = batch_size
        stop_flags: (batch_size,)
        return:
            candidate_items: len() = batch_size, e.g. [[2,3,5], [3,4], ...]
        """
        if stop_flags is None:
            stop_flags = np.zeros([len(pre_items)])
        AssertEqual(len(pre_items), len(stop_flags))

        res = []
        for pre, seq_len, stop in zip(pre_items, self.seq_lens(), stop_flags):
            if stop:
                res.append([])
            else:
                full = np.arange(seq_len)
                res.append(np.setdiff1d(full, pre))
        return res
예제 #8
0
def sequence_expand(input, lens):
    """
    input: len() = batch_size, e.g. [(dim), [], (dim), ...]
    lens: (batch_size,)
    e.g.
        input_sequence = [(dim), [], (dim)]
        lens = [1,0,2]
        return (3, dim)
    e.g.
        input_sequence = [(dim), [dim], (dim)]
        lens = [1,0,2]
        return (3, dim)
    """
    AssertEqual(len(input), len(lens))
    res = []
    for inp, l in zip(input, lens):
        if l > 0:
            res.append(np.array([inp] * l))     # (l, dim)
    return np.concatenate(res, axis=0)
예제 #9
0
    def get_reordered(self, order):
        """
        get item-level features by order
        click_id will be removed
        """
        AssertEqual(len(order), self.batch_size())

        global_item_indice = []
        for sub_order, sub_offset in zip(order, self.offset()):
            global_item_indice.append(np.array(sub_order) + sub_offset)
        global_item_indice = np.concatenate(global_item_indice, axis=0)

        new_batch_data = BatchData(self.conf, self.tensor_dict)
        new_seq_lens = [len(od) for od in order]
        for name in new_batch_data.conf.item_slot_names:
            values = new_batch_data.tensor_dict[name].values
            new_batch_data.tensor_dict[name] = FakeTensor(values[global_item_indice], new_seq_lens)
        del new_batch_data.tensor_dict['click_id']
        return new_batch_data
예제 #10
0
파일: gen_utils.py 프로젝트: wgcn96/MBCAL
    def apply_masks(batch_data, list_item_masks, conf=None):
        """
        list_item_masks: (n_masks, seq_len), a list of 1d item_mask
        Apply mask on item_level_slot_names except last_click_id
        """
        if conf is None:
            conf = batch_data.conf

        batch_size = batch_data.batch_size()
        seq_len = batch_data.seq_lens()[0]
        AssertEqual(len(list_item_masks[0]), seq_len)

        batch_data.add_last_click_id()

        n_masks = len(list_item_masks)
        batch_item_masks = np.tile(
            np.array(list_item_masks).flatten(),
            [batch_size])  # (batch_size * n_masks * seq_len)

        place = fluid.CPUPlace()
        feed_dict = {}
        for name in conf.recent_slot_names + \
                    conf.item_slot_names:
            ft = batch_data.tensor_dict[name]
            v = ft.values
            extra_shape = list(v.shape[1:])
            v = v.reshape([batch_size, -1] +
                          extra_shape)  # (batch_size, seq_len/recent_len, ...)
            v = np.repeat(
                v, n_masks,
                axis=0)  # (batch_size * n_masks, seq_len/recent_len, ...)
            seq_lens = [v.shape[1]] * (batch_size * n_masks)
            v = v.reshape([-1] + extra_shape
                          )  # (batch_size * n_masks * seq_len/recent_len, ...)
            if name in conf.item_slot_names and name != 'last_click_id':
                v = v * batch_item_masks.reshape([-1] + [1] *
                                                 (len(v.shape) - 1))
            feed_dict[name] = create_tensor(v,
                                            lod=[seq_len_2_lod(seq_lens)],
                                            place=place)
        return feed_dict
예제 #11
0
def sequence_sampling(scores, lens, sampling_type):
    """
    scores: (sum(lens),)
    lens: (batch_size,)
    return: (batch_size,) 
    e.g. 
        scores = [0.4,0.3, 0.4,0.9,0.8]
        lens = [2, 0, 3]
        return [0, None, 1]
    """ 
    AssertEqual(len(scores), np.sum(lens))
    scores_unconcat = sequence_unconcat(scores, lens)
    res_index = []
    for sub_score in scores_unconcat:
        if len(sub_score) > 0:
            if sampling_type == 'greedy':
                selected_index = np.argmax(sub_score)
            res_index.append(selected_index)
        else:
            res_index.append(None)
    return res_index
예제 #12
0
    def replace_following_items(self, pos, ref_batch_data):
        """
        Replace items starting from `pos` by items from `ref_batch_data`
        Replace click_id as well.
        Used for credit variance calculation

        Ignore last_click_id.
        """
        batch_size = self.batch_size()
        AssertEqual(batch_size, ref_batch_data.batch_size())

        new_batch_data = BatchData(self.conf, self.tensor_dict)
        for name in new_batch_data.conf.item_slot_names + new_batch_data.conf.label_slot_names:
            if name == 'last_click_id':
                continue
            else:
                v = new_batch_data.tensor_dict[name].values         # (b*seq_len, *)
                ref_v = ref_batch_data.tensor_dict[name].values     # (b*seq_len, *)
                tail_shape = list(v.shape[1:])
                new_v = np.concatenate([v.reshape([batch_size, -1] + tail_shape)[:, :pos],
                                    ref_v.reshape([batch_size, -1] + tail_shape)[:, pos:]], 1)  # (b, seq_len, *)
                new_v = new_v.reshape(v.shape)      # (b*seq_len, *)
                new_batch_data.tensor_dict[name].values = new_v
        return new_batch_data