Exemple #1
0
def YouTubeEvaluationSampler(dataset, max_seq_len, user_feature, seed=100, sort=True):
    
    random.seed(seed)
    def batch(dataset, user_feature=user_feature, max_seq_len=max_seq_len):
        
        while True:
            for user_id in dataset.warm_users():
                input_npy = np.zeros(1, dtype=[('seq_item_id', (np.int32,  max_seq_len)),
                                               ('seq_len', np.int32),
                                               ('user_gender', np.int32),
                                               ('user_geo', np.int32)])
                
                item_list = dataset.get_positive_items(user_id, sort=sort)
                if len(item_list) <= 1:
                    continue
                train_items = item_list[-max_seq_len-1:-1]
                pad_train_items = np.zeros(max_seq_len, np.int32)
                pad_train_items[:len(train_items)] = train_items
                input_npy[0] = (pad_train_items, 
                                len(train_items), 
                                user_feature[user_id]['user_gender'],
                                user_feature[user_id]['user_geo'])
                yield [train_items[-1]], input_npy
                yield [], []
            yield None, None
            
    s = Sampler(dataset=dataset, generate_batch=batch, num_process=1)
    
    return s
def TemporalSampler(dataset, batch_size, max_seq_len, num_process=5, seed=100):
    
    random.seed(seed)
    def batch(dataset, max_seq_len=max_seq_len, batch_size=batch_size):
        
        while True:
            input_npy = np.zeros(batch_size, dtype=[('seq_item_id', (np.int32,  max_seq_len)),
                                                    ('seq_len', np.int32),
                                                    ('label', np.int32)])
            
            for ind in range(batch_size):
                user_id = random.randint(0, dataset.total_users()-1)
                item_list = dataset.get_positive_items(user_id, sort=True)
                while len(item_list) <= 1:
                    user_id = random.randint(0, dataset.total_users()-1)
                    item_list = dataset.get_positive_items(user_id, sort=True)
                predict_pos = random.randint(1, len(item_list) - 1)
                train_items = item_list[max(0, predict_pos-max_seq_len):predict_pos]
                pad_train_items = np.zeros(max_seq_len, np.int32)
                pad_train_items[:len(train_items)] = train_items
                input_npy[ind] = (pad_train_items, len(train_items), item_list[predict_pos])
            yield input_npy
    
    s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process)
    
    return s
Exemple #3
0
def VBPREvaluationSampler(batch_size, dataset, item_vfeature, seed=100):
    
    random.seed(seed)
    def batch(dataset, batch_size=batch_size, item_vfeature=item_vfeature):
        _, dim_v = item_vfeature.shape
        while True:
            for user_id in dataset.warm_users():
                positive_items = dataset.get_positive_items(user_id)
                negative_items = dataset.get_negative_items(user_id)
                all_items = positive_items + negative_items
                
                for batch_ind in range(int(math.ceil(float(len(all_items)) / batch_size))):
                    current_batch_size = min(len(all_items)-batch_ind*batch_size, batch_size)
                    input_npy = np.zeros(current_batch_size, dtype=[('user_id', np.int32),
                                                            ('item_id', np.int32),
                                                            ('item_vfeature', np.float32, (dim_v))])
                    for inst_ind in range(current_batch_size):
                        item_id = all_items[batch_ind*batch_size+inst_ind]
                        input_npy[inst_ind] = (user_id, item_id, item_vfeature[item_id])
                    num_positives = len(positive_items) - batch_ind*batch_size
                    if num_positives > 0:
                        yield range(num_positives), input_npy
                    else:
                        yield [], input_npy
                
                yield [], []
            yield None, None
    
    s = Sampler(dataset=dataset, generate_batch=batch, num_process=1)
    return s
    
def StratifiedPointwiseSampler(dataset,
                               batch_size,
                               pos_ratio=0.5,
                               num_process=5,
                               seed=100):

    random.seed(seed)

    def batch(dataset, batch_size=batch_size, pos_ratio=pos_ratio, seed=seed):

        num_pos = int(batch_size * pos_ratio)
        while True:
            input_npy = np.zeros(batch_size,
                                 dtype=[('user_id', np.int32),
                                        ('item_id', np.int32),
                                        ('label', np.float32)])

            for ind in range(num_pos):
                entry = dataset.next_random_record()
                input_npy[ind] = (entry['user_id'], entry['item_id'], 1.0)

            for ind in range(batch_size - num_pos):
                user_id = random.randint(0, dataset.total_users() - 1)
                item_id = random.randint(0, dataset.total_items() - 1)
                while dataset.is_positive(user_id, item_id):
                    user_id = random.randint(0, dataset.total_users() - 1)
                    item_id = random.randint(0, dataset.total_items() - 1)
                input_npy[ind + num_pos] = (user_id, item_id, 0.0)

            yield input_npy

    s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process)

    return s
Exemple #5
0
def RandomPointwiseSampler(dataset, batch_size, num_process=5, seed=100):
    
    random.seed(seed)
    def batch(dataset, batch_size=batch_size):
        
        while True:
            input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32),
                                                        ('item_id', np.int32),
                                                        ('label', np.float32)])
            
            for ind in range(batch_size):
                user_id = random.randint(0, dataset.total_users()-1)
                item_id = random.randint(0, dataset.total_items()-1)
                label = 1.0 if dataset.is_positive(user_id, item_id) else 0.0
                input_npy[ind] = (user_id, item_id, label)
            yield input_npy
    
    s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process)
    
    return s
def VBPRPairwiseSampler(dataset,
                        batch_size,
                        item_vfeature,
                        num_process=5,
                        seed=100):

    random.seed(seed)

    def batch(dataset,
              batch_size=batch_size,
              item_vfeature=item_vfeature,
              seed=seed):

        _, dim_v = item_vfeature.shape
        while True:

            input_npy = np.zeros(batch_size,
                                 dtype=[
                                     ('user_id', np.int32),
                                     ('p_item_id', np.int32),
                                     ('n_item_id', np.int32),
                                     ('p_item_vfeature', np.float32, (dim_v)),
                                     ('n_item_vfeature', np.float32, (dim_v))
                                 ])

            for ind in range(batch_size):
                entry = dataset.next_random_record()
                user_id = entry['user_id']
                p_item_id = entry['item_id']
                n_item_id = dataset.sample_negative_items(user_id)[0]
                input_npy[ind] = (user_id, p_item_id, n_item_id,
                                  item_vfeature[p_item_id],
                                  item_vfeature[n_item_id])
            yield input_npy

    s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process)

    return s
Exemple #7
0
def RandomPairwiseSampler(dataset, batch_size, num_process=5, seed=100):

    random.seed(seed)

    def batch(dataset, batch_size=batch_size, seed=seed):

        while True:
            input_npy = np.zeros(batch_size,
                                 dtype=[('user_id', np.int32),
                                        ('p_item_id', np.int32),
                                        ('n_item_id', np.int32)])

            for ind in range(batch_size):
                entry = dataset.next_random_record()
                user_id = entry['user_id']
                p_item_id = entry['item_id']
                n_item_id = dataset.sample_negative_items(user_id)[0]
                input_npy[ind] = (user_id, p_item_id, n_item_id)
            yield input_npy

    s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process)

    return s