Ejemplo n.º 1
0
def generate_train_test_samples(file_name,train_begin_date, train_end_date,
                                test_begin_date, test_end_date,
                                save_name='data', cold_start=False,
                                minT=1, maxT=32, p=0.15, max_length=5):
    print('Spliting Data...')
    train_data, test_data = split_train_test_data(file_name, train_begin_date, train_end_date, test_begin_date, test_end_date)
    print('Building Graph...')
    g = SocialGraph()
    g.build(data=train_data)
    print('Saving Graph...')
    cPickle.dump(g, open('{}_{}_{}_graph.cpkl'.format(save_name,train_begin_date,train_end_date), 'wb'))
    print('Generating Random Path...')
    random_path = social_implicit_path_generator(g, minT=minT, maxT=maxT, p=p, max_length=max_length)
    print('Saving Random Path...')
    cPickle.dump(random_path, open('{}_{}_{}_path.pkl'.format(save_name,train_begin_date,train_end_date), 'wb'))
    print('Generating Training Data...')
    train_data = sample_generator(g, random_path)
    print('Saving Training Data...')
    cPickle.dump(train_data, open('{}_{}_{}_train_data.cpkl'.format(save_name,train_begin_date,train_end_date), 'wb'))
    print('Generating Testing Data...')
    test_data = generate_test_samples(test_data, cold_start=cold_start)
    print('Saving Testing Data...')
    cPickle.dump(test_data, open('{}_{}_{}_test_data.cpkl'.format(save_name,test_begin_date,test_end_date),'wb'))
    print('Finish!')
    return True
Ejemplo n.º 2
0
def social_implicit_path_generator(
        g=SocialGraph(), minT=1, maxT=32, p=0.15, max_length=5):
    random_path = dict()
    # Stage 1. Split social graph into user-user and item-item graphs
    user_graph, item_graph = g.split()

    # Stage 2. Random walk in user-user graph
    print('Random walk in user-user graph')
    user_path = weighted_random_walk_generator(user_graph,
                                               minT=minT,
                                               maxT=maxT,
                                               p=p,
                                               max_length=max_length)
    random_path['user_path'] = user_path
    # Stage 3. Random walk in item-item graph
    print('Random walk in item-item graph')
    item_path = weighted_random_walk_generator(item_graph,
                                               minT=minT,
                                               maxT=maxT,
                                               p=p,
                                               max_length=max_length)
    random_path['item_path'] = item_path
    return random_path
Ejemplo n.º 3
0
    def fit(self, data, g=SocialGraph(), epoch=5, batch_size=32):
        # preprocess data
        if self.config['user_id'] is None:
            self.config['user_id'] = np.asarray(g.node_u.keys())
        else:
            self.config['user_id'] = np.asarray(set(self.config['user_id'] + np.asarray(g.node_u.keys())))
        if self.config['item_id'] is None:
            self.config['item_id'] = np.asarray(g.node_i.keys())
        else:
            self.config['item_id'] = np.asarray(set(self.config['item_id'] + np.asarray(g.node_i.keys())))
        print('Review preprocessing...')
        if self.config['review_tokenizer'] is None:
            reviews, self.config['review_tokenizer'] = self.preprocess(g.review.values())
        else:
            reviews, self.config['review_tokenizer'] = self.preprocess(g.review.values(), token=self.config['review_tokenizer'])
        g.review = dict(zip(g.review.keys(), reviews))

        # Build Model
        if self.joint_model is None:
            self.build_model()

        # Training Model
        num_train = len(data['user1'])
        for i in range(epoch):
            print('{}-th epoch begin:'.format(i))
            data = self.data_shuffle(data)
            num_iters = num_train/batch_size if num_train%batch_size == 0 else int(num_train/batch_size) + 1
            iters = tqdm(range(num_iters))
            train_loss = []
            for j in iters:
                train_data = self.train_data_generator(data, g, j, batch_size, num_train)
                history = self.joint_model.fit(train_data, epochs=1, batch_size=batch_size, verbose=0)
                train_loss.append(history.history['loss'][-1])
                iters.set_description('Training loss: {:.4} >>>>'.format(history.history['loss'][-1]))
            self.config['loss_history'].append(np.mean(train_loss))
            print('{}-th epoch ended, training loss {:.4}.'.format(i, np.mean(train_loss)))
Ejemplo n.º 4
0
def negative_sampling_prob(g=SocialGraph(), random_path=None):
    init_u = np.zeros(len(g.node_u))
    init_i = np.zeros(len(g.node_i))
    users = g.node_u.keys()
    items = g.node_i.keys()
    user_sample_prob = dict(zip(users,init_u))
    item_sample_prob = dict(zip(items,init_i))
    user_context = random_path['user_path']
    item_context = random_path['item_path']
    user_total = 0
    item_total = 0
    for c in user_context:
        for i in c:
            user_sample_prob[i] += 1
            user_total += 1
    for c in item_context:
        for i in c:
            item_sample_prob[i] += 1
            item_total += 1
    for i in user_sample_prob:
        user_sample_prob[i] = user_sample_prob[i]*1.0/user_total
    for i in item_sample_prob:
        item_sample_prob[i] = item_sample_prob[i]*1.0/item_total
    return np.asarray(user_sample_prob.values()), np.asarray(item_sample_prob.values())
Ejemplo n.º 5
0
def get_suggested_friends(user_id):

    friends = Friend.query.filter(Friend.user_1 == user_id, 
                                  Friend.active == True).all()

    # only suggesting friends of friends
    # only user's friends are the keys in the social graph's underlying dict

    social_graph = SocialGraph()
    social_graph.add_friend_node(user_id)
    for friend in friends:
        social_graph.add_friend_edge(user_id, friend.user_2)
        if friend not in social_graph:
            social_graph.add_friend_node(friend.user_2)
            friends_friends = Friend.query.filter(Friend.user_1 == friend.user_2,
                                                  Friend.active == True).all()

            for friend_of_friend in friends_friends:
                social_graph.add_friend_edge(friend.user_2, friend_of_friend.user_2)


    suggested_friends = []
    friend_list = social_graph.get_friend_connections(user_id)

    for friend_user_id in friend_list:
        friends_friend_list = social_graph.get_friend_connections(friend_user_id)
        for friend_of_friend in friends_friend_list:
            if (friend_of_friend != user_id
                and friend_of_friend not in social_graph
                and friend_of_friend not in suggested_friends):
                    user = User.query.get(friend_of_friend)
                    suggested_friends.append(user)
                    if len(suggested_friends) > 1:
                        break

    return suggested_friends
Ejemplo n.º 6
0
def sample_generator(g=SocialGraph(), random_path=None):
    samples = {'user1':[], 'item1':[], 'review1':[], 'rating1':[], 'label1':[], 'context_u':[], 'success1':[],
               'user2':[], 'item2':[], 'review2':[], 'rating2':[], 'label2':[], 'context_i':[], 'success2':[]}
    print('Transfer random path to context...')
    user_context = path2context(g.node_u.keys(), random_path['user_path'])
    item_context = path2context(g.node_i.keys(), random_path['item_path'])
    user_nodes = np.asarray(g.node_u.keys())  # the user id
    item_nodes = np.asarray(g.node_i.keys())  # the item id
    print('Calculate negative sampling probability...')
    user_sample_prob, item_sample_prob = negative_sampling_prob(g, random_path)
    user_sample_index = 0  # the user negative sampling index
    item_sample_index = 0 # the item negative sampling index
    negative_user_set = np.random.choice(user_nodes, size=len(user_nodes)*10, p=user_sample_prob)
    negative_item_set = np.random.choice(item_nodes, size=len(item_nodes)*10, p=item_sample_prob)
    # Generate user samples
    num_reviews = len(g.review)  # number of reviews
    sample_index = np.r_[0:num_reviews]  # the index set used to random selection
    np.random.shuffle(sample_index)
    review_keys = g.review.keys()
    uni_labels = np.unique(g.label.values())  # unique labels
    print('Generate user samples')
    for user in tqdm(user_nodes):
        items = g.node_u[user]
        # item = np.random.choice(items)
        for item in items:
            # # Negative in context success pair
            for other_user in user_context[user]:
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user,item))
                samples['rating1'].append(g.rating[(user,item)])
                samples['label1'].append(g.label[(user,item)])
                samples['success1'].append(1)
                samples['context_u'].append(1)
                other_items = g.node_u[other_user]
                other_item = np.random.choice(other_items)
                samples['user2'].append(other_user)
                samples['item2'].append(other_item)
                samples['review2'].append((other_user,other_item))
                samples['rating2'].append(g.rating[(other_user,other_item)])
                samples['label2'].append(g.label[(other_user,other_item)])
                samples['success2'].append(1)
                if other_item in item_context[item]:
                    samples['context_i'].append(1)
                else:
                    samples['context_i'].append(0)
            # # Negative in context not success pair
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user, item))
                samples['rating1'].append(g.rating[(user, item)])
                samples['label1'].append(g.label[(user, item)])
                samples['success1'].append(1)
                samples['context_u'].append(1)
                negative_item = negative_sample(sets=item_nodes, conflicts=other_items)
                if negative_item is None:
                    negative_item = np.random.choice(item_nodes)
                    samples['success2'].append(1)
                else:
                    samples['success2'].append(0)
                samples['user2'].append(other_user)
                samples['item2'].append(negative_item)
                samples['review2'].append(review_keys[update_sample_index(sample_index)])
                samples['rating2'].append(g.rating[review_keys[update_sample_index(sample_index)]])
                samples['label2'].append(np.random.choice(uni_labels))
                if negative_item in item_context[item]:
                    samples['context_i'].append(1)
                else:
                    samples['context_i'].append(0)
            # # Not in context sampling success pair
            for i in range(len(user_context[user])):
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user, item))
                samples['rating1'].append(g.rating[(user, item)])
                samples['label1'].append(g.label[(user, item)])
                samples['success1'].append(1)
                negative_user, user_sample_index = fast_negative_sample(random_sets=negative_user_set, conflicts=user_context[user], index=user_sample_index)
                if negative_user is None:
                    negative_user = np.random.choice(user_nodes)
                    samples['context_u'].append(1)
                    samples['context_u'].append(1)
                else:
                    samples['context_u'].append(0)
                    samples['context_u'].append(0)
                other_items = g.node_u[negative_user]
                other_item = np.random.choice(other_items)
                samples['user2'].append(negative_user)
                samples['item2'].append(other_item)
                samples['review2'].append((negative_user, other_item))
                samples['rating2'].append(g.rating[(negative_user, other_item)])
                samples['label2'].append(g.label[(negative_user, other_item)])
                samples['success2'].append(1)
                if other_item in item_context[item]:
                    samples['context_i'].append(1)
                else:
                    samples['context_i'].append(0)
            # # Not in context sampling not success pair
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user, item))
                samples['rating1'].append(g.rating[(user, item)])
                samples['label1'].append(g.label[(user, item)])
                samples['success1'].append(1)
                negative_item = negative_sample(sets=item_nodes, conflicts=other_items)
                if negative_item is None:
                    negative_item = np.random.choice(item_nodes)
                    samples['success2'].append(1)
                else:
                    samples['success2'].append(0)
                samples['user2'].append(negative_user)
                samples['item2'].append(negative_item)
                samples['review2'].append(review_keys[update_sample_index(sample_index)])
                samples['rating2'].append(g.rating[review_keys[update_sample_index(sample_index)]])
                samples['label2'].append(np.random.choice(uni_labels))
                if negative_item in item_context[item]:
                    samples['context_i'].append(1)
                else:
                    samples['context_i'].append(0)
    # Generate item samples
    print('Generate item samples')
    for item in tqdm(item_nodes):
        users = g.node_i[item]
        # user = np.random.choice(users)
        for user in users:
            # # Negative in context success pair
            for other_item in item_context[item]:
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user,item))
                samples['rating1'].append(g.rating[(user,item)])
                samples['label1'].append(g.label[(user,item)])
                samples['success1'].append(1)
                samples['context_i'].append(1)
                other_users = g.node_i[other_item]
                other_user = np.random.choice(other_users)
                samples['user2'].append(other_user)
                samples['item2'].append(other_item)
                samples['review2'].append((other_user,other_item))
                samples['rating2'].append(g.rating[(other_user,other_item)])
                samples['label2'].append(g.label[(other_user,other_item)])
                samples['success2'].append(1)
                if other_user in user_context[user]:
                    samples['context_u'].append(1)
                else:
                    samples['context_u'].append(0)
            # # Negative in context not success pair
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user, item))
                samples['rating1'].append(g.rating[(user, item)])
                samples['label1'].append(g.label[(user, item)])
                samples['success1'].append(1)
                samples['context_i'].append(1)
                negative_user = negative_sample(sets=user_nodes, conflicts=other_users)
                if negative_user is None:
                    negative_user = np.random.choice(user_nodes)
                    samples['success2'].append(1)
                else:
                    samples['success2'].append(0)
                samples['user2'].append(negative_user)
                samples['item2'].append(item)
                samples['review2'].append(review_keys[update_sample_index(sample_index)])
                samples['rating2'].append(g.rating[review_keys[update_sample_index(sample_index)]])
                samples['label2'].append(np.random.choice(uni_labels))
                if negative_user in user_context[user]:
                    samples['context_u'].append(1)
                else:
                    samples['context_u'].append(0)
            # # Not in context sampling success pair
            for i in range(len(item_context[item])):
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user, item))
                samples['rating1'].append(g.rating[(user, item)])
                samples['label1'].append(g.label[(user, item)])
                samples['success1'].append(1)
                negative_item, item_sample_index = fast_negative_sample(random_sets=negative_item_set, conflicts=item_context[item], index=item_sample_index)
                if negative_item is None:
                    negative_item = np.random.choice(item_nodes)
                    samples['context_i'].append(1)
                    samples['context_i'].append(1)
                else:
                    samples['context_i'].append(0)
                    samples['context_i'].append(0)
                other_users = g.node_i[negative_item]
                other_user = np.random.choice(other_users)
                samples['user2'].append(other_user)
                samples['item2'].append(negative_item)
                samples['review2'].append((other_user, negative_item))
                samples['rating2'].append(g.rating[(other_user, negative_item)])
                samples['label2'].append(g.label[(other_user, negative_item)])
                samples['success2'].append(1)
                if other_user in user_context[user]:
                    samples['context_u'].append(1)
                else:
                    samples['context_u'].append(0)
            # # Not in context sampling not success pair
                samples['user1'].append(user)
                samples['item1'].append(item)
                samples['review1'].append((user, item))
                samples['rating1'].append(g.rating[(user, item)])
                samples['label1'].append(g.label[(user, item)])
                samples['success1'].append(1)
                negative_user = negative_sample(sets=user_nodes, conflicts=other_users)
                if negative_user is None:
                    negative_user = np.random.choice(user_nodes)
                    samples['success2'].append(1)
                else:
                    samples['success2'].append(0)
                samples['user2'].append(negative_user)
                samples['item2'].append(negative_item)
                samples['review2'].append(review_keys[update_sample_index(sample_index)])
                samples['rating2'].append(g.rating[review_keys[update_sample_index(sample_index)]])
                samples['label2'].append(np.random.choice(uni_labels))
                if negative_user in user_context[user]:
                    samples['context_u'].append(1)
                else:
                    samples['context_u'].append(0)
    return shuffle_samples(samples)