def init_all_vec():
    init_vec(user_list, user_emb)
    init_vec(itematuser_list, item_emb)
    useratgroup_set = set()
    for v in group_users.values():
        useratgroup_set.update(v)
    init_user_weight(useratgroup_set, user_weight_map)
    init_feature_weight(feature_weight)
    emb_to_file(out_user + "_init", user_emb)
    emb_to_file(out_item + "_init", item_emb)
    userweight_to_file(out_user_weight + "_init", user_weight_map)
    featureweight_to_file(out_feature_weight + "_init", feature_weight)
Exemple #2
0
def init_all_vec():
    init_vec(user_list, user_emb)
    init_vec(itematuser_list, item_emb)
    useratgroup_set = set()
    for v in group_users.values():
        useratgroup_set.update(v)
    useratgroup_list = list(useratgroup_set)
    init_vec(useratgroup_list, luser_emb)
    init_vec(useratgroup_list, ruser_emb)
    emb_to_file(out_user, user_emb)
    emb_to_file(out_item, item_emb)
    emb_to_file(out_luser, luser_emb)
    emb_to_file(out_ruser, ruser_emb)
Exemple #3
0
def cal_group_emb(members):
    sum_weight = 0
    # index_member_dict = {members.index(member): member for member in members}
    member_index_dict = {member: members.index(member) for member in members}
    group_len = len(members)
    # calculate weight matrix
    weight_matrix = np.zeros((group_len, group_len))
    member_weight_dict = dict()
    flag = 1
    for member1 in members:
        member_weight_dict[member1] = 0
        for member2 in members:
            if member1 != member2:
                weight = math_exp(
                    luser_emb.get(member1).dot(ruser_emb.get(member2)))
                if math.isinf(weight) or math.isnan(weight):
                    print("weight is not a number")
                    zoom(1000)
                    emb_to_file(out_user + "zoom", user_emb)
                    emb_to_file(out_item + "zoom", item_emb)
                    emb_to_file(out_luser + "zoom", luser_emb)
                    emb_to_file(out_ruser + "zoom", ruser_emb)
                    flag = 0
                    break
                weight_matrix[member_index_dict[member1]][
                    member_index_dict[member2]] = weight
                sum_weight += weight
                member_weight_dict[member1] += weight
        if flag == 0:
            break
    for member1 in members:
        member_weight_dict[member1] = 0
        for member2 in members:
            if member1 != member2:
                weight = math_exp(
                    luser_emb.get(member1).dot(ruser_emb.get(member2)))
                if math.isinf(weight) or math.isnan(weight):
                    print("weight is not a number")
                    raise RuntimeError("still not a number")
                weight_matrix[member_index_dict[member1]][
                    member_index_dict[member2]] = weight
                sum_weight += weight
                member_weight_dict[member1] += weight
    group_emb = np.zeros(DIM, )
    for member, weight in member_weight_dict.items():
        member_weight_dict[member] = weight / sum_weight
        group_emb += member_weight_dict[member] * user_emb.get(int(member))
    return group_emb, member_index_dict, weight_matrix, member_weight_dict
Exemple #4
0
def init_all_vec():
    # initialize users' context embedding
    init_vec(view1_friend_list, user_context_emb)
    init_vec(view2_friend_list, user_context_emb)
    # initialize view1 and view2's user embedding
    init_vec(view1_user_list, user_social_emb)
    init_vec(view2_user_list, user_prefer_emb)
    # initialize users' embeddding
    init_user_view_weight(all_user_set, user_weight_dict)
    init_user(all_user_set, user_emb)
    # initialize items' embedding
    init_vec(all_item_set, item_emb)
    emb_to_file(out_user + "_r" + str(reg) + "N" + str(NEG_N) + "_init",
                user_emb)
    emb_to_file(out_item + "_r" + str(reg) + "N" + str(NEG_N) + "_init",
                item_emb)
    pk.dump(
        user_item_map_matrix,
        open(out_uv_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_init.pkl",
             'wb'))
    pk.dump(
        user_user_map_matrix,
        open(out_uu_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_init.pkl",
             'wb'))
Exemple #5
0
def train_data(type):
    if type == 1:
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= user_event_N:
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (user_event_N + 1)))
                print("Iteration i:   " + str(iter) + "   ##########lr  " +
                      str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % 500000 == 0 and iter != 0 and iter != user_event_N:
                # write embedding into file
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
            training_user_item(all_edges_1, user_nei)
            print("user event iteration i:  %d finished." % iter)
            iter += 1
        emb_to_file(out_user + str(user_event_N), user_emb)
        emb_to_file(out_item + str(user_event_N), item_emb)
        print("The first stage training finished.")
        # train with group-event
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= group_event_N:
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (group_event_N + 1)))
                print("group event iteration i:   " + str(iter) +
                      "   ##########lr  " + str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % (group_event_N /
                       10) == 0 and iter != 0 and iter != group_event_N:
                # write embedding into file
                emb_to_file(out_luser + str(iter), luser_emb)
                emb_to_file(out_ruser + str(iter), ruser_emb)
            training_group_item(all_edges_2, group_nei, type)
            print("group event iteration i:  %d finished." % iter)
            iter += 1
        emb_to_file(out_luser + str(group_event_N), luser_emb)
        emb_to_file(out_ruser + str(group_event_N), ruser_emb)
    elif type == 2:
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= (user_event_N / 10):
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (user_event_N + 1)))
                print("Iteration i:   " + str(iter) + "   ##########lr  " +
                      str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            # if iter % 5000000 == 0 and iter != 0 and iter != N:
            #     # write embedding into file
            #     write_to_file(out_user + str(iter), user_emb)
            #     write_to_file(out_item + str(iter), item_emb)
            training_user_item(all_edges_1, user_nei)
            print("user event iteration i:  %d finished." % iter)
            iter += 1
        print("The first stage training finshed.")
        # train with group-event
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= group_event_N:
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (group_event_N + 1)))
                print("Iteration i:   " + str(iter) + "   ##########lr  " +
                      str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % (group_event_N /
                       10) == 0 and iter != 0 and iter != group_event_N:
                # write embedding into file
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
                emb_to_file(out_luser + str(iter), luser_emb)
                emb_to_file(out_ruser + str(iter), ruser_emb)
            training_group_item(all_edges_2, group_nei, type)
            print("Iteration i:  %d finished." % iter)
            iter += 1
        emb_to_file(out_user + str(group_event_N), user_emb)
        emb_to_file(out_item + str(group_event_N), item_emb)
        emb_to_file(out_luser + str(group_event_N), luser_emb)
        emb_to_file(out_ruser + str(group_event_N), ruser_emb)
    else:
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= (user_event_N * 8):
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (user_event_N + 1)))
                print("Iteration i:   " + str(iter) + "   ##########lr  " +
                      str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % (user_event_N) == 0 and iter != 0:
                # write embedding into file
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
                emb_to_file(out_luser + str(iter), luser_emb)
                emb_to_file(out_ruser + str(iter), ruser_emb)
            if bernoilli():
                training_group_item(all_edges_2, group_nei, type)
            else:
                training_user_item(all_edges_1, user_nei)

            print("Iteration i:  %d finished." % iter)
            iter += 1
Exemple #6
0
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
                emb_to_file(out_luser + str(iter), luser_emb)
                emb_to_file(out_ruser + str(iter), ruser_emb)
            if bernoilli():
                training_group_item(all_edges_2, group_nei, type)
            else:
                training_user_item(all_edges_1, user_nei)

            print("Iteration i:  %d finished." % iter)
            iter += 1


if __name__ == "__main__":
    starttime = datetime.datetime.now()
    get_pop_pro()
    #get_pop_pro_from_file()
    endtime = datetime.datetime.now()
    print(endtime - starttime)
    init_neg_table()
    print("initial neg table")
    init_all_vec()
    init_sigmod_table()
    print("training starting")
    train_data(3)
    print("training finished")
    emb_to_file(out_user + "finished", user_emb)
    emb_to_file(out_item + "finished", item_emb)
    emb_to_file(out_luser + "finished", luser_emb)
    emb_to_file(out_ruser + "finished", ruser_emb)
def train_data(type):
    if type == 1:
        # read trained user and event embedding
        get_emb(init_user, user_emb)
        get_emb(init_item, item_emb)
        print("The first stage training finished.")
        # train with group-event
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= group_event_N * 10:
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (group_event_N + 1)))
                print("group event iteration i:   " + str(iter) +
                      "   ##########lr  " + str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % (group_event_N
                       ) == 0 and iter != 0 and iter != group_event_N:
                # write embedding into file
                userweight_to_file(out_user_weight + str(iter),
                                   user_weight_map)
                featureweight_to_file(out_feature_weight + str(iter),
                                      feature_weight)
            training_group_item(all_edges_2, group_nei, type)
            print("group event iteration i:  %d finished." % iter)
            iter += 1
        # userweight_to_file(out_user_weight + str(group_event_N), user_weight_map)
        # featureweight_to_file(out_feature_weight + str(iter), feature_weight)
    elif type == 2:
        get_emb(init_user, user_emb)
        get_emb(init_item, item_emb)
        print("The first stage training finshed.")
        # train with group-event
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= group_event_N * 10:
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (group_event_N + 1)))
                print("Iteration i:   " + str(iter) + "   ##########lr  " +
                      str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % (group_event_N) == 0 and iter != 0:
                # write embedding into file
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
                userweight_to_file(out_user_weight + str(iter),
                                   user_weight_map)
                featureweight_to_file(out_feature_weight + str(iter),
                                      feature_weight)
            training_group_item(all_edges_2, group_nei, type)
            print("Iteration i:  %d finished." % iter)
            iter += 1
        # emb_to_file(out_user + str(group_event_N), user_emb)
        # emb_to_file(out_item + str(group_event_N), item_emb)
        # userweight_to_file(out_user_weight + str(group_event_N), user_weight_map)
        # featureweight_to_file(out_feature_weight + str(group_event_N), feature_weight)
    else:
        iter = 0
        last_count = 0
        current_sample_count = 0
        while iter <= (user_event_N * 30):
            if iter - last_count > 10000:
                current_sample_count += iter - last_count
                last_count = iter
                lr = init_lr * (1 - current_sample_count /
                                (1.0 * (user_event_N + 1)))
                print("Iteration i:   " + str(iter) + "   ##########lr  " +
                      str(lr))
                if lr < init_lr * 0.0001:
                    lr = init_lr * 0.0001
            if iter % (user_event_N * 2) == 0 and iter != 0:
                # write embedding into file
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
                userweight_to_file(out_user_weight + str(iter),
                                   user_weight_map)
                featureweight_to_file(out_feature_weight + str(iter),
                                      feature_weight)
            if bernoilli():
                training_group_item(all_edges_2, group_nei, type)
            else:
                training_user_item(all_edges_1, user_nei)
            if iter % 10000 == 0:
                print("Iteration i:  %d finished." % iter)
            iter += 1
            if iter % (user_event_N * 2) == 0 and iter != 0:
                # write embedding into file
                emb_to_file(out_user + str(iter), user_emb)
                emb_to_file(out_item + str(iter), item_emb)
                userweight_to_file(out_user_weight + str(iter),
                                   user_weight_map)
                featureweight_to_file(out_feature_weight + str(iter),
                                      feature_weight)
            if bernoilli():
                training_group_item(all_edges_2, group_nei, type)
            else:
                training_user_item(all_edges_1, user_nei)
            if iter % 10000 == 0:
                print("Iteration i:  %d finished." % iter)
            iter += 1


if __name__ == "__main__":
    get_pop_pro()
    #init_neg_table()
    print("initial neg table")
    init_all_vec()
    init_sigmod_table()
    print("training starting")
    train_data(2)
    print("training finished")
    emb_to_file(out_user + "_finished", user_emb)
    emb_to_file(out_item + "_finished", item_emb)
    userweight_to_file(out_user_weight + "_finished", user_weight_map)
    featureweight_to_file(out_feature_weight + "_finished", feature_weight)
Exemple #9
0
def train_data():
    iter = 0
    im_iter = 0
    ex_iter = 0
    im_last_count = 0
    ex_last_count = 0
    im_current_sample_count = 0
    ex_current_sample_count = 0
    while iter <= ITER_ROUND:
        view_iter = 0
        while view_iter < sample_T:
            if view_iter - im_last_count > 10000:
                im_current_sample_count += im_iter - im_last_count
                im_last_count = view_iter
                implicit_lr = init_lr * (1 - im_current_sample_count /
                                         (1.0 * (view_iter_num + 1)))
                print("Iteration i:   " + str(im_iter) + "   ##########lr  " +
                      str(implicit_lr))
                if implicit_lr < init_lr * 0.0001:
                    implicit_lr = init_lr * 0.0001
            # randomly sample a implicit relation graph
            if bernoilli(social_prefer_p):
                # social user user relation
                training_user_in_view(1)
            else:
                # preference user user relation
                training_user_in_view(2)
            if view_iter % 10000 == 0:
                print("view iter %d finished." % view_iter)
            view_iter += 1
        # update by real links

        link_iter = 0
        while link_iter < link_iter_num:
            if link_iter - ex_last_count > 10000:
                ex_current_sample_count += ex_iter - ex_last_count
                ex_last_count = link_iter
                explicit_lr = init_lr * (1 - ex_current_sample_count /
                                         (1.0 * (view_iter_num + 1)))
                print("Iteration i:   " + str(ex_iter) + "   ##########lr  " +
                      str(explicit_lr))
                if explicit_lr < init_lr * 0.0001:
                    explicit_lr = init_lr * 0.0001
            train_user_user_relation()
            # if bernoilli(user_item_p):
            #     # train_explicit_relation_neg(user_friend_edges, user_nei,1)
            #     train_user_user_relation()
            # else:
            #     # train_explicit_relation_neg(user_item_edges, item_nei,2)
            #     train_user_item_relation()
            if link_iter % 1000 == 0:
                print("real edge iter %d finished." % link_iter)
            link_iter += 1
        # write embedding into file
        if iter % 4 == 0:
            emb_to_file(
                out_user + "_r" + str(reg) + "N" + str(NEG_N) + "_round" +
                str(iter), user_emb)
            #emb_to_file(out_item + "_r" + str(reg) + "N" + str(NEG_N) + "_round" + str(iter), item_emb)
            pk.dump(
                user_user_map_matrix,
                open(
                    out_uu_matrix + "_r" + str(reg) + "N" + str(NEG_N) +
                    "_round" + str(iter) + ".pkl", 'wb'))
            # pk.dump(user_item_map_matrix,
            #         open(out_uv_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_round" + str(iter) + ".pkl", 'wb'))
        print("Round i:  %d finished." % iter)
        iter += 1