def init_all_vec(): init_vec(user_list, user_emb) init_vec(itematuser_list, item_emb) useratgroup_set = set() for v in group_users.values(): useratgroup_set.update(v) init_user_weight(useratgroup_set, user_weight_map) init_feature_weight(feature_weight) emb_to_file(out_user + "_init", user_emb) emb_to_file(out_item + "_init", item_emb) userweight_to_file(out_user_weight + "_init", user_weight_map) featureweight_to_file(out_feature_weight + "_init", feature_weight)
def init_all_vec(): init_vec(user_list, user_emb) init_vec(itematuser_list, item_emb) useratgroup_set = set() for v in group_users.values(): useratgroup_set.update(v) useratgroup_list = list(useratgroup_set) init_vec(useratgroup_list, luser_emb) init_vec(useratgroup_list, ruser_emb) emb_to_file(out_user, user_emb) emb_to_file(out_item, item_emb) emb_to_file(out_luser, luser_emb) emb_to_file(out_ruser, ruser_emb)
def cal_group_emb(members): sum_weight = 0 # index_member_dict = {members.index(member): member for member in members} member_index_dict = {member: members.index(member) for member in members} group_len = len(members) # calculate weight matrix weight_matrix = np.zeros((group_len, group_len)) member_weight_dict = dict() flag = 1 for member1 in members: member_weight_dict[member1] = 0 for member2 in members: if member1 != member2: weight = math_exp( luser_emb.get(member1).dot(ruser_emb.get(member2))) if math.isinf(weight) or math.isnan(weight): print("weight is not a number") zoom(1000) emb_to_file(out_user + "zoom", user_emb) emb_to_file(out_item + "zoom", item_emb) emb_to_file(out_luser + "zoom", luser_emb) emb_to_file(out_ruser + "zoom", ruser_emb) flag = 0 break weight_matrix[member_index_dict[member1]][ member_index_dict[member2]] = weight sum_weight += weight member_weight_dict[member1] += weight if flag == 0: break for member1 in members: member_weight_dict[member1] = 0 for member2 in members: if member1 != member2: weight = math_exp( luser_emb.get(member1).dot(ruser_emb.get(member2))) if math.isinf(weight) or math.isnan(weight): print("weight is not a number") raise RuntimeError("still not a number") weight_matrix[member_index_dict[member1]][ member_index_dict[member2]] = weight sum_weight += weight member_weight_dict[member1] += weight group_emb = np.zeros(DIM, ) for member, weight in member_weight_dict.items(): member_weight_dict[member] = weight / sum_weight group_emb += member_weight_dict[member] * user_emb.get(int(member)) return group_emb, member_index_dict, weight_matrix, member_weight_dict
def init_all_vec(): # initialize users' context embedding init_vec(view1_friend_list, user_context_emb) init_vec(view2_friend_list, user_context_emb) # initialize view1 and view2's user embedding init_vec(view1_user_list, user_social_emb) init_vec(view2_user_list, user_prefer_emb) # initialize users' embeddding init_user_view_weight(all_user_set, user_weight_dict) init_user(all_user_set, user_emb) # initialize items' embedding init_vec(all_item_set, item_emb) emb_to_file(out_user + "_r" + str(reg) + "N" + str(NEG_N) + "_init", user_emb) emb_to_file(out_item + "_r" + str(reg) + "N" + str(NEG_N) + "_init", item_emb) pk.dump( user_item_map_matrix, open(out_uv_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_init.pkl", 'wb')) pk.dump( user_user_map_matrix, open(out_uu_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_init.pkl", 'wb'))
def train_data(type): if type == 1: iter = 0 last_count = 0 current_sample_count = 0 while iter <= user_event_N: if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (user_event_N + 1))) print("Iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % 500000 == 0 and iter != 0 and iter != user_event_N: # write embedding into file emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) training_user_item(all_edges_1, user_nei) print("user event iteration i: %d finished." % iter) iter += 1 emb_to_file(out_user + str(user_event_N), user_emb) emb_to_file(out_item + str(user_event_N), item_emb) print("The first stage training finished.") # train with group-event iter = 0 last_count = 0 current_sample_count = 0 while iter <= group_event_N: if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (group_event_N + 1))) print("group event iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % (group_event_N / 10) == 0 and iter != 0 and iter != group_event_N: # write embedding into file emb_to_file(out_luser + str(iter), luser_emb) emb_to_file(out_ruser + str(iter), ruser_emb) training_group_item(all_edges_2, group_nei, type) print("group event iteration i: %d finished." % iter) iter += 1 emb_to_file(out_luser + str(group_event_N), luser_emb) emb_to_file(out_ruser + str(group_event_N), ruser_emb) elif type == 2: iter = 0 last_count = 0 current_sample_count = 0 while iter <= (user_event_N / 10): if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (user_event_N + 1))) print("Iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 # if iter % 5000000 == 0 and iter != 0 and iter != N: # # write embedding into file # write_to_file(out_user + str(iter), user_emb) # write_to_file(out_item + str(iter), item_emb) training_user_item(all_edges_1, user_nei) print("user event iteration i: %d finished." % iter) iter += 1 print("The first stage training finshed.") # train with group-event iter = 0 last_count = 0 current_sample_count = 0 while iter <= group_event_N: if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (group_event_N + 1))) print("Iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % (group_event_N / 10) == 0 and iter != 0 and iter != group_event_N: # write embedding into file emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) emb_to_file(out_luser + str(iter), luser_emb) emb_to_file(out_ruser + str(iter), ruser_emb) training_group_item(all_edges_2, group_nei, type) print("Iteration i: %d finished." % iter) iter += 1 emb_to_file(out_user + str(group_event_N), user_emb) emb_to_file(out_item + str(group_event_N), item_emb) emb_to_file(out_luser + str(group_event_N), luser_emb) emb_to_file(out_ruser + str(group_event_N), ruser_emb) else: iter = 0 last_count = 0 current_sample_count = 0 while iter <= (user_event_N * 8): if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (user_event_N + 1))) print("Iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % (user_event_N) == 0 and iter != 0: # write embedding into file emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) emb_to_file(out_luser + str(iter), luser_emb) emb_to_file(out_ruser + str(iter), ruser_emb) if bernoilli(): training_group_item(all_edges_2, group_nei, type) else: training_user_item(all_edges_1, user_nei) print("Iteration i: %d finished." % iter) iter += 1
emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) emb_to_file(out_luser + str(iter), luser_emb) emb_to_file(out_ruser + str(iter), ruser_emb) if bernoilli(): training_group_item(all_edges_2, group_nei, type) else: training_user_item(all_edges_1, user_nei) print("Iteration i: %d finished." % iter) iter += 1 if __name__ == "__main__": starttime = datetime.datetime.now() get_pop_pro() #get_pop_pro_from_file() endtime = datetime.datetime.now() print(endtime - starttime) init_neg_table() print("initial neg table") init_all_vec() init_sigmod_table() print("training starting") train_data(3) print("training finished") emb_to_file(out_user + "finished", user_emb) emb_to_file(out_item + "finished", item_emb) emb_to_file(out_luser + "finished", luser_emb) emb_to_file(out_ruser + "finished", ruser_emb)
def train_data(type): if type == 1: # read trained user and event embedding get_emb(init_user, user_emb) get_emb(init_item, item_emb) print("The first stage training finished.") # train with group-event iter = 0 last_count = 0 current_sample_count = 0 while iter <= group_event_N * 10: if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (group_event_N + 1))) print("group event iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % (group_event_N ) == 0 and iter != 0 and iter != group_event_N: # write embedding into file userweight_to_file(out_user_weight + str(iter), user_weight_map) featureweight_to_file(out_feature_weight + str(iter), feature_weight) training_group_item(all_edges_2, group_nei, type) print("group event iteration i: %d finished." % iter) iter += 1 # userweight_to_file(out_user_weight + str(group_event_N), user_weight_map) # featureweight_to_file(out_feature_weight + str(iter), feature_weight) elif type == 2: get_emb(init_user, user_emb) get_emb(init_item, item_emb) print("The first stage training finshed.") # train with group-event iter = 0 last_count = 0 current_sample_count = 0 while iter <= group_event_N * 10: if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (group_event_N + 1))) print("Iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % (group_event_N) == 0 and iter != 0: # write embedding into file emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) userweight_to_file(out_user_weight + str(iter), user_weight_map) featureweight_to_file(out_feature_weight + str(iter), feature_weight) training_group_item(all_edges_2, group_nei, type) print("Iteration i: %d finished." % iter) iter += 1 # emb_to_file(out_user + str(group_event_N), user_emb) # emb_to_file(out_item + str(group_event_N), item_emb) # userweight_to_file(out_user_weight + str(group_event_N), user_weight_map) # featureweight_to_file(out_feature_weight + str(group_event_N), feature_weight) else: iter = 0 last_count = 0 current_sample_count = 0 while iter <= (user_event_N * 30): if iter - last_count > 10000: current_sample_count += iter - last_count last_count = iter lr = init_lr * (1 - current_sample_count / (1.0 * (user_event_N + 1))) print("Iteration i: " + str(iter) + " ##########lr " + str(lr)) if lr < init_lr * 0.0001: lr = init_lr * 0.0001 if iter % (user_event_N * 2) == 0 and iter != 0: # write embedding into file emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) userweight_to_file(out_user_weight + str(iter), user_weight_map) featureweight_to_file(out_feature_weight + str(iter), feature_weight) if bernoilli(): training_group_item(all_edges_2, group_nei, type) else: training_user_item(all_edges_1, user_nei) if iter % 10000 == 0: print("Iteration i: %d finished." % iter) iter += 1
if iter % (user_event_N * 2) == 0 and iter != 0: # write embedding into file emb_to_file(out_user + str(iter), user_emb) emb_to_file(out_item + str(iter), item_emb) userweight_to_file(out_user_weight + str(iter), user_weight_map) featureweight_to_file(out_feature_weight + str(iter), feature_weight) if bernoilli(): training_group_item(all_edges_2, group_nei, type) else: training_user_item(all_edges_1, user_nei) if iter % 10000 == 0: print("Iteration i: %d finished." % iter) iter += 1 if __name__ == "__main__": get_pop_pro() #init_neg_table() print("initial neg table") init_all_vec() init_sigmod_table() print("training starting") train_data(2) print("training finished") emb_to_file(out_user + "_finished", user_emb) emb_to_file(out_item + "_finished", item_emb) userweight_to_file(out_user_weight + "_finished", user_weight_map) featureweight_to_file(out_feature_weight + "_finished", feature_weight)
def train_data(): iter = 0 im_iter = 0 ex_iter = 0 im_last_count = 0 ex_last_count = 0 im_current_sample_count = 0 ex_current_sample_count = 0 while iter <= ITER_ROUND: view_iter = 0 while view_iter < sample_T: if view_iter - im_last_count > 10000: im_current_sample_count += im_iter - im_last_count im_last_count = view_iter implicit_lr = init_lr * (1 - im_current_sample_count / (1.0 * (view_iter_num + 1))) print("Iteration i: " + str(im_iter) + " ##########lr " + str(implicit_lr)) if implicit_lr < init_lr * 0.0001: implicit_lr = init_lr * 0.0001 # randomly sample a implicit relation graph if bernoilli(social_prefer_p): # social user user relation training_user_in_view(1) else: # preference user user relation training_user_in_view(2) if view_iter % 10000 == 0: print("view iter %d finished." % view_iter) view_iter += 1 # update by real links link_iter = 0 while link_iter < link_iter_num: if link_iter - ex_last_count > 10000: ex_current_sample_count += ex_iter - ex_last_count ex_last_count = link_iter explicit_lr = init_lr * (1 - ex_current_sample_count / (1.0 * (view_iter_num + 1))) print("Iteration i: " + str(ex_iter) + " ##########lr " + str(explicit_lr)) if explicit_lr < init_lr * 0.0001: explicit_lr = init_lr * 0.0001 train_user_user_relation() # if bernoilli(user_item_p): # # train_explicit_relation_neg(user_friend_edges, user_nei,1) # train_user_user_relation() # else: # # train_explicit_relation_neg(user_item_edges, item_nei,2) # train_user_item_relation() if link_iter % 1000 == 0: print("real edge iter %d finished." % link_iter) link_iter += 1 # write embedding into file if iter % 4 == 0: emb_to_file( out_user + "_r" + str(reg) + "N" + str(NEG_N) + "_round" + str(iter), user_emb) #emb_to_file(out_item + "_r" + str(reg) + "N" + str(NEG_N) + "_round" + str(iter), item_emb) pk.dump( user_user_map_matrix, open( out_uu_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_round" + str(iter) + ".pkl", 'wb')) # pk.dump(user_item_map_matrix, # open(out_uv_matrix + "_r" + str(reg) + "N" + str(NEG_N) + "_round" + str(iter) + ".pkl", 'wb')) print("Round i: %d finished." % iter) iter += 1