コード例 #1
0
def main():
	print("Initializing Friend Graph...")
	friend_graph = init_friend_graph('../data/yelpFriends.txt')

	user_data_directory = os.getcwd() + "/../data/yelp_users/"

	print("Reading reviews from each user...")
	for user in os.listdir(user_data_directory):
		filename = os.getcwd() + "/../data/yelp_users/" + user
		readUserReviews(filename)

	restaurant_dict = generate_cosine_similarity_matrix()

	print("initial length of restaurant dictionary: " + str(len(restaurant_dict)))
	#filter out restaurants that will lead to null chi square values
	print("Filtering out null chi square value restaurants...")
	filtered_restaurants = {k:v for (k,v) in restaurant_dict.items()
		if not is_chi_square_null(v, friend_graph)}

	print("After filtering out null chi square restaurants: " + str(len(filtered_restaurants)))

	print("Generating pickle object...")
	pickle_object = open("cosSimilarityNoNullRestaurants.pkl", "wb")
	pickle.dump(filtered_restaurants, pickle_object)
	pickle_object.close()

	print("Done. Quitting...")
コード例 #2
0
def main():
    rest_user_ratings_map = read_restaurant_by_user_ratings()
    friend_graph = init_friend_graph('../data/yelpFriends.txt')

    filtered_restaurants = {
        k: v
        for (k, v) in rest_user_ratings_map.items()
        if not isSubgraphNull(v, friend_graph)
    }
    print(len(filtered_restaurants))
    restaurant_file = os.getcwd() + "/../data/equalRatingNoNullRestaurants.txt"
    with open(restaurant_file, 'w') as f:
        json.dump(filtered_restaurants, f)
コード例 #3
0
def main():
    rest_user_reviews_map = read_restaurant_by_user_reviews()
    num_restaurants = len(rest_user_reviews_map)
    friend_graph = init_friend_graph('../data/yelpFriends.txt')

    # A list of lists, where each list corresponds to a restaurant subgraph
    # Each list contains RAND_NUM chi sq values generated from permuting the graph
    random_chisq_values = []

    # Running sum of the chi sq values for each restaurant of the real graph
    real_chisq_sum = 0

    counter = 0

    for r_id in rest_user_reviews_map:
        user_reviews = rest_user_reviews_map[r_id]
        combinations = get_user_combos_from_ratings_map(user_reviews['users'])
        # generate random chisq values first:
        chisq_vals = gen_random_chisq_vals(user_reviews, friend_graph,
                                           combinations)
        #print("calculated " + str(counter) + " of " + str(num_restaurants))
        counter += 1
        random_chisq_values.append(chisq_vals)

        # calculate chisq value for actual graph
        real_chisq_sum += calc_chi_sq(user_reviews['users'],
                                      user_reviews['cos_matrix'], friend_graph,
                                      combinations)

    # list of chisq sums for RAND_NUM trials
    sum_rand_chisq = [sum(i) for i in zip(*random_chisq_values)]
    sum_rand_chisq.sort()

    for num in sum_rand_chisq:
        print(str(num))
    print(real_chisq_sum)
    # calculate what percentile the real sum is in compared to the distribution of random
    percentile = 1
    for i in range(len(sum_rand_chisq)):
        if sum_rand_chisq[i] > real_chisq_sum:
            percentile = i / len(sum_rand_chisq)
            break
    print("Percentile: " + str(percentile))
コード例 #4
0
def main():
    print("Initializing friend graph...")
    friend_graph = init_friend_graph('../data/yelpFriends.txt')

    friend_edges, user_to_index, index_to_user = friend_edge_list(friend_graph)

    # iterate through users and read in reviews
    user_data_directory = os.getcwd() + "/../data/yelp_users/"
    print("Reading reviews from each user...")
    for user in os.listdir(user_data_directory):
        filename = os.getcwd() + "/../data/yelp_users/" + user
        readUserReviews(filename)

    print("Generating word matrix...")
    word_labels, user_word_matrix = generate_user_word_matrix(index_to_user)
    print(len(word_labels))
    print(word_labels)
    users_size = len(user_to_index)
    friend_edges_size = len(friend_edges)

    word_edges, word_to_index = word_user_edge_list(word_labels,
                                                    user_word_matrix)

    print("Writing edge lists...")
    # write the friend list
    with open('../data/friend_edge_list.txt', 'w') as f:
        f.write(str(users_size) + " " + str(friend_edges_size) + "\n")
        for edge in friend_edges:
            f.write(str(edge[0]) + " " + str(edge[1]) + "\n")

    # combine the edges for friends and words together:
    all_edges = friend_edges + word_edges
    nodes_size = users_size + len(word_to_index)
    edges_size = len(all_edges)
    with open('../data/friend_word_edge_list.txt', 'w') as f:
        f.write(str(nodes_size) + " " + str(edges_size) + "\n")
        for edge in all_edges:
            f.write(str(edge[0]) + " " + str(edge[1]) + "\n")
コード例 #5
0
def main():
	print("Initializing Friend Graph...")
	friend_graph = init_friend_graph('../data/yelpFriends.txt')
	index_to_user = list(friend_graph.keys())
	user_to_index = {k: v for v, k in enumerate(index_to_user)}
	V = len(index_to_user)
	friend_matrix = np.full((V,V), False)
	for user1, friend_list in friend_graph.items():
		for user2 in friend_list:
			idx1 = user_to_index.get(user1)
			idx2 = user_to_index.get(user2)
			if (idx1 is not None and idx2 is not None):
				friend_matrix[idx1, idx2] = True
				friend_matrix[idx2, idx2] = True


	user_data_directory = os.getcwd() + "/../data/yelp_users/"
	print("Reading reviews from each user...")
	for user in os.listdir(user_data_directory):
		filename = os.getcwd() + "/../data/yelp_users/" + user
		readUserReviews(filename)

	word_labels, user_word_matrix = generate_user_word_matrix(index_to_user)
	print(user_word_matrix.shape)
	result = {
		'user_labels': index_to_user,
		'word_labels': word_labels,
		'friend_matrix': friend_matrix,
		'user_word_matrix': user_word_matrix,
	}
	print("Generating pickle object...")
	pickle_object = open("../data/autoencoder_sim_matrices.pkl", "wb")
	pickle.dump(result, pickle_object)
	pickle_object.close()

	print("Done. Quitting...")