def predict_elite_status_with_linear_regression():
	# Generate graph and user dictionaries
	graph = read_graph_from_yelp_JSON_file()
	users = read_users_from_yelp_JSON_file(model_type='linear_regression')

	# Add PageRank to user dictionaries
	pagerank_for_node = networkx.pagerank(graph)
	user_pageranks = [{'ID': node_ID, 'pagerank': pagerank} for node_ID, pagerank in pagerank_for_node.iteritems()]
	users = join_dictionaries(user_pageranks, users, 'ID')

	# Prepare users for learning
	users = remove_labels(users, 'ID')
	users = normalize_users(users, excluded_attributes=['years_elite'])
	users = designate_attribute_as_label(users, 'years_elite')
	random.shuffle(users)

	# Split data into training and test
	user_count = len(users)
	training_set_size = int(0.75 * user_count)
	test_set_size = user_count - training_set_size
	training_set = users[0:training_set_size]
	test_set = users[-test_set_size:]

	# Fit to hyperplane
	model, weights = regression.get_model_and_weights(training_set)

	# Show us how important each attribute is
	print 'Attribute weights:'
	for attribute, weight in weights.items():
		print attribute + ': ' + str(weight)

	# Test the model by calculating its coefficient of determination (R^2) on test data
	test_samples, test_labels, _ = regression.prep_data(test_set)
	test_score = model.score(test_samples, test_labels)
	print 'Test score: ' + str(test_score)
def predict_pagerank():
	# Hyperparameters
	MINIMUM_FRIEND_COUNT = 1

	# Generate graph and users
	graph = read_graph_from_yelp_JSON_file()
	remove_low_degree_nodes(graph, MINIMUM_FRIEND_COUNT)
	users = read_users_from_yelp_JSON_file()

	# Add PageRank to user dictionaries
	pagerank_for_node = networkx.pagerank(graph)
	user_pageranks = [{'ID': node_ID, 'pagerank': pagerank} for node_ID, pagerank in pagerank_for_node.iteritems()]
	users = join_dictionaries(user_pageranks, users, 'ID')

	# Prepare users for regression
	users = remove_labels(users, label_name='ID')
	users = remove_labels(users, label_name='friend_count')
	users = normalize_users(users, excluded_attributes=['ID', 'years_elite'])
	users = designate_attribute_as_label(users, attribute='pagerank')

	# Fit to hyperplane
	training_set = users
	model, weights = regression.get_model_and_weights(training_set)

	# Show us how important each attribute is
	print 'Attribute weights:'
	for attribute, weight in weights.items():
		print attribute + ': ' + str(weight * 100)