def generate_adoption_data(netdata, interact_type, split_timestamp, min_interactions_per_user=1, time_window=None, time_scale=ord('o')): print "ALERT: Generating adoption data chronologically" core_nodes2 = netdata.get_nodes_list(should_have_interactions=True) # caution: should do it for all users with interactions to be safe interactions_stream, eligible_nodes = adopt_time.get_interactions_stream( core_nodes2, interact_type, split_timestamp, min_interactions_per_user, after=True) print "Number of interactions %d after time %d" % ( len(interactions_stream), split_timestamp) counter = 0 kbest_lastm_heaps = {} # Heap storing last M interactions by neighbors friends_lastm_heaps = {} rnonfr_lastm_heaps = {} # care only about friends of eligible_nodes incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes) # consider only k-nearest neighbors of eligible nodes incoming_kbest_dict = adopt_time.compute_globalk_neighbors3( netdata, eligible_nodes, interact_type, k=10, min_interactions_per_user=min_interactions_per_user) # consider building lastm_heap only for an eligible nodes friends_lastm_heaps = adopt_time.compute_initial_lastm_heaps( eligible_nodes, incoming_friends_dict, interact_type, split_timestamp, min_interactions_per_user, time_window) kbest_lastm_heaps = adopt_time.compute_initial_lastm_heaps( eligible_nodes, incoming_kbest_dict, interact_type, split_timestamp, min_interactions_per_user, time_window) items_pop = get_initial_items_pop(eligible_nodes, interact_type) # we need friends_dict for only those that appear in the test set # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user), # since that is what is compared in the suscept test. outf = open("adoptions.dat", "w") for node, item_id, timestamp, rating in interactions_stream: fr_count, kbest_count, itempop = process_interaction( node, item_id, timestamp, rating, friends_lastm_heaps, kbest_lastm_heaps, rnonfr_lastm_heaps, incoming_friends_dict[node.uid], incoming_kbest_dict[node.uid], items_pop) if node.has_friends(): outf.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format( node.uid, item_id, timestamp, rating, fr_count, kbest_count, itempop)) outf.close()
def generate_adoption_data(netdata, interact_type, split_timestamp, min_interactions_per_user=1, time_window=None, time_scale=ord('o')): print "ALERT: Generating adoption data chronologically" core_nodes2 = netdata.get_nodes_list(should_have_interactions=True) # caution: should do it for all users with interactions to be safe interactions_stream, eligible_nodes = adopt_time.get_interactions_stream(core_nodes2, interact_type, split_timestamp, min_interactions_per_user, after=True) print "Number of interactions %d after time %d" %(len(interactions_stream), split_timestamp) counter = 0 kbest_lastm_heaps = {} # Heap storing last M interactions by neighbors friends_lastm_heaps = {} rnonfr_lastm_heaps = {} # care only about friends of eligible_nodes incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes) # consider only k-nearest neighbors of eligible nodes incoming_kbest_dict = adopt_time.compute_globalk_neighbors3(netdata, eligible_nodes, interact_type, k=10, min_interactions_per_user=min_interactions_per_user) # consider building lastm_heap only for an eligible nodes friends_lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes, incoming_friends_dict, interact_type, split_timestamp, min_interactions_per_user, time_window) kbest_lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes, incoming_kbest_dict, interact_type, split_timestamp, min_interactions_per_user, time_window) items_pop = get_initial_items_pop(eligible_nodes, interact_type) # we need friends_dict for only those that appear in the test set # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user), # since that is what is compared in the suscept test. outf = open("adoptions.dat", "w") for node, item_id, timestamp, rating in interactions_stream: fr_count,kbest_count, itempop = process_interaction(node, item_id, timestamp, rating, friends_lastm_heaps, kbest_lastm_heaps, rnonfr_lastm_heaps, incoming_friends_dict[node.uid], incoming_kbest_dict[node.uid], items_pop) if node.has_friends(): outf.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(node.uid, item_id, timestamp, rating, fr_count, kbest_count, itempop)) outf.close()
def generate_fake_preferences( netdata, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user=1, time_window=None, time_scale=ord("o"), method="random", ): global num_not_influence, num_not_homophily global friends_share, nonfriends_share num_not_homophily = 0 num_not_influence = 0 print "ALERT: Generating some fake data..." """ core_nodes = netdata.get_nodes_iterable(should_have_interactions=True, should_have_friends=True) # caution: should do it for all users with interactions to be safe interactions_stream = get_interactions_stream(core_nodes, interact_type, split_timestamp, min_interactions_per_user) """ core_nodes2 = netdata.get_nodes_iterable(should_have_interactions=True) # caution: should do it for all users with interactions to be safe interactions_stream, eligible_nodes = adopt_time.get_interactions_stream( core_nodes2, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user ) print "Number of interactions to change", len(interactions_stream) counter = 0 lastm_heaps = {} if method == "homophily": """ globalk_neighbors_dict = compute_globalk_neighbors(netdata, all_future_nodes, interact_type, k=10, min_interactions_per_user=min_interactions_per_user) print "Generated k-best neighbors for all" """ globalk_incoming_dict = adopt_time.compute_globalk_neighbors3( netdata, eligible_nodes, interact_type, k=10, min_interactions_per_user=min_interactions_beforeaftersplit_per_user, ) lastm_heaps = adopt_time.compute_initial_lastm_heaps( eligible_nodes, globalk_incoming_dict, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user, time_window, ) """ for node in netdata.get_nodes_iterable(should_have_interactions=True): lastm_heaps[node.uid] = [(0,-1)]*time_window heapq.heapify(lastm_heaps[node.uid]) """ elif method == "random": items_pop = get_items_dup_array(netdata, interact_type) print "Generated items popularity dict" elif method == "influence": # we need friends_dict for only those that appear in the test set # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user), # since that is what is compared in the suscept test. incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes) lastm_heaps = adopt_time.compute_initial_lastm_heaps( eligible_nodes, incoming_friends_dict, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user, time_window, ) """ for node in netdata.get_nodes_iterable(should_have_interactions=True): lastm_heaps[node.uid] = [(0,-1)]*time_window heapq.heapify(lastm_heaps[node.uid]) """ print len(lastm_heaps), "yoyo" # print(incoming_friends_dict) else: print "Invalid fake prefs method" sys.exit(1) for node, item_id, timestamp, rating in interactions_stream: if method == "random": new_item_id = select_item_dueto_random(items_pop) # .randint(1, netdata.total_num_items) elif method == "influence": new_item_id = select_item_from_neighbors( netdata, lastm_heaps, node, timestamp, time_window, incoming_friends_dict[node.uid] ) elif method == "homophily": new_item_id = select_item_from_neighbors( netdata, lastm_heaps, node, timestamp, time_window, globalk_incoming_dict[node.uid] ) else: print "Invalid fake prefs method" sys.exit(1) # print node.uid, new_item_id, timestamp ret = node.change_interacted_item(interact_type, item_id, new_item_id, timestamp) if ret == -1: print node.get_interactions(interact_type) print "Cannot find in the above array", item_id print "Big Error: Item_id not found in interactions list." sys.exit(1) counter += 1 if counter % 100000 == 0: print "Done faked", counter if method == "influence": print "In influence: number of interactions that were not generated by influence:", num_not_influence elif method == "homophily": print "In homophily: friends share, and nonfriends share of fake data:", friends_share, nonfriends_share print "In homophily: number of interactions that were not generated by influence:", num_not_homophily print "Fake data generated"
def generate_fake_preferences(netdata, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user=1, time_window=None, time_scale=ord('o'), method="random"): global num_not_influence, num_not_homophily global friends_share, nonfriends_share num_not_homophily = 0 num_not_influence = 0 print "ALERT: Generating some fake data..." """ core_nodes = netdata.get_nodes_iterable(should_have_interactions=True, should_have_friends=True) # caution: should do it for all users with interactions to be safe interactions_stream = get_interactions_stream(core_nodes, interact_type, split_timestamp, min_interactions_per_user) """ core_nodes2 = netdata.get_nodes_iterable(should_have_interactions=True) # caution: should do it for all users with interactions to be safe interactions_stream, eligible_nodes = adopt_time.get_interactions_stream(core_nodes2, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user) print "Number of interactions to change", len(interactions_stream) counter = 0 lastm_heaps = {} if method=="homophily": """ globalk_neighbors_dict = compute_globalk_neighbors(netdata, all_future_nodes, interact_type, k=10, min_interactions_per_user=min_interactions_per_user) print "Generated k-best neighbors for all" """ globalk_incoming_dict = adopt_time.compute_globalk_neighbors3(netdata,eligible_nodes, interact_type, k=10, min_interactions_per_user=min_interactions_beforeaftersplit_per_user) lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes, globalk_incoming_dict, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user, time_window) """ for node in netdata.get_nodes_iterable(should_have_interactions=True): lastm_heaps[node.uid] = [(0,-1)]*time_window heapq.heapify(lastm_heaps[node.uid]) """ elif method=="random": items_pop = get_items_dup_array(netdata, interact_type) print "Generated items popularity dict" elif method=="influence": # we need friends_dict for only those that appear in the test set # and more importantly, in the interaction stream (i.e. >=min_interactions_per_user), # since that is what is compared in the suscept test. incoming_friends_dict = adopt_time.compute_incoming_friends(eligible_nodes) lastm_heaps = adopt_time.compute_initial_lastm_heaps(eligible_nodes, incoming_friends_dict, interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user, time_window) """ for node in netdata.get_nodes_iterable(should_have_interactions=True): lastm_heaps[node.uid] = [(0,-1)]*time_window heapq.heapify(lastm_heaps[node.uid]) """ print len(lastm_heaps), "yoyo" #print(incoming_friends_dict) else: print "Invalid fake prefs method"; sys.exit(1) for node, item_id, timestamp, rating in interactions_stream: if method == "random": new_item_id = select_item_dueto_random(items_pop)#.randint(1, netdata.total_num_items) elif method == "influence": new_item_id = select_item_from_neighbors(netdata, lastm_heaps, node, timestamp, time_window, incoming_friends_dict[node.uid]) elif method == "homophily": new_item_id = select_item_from_neighbors(netdata, lastm_heaps, node, timestamp, time_window, globalk_incoming_dict[node.uid]) else: print "Invalid fake prefs method"; sys.exit(1) #print node.uid, new_item_id, timestamp ret = node.change_interacted_item(interact_type, item_id, new_item_id, timestamp) if ret == -1: print node.get_interactions(interact_type) print "Cannot find in the above array", item_id print "Big Error: Item_id not found in interactions list."; sys.exit(1) counter += 1 if counter % 100000 == 0: print "Done faked", counter if method=="influence": print "In influence: number of interactions that were not generated by influence:", num_not_influence elif method=="homophily": print "In homophily: friends share, and nonfriends share of fake data:", friends_share, nonfriends_share print "In homophily: number of interactions that were not generated by influence:", num_not_homophily print "Fake data generated"