def test_community_analysis_single_channel_cutoff_20(self): log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_single_channel_cutoff_20') nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) saver.save_net_nx_graph( message_number_graph, self.current_directory, "message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH)) expected_output = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') os.remove(self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff self.assertTrue(expected_result[0].isomorphic(expected_output[0])) self.assertEqual( compare_communities(expected_result[1], expected_output[1]), 0)
def test_infomap_igraph(self): message_graph, message_comm = community.infomap_igraph(ig_graph=None, net_file_location= self.current_directory + '/data/message_number_graph.net') expected_result = util.load_from_disk(self.current_directory + '/data/community') dis = clustering.compare_communities(message_comm, expected_result) #calculate distance between two communities self.assertEqual(dis, 0.0)
def test_community_analysis_multi_channel(self): log_data = reader.linux_input(self.log_data_dir, ["ALL"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_multi_channel') nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv( nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash) presence_type = ["CC", "UU", "CU"] expected_output = {ptype: {} for ptype in presence_type} for ptype in presence_type: saver.save_net_nx_graph(dict_out[ptype]["graph"], self.current_directory, "adj" + ptype) saver.save_net_nx_graph(dict_out[ptype]["reducedGraph"], self.current_directory, "radj" + ptype) expected_output[ptype]['adj'] = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + '/adj' + ptype + '.net') expected_output[ptype]['radj'] = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + '/radj' + ptype + '.net') os.remove(self.current_directory + '/adj' + ptype + '.net') os.remove(self.current_directory + '/radj' + ptype + '.net') self.assertTrue(expected_result[ptype]['adj'][0].isomorphic( expected_output[ptype]['adj'][0])) self.assertEqual( compare_communities(expected_output[ptype]['adj'][1], expected_result[ptype]['adj'][1]), 0) self.assertTrue(expected_result[ptype]['radj'][0].isomorphic( expected_output[ptype]['radj'][0])) self.assertEqual( compare_communities(expected_output[ptype]['radj'][1], expected_result[ptype]['radj'][1]), 0)
config.THRESHOLD_MESSAGE_NUMBER_GRAPH = threshold #revert to default config # ============== OUTPUT ================ saver.save_net_nx_graph (message_number_graph, output_directory, "message_number_graph") saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv([["response_time_cutoff"], [rt_cutoff_time]], output_directory, "rt_cutoff") saver.save_csv([["month", "users", "directed_messages"], ["Jan-2013", len(message_number_graph), int(message_number_graph.size('weight'))]], output_directory, "users_messages") for dtype in degree_type: saver.save_csv(degree_anal_message_number[dtype]["formatted_for_csv"], output_directory, dtype) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS)) # =============== VIZ =================== message_graph, message_comm = community.infomap_igraph(ig_graph=None, net_file_location= output_directory + 'message_number_graph.net') vis.plot_infomap_igraph(message_graph, message_comm.membership, output_directory, "message") vis.plot_data (data, output_directory, "bins") for dtype in degree_type: slope,intercept,r_square,mse = vis.generate_log_plots(degree_anal_message_number[dtype]["raw_for_vis"], output_directory, channel_name[0] +dtype) saver.save_csv( [["Y","K","R^2", "MSE"], [slope,intercept,r_square,mse]], output_directory, dtype+"-curve-fit") # ============== PRESENCE ACROSS MULTIPLE CHANNELS ============== # Change analysis to all channels in config log_data = reader.linux_input(log_directory, ["ALL"], starting_date, ending_date) nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker(log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv(nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash) saver.save_js_arc(dict_out["CC"]["reducedGraph"], channels_hash, config.OUTPUT_DIRECTORY + "protovis/", "cc.js")
def codelengths(log_directory, output_directory, channel_name, start_date, end_date): """ The function iterate through the months in the given date range and computes the infomap number. It then plots a box plot for the infomap numbers of all the whole months in the given time period. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') codelengths = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) print "error in", dt.month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
log_data, nicks, nick_same_list, False) saver.save_net_nx_graph( message_number_graph, output_directory, "message-exchange-" + starting_date + "-cutoff-" + str(cutoff)) saver.save_csv([["month", "users", "directed_messages"], [ "Jan-2013", len(message_number_graph), int(message_number_graph.size('weight')) ]], output_directory, "users_messages-" + starting_date + "-cutoff_" + str(cutoff)) del message_number_graph gc.collect() msg_graph, message_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + starting_date + "-cutoff-" + str(cutoff) + '.net') if cutoff != 0: # be careful; vis.plot_infomap_igraph() takes a long time to complete on large graphs print("vis.plot_infomap_igraph() starts for", starting_date, "with cutoff=", cutoff, "at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() vis.plot_infomap_igraph( msg_graph, message_community.membership, output_directory, "message-exchange-" + starting_date + "-cutoff-" + str(cutoff))
def test_infomap_igraph_no_community(self): message_graph, message_comm = community.infomap_igraph( self.sample_igraph) self.assertEqual((message_graph, message_comm), (self.sample_igraph, None))