def test_community_analysis_single_channel_cutoff_20(self): log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_single_channel_cutoff_20') nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) saver.save_net_nx_graph( message_number_graph, self.current_directory, "message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH)) expected_output = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') os.remove(self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff self.assertTrue(expected_result[0].isomorphic(expected_output[0])) self.assertEqual( compare_communities(expected_result[1], expected_output[1]), 0)
def test_save_net_nx_graph(self): graph = nx.DiGraph() graph.add_nodes_from([1, 2, 3]) graph.add_edge(1, 3) saver.save_net_nx_graph(graph, self.current_directory, 'test_save_net_nx') assert os.path.exists(self.current_directory + '/test_save_net_nx.net') assert os.path.isfile(self.current_directory + '/test_save_net_nx.net') g = nx.read_pajek(self.current_directory + '/test_save_net_nx.net') assert nx.is_isomorphic(graph, g) os.remove(self.current_directory + '/test_save_net_nx.net')
def test_community_analysis_multi_channel(self): log_data = reader.linux_input(self.log_data_dir, ["ALL"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_multi_channel') nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv( nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash) presence_type = ["CC", "UU", "CU"] expected_output = {ptype: {} for ptype in presence_type} for ptype in presence_type: saver.save_net_nx_graph(dict_out[ptype]["graph"], self.current_directory, "adj" + ptype) saver.save_net_nx_graph(dict_out[ptype]["reducedGraph"], self.current_directory, "radj" + ptype) expected_output[ptype]['adj'] = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + '/adj' + ptype + '.net') expected_output[ptype]['radj'] = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + '/radj' + ptype + '.net') os.remove(self.current_directory + '/adj' + ptype + '.net') os.remove(self.current_directory + '/radj' + ptype + '.net') self.assertTrue(expected_result[ptype]['adj'][0].isomorphic( expected_output[ptype]['adj'][0])) self.assertEqual( compare_communities(expected_output[ptype]['adj'][1], expected_result[ptype]['adj'][1]), 0) self.assertTrue(expected_result[ptype]['radj'][0].isomorphic( expected_output[ptype]['radj'][0])) self.assertEqual( compare_communities(expected_output[ptype]['radj'][1], expected_result[ptype]['radj'][1]), 0)
def codelengths(log_directory, output_directory, channel_name): codelengths = [] for month in xrange(1, 13): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) print "error in", month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords") network.degree_analysis_on_graph(message_number_graph) threshold = config.THRESHOLD_MESSAGE_NUMBER_GRAPH #store original default config cutoffs = [0, 10, 20] for cutoff in cutoffs: config.THRESHOLD_MESSAGE_NUMBER_GRAPH = cutoff msg_graph_experts, top_hub, top_keyword_overlap, top_auth = network.identify_hubs_and_experts(log_data, nicks, nick_same_list) saver.draw_nx_graph (msg_graph_experts, output_directory, "hits-cutoff-"+str(cutoff)) config.THRESHOLD_MESSAGE_NUMBER_GRAPH = threshold #revert to default config # ============== OUTPUT ================ saver.save_net_nx_graph (message_number_graph, output_directory, "message_number_graph") saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph") saver.save_csv([["response_time_cutoff"], [rt_cutoff_time]], output_directory, "rt_cutoff") saver.save_csv([["month", "users", "directed_messages"], ["Jan-2013", len(message_number_graph), int(message_number_graph.size('weight'))]], output_directory, "users_messages") for dtype in degree_type: saver.save_csv(degree_anal_message_number[dtype]["formatted_for_csv"], output_directory, dtype) saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS)) # =============== VIZ =================== message_graph, message_comm = community.infomap_igraph(ig_graph=None, net_file_location= output_directory + 'message_number_graph.net') vis.plot_infomap_igraph(message_graph, message_comm.membership, output_directory, "message") vis.plot_data (data, output_directory, "bins")
def codelengths(log_directory, output_directory, channel_name, start_date, end_date): """ The function iterate through the months in the given date range and computes the infomap number. It then plots a box plot for the infomap numbers of all the whole months in the given time period. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') codelengths = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) print "error in", dt.month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
file=exec_times_file) exec_times_file.flush() del message_number_graph, degree_anal_message_number del slope, intercept, r_square, mse gc.collect() print("msg exchange with cutoff=0 gc completed at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() # create a smaller message exchange graph for visualization config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) saver.save_net_nx_graph(message_number_graph, output_directory, "message_number_graph") print("msg exchange graph with cutoff=20 generated at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() saver.save_net_nx_graph(message_number_graph, output_directory, "message_number_graph_cutoff_20") print("msg exchange graph saved at: ", datetime.datetime.now(), file=exec_times_file) exec_times_file.flush() saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph_cutoff_20") print("msg exchange graph plot completed at: ",