def test_message_number_graph(self, log_data, nicks, nick_same_list): update_expected_output_directory(log_data) msg_number_graph = network.message_number_graph( log_data, nicks, nick_same_list) self.assertTrue(compare_graph_outputs(msg_number_graph, "message_number_graph.gpickle"), msg=None)
def test_community_analysis_single_channel_cutoff_20(self): log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_single_channel_cutoff_20') nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) saver.save_net_nx_graph( message_number_graph, self.current_directory, "message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH)) expected_output = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') os.remove(self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff self.assertTrue(expected_result[0].isomorphic(expected_output[0])) self.assertEqual( compare_communities(expected_result[1], expected_output[1]), 0)
def test_degree_analysis_on_nodes(self, log_data, nicks, nick_same_list): update_expected_output_directory(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list) message_time_graph = network.message_time_graph( log_data, nicks, nick_same_list) nick_change_graph = user.nick_change_graph(log_data) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) degree_anal_message_time = network.degree_analysis_on_graph( message_time_graph) degree_anal_nick_change = network.degree_analysis_on_graph( nick_change_graph) expected_analysis_msg_number = [] expected_analysis_msg_time = [] expected_analysis_nick_change = [] unjson('degree_anal_message_number.json', expected_analysis_msg_number) unjson('degree_anal_message_time.json', expected_analysis_msg_time) unjson('degree_anal_nick_change.json', expected_analysis_nick_change) self.assertDictEqual(degree_anal_message_number, expected_analysis_msg_number[0], msg=None) self.assertDictEqual(degree_anal_message_time, expected_analysis_msg_time[0], msg=None) self.assertDictEqual(degree_anal_nick_change, expected_analysis_nick_change[0], msg=None)
def box_plot_for_degree(log_directory, output_directory, channel_name): cutoff = 0 for channel_name_iter in channel_name: out_degree_fit_parameters = np.zeros((12, 4)) in_degree_fit_parameters = np.zeros((12, 4)) total_degree_fit_parameters = np.zeros((12, 4)) for month in range(1, 13): log_data = reader.linux_input(log_directory, channel_name_iter, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) out_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["out_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) in_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["in_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) total_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["total_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) parameters = ['slope', 'intercept', 'r_square'] for para_ind in range(len(parameters)): vis.box_plot( out_degree_fit_parameters[:, para_ind], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( in_degree_fit_parameters[:, para_ind], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( total_degree_fit_parameters[:, para_ind], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
def test_message_exchange_network(self): log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/degree_anal_message_number_graph_kubuntu-devel') nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) expected_output = network.degree_analysis_on_graph( message_number_graph) self.assertEqual(expected_result, expected_output)
def test_reduced_networks_cutoff_20(self): default_config = config.THRESHOLD_MESSAGE_NUMBER_GRAPH config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/message_number_graph_cutoff_20') nicks, nick_same_list = nickTracker.nick_tracker(log_data, False) expected_output = network.message_number_graph(log_data, nicks, nick_same_list, False) config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_config self.assertTrue(nx.is_isomorphic(expected_result, expected_output))
def test_message_number_graph_day_analysis(self, mock_get_nick_sen_rec, mock_rec_list_splice, mock_correctLastCharCR, mock_check_if_msg_line, mock_create_connected_nick_list, mock_to_graph): to_graph_ret = util.load_from_disk( self.current_directory + "/data/message_number_graph/to_graph") conn_list = list(connected_components(to_graph_ret)) mock_to_graph.return_value = to_graph_ret mock_rec_list_splice.side_effect = util.load_from_disk( self.current_directory + "/data/message_number_graph/rec_list_splice") mock_create_connected_nick_list.return_value = util.load_from_disk( self.current_directory + "/data/message_number_graph/conn_comp_list") mock_check_if_msg_line.side_effect = util.load_from_disk( self.current_directory + "/data/message_number_graph/check_if_msg_line") mock_correctLastCharCR.side_effect = util.load_from_disk( self.current_directory + "/data/message_number_graph/correctLastCharCR") mock_get_nick_sen_rec.side_effect = util.load_from_disk( self.current_directory + "/data/message_number_graph/get_nick_sen_rec") capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput ret = network.message_number_graph(self.log_data, self.nicks, self.nick_same_list, DAY_BY_DAY_ANALYSIS=True) expected_graph_list = util.load_from_disk( self.current_directory + "/data/message_number_graph/message_number_day_list") sys.stdout = sys.__stdout__ capturedOutput.close() mock_to_graph.assert_called_once_with(self.nick_same_list) mock_create_connected_nick_list.assert_called_once_with(conn_list) self.assertTrue(nx.is_isomorphic(ret[0][0], expected_graph_list[0][0])) self.assertTrue(nx.is_isomorphic(ret[1][0], expected_graph_list[1][0]))
def test_message_number_graph(self): capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput graph = network.message_number_graph(self.log_data, self.nicks, self.nick_same_list, DAY_BY_DAY_ANALYSIS=False) sys.stdout = sys.__stdout__ capturedOutput.close() self.assertTrue( nx.is_isomorphic( graph, util.load_from_disk( self.test_data_dir + "message_number_graph/aggregate_message_number_graph")))
def test_message_number_graph_day_analysis(self): capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput graph = network.message_number_graph(self.log_data, self.nicks, self.nick_same_list, DAY_BY_DAY_ANALYSIS=True) expected_graph_list = util.load_from_disk( self.test_data_dir + "message_number_graph/message_number_day_list") sys.stdout = sys.__stdout__ capturedOutput.close() self.assertTrue( nx.is_isomorphic(graph[0][0], expected_graph_list[0][0])) self.assertTrue( nx.is_isomorphic(graph[1][0], expected_graph_list[1][0]))
def test_degree_distribution_message_exchange_network(self): degree_type = ["out_degree", "in_degree", "total_degree"] log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/message_exchange_network_curve_fit') nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) expected_output = {} for dtype in degree_type: expected_output[dtype] = vis.generate_log_plots( degree_anal_message_number[dtype]["raw_for_vis"], self.current_directory, "#kubuntu-devel" + dtype) os.remove(self.current_directory + "/#kubuntu-devel" + dtype + ".png") self.assertEqual(expected_result, expected_output)
def codelengths(log_directory, output_directory, channel_name): codelengths = [] for month in xrange(1, 13): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) print "error in", month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
def test_degree_analysis_on_nodes(self, log_data, nicks, nick_same_list): update_expected_output_directory(log_data) message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list) nick_change_graph = user.nick_change_graph(log_data) degree_anal_message_number = network.degree_analysis_on_graph(message_number_graph) degree_anal_message_time = network.degree_analysis_on_graph(message_time_graph) degree_anal_nick_change = network.degree_analysis_on_graph(nick_change_graph) expected_analysis_msg_number = [] expected_analysis_msg_time = [] expected_analysis_nick_change = [] unjson('degree_anal_message_number.json', expected_analysis_msg_number) unjson('degree_anal_message_time.json', expected_analysis_msg_time) unjson('degree_anal_nick_change.json', expected_analysis_nick_change) self.assertDictEqual(degree_anal_message_number, expected_analysis_msg_number[0], msg=None) self.assertDictEqual(degree_anal_message_time, expected_analysis_msg_time[0], msg=None) self.assertDictEqual(degree_anal_nick_change, expected_analysis_nick_change[0], msg=None)
from lib.in_out import reader, saver from lib import nickTracker, config, vis, validate from lib.analysis import network, channel, user, community log_directory = config.LOG_DIRECTORY channel_name = config.CHANNEL_NAME starting_date = config.STARTING_DATE ending_date = config.ENDING_DATE output_directory = config.OUTPUT_DIRECTORY # ============== INPUT================== log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph) # adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")
def test_message_number_graph(self, log_data, nicks, nick_same_list): update_expected_output_directory(log_data) msg_number_graph = network.message_number_graph(log_data, nicks, nick_same_list) self.assertTrue(compare_graph_outputs(msg_number_graph, "message_number_graph.gpickle"), msg=None)
def codelengths(log_directory, output_directory, channel_name, start_date, end_date): """ The function iterate through the months in the given date range and computes the infomap number. It then plots a box plot for the infomap numbers of all the whole months in the given time period. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') codelengths = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) print "error in", dt.month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
from lib import nickTracker, config, vis, validate from lib.analysis import network, channel, user, community log_directory = config.LOG_DIRECTORY channel_name = config.CHANNEL_NAME starting_date = config.STARTING_DATE ending_date = config.ENDING_DATE output_directory = config.OUTPUT_DIRECTORY # ============== INPUT================== log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) message_number_graph_day_list = network.message_number_graph( log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph( message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv( log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv( log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list)
def box_plot_for_degree(log_directory, output_directory, channel_name, start_date, end_date): """ Correlational : statistical distribution of curve fit parameters generated for degree distribution. The function takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each degree distribution sample shall have 3 curve fit parameters namely slope, intercept & r_square. The function collects these parameters for all the months of the given time duration. The function produces box plot separately for each parameter. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done. start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') cutoff = 0 for channel_name_iter in channel_name: out_degree_fit_parameters = np.zeros((12, 4)) in_degree_fit_parameters = np.zeros((12, 4)) total_degree_fit_parameters = np.zeros((12, 4)) for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month = dt + relativedelta( months=1) - datetime.timedelta(days=1) # for month in range(1, 13): log_data = reader.linux_input( log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"), last_day_of_the_month.strftime("%Y-%m-%d")) nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) out_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["out_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) in_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["in_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) total_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["total_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) parameters = ['slope', 'intercept', 'r_square'] for para_ind in range(len(parameters)): vis.box_plot( out_degree_fit_parameters[:, para_ind], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( in_degree_fit_parameters[:, para_ind], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( total_degree_fit_parameters[:, para_ind], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))