def correlational_activity(log_directory, output_directory, channel_name): pearson = [] for month in xrange(1, 12): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) bin_matrix_m1, total_messages_m1 = network.message_number_bins_csv( log_data_m1, nicks_m1, nick_same_list_m1) monthly_sum_bins_m1 = [sum(i) for i in zip(*bin_matrix_m1)] log_data_m2 = reader.linux_input(log_directory, channel_name, "2013-" + str(month + 1) + "-1", "2013-" + str(month + 1) + "-31") nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2) bin_matrix_m2, total_messages_m2 = network.message_number_bins_csv( log_data_m2, nicks_m2, nick_same_list_m2) monthly_sum_bins_m2 = [sum(i) for i in zip(*bin_matrix_m2)] corr = np.corrcoef(monthly_sum_bins_m1, monthly_sum_bins_m2)[0, 1] print "\n----------------------------------" print "For months", month, "and", month + 1 print "Bins for M1:", monthly_sum_bins_m1 print "Bins for M2:", monthly_sum_bins_m2 print "Pearson correlation:", corr pearson.append(corr) vis.box_plot(pearson, output_directory, "pearson2013") saver.save_csv([pearson], output_directory, "pearson2013")
def correlational_activity(log_directory, output_directory, channel_name, start_date, end_date): """ The function selects a month in the given date range and creates heatmap bins for the current month and the next month. It then calculates the correlational calculates the correlational vectors between the two heatmaps and then produces a box plot for all the correlational coefficients of the months in the given date range. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') pearson = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) bin_matrix_m1, total_messages_m1 = network.message_number_bins_csv( log_data_m1, nicks_m1, nick_same_list_m1) monthly_sum_bins_m1 = [sum(i) for i in zip(*bin_matrix_m1)] next_month_dt = dt + relativedelta(months=1) last_day_of_the_month2 = next_month_dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m2 = reader.linux_input( log_directory, channel_name, next_month_dt.strftime("%Y-%m-%d"), last_day_of_the_month2.strftime("%Y-%m-%d")) nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2) bin_matrix_m2, total_messages_m2 = network.message_number_bins_csv( log_data_m2, nicks_m2, nick_same_list_m2) monthly_sum_bins_m2 = [sum(i) for i in zip(*bin_matrix_m2)] corr = np.corrcoef(monthly_sum_bins_m1, monthly_sum_bins_m2)[0, 1] print "\n----------------------------------" print "For months", dt.month, "and", dt.month + 1 print "Bins for M1:", monthly_sum_bins_m1 print "Bins for M2:", monthly_sum_bins_m2 print "Pearson correlation:", corr pearson.append(corr) vis.box_plot(pearson, output_directory, "pearson2013") saver.save_csv([pearson], output_directory, "pearson2013")
def test_community_analysis_single_channel_cutoff_20(self): log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_single_channel_cutoff_20') nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) saver.save_net_nx_graph( message_number_graph, self.current_directory, "message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH)) expected_output = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') os.remove(self.current_directory + "/message-exchange-" + self.start_date + "-cutoff-" + str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net') config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff self.assertTrue(expected_result[0].isomorphic(expected_output[0])) self.assertEqual( compare_communities(expected_result[1], expected_output[1]), 0)
def setUp(self): self.current_directory = os.path.dirname(os.path.realpath(__file__)) self.log_data_dir = self.current_directory + "/data/input/" self.log_data_kubuntu_devel = reader.linux_input( self.log_data_dir, ["#kubuntu-devel"], '2013-01-01', '2013-01-31') self.nicks, self.nick_same_list = nickTracker.nick_tracker( self.log_data_kubuntu_devel)
def test_full_presence_graph(self, log_data): update_expected_output_directory(log_data) nicks1, nick_same_list1, channels_for_user1, nick_channel_dict1, \ nicks_hash1, channels_hash1 = nick_tracker(log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv(nicks1, nick_same_list1, \ channels_for_user1, nick_channel_dict1, \ nicks_hash1, channels_hash1) self.assertTrue(compare_graph_outputs(graph, "full_presence_graph.gpickle"), msg=None)
def test_full_presence_graph(self, log_data): update_expected_output_directory(log_data) nicks1, nick_same_list1, channels_for_user1, nick_channel_dict1, \ nicks_hash1, channels_hash1 = nick_tracker(log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv(nicks1, nick_same_list1, \ channels_for_user1, nick_channel_dict1, \ nicks_hash1, channels_hash1) self.assertTrue(compare_graph_outputs(graph, "full_presence_graph.gpickle"), msg=None)
def keywords_hits_overlap(log_directory, output_directory, channel_name): # Correlational: overlap for keyword digest and HITS for month in xrange(1, 13): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_graph_m1, top_hubs_m1, top_keyword_overlap_m1, top_auth_m1 = network.identify_hubs_and_experts( log_data_m1, nicks_m1, nick_same_list_m1) saver.draw_nx_graph(message_graph_m1, output_directory, "expert-month-" + str(month)) log_data_m2 = reader.linux_input(log_directory, channel_name, "2013-" + str(month + 1) + "-1", "2013-" + str(month + 1) + "-31") nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m1) message_graph_m2, top_hubs_m2, top_keyword_overlap_with_score_m2, top_auth_m2 = network.identify_hubs_and_experts( log_data_m2, nicks_m2, nick_same_list_m2) print "Top 10 HUBS for Month [HITS]", month, ":", top_hubs_m1 print "Top 10 HUBS for Month [HITS]", month + 1, ":", top_hubs_m2 print "Number of common HUBS (from 10) between above 2 months:", len( list(set(top_hubs_m1).intersection(top_hubs_m2))) print "Top 10 Experts by keywords for Months", month, ":", top_keyword_overlap_m1 print "Top 10 Experts by keywords for Months", month + 1, ":", top_keyword_overlap_with_score_m2 print "Number of common Experts by keywords (from 10) between above 2 months:", len( list( set(top_keyword_overlap_m1).intersection( top_keyword_overlap_with_score_m2))) print "Top 10 AUTH for Month [HITS]", month, ":", top_auth_m1 print "Top 10 AUTH for Month [HITS]", month + 1, ":", top_auth_m2 print "Number of common AUTH (from 10) between above 2 months:", len( list(set(top_auth_m1).intersection(top_auth_m2))) print "Number of users common btw HUBS from HITS and Experts by Keywords (from 10) for month", month, ":", len( list(set(top_keyword_overlap_m1).intersection(top_hubs_m1))) print "Number of users common btw AUTH from HITS and Experts by Keywords (from 10) for month", month, ":", len( list(set(top_keyword_overlap_m1).intersection(top_auth_m1))) print "Number of users common btw HUBS from HITS and AUTH from HITS (from 10) for month", month, ":", len( list(set(top_hubs_m1).intersection(top_auth_m1))) print "Number of users common btw HUBS, HITS and KEYWORDS", month, ":", len( set(list(set(top_keyword_overlap_m1).intersection( top_hubs_m1))).intersection(top_auth_m1))
def test_user_nick_change_tracking(self): expected_nicks = util.load_from_disk(self.out_dir + "nicks") expected_nick_same_list = util.load_from_disk(self.out_dir + "nick_same_list") nicks, nick_same_list = nickTracker.nick_tracker(self.log_data) self.assertEqual(expected_nicks, nicks) self.assertEqual(expected_nick_same_list, nick_same_list)
def test_nick_tracker(self): nicks, nick_same_list = nickTracker.nick_tracker(self.log_data, track_users_on_channels = False) assert nicks == self.nicks1 assert nick_same_list == self.nick_same_list1 nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker(self.log_data, track_users_on_channels = True) assert nicks == self.nicks2 assert nick_same_list == self.nick_same_list2 assert channels_for_user == self.channels_for_user assert nicks_hash == self.nicks_hash assert channels_hash == self.channels_hash
def test_keyword_digest(self): nicks, nick_same_list = nickTracker.nick_tracker(self.log_data) user.keywords_clusters(self.log_data, nicks, nick_same_list, "./", "temp_keywords") self.assertTrue( filecmp.cmp(self.out_dir + "temp_keywords.txt", "temp_keywords.txt")) os.remove("temp_keywords.txt")
def box_plot_for_degree(log_directory, output_directory, channel_name): cutoff = 0 for channel_name_iter in channel_name: out_degree_fit_parameters = np.zeros((12, 4)) in_degree_fit_parameters = np.zeros((12, 4)) total_degree_fit_parameters = np.zeros((12, 4)) for month in range(1, 13): log_data = reader.linux_input(log_directory, channel_name_iter, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) out_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["out_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) in_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["in_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) total_degree_fit_parameters[month - 1] = vis.generate_log_plots( degree_anal_message_number["total_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) parameters = ['slope', 'intercept', 'r_square'] for para_ind in range(len(parameters)): vis.box_plot( out_degree_fit_parameters[:, para_ind], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( in_degree_fit_parameters[:, para_ind], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( total_degree_fit_parameters[:, para_ind], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
def test_message_exchange_network(self): log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/degree_anal_message_number_graph_kubuntu-devel') nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) expected_output = network.degree_analysis_on_graph( message_number_graph) self.assertEqual(expected_result, expected_output)
def test_reduced_networks_cutoff_20(self): default_config = config.THRESHOLD_MESSAGE_NUMBER_GRAPH config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20 log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/message_number_graph_cutoff_20') nicks, nick_same_list = nickTracker.nick_tracker(log_data, False) expected_output = network.message_number_graph(log_data, nicks, nick_same_list, False) config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_config self.assertTrue(nx.is_isomorphic(expected_result, expected_output))
def test_degree_distribution_message_exchange_network(self): degree_type = ["out_degree", "in_degree", "total_degree"] log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/message_exchange_network_curve_fit') nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) expected_output = {} for dtype in degree_type: expected_output[dtype] = vis.generate_log_plots( degree_anal_message_number[dtype]["raw_for_vis"], self.current_directory, "#kubuntu-devel" + dtype) os.remove(self.current_directory + "/#kubuntu-devel" + dtype + ".png") self.assertEqual(expected_result, expected_output)
def codelengths(log_directory, output_directory, channel_name): codelengths = [] for month in xrange(1, 13): log_data_m1 = reader.linux_input(log_directory, channel_name, "2013-" + str(month) + "-1", "2013-" + str(month) + "-31") nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(month)) print "error in", month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")
def test_identify_hubs_and_experts(self): expected_top_hub = util.load_from_disk(self.out_dir + "top_hub") expected_top_keyword_overlap = util.load_from_disk( self.out_dir + "top_keyword_overlap") expected_top_auth = util.load_from_disk(self.out_dir + "top_auth") expected_message_graph = util.load_from_disk(self.out_dir + "message_num_graph") capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput nicks, nick_same_list = nickTracker.nick_tracker(self.log_data) message_num_graph, top_hub, top_keyword_overlap, top_auth = network.identify_hubs_and_experts( self.log_data, nicks, nick_same_list) sys.stdout = sys.__stdout__ capturedOutput.close() self.assertEqual(top_hub, expected_top_hub) self.assertEqual(top_keyword_overlap, expected_top_keyword_overlap) self.assertEqual(top_auth, expected_top_auth) self.assertTrue( nx.is_isomorphic(expected_message_graph, message_num_graph))
def correlational_CL_RT_CRT(log_directory, output_directory, start_date, end_date): """ Correlational : statistical distribution as illustrated by box plot for RT, CL, CRT parameters. The function takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each degree distribution sample shall have 3 curve fit parameters namely a,b & c. The function collects these parameters for all the months of the given time duration. The function produces box plot separately for each parameter. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') percentiles = [0, 1, 5, 10, 20] for channel_name_iter in [["#kubuntu-devel"], ["#ubuntu-devel"], ["#kubuntu"]]: for cutoff in percentiles: conv_len_curve_fit_parameters = np.zeros((12, 4)) resp_time_curve_fit_parameters = np.zeros((12, 4)) conv_ref_time_curve_fit_parameters = np.zeros((12, 5)) for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data = reader.linux_input( log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"), last_day_of_the_month.strftime("%Y-%m-%d")) nicks, nick_same_list = nickTracker.nick_tracker(log_data) default_cutoff = config.CUTOFF_PERCENTILE config.CUTOFF_PERCENTILE = cutoff truncated_rt, rt_cutoff_time = channel.response_time( log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE) conv_len, conv_ref_time = channel.conv_len_conv_refr_time( log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE) conv_len_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot( conv_len, output_directory, "conv_len_cutoff" + str(cutoff)) resp_time_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot( truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff)) conv_ref_time_curve_fit_parameters[ dt.month - 1] = vis.exponential_curve_fit_and_plot_x_shifted( conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff)) parameters = ['a', 'b', 'c'] for para_ind in range(len(parameters)): vis.box_plot( conv_len_curve_fit_parameters[:, para_ind], output_directory, "conv_len_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( resp_time_curve_fit_parameters[:, para_ind], output_directory, "resp_time_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( conv_ref_time_curve_fit_parameters[:, para_ind], output_directory, "conv_refr_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [conv_len_curve_fit_parameters[:, para_ind].tolist()], output_directory, "conv_len_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [resp_time_curve_fit_parameters[:, para_ind].tolist()], output_directory, "resp_time_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv( [conv_ref_time_curve_fit_parameters[:, para_ind].tolist()], output_directory, "conv_refr_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) config.CUTOFF_PERCENTILE = default_cutoff
def test_nick_tracker(self): log_data_dat = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/log_data") nick_same_list_data = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nick_same_list") log_data = log_data_dat expected_nicks1 = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nicks1") expected_nick_same_list = nick_same_list_data expected_output = util.load_from_disk(self.current_directory + "/data/stdout_nick_tracker1") captured_output = StringIO.StringIO() sys.stdout = captured_output nicks, nick_same_list = nickTracker.nick_tracker( log_data, track_users_on_channels=False) sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_nicks1, nicks) self.assertEqual(expected_nick_same_list, nick_same_list) self.assertEqual(expected_output, output) log_data = log_data_dat expected_nicks2 = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nicks2") expected_nick_same_list = nick_same_list_data expected_channels_for_user = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/channels_for_user") expected_nick_channel_dict = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nick_channel_dict") expected_nicks_hash = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nicks_hash") expected_channels_hash = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/channels_hash") expected_output = util.load_from_disk(self.current_directory + "/data/stdout_nick_tracker2") captured_output = StringIO.StringIO() sys.stdout = captured_output nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, track_users_on_channels=True) sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_nicks2, nicks) self.assertEqual(expected_nick_same_list, nick_same_list) self.assertEqual(expected_channels_for_user, channels_for_user) self.assertEqual(expected_nicks_hash, nicks_hash) self.assertEqual(expected_channels_hash, channels_hash) self.assertEqual(expected_output, output)
def test_presence_networks(self): log_data = reader.linux_input(self.log_data_dir, ["ALL"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/presence_graph_dict') nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, True) expected_output, graph = network.channel_user_presence_graph_and_csv( nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash) edge_types = ['CC', 'UU', 'CU'] for edge_type in edge_types: self.assertTrue( nx.is_isomorphic(expected_output[edge_type]['graph'], expected_result[edge_type]['graph'])) self.assertTrue( nx.is_isomorphic(expected_output[edge_type]['reducedGraph'], expected_result[edge_type]['reducedGraph'])) expected_output[edge_type].pop('graph') expected_output[edge_type].pop('reducedGraph') expected_result[edge_type].pop('graph') expected_result[edge_type].pop('reducedGraph') np.testing.assert_equal(expected_output, expected_result)
def test_community_analysis_multi_channel(self): log_data = reader.linux_input(self.log_data_dir, ["ALL"], self.start_date, self.end_date) expected_result = util.load_from_disk( self.current_directory + '/data/output/community_analysis_multi_channel') nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv( nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash) presence_type = ["CC", "UU", "CU"] expected_output = {ptype: {} for ptype in presence_type} for ptype in presence_type: saver.save_net_nx_graph(dict_out[ptype]["graph"], self.current_directory, "adj" + ptype) saver.save_net_nx_graph(dict_out[ptype]["reducedGraph"], self.current_directory, "radj" + ptype) expected_output[ptype]['adj'] = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + '/adj' + ptype + '.net') expected_output[ptype]['radj'] = community.infomap_igraph( ig_graph=None, net_file_location=self.current_directory + '/radj' + ptype + '.net') os.remove(self.current_directory + '/adj' + ptype + '.net') os.remove(self.current_directory + '/radj' + ptype + '.net') self.assertTrue(expected_result[ptype]['adj'][0].isomorphic( expected_output[ptype]['adj'][0])) self.assertEqual( compare_communities(expected_output[ptype]['adj'][1], expected_result[ptype]['adj'][1]), 0) self.assertTrue(expected_result[ptype]['radj'][0].isomorphic( expected_output[ptype]['radj'][0])) self.assertEqual( compare_communities(expected_output[ptype]['radj'][1], expected_result[ptype]['radj'][1]), 0)
from os import path sys.path.insert(0, '../IRCLogParser') sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from lib.analysis import network, user, channel from lib.nickTracker import nick_tracker from ddt import ddt, data, unpack from lib import config from lib.in_out import reader import networkx as nx current_dir = os.path.dirname(__file__) log_directory = os.path.join(current_dir, 'data/input/') expected_output_directory = os.path.join(current_dir, 'data/output/') channel_name = config.CHANNEL_NAME log_for_jan = reader.linux_input(log_directory, channel_name, "2013-1-1", "2013-1-31") nicks_for_jan, nick_same_list_for_jan = nick_tracker(log_for_jan) log_for_aug = reader.linux_input(log_directory, channel_name, "2013-8-1", "2013-8-31") nicks_for_aug, nick_same_list_for_aug = nick_tracker(log_for_aug) def update_expected_output_directory(log_data): key = log_data.keys()[0] #get any key as months and year will be same since log_data has monthly data global expected_output_directory expected_output_directory = os.path.join(current_dir, 'data/output/' + str(key.year)+'/') month = key.month temp = str(month) if (month < 10): temp = '0' + str(month) expected_output_directory += temp +'/' def compare_graph_outputs(generated_output, stored_output_file_name):
def test_degree_distribution_multi_channel(self): log_data = reader.linux_input(self.log_data_dir, ["ALL"], self.start_date, self.end_date) expected_result_CC_degree_curve_fit = util.load_from_disk( self.current_directory + '/data/output/CC_degree_curve_fit') expected_result_CU_degree_curve_fit = util.load_from_disk( self.current_directory + '/data/output/CU_degree_curve_fit') expected_result_UU_degree_curve_fit = util.load_from_disk( self.current_directory + '/data/output/UU_degree_curve_fit') nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, True) dict_out, graph = network.channel_user_presence_graph_and_csv( nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash) degree_anal_message_number_CC = network.degree_analysis_on_graph( dict_out["CC"]["graph"], directed=False) degree_anal_message_number_UU = network.degree_analysis_on_graph( dict_out["UU"]["graph"], directed=False) degree_anal_message_number_CU = network.degree_analysis_on_graph( dict_out["CU"]["graph"], directed=False) Y = degree_anal_message_number_CU["degree"]["raw_for_vis"][1:] data = [(i, Y[i]) for i in range(len(Y))] CU_truncated, cutoff = channel.truncate_table(data, 0.5) CU_T = [data[1] for data in list(CU_truncated)] expected_output_CC_degree_curve_fit = vis.generate_log_plots( degree_anal_message_number_CC["degree"]["raw_for_vis"], self.current_directory, "CC_degree_curve_fit") expected_output_CU_degree_curve_fit = vis.generate_log_plots( CU_T, self.current_directory, "CU_degree_curve_fit") expected_output_UU_degree_curve_fit = vis.generate_log_plots( degree_anal_message_number_UU["degree"]["raw_for_vis"], self.current_directory, "UU_degree_curve_fit") os.remove(self.current_directory + "/CC_degree_curve_fit" + ".png") os.remove(self.current_directory + "/CU_degree_curve_fit" + ".png") os.remove(self.current_directory + "/UU_degree_curve_fit" + ".png") self.assertEqual(expected_result_CC_degree_curve_fit, expected_output_CC_degree_curve_fit) self.assertEqual(expected_result_CU_degree_curve_fit, expected_output_CU_degree_curve_fit) self.assertEqual(expected_result_UU_degree_curve_fit, expected_output_UU_degree_curve_fit)
def test_user_to_channel_tracking(self): expected_nicks = util.load_from_disk(self.out_dir + "channel_tracking_nicks") expected_nick_same_list = util.load_from_disk( self.out_dir + "channel_tracking_nick_same_list") expected_channels_for_user = util.load_from_disk(self.out_dir + "channels_for_user") expected_nick_channel_dict = util.load_from_disk(self.out_dir + "nick_channel_dict") expected_nicks_hash = util.load_from_disk(self.out_dir + "nicks_hash") expected_channel_hash = util.load_from_disk(self.out_dir + "channels_hash") nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( self.log_data, True) self.assertEqual(expected_nicks, nicks) self.assertEqual(expected_nick_same_list, nick_same_list) self.assertEqual(expected_channels_for_user, channels_for_user) self.assertEqual(expected_nick_channel_dict, nick_channel_dict) self.assertEqual(expected_nicks_hash, nicks_hash) self.assertEqual(expected_channel_hash, channels_hash)
def test_nick_tracker(self, mock_check_if_msg_line, mock_correctLastCharCR, mock_splice_find, mock_searchChannel): log_data_dat = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/log_data") nick_same_list_data = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nick_same_list") correctLastCharCR_data = util.load_from_disk( self.current_directory + "/data/correctLastCharCR_list") check_if_msg_line_data = util.load_from_disk( self.current_directory + "/data/check_if_msg_line_list") splice_find_data = util.load_from_disk(self.current_directory + "/data/splice_find_list") log_data = log_data_dat mock_searchChannel.side_effect = util.load_from_disk( self.current_directory + "/data/searchChannel_list1") mock_splice_find.side_effect = splice_find_data mock_correctLastCharCR.side_effect = correctLastCharCR_data mock_check_if_msg_line.side_effect = check_if_msg_line_data expected_nicks1 = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nicks1") expected_nick_same_list = nick_same_list_data expected_output = util.load_from_disk(self.current_directory + "/data/stdout_nick_tracker1") captured_output = StringIO.StringIO() sys.stdout = captured_output nicks, nick_same_list = nickTracker.nick_tracker( log_data, track_users_on_channels=False) sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_nicks1, nicks) self.assertEqual(expected_nick_same_list, nick_same_list) self.assertEqual(expected_output, output) log_data = log_data_dat mock_searchChannel.side_effect = util.load_from_disk( self.current_directory + "/data/searchChannel_list2") mock_splice_find.side_effect = splice_find_data mock_correctLastCharCR.side_effect = correctLastCharCR_data mock_check_if_msg_line.side_effect = check_if_msg_line_data expected_channels_for_user = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/channels_for_user") expected_nick_channel_dict = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nick_channel_dict") expected_nicks_hash = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nicks_hash") expected_channels_hash = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/channels_hash") expected_output = util.load_from_disk(self.current_directory + "/data/stdout_nick_tracker2") expected_nicks2 = util.load_from_disk( self.current_directory + "/../../data/test_lib/nickTracker/nicks2") expected_nick_same_list = nick_same_list_data captured_output = StringIO.StringIO() sys.stdout = captured_output nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker( log_data, track_users_on_channels=True) sys.stdout = sys.__stdout__ output = captured_output.getvalue() captured_output.close() self.assertEqual(expected_nicks2, nicks) self.assertEqual(expected_nick_same_list, nick_same_list) self.assertEqual(expected_output, output) self.assertEqual(expected_channels_for_user, channels_for_user) self.assertEqual(expected_nick_channel_dict, nick_channel_dict) self.assertEqual(expected_nicks_hash, nicks_hash) self.assertEqual(expected_channels_hash, channels_hash)
sys.path.insert(0, '../IRCLogParser') sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from lib.analysis import network, user, channel from lib.nickTracker import nick_tracker from ddt import ddt, data, unpack from lib import config from lib.in_out import reader import networkx as nx current_dir = os.path.dirname(__file__) log_directory = os.path.join(current_dir, 'data/input/') expected_output_directory = os.path.join(current_dir, 'data/output/') channel_name = config.CHANNEL_NAME log_for_jan = reader.linux_input(log_directory, channel_name, "2013-1-1", "2013-1-31") nicks_for_jan, nick_same_list_for_jan = nick_tracker(log_for_jan) log_for_aug = reader.linux_input(log_directory, channel_name, "2013-8-1", "2013-8-31") nicks_for_aug, nick_same_list_for_aug = nick_tracker(log_for_aug) def update_expected_output_directory(log_data): key = list( log_data.keys() )[0] #get any key as months and year will be same since log_data has monthly data global expected_output_directory expected_output_directory = os.path.join( current_dir, 'data/output/' + str(key.year) + '/') month = key.month temp = str(month) if (month < 10):
import sys sys.path.insert(0, "IRCLogParser/") from lib.in_out import reader, saver from lib import nickTracker, config, vis, validate from lib.analysis import network, channel, user, community log_directory = config.LOG_DIRECTORY channel_name = config.CHANNEL_NAME starting_date = config.STARTING_DATE ending_date = config.ENDING_DATE output_directory = config.OUTPUT_DIRECTORY # ============== INPUT================== log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) message_number_graph_day_list = network.message_number_graph( log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph( message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv( log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True)
def keywords_hits_overlap(log_directory, output_directory, channel_name, start_date, end_date): """ The function iterates through the months in the given date range and produces the authorities, top keywords and top hubs for the current month and the next month. It also produces the overlap of authorities, top keywords and top hubs between the current and the next month. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_graph_m1, top_hubs_m1, top_keyword_overlap_m1, top_auth_m1 = network.identify_hubs_and_experts( log_data_m1, nicks_m1, nick_same_list_m1) saver.draw_nx_graph(message_graph_m1, output_directory, "expert-month-" + str(dt.month)) next_month_dt = dt + relativedelta(months=1) last_day_of_the_month2 = next_month_dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m2 = reader.linux_input( log_directory, channel_name, next_month_dt.strftime("%Y-%m-%d"), last_day_of_the_month2.strftime("%Y-%m-%d")) nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2) message_graph_m2, top_hubs_m2, top_keyword_overlap_with_score_m2, top_auth_m2 = network.identify_hubs_and_experts( log_data_m2, nicks_m2, nick_same_list_m2) print "Top 10 HUBS for Month [HITS]", dt.month, ":", top_hubs_m1 print "Top 10 HUBS for Month [HITS]", next_month_dt.month, ":", top_hubs_m2 print "Number of common HUBS (from 10) between above 2 months:", len( list(set(top_hubs_m1).intersection(top_hubs_m2))) print "Top 10 Experts by keywords for Months", dt.month, ":", top_keyword_overlap_m1 print "Top 10 Experts by keywords for Months", next_month_dt.month, ":", top_keyword_overlap_with_score_m2 print "Number of common Experts by keywords (from 10) between above 2 months:", len( list( set(top_keyword_overlap_m1).intersection( top_keyword_overlap_with_score_m2))) print "Top 10 AUTH for Month [HITS]", dt.month, ":", top_auth_m1 print "Top 10 AUTH for Month [HITS]", next_month_dt.month, ":", top_auth_m2 print "Number of common AUTH (from 10) between above 2 months:", len( list(set(top_auth_m1).intersection(top_auth_m2))) print "Number of users common btw HUBS from HITS and Experts by Keywords (from 10) for month", dt.month, ":", len( list(set(top_keyword_overlap_m1).intersection(top_hubs_m1))) print "Number of users common btw AUTH from HITS and Experts by Keywords (from 10) for month", dt.month, ":", len( list(set(top_keyword_overlap_m1).intersection(top_auth_m1))) print "Number of users common btw HUBS from HITS and AUTH from HITS (from 10) for month", dt.month, ":", len( list(set(top_hubs_m1).intersection(top_auth_m1))) print "Number of users common btw HUBS, HITS and KEYWORDS", dt.month, ":", len( set(list(set(top_keyword_overlap_m1).intersection( top_hubs_m1))).intersection(top_auth_m1))
import lib.nickTracker as nickTracker, lib.config as config, lib.vis as vis, lib.validate as validate, lib.util as util from lib.analysis import network, channel, user, community import numpy as np import networkx as nx log_directory = config.LOG_DIRECTORY channel_name = config.CHANNEL_NAME starting_date = config.STARTING_DATE ending_date = config.ENDING_DATE output_directory = config.OUTPUT_DIRECTORY degree_type = ["out_degree", "in_degree", "total_degree"] presence_type = ["CC", "UU", "CU"] # ============== INPUT================== log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph(message_number_graph) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) data = [[i for i in range(len(bin_matrix[0]))]] data.append([sum(i) for i in zip(*bin_matrix)]) default_cutoff = config.CUTOFF_PERCENTILE percentiles = [0, 1, 5, 10, 20] for cutoff in percentiles: config.CUTOFF_PERCENTILE = cutoff
import sys sys.path.insert(0, "IRCLogParser/") from lib.in_out import reader, saver from lib import nickTracker, config, vis, validate from lib.analysis import network, channel, user, community log_directory = config.LOG_DIRECTORY channel_name = config.CHANNEL_NAME starting_date = config.STARTING_DATE ending_date = config.ENDING_DATE output_directory = config.OUTPUT_DIRECTORY # ============== INPUT================== log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date) nicks, nick_same_list = nickTracker.nick_tracker(log_data) # ============== ANALYSIS ============= message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False) message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True) degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph) message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True) message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False) out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list) nick_change_graph_list = user.nick_change_graph(log_data, True) bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list) conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list) resp_time = channel.response_time(log_data, nicks, nick_same_list) user.keywords_clusters(log_data, nicks, nick_same_list) network.degree_analysis_on_graph(message_number_graph)
def box_plot_for_degree(log_directory, output_directory, channel_name, start_date, end_date): """ Correlational : statistical distribution of curve fit parameters generated for degree distribution. The function takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each degree distribution sample shall have 3 curve fit parameters namely slope, intercept & r_square. The function collects these parameters for all the months of the given time duration. The function produces box plot separately for each parameter. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done. start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') cutoff = 0 for channel_name_iter in channel_name: out_degree_fit_parameters = np.zeros((12, 4)) in_degree_fit_parameters = np.zeros((12, 4)) total_degree_fit_parameters = np.zeros((12, 4)) for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month = dt + relativedelta( months=1) - datetime.timedelta(days=1) # for month in range(1, 13): log_data = reader.linux_input( log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"), last_day_of_the_month.strftime("%Y-%m-%d")) nicks, nick_same_list = nickTracker.nick_tracker(log_data) message_number_graph = network.message_number_graph( log_data, nicks, nick_same_list, False) degree_anal_message_number = network.degree_analysis_on_graph( message_number_graph) out_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["out_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) in_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["in_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) total_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots( degree_anal_message_number["total_degree"]["raw_for_vis"], output_directory, channel_name_iter[0]) parameters = ['slope', 'intercept', 'r_square'] for para_ind in range(len(parameters)): vis.box_plot( out_degree_fit_parameters[:, para_ind], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( in_degree_fit_parameters[:, para_ind], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) vis.box_plot( total_degree_fit_parameters[:, para_ind], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()], output_directory, "out_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()], output_directory, "in_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff)) saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()], output_directory, "total_degree_" + str(parameters[para_ind]) + "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
def codelengths(log_directory, output_directory, channel_name, start_date, end_date): """ The function iterate through the months in the given date range and computes the infomap number. It then plots a box plot for the infomap numbers of all the whole months in the given time period. Args: log_directory(str): path to the location of Logs output_directory(str): path to the location where the results are to be stored channel_name(list): channels for which the analysis is to be done start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month. end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month. Returns: null """ start_date = start_date.strptime('%Y-%m-%d') end_date = end_date.strptime('%Y-%m-%d') codelengths = [] for dt in rrule(MONTHLY, dtstart=start_date, until=end_date): last_day_of_the_month1 = dt + relativedelta( months=1) - datetime.timedelta(days=1) log_data_m1 = reader.linux_input( log_directory, channel_name, dt.strftime("%Y-%m-%d"), last_day_of_the_month1.strftime("%Y-%m-%d")) nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1) message_number_graph_m1 = network.message_number_graph( log_data_m1, nicks_m1, nick_same_list_m1, False) try: #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) except: node_labels = message_number_graph_m1.nodes() labels = {} for label in node_labels: if label == "fos": labels[label] = "fos_" else: labels[label] = label message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1, labels) saver.save_net_nx_graph(message_number_graph_m1, output_directory, "message-exchange-" + str(dt.month)) print "error in", dt.month msg_igraph, msg_community = community.infomap_igraph( ig_graph=None, net_file_location=output_directory + "message-exchange-" + str(dt.month) + '.net') codelengths.append(msg_community.codelength) vis.box_plot(codelengths, output_directory, "codelengths2013") saver.save_csv([codelengths], output_directory, "codelengths2013")