Beispiel #1
0
 def test_message_number_graph(self, log_data, nicks, nick_same_list):
     update_expected_output_directory(log_data)
     msg_number_graph = network.message_number_graph(
         log_data, nicks, nick_same_list)
     self.assertTrue(compare_graph_outputs(msg_number_graph,
                                           "message_number_graph.gpickle"),
                     msg=None)
Beispiel #2
0
    def test_community_analysis_single_channel_cutoff_20(self):
        log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"],
                                      self.start_date, self.end_date)
        expected_result = util.load_from_disk(
            self.current_directory +
            '/data/output/community_analysis_single_channel_cutoff_20')
        nicks, nick_same_list = nickTracker.nick_tracker(log_data)
        default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH
        config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20
        message_number_graph = network.message_number_graph(
            log_data, nicks, nick_same_list, False)
        saver.save_net_nx_graph(
            message_number_graph, self.current_directory,
            "message-exchange-" + self.start_date + "-cutoff-" +
            str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH))

        expected_output = community.infomap_igraph(
            ig_graph=None,
            net_file_location=self.current_directory + "/message-exchange-" +
            self.start_date + "-cutoff-" +
            str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net')
        os.remove(self.current_directory + "/message-exchange-" +
                  self.start_date + "-cutoff-" +
                  str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net')
        config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff

        self.assertTrue(expected_result[0].isomorphic(expected_output[0]))
        self.assertEqual(
            compare_communities(expected_result[1], expected_output[1]), 0)
Beispiel #3
0
    def test_degree_analysis_on_nodes(self, log_data, nicks, nick_same_list):
        update_expected_output_directory(log_data)
        message_number_graph = network.message_number_graph(
            log_data, nicks, nick_same_list)
        message_time_graph = network.message_time_graph(
            log_data, nicks, nick_same_list)
        nick_change_graph = user.nick_change_graph(log_data)

        degree_anal_message_number = network.degree_analysis_on_graph(
            message_number_graph)
        degree_anal_message_time = network.degree_analysis_on_graph(
            message_time_graph)
        degree_anal_nick_change = network.degree_analysis_on_graph(
            nick_change_graph)

        expected_analysis_msg_number = []
        expected_analysis_msg_time = []
        expected_analysis_nick_change = []

        unjson('degree_anal_message_number.json', expected_analysis_msg_number)
        unjson('degree_anal_message_time.json', expected_analysis_msg_time)
        unjson('degree_anal_nick_change.json', expected_analysis_nick_change)

        self.assertDictEqual(degree_anal_message_number,
                             expected_analysis_msg_number[0],
                             msg=None)
        self.assertDictEqual(degree_anal_message_time,
                             expected_analysis_msg_time[0],
                             msg=None)
        self.assertDictEqual(degree_anal_nick_change,
                             expected_analysis_nick_change[0],
                             msg=None)
Beispiel #4
0
def box_plot_for_degree(log_directory, output_directory, channel_name):
    cutoff = 0
    for channel_name_iter in channel_name:
        out_degree_fit_parameters = np.zeros((12, 4))
        in_degree_fit_parameters = np.zeros((12, 4))
        total_degree_fit_parameters = np.zeros((12, 4))
        for month in range(1, 13):
            log_data = reader.linux_input(log_directory, channel_name_iter,
                                          "2013-" + str(month) + "-1",
                                          "2013-" + str(month) + "-31")
            nicks, nick_same_list = nickTracker.nick_tracker(log_data)

            message_number_graph = network.message_number_graph(
                log_data, nicks, nick_same_list, False)
            degree_anal_message_number = network.degree_analysis_on_graph(
                message_number_graph)

            out_degree_fit_parameters[month - 1] = vis.generate_log_plots(
                degree_anal_message_number["out_degree"]["raw_for_vis"],
                output_directory, channel_name_iter[0])
            in_degree_fit_parameters[month - 1] = vis.generate_log_plots(
                degree_anal_message_number["in_degree"]["raw_for_vis"],
                output_directory, channel_name_iter[0])
            total_degree_fit_parameters[month - 1] = vis.generate_log_plots(
                degree_anal_message_number["total_degree"]["raw_for_vis"],
                output_directory, channel_name_iter[0])

        parameters = ['slope', 'intercept', 'r_square']
        for para_ind in range(len(parameters)):
            vis.box_plot(
                out_degree_fit_parameters[:, para_ind], output_directory,
                "out_degree_" + str(parameters[para_ind]) + "_2013_" +
                channel_name_iter[0] + "_cut_" + str(cutoff))
            vis.box_plot(
                in_degree_fit_parameters[:, para_ind], output_directory,
                "in_degree_" + str(parameters[para_ind]) + "_2013_" +
                channel_name_iter[0] + "_cut_" + str(cutoff))
            vis.box_plot(
                total_degree_fit_parameters[:, para_ind], output_directory,
                "total_degree_" + str(parameters[para_ind]) + "_2013_" +
                channel_name_iter[0] + "_cut_" + str(cutoff))

            saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()],
                           output_directory, "out_degree_" +
                           str(parameters[para_ind]) + "_2013_" +
                           channel_name_iter[0] + "_cut_" + str(cutoff))
            saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()],
                           output_directory, "in_degree_" +
                           str(parameters[para_ind]) + "_2013_" +
                           channel_name_iter[0] + "_cut_" + str(cutoff))
            saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()],
                           output_directory, "total_degree_" +
                           str(parameters[para_ind]) + "_2013_" +
                           channel_name_iter[0] + "_cut_" + str(cutoff))
Beispiel #5
0
 def test_message_exchange_network(self):
     log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"],
                                   self.start_date, self.end_date)
     expected_result = util.load_from_disk(
         self.current_directory +
         '/data/output/degree_anal_message_number_graph_kubuntu-devel')
     nicks, nick_same_list = nickTracker.nick_tracker(log_data)
     message_number_graph = network.message_number_graph(
         log_data, nicks, nick_same_list, False)
     expected_output = network.degree_analysis_on_graph(
         message_number_graph)
     self.assertEqual(expected_result, expected_output)
Beispiel #6
0
 def test_reduced_networks_cutoff_20(self):
     default_config = config.THRESHOLD_MESSAGE_NUMBER_GRAPH
     config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20
     log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"],
                                   self.start_date, self.end_date)
     expected_result = util.load_from_disk(
         self.current_directory +
         '/data/output/message_number_graph_cutoff_20')
     nicks, nick_same_list = nickTracker.nick_tracker(log_data, False)
     expected_output = network.message_number_graph(log_data, nicks,
                                                    nick_same_list, False)
     config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_config
     self.assertTrue(nx.is_isomorphic(expected_result, expected_output))
Beispiel #7
0
    def test_message_number_graph_day_analysis(self, mock_get_nick_sen_rec,
                                               mock_rec_list_splice,
                                               mock_correctLastCharCR,
                                               mock_check_if_msg_line,
                                               mock_create_connected_nick_list,
                                               mock_to_graph):
        to_graph_ret = util.load_from_disk(
            self.current_directory + "/data/message_number_graph/to_graph")

        conn_list = list(connected_components(to_graph_ret))

        mock_to_graph.return_value = to_graph_ret
        mock_rec_list_splice.side_effect = util.load_from_disk(
            self.current_directory +
            "/data/message_number_graph/rec_list_splice")
        mock_create_connected_nick_list.return_value = util.load_from_disk(
            self.current_directory +
            "/data/message_number_graph/conn_comp_list")
        mock_check_if_msg_line.side_effect = util.load_from_disk(
            self.current_directory +
            "/data/message_number_graph/check_if_msg_line")
        mock_correctLastCharCR.side_effect = util.load_from_disk(
            self.current_directory +
            "/data/message_number_graph/correctLastCharCR")
        mock_get_nick_sen_rec.side_effect = util.load_from_disk(
            self.current_directory +
            "/data/message_number_graph/get_nick_sen_rec")

        capturedOutput = StringIO.StringIO()
        sys.stdout = capturedOutput

        ret = network.message_number_graph(self.log_data,
                                           self.nicks,
                                           self.nick_same_list,
                                           DAY_BY_DAY_ANALYSIS=True)
        expected_graph_list = util.load_from_disk(
            self.current_directory +
            "/data/message_number_graph/message_number_day_list")

        sys.stdout = sys.__stdout__
        capturedOutput.close()

        mock_to_graph.assert_called_once_with(self.nick_same_list)
        mock_create_connected_nick_list.assert_called_once_with(conn_list)
        self.assertTrue(nx.is_isomorphic(ret[0][0], expected_graph_list[0][0]))
        self.assertTrue(nx.is_isomorphic(ret[1][0], expected_graph_list[1][0]))
    def test_message_number_graph(self):

        capturedOutput = StringIO.StringIO()
        sys.stdout = capturedOutput

        graph = network.message_number_graph(self.log_data,
                                             self.nicks,
                                             self.nick_same_list,
                                             DAY_BY_DAY_ANALYSIS=False)

        sys.stdout = sys.__stdout__
        capturedOutput.close()

        self.assertTrue(
            nx.is_isomorphic(
                graph,
                util.load_from_disk(
                    self.test_data_dir +
                    "message_number_graph/aggregate_message_number_graph")))
    def test_message_number_graph_day_analysis(self):

        capturedOutput = StringIO.StringIO()
        sys.stdout = capturedOutput

        graph = network.message_number_graph(self.log_data,
                                             self.nicks,
                                             self.nick_same_list,
                                             DAY_BY_DAY_ANALYSIS=True)
        expected_graph_list = util.load_from_disk(
            self.test_data_dir +
            "message_number_graph/message_number_day_list")

        sys.stdout = sys.__stdout__
        capturedOutput.close()

        self.assertTrue(
            nx.is_isomorphic(graph[0][0], expected_graph_list[0][0]))
        self.assertTrue(
            nx.is_isomorphic(graph[1][0], expected_graph_list[1][0]))
Beispiel #10
0
 def test_degree_distribution_message_exchange_network(self):
     degree_type = ["out_degree", "in_degree", "total_degree"]
     log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"],
                                   self.start_date, self.end_date)
     expected_result = util.load_from_disk(
         self.current_directory +
         '/data/output/message_exchange_network_curve_fit')
     nicks, nick_same_list = nickTracker.nick_tracker(log_data)
     message_number_graph = network.message_number_graph(
         log_data, nicks, nick_same_list, False)
     degree_anal_message_number = network.degree_analysis_on_graph(
         message_number_graph)
     expected_output = {}
     for dtype in degree_type:
         expected_output[dtype] = vis.generate_log_plots(
             degree_anal_message_number[dtype]["raw_for_vis"],
             self.current_directory, "#kubuntu-devel" + dtype)
         os.remove(self.current_directory + "/#kubuntu-devel" + dtype +
                   ".png")
     self.assertEqual(expected_result, expected_output)
Beispiel #11
0
def codelengths(log_directory, output_directory, channel_name):
    codelengths = []
    for month in xrange(1, 13):
        log_data_m1 = reader.linux_input(log_directory, channel_name,
                                         "2013-" + str(month) + "-1",
                                         "2013-" + str(month) + "-31")
        nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1)
        message_number_graph_m1 = network.message_number_graph(
            log_data_m1, nicks_m1, nick_same_list_m1, False)
        try:
            #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(month))
            msg_igraph, msg_community = community.infomap_igraph(
                ig_graph=None,
                net_file_location=output_directory + "message-exchange-" +
                str(month) + '.net')
            codelengths.append(msg_community.codelength)
        except:
            node_labels = message_number_graph_m1.nodes()
            labels = {}
            for label in node_labels:
                if label == "fos":
                    labels[label] = "fos_"
                else:
                    labels[label] = label

            message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1,
                                                       labels)
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(month))
            print "error in", month

        msg_igraph, msg_community = community.infomap_igraph(
            ig_graph=None,
            net_file_location=output_directory + "message-exchange-" +
            str(month) + '.net')
        codelengths.append(msg_community.codelength)

    vis.box_plot(codelengths, output_directory, "codelengths2013")
    saver.save_csv([codelengths], output_directory, "codelengths2013")
Beispiel #12
0
    def test_degree_analysis_on_nodes(self, log_data, nicks, nick_same_list):
        update_expected_output_directory(log_data)
        message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list)
        message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list)
        nick_change_graph = user.nick_change_graph(log_data)

        degree_anal_message_number = network.degree_analysis_on_graph(message_number_graph)
        degree_anal_message_time = network.degree_analysis_on_graph(message_time_graph)
        degree_anal_nick_change = network.degree_analysis_on_graph(nick_change_graph)

        expected_analysis_msg_number = []
        expected_analysis_msg_time = []
        expected_analysis_nick_change = []

        unjson('degree_anal_message_number.json', expected_analysis_msg_number)
        unjson('degree_anal_message_time.json', expected_analysis_msg_time)
        unjson('degree_anal_nick_change.json', expected_analysis_nick_change)

        self.assertDictEqual(degree_anal_message_number, expected_analysis_msg_number[0], msg=None)
        self.assertDictEqual(degree_anal_message_time, expected_analysis_msg_time[0], msg=None)
        self.assertDictEqual(degree_anal_nick_change, expected_analysis_nick_change[0], msg=None)
Beispiel #13
0
from lib.in_out import reader, saver
from lib import nickTracker, config, vis, validate
from lib.analysis import network, channel, user, community

log_directory = config.LOG_DIRECTORY
channel_name = config.CHANNEL_NAME
starting_date = config.STARTING_DATE
ending_date = config.ENDING_DATE
output_directory = config.OUTPUT_DIRECTORY

# ============== INPUT==================
log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)

# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False)
message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list)
nick_change_graph_list =  user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")
Beispiel #14
0
 def test_message_number_graph(self, log_data, nicks, nick_same_list):
     update_expected_output_directory(log_data)
     msg_number_graph = network.message_number_graph(log_data, nicks, nick_same_list)
     self.assertTrue(compare_graph_outputs(msg_number_graph, "message_number_graph.gpickle"), msg=None)
Beispiel #15
0
def codelengths(log_directory, output_directory, channel_name, start_date,
                end_date):
    """
        The function iterate through the months in the given date range and computes the infomap number. It then plots a
        box plot for the infomap numbers of all the whole months in the given time period.

    Args:
        log_directory(str): path to the location of Logs
        output_directory(str):  path to the location where the results are to be stored
        channel_name(list): channels for which the analysis is to be done
        start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month.
        end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month.

    Returns:
       null

    """
    start_date = start_date.strptime('%Y-%m-%d')
    end_date = end_date.strptime('%Y-%m-%d')
    codelengths = []
    for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
        last_day_of_the_month1 = dt + relativedelta(
            months=1) - datetime.timedelta(days=1)
        log_data_m1 = reader.linux_input(
            log_directory, channel_name, dt.strftime("%Y-%m-%d"),
            last_day_of_the_month1.strftime("%Y-%m-%d"))
        nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1)
        message_number_graph_m1 = network.message_number_graph(
            log_data_m1, nicks_m1, nick_same_list_m1, False)
        try:
            #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(dt.month))
            msg_igraph, msg_community = community.infomap_igraph(
                ig_graph=None,
                net_file_location=output_directory + "message-exchange-" +
                str(dt.month) + '.net')
            codelengths.append(msg_community.codelength)
        except:
            node_labels = message_number_graph_m1.nodes()
            labels = {}
            for label in node_labels:
                if label == "fos":
                    labels[label] = "fos_"
                else:
                    labels[label] = label

            message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1,
                                                       labels)
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(dt.month))
            print "error in", dt.month

        msg_igraph, msg_community = community.infomap_igraph(
            ig_graph=None,
            net_file_location=output_directory + "message-exchange-" +
            str(dt.month) + '.net')
        codelengths.append(msg_community.codelength)

    vis.box_plot(codelengths, output_directory, "codelengths2013")
    saver.save_csv([codelengths], output_directory, "codelengths2013")
Beispiel #16
0
from lib import nickTracker, config, vis, validate
from lib.analysis import network, channel, user, community

log_directory = config.LOG_DIRECTORY
channel_name = config.CHANNEL_NAME
starting_date = config.STARTING_DATE
ending_date = config.ENDING_DATE
output_directory = config.OUTPUT_DIRECTORY

# ============== INPUT==================
log_data = reader.linux_input(log_directory, channel_name, starting_date,
                              ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)

# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks,
                                                    nick_same_list, False)
message_number_graph_day_list = network.message_number_graph(
    log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(
    message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks,
                                                     nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks,
                                                nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(
    log_data, nicks, nick_same_list)
nick_change_graph_list = user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(
    log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
    log_data, nicks, nick_same_list)
Beispiel #17
0
def box_plot_for_degree(log_directory, output_directory, channel_name,
                        start_date, end_date):
    """
        Correlational : statistical distribution of curve fit parameters generated for degree distribution. The function
        takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each
        degree distribution sample shall have 3 curve fit parameters namely slope, intercept & r_square. The function collects these parameters
        for all the months of the given time duration. The function produces box plot separately for each parameter.

    Args:
        log_directory(str): path to the location of Logs
        output_directory(str):  path to the location where the results are to be stored
        channel_name(list): channels for which the analysis is to be done.
        start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month.
        end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month.

    Returns:
       null

    """
    start_date = start_date.strptime('%Y-%m-%d')
    end_date = end_date.strptime('%Y-%m-%d')
    cutoff = 0
    for channel_name_iter in channel_name:
        out_degree_fit_parameters = np.zeros((12, 4))
        in_degree_fit_parameters = np.zeros((12, 4))
        total_degree_fit_parameters = np.zeros((12, 4))
        for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
            last_day_of_the_month = dt + relativedelta(
                months=1) - datetime.timedelta(days=1)
            # for month in range(1, 13):
            log_data = reader.linux_input(
                log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"),
                last_day_of_the_month.strftime("%Y-%m-%d"))
            nicks, nick_same_list = nickTracker.nick_tracker(log_data)

            message_number_graph = network.message_number_graph(
                log_data, nicks, nick_same_list, False)
            degree_anal_message_number = network.degree_analysis_on_graph(
                message_number_graph)

            out_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots(
                degree_anal_message_number["out_degree"]["raw_for_vis"],
                output_directory, channel_name_iter[0])
            in_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots(
                degree_anal_message_number["in_degree"]["raw_for_vis"],
                output_directory, channel_name_iter[0])
            total_degree_fit_parameters[dt.month - 1] = vis.generate_log_plots(
                degree_anal_message_number["total_degree"]["raw_for_vis"],
                output_directory, channel_name_iter[0])

        parameters = ['slope', 'intercept', 'r_square']
        for para_ind in range(len(parameters)):
            vis.box_plot(
                out_degree_fit_parameters[:, para_ind], output_directory,
                "out_degree_" + str(parameters[para_ind]) + "_2013_" +
                channel_name_iter[0] + "_cut_" + str(cutoff))
            vis.box_plot(
                in_degree_fit_parameters[:, para_ind], output_directory,
                "in_degree_" + str(parameters[para_ind]) + "_2013_" +
                channel_name_iter[0] + "_cut_" + str(cutoff))
            vis.box_plot(
                total_degree_fit_parameters[:, para_ind], output_directory,
                "total_degree_" + str(parameters[para_ind]) + "_2013_" +
                channel_name_iter[0] + "_cut_" + str(cutoff))

            saver.save_csv([out_degree_fit_parameters[:, para_ind].tolist()],
                           output_directory, "out_degree_" +
                           str(parameters[para_ind]) + "_2013_" +
                           channel_name_iter[0] + "_cut_" + str(cutoff))
            saver.save_csv([in_degree_fit_parameters[:, para_ind].tolist()],
                           output_directory, "in_degree_" +
                           str(parameters[para_ind]) + "_2013_" +
                           channel_name_iter[0] + "_cut_" + str(cutoff))
            saver.save_csv([total_degree_fit_parameters[:, para_ind].tolist()],
                           output_directory, "total_degree_" +
                           str(parameters[para_ind]) + "_2013_" +
                           channel_name_iter[0] + "_cut_" + str(cutoff))