def test_conversation_characteristics_cutoff_1(self, mock_savefig):
        cutoff = 1
        expected_result_conv_len = [
            0.45678248067618998, 1.9431782685053713, 0.0030314547153581827,
            3.3570362370587976e-05
        ]
        expected_result_resp_time = [
            0.26876242441433712, 0.38822996056503406, 0.0001355301591146847,
            1.4291783519203551e-05
        ]
        expected_result_conv_ref = [
            0.0031066946048193583, 0.0089793356687177077,
            2.3338045062882878e-05, 8.2373085916393017e-08, 66
        ]

        truncated_rt, rt_cutoff_time = channel.response_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            cutoff)
        conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            rt_cutoff_time, cutoff)
        expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff))
        expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            truncated_rt, self.current_directory,
            "resp_time_cutoff" + str(cutoff))
        expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
            conv_ref_time, self.current_directory,
            "conv_ref_time_cutoff" + str(cutoff))
        self.assertTrue(expected_output_conv_len_curve_fit_parameters,
                        expected_result_conv_len)
        self.assertTrue(expected_output_resp_time_curve_fit_parameters,
                        expected_result_resp_time)
        self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters,
                        expected_result_conv_ref)
    def test_conversation_characteristics_cutoff_0(self, mock_savefig):
        cutoff = 0
        expected_result_conv_len = [
            0.46025248575487415, 1.8745480617100398, 0.0014084453709393393,
            1.8113237256968182e-05
        ]
        expected_result_resp_time = [
            0.26599443483759627, 0.38817554962605116, 0.00012042990450484642,
            1.1831364434688785e-05
        ]
        expected_result_conv_ref = [
            0.004067315269095536, 0.01296093837728012, 1.761952783942606e-05,
            5.6259486000435723e-08, 60
        ]

        truncated_rt, rt_cutoff_time = channel.response_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            cutoff)
        conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            rt_cutoff_time, cutoff)
        expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff))
        expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            truncated_rt, self.current_directory,
            "resp_time_cutoff" + str(cutoff))
        expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
            conv_ref_time, self.current_directory,
            "conv_ref_time_cutoff" + str(cutoff))
        self.assertTrue(expected_output_conv_len_curve_fit_parameters,
                        expected_result_conv_len)
        self.assertTrue(expected_output_resp_time_curve_fit_parameters,
                        expected_result_resp_time)
        self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters,
                        expected_result_conv_ref)
    def test_conversation_characteristics_cutoff_5(self, mock_savefig):
        cutoff = 5
        expected_result_conv_len = [
            0.44916983849233633, 1.9156349592761313, 0.0048790728866266418,
            4.3411589194639429e-05
        ]
        expected_result_resp_time = [
            0.28001731891457893, 0.38845839930487419, 0.00020016446653020847,
            2.896185549800808e-05
        ]
        expected_result_conv_ref = [
            0.0019379564807119043, 0.0048940078069499857,
            4.2070926227686924e-05, 1.7671895171226243e-07, 90
        ]

        truncated_rt, rt_cutoff_time = channel.response_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            cutoff)
        conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
            self.log_data_kubuntu_devel, self.nicks, self.nick_same_list,
            rt_cutoff_time, cutoff)
        expected_output_conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            conv_len, self.current_directory, "conv_len_cutoff" + str(cutoff))
        expected_output_resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
            truncated_rt, self.current_directory,
            "resp_time_cutoff" + str(cutoff))
        expected_output_conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
            conv_ref_time, self.current_directory,
            "conv_ref_time_cutoff" + str(cutoff))
        self.assertTrue(expected_output_conv_len_curve_fit_parameters,
                        expected_result_conv_len)
        self.assertTrue(expected_output_resp_time_curve_fit_parameters,
                        expected_result_resp_time)
        self.assertTrue(expected_output_conv_ref_time_curve_fit_parameters,
                        expected_result_conv_ref)
Example #4
0
    def test_exponential_curve_fit_and_plot(self, data, expected_result):

        output = vis.exponential_curve_fit_and_plot(data, current_dir,
                                                    "exponential_plot_test")

        #remove the plot created
        os.remove(current_dir + '/exponential_plot_test.png')

        assert np.allclose(output, expected_result)
Example #5
0
    def test_exponential_curve_fit_and_plot(self, mock_curve_fit,
                                            mock_curve_func,
                                            mock_probability_distribution):
        data = util.load_from_disk(self.test_data_dir + "/vis/conv_len")
        expected_result = util.load_from_disk(self.test_data_dir +
                                              "/vis/conv_len_fit")
        mock_curve_fit.return_value = util.load_from_disk(self.test_data_dir +
                                                          "/vis/curve_fit")
        mock_curve_func.side_effect = self.mock_exponential_curve_func
        mock_probability_distribution.side_effect = self.mock_generate_probability_distribution

        expected_output = vis.exponential_curve_fit_and_plot(
            data, self.test_data_dir, "exponential_plot_test")
        os.remove(self.test_data_dir + '/exponential_plot_test.png')
        self.assertTrue(np.allclose(expected_output, expected_result))
Example #6
0
               "MessageNumber_binsize_" + str(config.BIN_LENGTH_MINS))
for i in range(len(message_number_graph_day_list)):
    saver.draw_nx_graph(message_number_graph_day_list[i][0], output_directory,
                        "mng" + str(i + 1))
for i in range(len(nick_change_graph_list)):
    saver.draw_nx_graph(nick_change_graph_list[i], output_directory,
                        "ncg" + str(i + 1))

saver.draw_nx_graph(message_number_graph, output_directory, "mnagg")
saver.draw_nx_graph(message_time_graph, output_directory, "mtgagg")
saver.save_csv(conv_len, output_directory, "conv_len")
saver.save_csv(resp_time, output_directory, "resp_time")
saver.save_csv(conv_ref_time, output_directory, "conv_ref_time")

# =============== VIZ ===================
conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
    conv_len, 20, output_directory, "conv_len")
resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(
    resp_time, 20, output_directory, "resp_time")
conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(
    conv_ref_time, 30, output_directory, "conv_ref_time")
# vis.plot_infomap_igraph(adjCC_graph, adjCC_membership, output_directory, "adjCC_infomaps")
# vis.generate_log_plots(9, out_degree_node_number, output_directory, channel_name[0] +"OUT"+ starting_date + ending_date)

# ============== VALIDATION ==============
validate.validate_RT_RL_CRT(
    conv_len_curve_fit_parameters,
    [[10.5, 10.6], [2.12, 2.32], [0, 0.2], [0, 0.0002]], "conv_len")
validate.validate_RT_RL_CRT(
    resp_time_curve_fit_parameters,
    [[0.3, 10.4], [10.3, 30.4], [-0.002, 0.002], [0, 0.002]], "resp_time")
validate.validate_RT_RL_CRT(
Example #7
0
bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list)
data = [[i for i in range(len(bin_matrix[0]))]]
data.append([sum(i) for i in zip(*bin_matrix)])

default_cutoff = config.CUTOFF_PERCENTILE
percentiles = [0, 1, 5, 10, 20]

for cutoff in percentiles:
    config.CUTOFF_PERCENTILE = cutoff
    truncated_rt, rt_cutoff_time = channel.response_time(log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE)
    conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list, rt_cutoff_time, config.CUTOFF_PERCENTILE)
    saver.save_csv(conv_len, output_directory, "conv_len-cutoff-" + str(cutoff))
    saver.save_csv(truncated_rt, output_directory, "resp_time-cutoff-" + str(cutoff))
    saver.save_csv(conv_ref_time, output_directory, "conv_ref_time-cutoff-" + str(cutoff))
    conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, output_directory, "conv_len_cutoff" + str(cutoff))
    resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(truncated_rt, output_directory, "resp_time_cutoff" + str(cutoff))
    conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, output_directory, "conv_ref_time_cutoff" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [conv_len_curve_fit_parameters]], output_directory,"conv_len_curve_fit_parameters-cutoff-" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [resp_time_curve_fit_parameters]], output_directory,"resp_time_curve_fit_parameters-cutoff-" + str(cutoff))
    saver.save_csv( [["a","b","c", "MSE"], [conv_ref_time_curve_fit_parameters]], output_directory,"conv_ref_time_curve_fit_parameters-cutoff-"+str(cutoff))

config.CUTOFF_PERCENTILE = default_cutoff #revert back to default

user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords")
network.degree_analysis_on_graph(message_number_graph)

threshold = config.THRESHOLD_MESSAGE_NUMBER_GRAPH #store original default config
cutoffs = [0, 10, 20]

for cutoff in cutoffs:
Example #8
0
saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date)
saver.save_csv(total_degree_node_number, output_directory, "node_total_degree"+ starting_date +'-'+ending_date)
saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS))
for i in range(len(message_number_graph_day_list)):
    saver.draw_nx_graph(message_number_graph_day_list[i][0], output_directory, "mng" + str(i+1))
for i in range(len(nick_change_graph_list)):
    saver.draw_nx_graph(nick_change_graph_list[i], output_directory, "ncg" + str(i+1))

saver.draw_nx_graph(message_number_graph, output_directory, "mnagg")    
saver.draw_nx_graph(message_time_graph, output_directory, "mtgagg")
saver.save_csv(conv_len, output_directory, "conv_len")
saver.save_csv(resp_time, output_directory, "resp_time")
saver.save_csv(conv_ref_time, output_directory, "conv_ref_time")

# =============== VIZ ===================
conv_len_curve_fit_parameters = vis.exponential_curve_fit_and_plot(conv_len, 20, output_directory, "conv_len")
resp_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot(resp_time, 20, output_directory, "resp_time")
conv_ref_time_curve_fit_parameters = vis.exponential_curve_fit_and_plot_x_shifted(conv_ref_time, 30, output_directory, "conv_ref_time")
# vis.plot_infomap_igraph(adjCC_graph, adjCC_membership, output_directory, "adjCC_infomaps")
# vis.generate_log_plots(9, out_degree_node_number, output_directory, channel_name[0] +"OUT"+ starting_date + ending_date)

# ============== VALIDATION ==============
validate.validate_RT_RL_CRT(conv_len_curve_fit_parameters, [[10.5, 10.6], [2.12, 2.32], [0, 0.2], [0, 0.0002]], "conv_len")
validate.validate_RT_RL_CRT(resp_time_curve_fit_parameters, [[0.3, 10.4], [10.3, 30.4], [-0.002, 0.002], [0, 0.002]], "resp_time")
validate.validate_RT_RL_CRT(conv_ref_time_curve_fit_parameters, [[10.05, 10.1], [0.1, 0.2], [0.02, 0.04], [0, 0.0002], [9, 11]], "conv_ref_time")


# ============== PRESENCE ACROSS MULTIPLE CHANNELS ==============
# Change analysis to all channels in config
#nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker(log_data, True)
#dict_out, graph = network.channel_user_presence_graph_and_csv(nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash)
Example #9
0
def correlational_CL_RT_CRT(log_directory, output_directory, start_date,
                            end_date):
    """
        Correlational : statistical distribution as illustrated by box plot for RT, CL, CRT parameters. The function
        takes the given time duration and selects one month at a time for generation of a degree distribution sample. Each
        degree distribution sample shall have 3 curve fit parameters namely a,b & c. The function collects these parameters
        for all the months of the given time duration. The function produces box plot separately for each parameter.


    Args:
        log_directory(str): path to the location of Logs
        output_directory(str):  path to the location where the results are to be stored
        channel_name(list): channels for which the analysis is to be done
        start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month.
        end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month.

    Returns:
       null

    """
    start_date = start_date.strptime('%Y-%m-%d')
    end_date = end_date.strptime('%Y-%m-%d')
    percentiles = [0, 1, 5, 10, 20]
    for channel_name_iter in [["#kubuntu-devel"], ["#ubuntu-devel"],
                              ["#kubuntu"]]:
        for cutoff in percentiles:
            conv_len_curve_fit_parameters = np.zeros((12, 4))
            resp_time_curve_fit_parameters = np.zeros((12, 4))
            conv_ref_time_curve_fit_parameters = np.zeros((12, 5))
            for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
                last_day_of_the_month = dt + relativedelta(
                    months=1) - datetime.timedelta(days=1)

                log_data = reader.linux_input(
                    log_directory, channel_name_iter, dt.strftime("%Y-%m-%d"),
                    last_day_of_the_month.strftime("%Y-%m-%d"))
                nicks, nick_same_list = nickTracker.nick_tracker(log_data)
                default_cutoff = config.CUTOFF_PERCENTILE

                config.CUTOFF_PERCENTILE = cutoff
                truncated_rt, rt_cutoff_time = channel.response_time(
                    log_data, nicks, nick_same_list, config.CUTOFF_PERCENTILE)
                conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
                    log_data, nicks, nick_same_list, rt_cutoff_time,
                    config.CUTOFF_PERCENTILE)
                conv_len_curve_fit_parameters[
                    dt.month - 1] = vis.exponential_curve_fit_and_plot(
                        conv_len, output_directory,
                        "conv_len_cutoff" + str(cutoff))
                resp_time_curve_fit_parameters[
                    dt.month - 1] = vis.exponential_curve_fit_and_plot(
                        truncated_rt, output_directory,
                        "resp_time_cutoff" + str(cutoff))
                conv_ref_time_curve_fit_parameters[
                    dt.month -
                    1] = vis.exponential_curve_fit_and_plot_x_shifted(
                        conv_ref_time, output_directory,
                        "conv_ref_time_cutoff" + str(cutoff))

            parameters = ['a', 'b', 'c']
            for para_ind in range(len(parameters)):
                vis.box_plot(
                    conv_len_curve_fit_parameters[:, para_ind],
                    output_directory, "conv_len_" + str(parameters[para_ind]) +
                    "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
                vis.box_plot(
                    resp_time_curve_fit_parameters[:,
                                                   para_ind], output_directory,
                    "resp_time_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))
                vis.box_plot(
                    conv_ref_time_curve_fit_parameters[:, para_ind],
                    output_directory,
                    "conv_refr_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))

                saver.save_csv(
                    [conv_len_curve_fit_parameters[:, para_ind].tolist()],
                    output_directory, "conv_len_" + str(parameters[para_ind]) +
                    "_2013_" + channel_name_iter[0] + "_cut_" + str(cutoff))
                saver.save_csv(
                    [resp_time_curve_fit_parameters[:, para_ind].tolist()],
                    output_directory,
                    "resp_time_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))
                saver.save_csv(
                    [conv_ref_time_curve_fit_parameters[:, para_ind].tolist()],
                    output_directory,
                    "conv_refr_" + str(parameters[para_ind]) + "_2013_" +
                    channel_name_iter[0] + "_cut_" + str(cutoff))

    config.CUTOFF_PERCENTILE = default_cutoff