Example #1
0
def correlational_activity(log_directory, output_directory, channel_name):
    pearson = []
    for month in xrange(1, 12):
        log_data_m1 = reader.linux_input(log_directory, channel_name,
                                         "2013-" + str(month) + "-1",
                                         "2013-" + str(month) + "-31")
        nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1)
        bin_matrix_m1, total_messages_m1 = network.message_number_bins_csv(
            log_data_m1, nicks_m1, nick_same_list_m1)
        monthly_sum_bins_m1 = [sum(i) for i in zip(*bin_matrix_m1)]

        log_data_m2 = reader.linux_input(log_directory, channel_name,
                                         "2013-" + str(month + 1) + "-1",
                                         "2013-" + str(month + 1) + "-31")
        nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2)
        bin_matrix_m2, total_messages_m2 = network.message_number_bins_csv(
            log_data_m2, nicks_m2, nick_same_list_m2)
        monthly_sum_bins_m2 = [sum(i) for i in zip(*bin_matrix_m2)]
        corr = np.corrcoef(monthly_sum_bins_m1, monthly_sum_bins_m2)[0, 1]

        print "\n----------------------------------"
        print "For months", month, "and", month + 1
        print "Bins for M1:", monthly_sum_bins_m1
        print "Bins for M2:", monthly_sum_bins_m2
        print "Pearson correlation:", corr
        pearson.append(corr)

    vis.box_plot(pearson, output_directory, "pearson2013")
    saver.save_csv([pearson], output_directory, "pearson2013")
Example #2
0
def correlational_activity(log_directory, output_directory, channel_name,
                           start_date, end_date):
    """
        The function selects a month in the given date range and creates heatmap bins for the current month and the next
        month. It then calculates the correlational calculates the correlational vectors between the two heatmaps and
        then produces a box plot for all the correlational coefficients of the months in the given date range.

    Args:
        log_directory(str): path to the location of Logs
        output_directory(str):  path to the location where the results are to be stored
        channel_name(list): channels for which the analysis is to be done
        start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month.
        end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month.

    Returns:
       null

    """
    start_date = start_date.strptime('%Y-%m-%d')
    end_date = end_date.strptime('%Y-%m-%d')
    pearson = []
    for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
        last_day_of_the_month1 = dt + relativedelta(
            months=1) - datetime.timedelta(days=1)

        log_data_m1 = reader.linux_input(
            log_directory, channel_name, dt.strftime("%Y-%m-%d"),
            last_day_of_the_month1.strftime("%Y-%m-%d"))
        nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1)
        bin_matrix_m1, total_messages_m1 = network.message_number_bins_csv(
            log_data_m1, nicks_m1, nick_same_list_m1)
        monthly_sum_bins_m1 = [sum(i) for i in zip(*bin_matrix_m1)]

        next_month_dt = dt + relativedelta(months=1)
        last_day_of_the_month2 = next_month_dt + relativedelta(
            months=1) - datetime.timedelta(days=1)
        log_data_m2 = reader.linux_input(
            log_directory, channel_name, next_month_dt.strftime("%Y-%m-%d"),
            last_day_of_the_month2.strftime("%Y-%m-%d"))
        nicks_m2, nick_same_list_m2 = nickTracker.nick_tracker(log_data_m2)
        bin_matrix_m2, total_messages_m2 = network.message_number_bins_csv(
            log_data_m2, nicks_m2, nick_same_list_m2)
        monthly_sum_bins_m2 = [sum(i) for i in zip(*bin_matrix_m2)]
        corr = np.corrcoef(monthly_sum_bins_m1, monthly_sum_bins_m2)[0, 1]

        print "\n----------------------------------"
        print "For months", dt.month, "and", dt.month + 1
        print "Bins for M1:", monthly_sum_bins_m1
        print "Bins for M2:", monthly_sum_bins_m2
        print "Pearson correlation:", corr
        pearson.append(corr)

    vis.box_plot(pearson, output_directory, "pearson2013")
    saver.save_csv([pearson], output_directory, "pearson2013")
Example #3
0
    def test_message_number_bins_csv(self, mock_util):

        mock_util.correctLastCharCR.side_effect = util.load_from_disk(
            current_directory +
            "/data/message_number_bins_csv/correctLastCharCR")
        mock_util.correct_last_char_list.side_effect = util.load_from_disk(
            current_directory +
            "/data/message_number_bins_csv/correct_last_char_list")
        mock_util.rec_list_splice.side_effect = util.load_from_disk(
            current_directory +
            "/data/message_number_bins_csv/rec_list_splice")

        bin_matrix_ = util.load_from_disk(
            current_directory + "/data/message_number_bins_csv/bin_matrix")
        tot_msgs_ = util.load_from_disk(
            current_directory + "/data/message_number_bins_csv/tot_msgs")

        capturedOutput = StringIO.StringIO()
        sys.stdout = capturedOutput

        bin_matrix, tot_msgs = network.message_number_bins_csv(
            self.log_data, self.nicks, self.nick_same_list)

        sys.stdout = sys.__stdout__
        capturedOutput.close()

        self.assertEqual(bin_matrix, bin_matrix_)
        self.assertEqual(tot_msgs, tot_msgs_)
    def test_message_number_bins_csv(self, log_data, nicks, nick_same_list):
        update_expected_output_directory(log_data)
        msg_number_bins_csv, total_msg = network.message_number_bins_csv(log_data, nicks, nick_same_list)

        expected_output = []
        csv_to_list(expected_output_directory + 'message_number_binsize_60.csv', expected_output, False)
        self.assertListEqual(msg_number_bins_csv, expected_output, msg=None)
Example #5
0
    def test_message_number_bins_csv(self, log_data, nicks, nick_same_list):
        update_expected_output_directory(log_data)
        msg_number_bins_csv, total_msg = network.message_number_bins_csv(log_data, nicks, nick_same_list)

        expected_output = []
        csv_to_list(expected_output_directory + 'message_number_binsize_30.csv', expected_output, False)
        self.assertListEqual(msg_number_bins_csv, expected_output, msg=None)
Example #6
0
    def test_message_number_bins_csv(self, mock_rec_list,
                                     mock_correct_last_char_list,
                                     mock_correct_last_char_CR):
        mock_correct_last_char_CR.side_effect = util.load_from_disk(
            self.test_data_dir + "message_number_bins_csv/correctLastCharCR")
        mock_correct_last_char_list.side_effect = util.load_from_disk(
            self.test_data_dir +
            "message_number_bins_csv/correct_last_char_list")
        mock_rec_list.side_effect = util.load_from_disk(
            self.test_data_dir + "message_number_bins_csv/rec_list_splice")

        expected_bin_matrix = util.load_from_disk(
            self.test_data_dir + "message_number_bins_csv/bin_matrix")
        expected_tot_msgs = util.load_from_disk(
            self.test_data_dir + "message_number_bins_csv/tot_msgs")

        capturedOutput = StringIO.StringIO()
        sys.stdout = capturedOutput

        bin_matrix, tot_msgs = network.message_number_bins_csv(
            self.log_data, self.nicks, self.nick_same_list)

        sys.stdout = sys.__stdout__
        capturedOutput.close()

        self.assertEqual(bin_matrix, expected_bin_matrix)
        self.assertEqual(tot_msgs, expected_tot_msgs)
 def test_activity_graph(self):
     self.expected_result = util.load_from_disk(
         self.current_directory + "/data/output/activity_graph")
     bin_matrix, total_messages = network.message_number_bins_csv(
         self.log_data_kubuntu_devel, self.nicks, self.nick_same_list)
     expected_output = [[i for i in range(len(bin_matrix[0]))]]
     expected_output.append([sum(i) for i in zip(*bin_matrix)])
     self.assertTrue(expected_output, self.expected_result)
Example #8
0
    def test_message_number_bins_csv(self):

        expected_bin_matrix = util.load_from_disk(
            self.test_data_dir + "message_number_bins_csv/bin_matrix")
        expected_tot_msgs = util.load_from_disk(
            self.test_data_dir + "message_number_bins_csv/tot_msgs")

        capturedOutput = StringIO.StringIO()
        sys.stdout = capturedOutput

        bin_matrix, tot_msgs = network.message_number_bins_csv(
            self.log_data, self.nicks, self.nick_same_list)

        sys.stdout = sys.__stdout__
        capturedOutput.close()

        self.assertEqual(bin_matrix, expected_bin_matrix)
        self.assertEqual(tot_msgs, expected_tot_msgs)
Example #9
0
# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks,
                                                    nick_same_list, False)
message_number_graph_day_list = network.message_number_graph(
    log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(
    message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks,
                                                     nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks,
                                                nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(
    log_data, nicks, nick_same_list)
nick_change_graph_list = user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(
    log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(
    log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")

# ============== OUTPUT ================
saver.draw_nx_graph(message_number_graph, output_directory,
                    "message_number_graph")
saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"],
               output_directory, "out_degree")
saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"],
Example #10
0
ending_date = config.ENDING_DATE
output_directory = config.OUTPUT_DIRECTORY

# ============== INPUT==================
log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)

# ============== ANALYSIS =============
message_number_graph = network.message_number_graph(log_data, nicks, nick_same_list, False)
message_number_graph_day_list = network.message_number_graph(log_data, nicks, nick_same_list, True)
degree_anal_message_numder = network.degree_analysis_on_graph(message_number_graph)
message_time_graph_list = network.message_time_graph(log_data, nicks, nick_same_list, True)
message_time_graph = network.message_time_graph(log_data, nicks, nick_same_list, False)
out_degree_node_number, in_degree_node_number, total_degree_node_number = network.degree_node_number_csv(log_data, nicks, nick_same_list)
nick_change_graph_list =  user.nick_change_graph(log_data, True)
bin_matrix, total_messages = network.message_number_bins_csv(log_data, nicks, nick_same_list)
conv_len, conv_ref_time = channel.conv_len_conv_refr_time(log_data, nicks, nick_same_list)
resp_time = channel.response_time(log_data, nicks, nick_same_list)

user.keywords_clusters(log_data, nicks, nick_same_list)
network.degree_analysis_on_graph(message_number_graph)

# adjCC_graph, adjCC_membership = community.infomap_igraph(ig_graph=None, net_file_location="/home/rohan/Desktop/adjCC.net")

# ============== OUTPUT ================
saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph")
saver.save_csv(degree_anal_message_numder["out_degree"]["formatted_for_csv"], output_directory, "out_degree")
saver.save_csv(degree_anal_message_numder["in_degree"]["formatted_for_csv"], output_directory, "in_degree")
saver.save_csv(degree_anal_message_numder["total_degree"]["formatted_for_csv"], output_directory, "total_degree")
saver.save_csv(out_degree_node_number, output_directory, "node_out_degree" + starting_date +'-'+ending_date)
saver.save_csv(in_degree_node_number, output_directory, "node_in_degree"+ starting_date +'-'+ending_date)