예제 #1
0
    def test_community_analysis_single_channel_cutoff_20(self):
        log_data = reader.linux_input(self.log_data_dir, ["#kubuntu-devel"],
                                      self.start_date, self.end_date)
        expected_result = util.load_from_disk(
            self.current_directory +
            '/data/output/community_analysis_single_channel_cutoff_20')
        nicks, nick_same_list = nickTracker.nick_tracker(log_data)
        default_cutoff = config.THRESHOLD_MESSAGE_NUMBER_GRAPH
        config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20
        message_number_graph = network.message_number_graph(
            log_data, nicks, nick_same_list, False)
        saver.save_net_nx_graph(
            message_number_graph, self.current_directory,
            "message-exchange-" + self.start_date + "-cutoff-" +
            str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH))

        expected_output = community.infomap_igraph(
            ig_graph=None,
            net_file_location=self.current_directory + "/message-exchange-" +
            self.start_date + "-cutoff-" +
            str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net')
        os.remove(self.current_directory + "/message-exchange-" +
                  self.start_date + "-cutoff-" +
                  str(config.THRESHOLD_MESSAGE_NUMBER_GRAPH) + '.net')
        config.THRESHOLD_MESSAGE_NUMBER_GRAPH = default_cutoff

        self.assertTrue(expected_result[0].isomorphic(expected_output[0]))
        self.assertEqual(
            compare_communities(expected_result[1], expected_output[1]), 0)
예제 #2
0
    def test_save_net_nx_graph(self):
        graph = nx.DiGraph()
        graph.add_nodes_from([1, 2, 3])
        graph.add_edge(1, 3)
        saver.save_net_nx_graph(graph, self.current_directory,
                                'test_save_net_nx')

        assert os.path.exists(self.current_directory + '/test_save_net_nx.net')
        assert os.path.isfile(self.current_directory + '/test_save_net_nx.net')

        g = nx.read_pajek(self.current_directory + '/test_save_net_nx.net')
        assert nx.is_isomorphic(graph, g)

        os.remove(self.current_directory + '/test_save_net_nx.net')
예제 #3
0
    def test_community_analysis_multi_channel(self):
        log_data = reader.linux_input(self.log_data_dir, ["ALL"],
                                      self.start_date, self.end_date)
        expected_result = util.load_from_disk(
            self.current_directory +
            '/data/output/community_analysis_multi_channel')
        nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash = nickTracker.nick_tracker(
            log_data, True)
        dict_out, graph = network.channel_user_presence_graph_and_csv(
            nicks, nick_same_list, channels_for_user, nick_channel_dict,
            nicks_hash, channels_hash)

        presence_type = ["CC", "UU", "CU"]
        expected_output = {ptype: {} for ptype in presence_type}
        for ptype in presence_type:
            saver.save_net_nx_graph(dict_out[ptype]["graph"],
                                    self.current_directory, "adj" + ptype)
            saver.save_net_nx_graph(dict_out[ptype]["reducedGraph"],
                                    self.current_directory, "radj" + ptype)
            expected_output[ptype]['adj'] = community.infomap_igraph(
                ig_graph=None,
                net_file_location=self.current_directory + '/adj' + ptype +
                '.net')
            expected_output[ptype]['radj'] = community.infomap_igraph(
                ig_graph=None,
                net_file_location=self.current_directory + '/radj' + ptype +
                '.net')

            os.remove(self.current_directory + '/adj' + ptype + '.net')
            os.remove(self.current_directory + '/radj' + ptype + '.net')

            self.assertTrue(expected_result[ptype]['adj'][0].isomorphic(
                expected_output[ptype]['adj'][0]))
            self.assertEqual(
                compare_communities(expected_output[ptype]['adj'][1],
                                    expected_result[ptype]['adj'][1]), 0)
            self.assertTrue(expected_result[ptype]['radj'][0].isomorphic(
                expected_output[ptype]['radj'][0]))
            self.assertEqual(
                compare_communities(expected_output[ptype]['radj'][1],
                                    expected_result[ptype]['radj'][1]), 0)
예제 #4
0
def codelengths(log_directory, output_directory, channel_name):
    codelengths = []
    for month in xrange(1, 13):
        log_data_m1 = reader.linux_input(log_directory, channel_name,
                                         "2013-" + str(month) + "-1",
                                         "2013-" + str(month) + "-31")
        nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1)
        message_number_graph_m1 = network.message_number_graph(
            log_data_m1, nicks_m1, nick_same_list_m1, False)
        try:
            #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(month))
            msg_igraph, msg_community = community.infomap_igraph(
                ig_graph=None,
                net_file_location=output_directory + "message-exchange-" +
                str(month) + '.net')
            codelengths.append(msg_community.codelength)
        except:
            node_labels = message_number_graph_m1.nodes()
            labels = {}
            for label in node_labels:
                if label == "fos":
                    labels[label] = "fos_"
                else:
                    labels[label] = label

            message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1,
                                                       labels)
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(month))
            print "error in", month

        msg_igraph, msg_community = community.infomap_igraph(
            ig_graph=None,
            net_file_location=output_directory + "message-exchange-" +
            str(month) + '.net')
        codelengths.append(msg_community.codelength)

    vis.box_plot(codelengths, output_directory, "codelengths2013")
    saver.save_csv([codelengths], output_directory, "codelengths2013")
예제 #5
0
user.keywords_clusters(log_data, nicks, nick_same_list, output_directory, "keywords")
network.degree_analysis_on_graph(message_number_graph)

threshold = config.THRESHOLD_MESSAGE_NUMBER_GRAPH #store original default config
cutoffs = [0, 10, 20]

for cutoff in cutoffs:
    config.THRESHOLD_MESSAGE_NUMBER_GRAPH = cutoff
    msg_graph_experts, top_hub, top_keyword_overlap, top_auth = network.identify_hubs_and_experts(log_data, nicks, nick_same_list)
    saver.draw_nx_graph (msg_graph_experts, output_directory, "hits-cutoff-"+str(cutoff))

config.THRESHOLD_MESSAGE_NUMBER_GRAPH = threshold #revert to default config

# ============== OUTPUT ================
saver.save_net_nx_graph (message_number_graph, output_directory, "message_number_graph")
saver.draw_nx_graph(message_number_graph, output_directory, "message_number_graph")

saver.save_csv([["response_time_cutoff"], [rt_cutoff_time]], output_directory, "rt_cutoff")
saver.save_csv([["month", "users", "directed_messages"], ["Jan-2013", len(message_number_graph), int(message_number_graph.size('weight'))]], output_directory, "users_messages")

for dtype in degree_type:
    saver.save_csv(degree_anal_message_number[dtype]["formatted_for_csv"], output_directory, dtype)   

saver.save_csv(bin_matrix, output_directory, "MessageNumber_binsize_"+str(config.BIN_LENGTH_MINS)) 

# =============== VIZ ===================
message_graph, message_comm = community.infomap_igraph(ig_graph=None, net_file_location= output_directory + 'message_number_graph.net')
vis.plot_infomap_igraph(message_graph, message_comm.membership, output_directory, "message")
vis.plot_data (data, output_directory, "bins")
예제 #6
0
def codelengths(log_directory, output_directory, channel_name, start_date,
                end_date):
    """
        The function iterate through the months in the given date range and computes the infomap number. It then plots a
        box plot for the infomap numbers of all the whole months in the given time period.

    Args:
        log_directory(str): path to the location of Logs
        output_directory(str):  path to the location where the results are to be stored
        channel_name(list): channels for which the analysis is to be done
        start_date(datetime): starting date for the logs to be analysed. This has to be the beginning of the month.
        end_date(datetime): ending date for which the logs are to be analysed. This has to be the end of the month.

    Returns:
       null

    """
    start_date = start_date.strptime('%Y-%m-%d')
    end_date = end_date.strptime('%Y-%m-%d')
    codelengths = []
    for dt in rrule(MONTHLY, dtstart=start_date, until=end_date):
        last_day_of_the_month1 = dt + relativedelta(
            months=1) - datetime.timedelta(days=1)
        log_data_m1 = reader.linux_input(
            log_directory, channel_name, dt.strftime("%Y-%m-%d"),
            last_day_of_the_month1.strftime("%Y-%m-%d"))
        nicks_m1, nick_same_list_m1 = nickTracker.nick_tracker(log_data_m1)
        message_number_graph_m1 = network.message_number_graph(
            log_data_m1, nicks_m1, nick_same_list_m1, False)
        try:
            #FOS is a reserved word in igraph and if 'fos' is a username in the nx graph, it generates an error
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(dt.month))
            msg_igraph, msg_community = community.infomap_igraph(
                ig_graph=None,
                net_file_location=output_directory + "message-exchange-" +
                str(dt.month) + '.net')
            codelengths.append(msg_community.codelength)
        except:
            node_labels = message_number_graph_m1.nodes()
            labels = {}
            for label in node_labels:
                if label == "fos":
                    labels[label] = "fos_"
                else:
                    labels[label] = label

            message_number_graph_m1 = nx.relabel_nodes(message_number_graph_m1,
                                                       labels)
            saver.save_net_nx_graph(message_number_graph_m1, output_directory,
                                    "message-exchange-" + str(dt.month))
            print "error in", dt.month

        msg_igraph, msg_community = community.infomap_igraph(
            ig_graph=None,
            net_file_location=output_directory + "message-exchange-" +
            str(dt.month) + '.net')
        codelengths.append(msg_community.codelength)

    vis.box_plot(codelengths, output_directory, "codelengths2013")
    saver.save_csv([codelengths], output_directory, "codelengths2013")
예제 #7
0
      file=exec_times_file)
exec_times_file.flush()

del message_number_graph, degree_anal_message_number
del slope, intercept, r_square, mse
gc.collect()
print("msg exchange with cutoff=0 gc completed at: ",
      datetime.datetime.now(),
      file=exec_times_file)
exec_times_file.flush()

# create a smaller message exchange graph for visualization
config.THRESHOLD_MESSAGE_NUMBER_GRAPH = 20
message_number_graph = network.message_number_graph(log_data, nicks,
                                                    nick_same_list, False)
saver.save_net_nx_graph(message_number_graph, output_directory,
                        "message_number_graph")
print("msg exchange graph with cutoff=20 generated at: ",
      datetime.datetime.now(),
      file=exec_times_file)
exec_times_file.flush()

saver.save_net_nx_graph(message_number_graph, output_directory,
                        "message_number_graph_cutoff_20")
print("msg exchange graph saved at: ",
      datetime.datetime.now(),
      file=exec_times_file)
exec_times_file.flush()

saver.draw_nx_graph(message_number_graph, output_directory,
                    "message_number_graph_cutoff_20")
print("msg exchange graph plot completed at: ",