def nick_change_graph(log_dict, DAY_BY_DAY_ANALYSIS=False): """ creates a graph which tracks the nick changes of the users where each edge has a time stamp denoting the time at which the nick was changed by the user Args: log_dict (str): Dictionary of logs created using reader.py Returns: list of the day_to_day nick changes if config.DAY_BY_DAY_ANALYSIS=True or else an aggregate nick change graph for the given time period. """ rem_time = None #remembers the time of the last message of the file parsed before the current file nick_change_day_list = [] aggregate_nick_change_graph = nx.MultiDiGraph() # graph for nick changes in the whole time span (not day to day) for day_content_all_channels in log_dict.values(): for day_content in day_content_all_channels: day_log = day_content["log_data"] today_nick_change_graph = nx.MultiDiGraph() #using networkx current_line_no = -1 for line in day_log: current_line_no = current_line_no + 1 if(line[0] == '=' and "changed the topic of" not in line): #excluding the condition when user changes the topic. Search for only nick changes nick1 = util.splice_find(line, "=", " is", 3) nick2 = util.splice_find(line, "wn as", "\n", 5) earlier_line_no = current_line_no while earlier_line_no >= 0: #to find the line just before "=="" so as to find time of Nick Change earlier_line_no = earlier_line_no - 1 if(day_log[earlier_line_no][0] != '='): year, month, day = util.get_year_month_day(day_content) util.build_graphs(nick1, nick2, day_log[earlier_line_no][1:6], year, month, day, today_nick_change_graph, aggregate_nick_change_graph) break if(earlier_line_no == -1): today_nick_change_graph.add_edge(nick1, nick2, weight=rem_time) aggregate_nick_change_graph.add_edge(nick1, nick2, weight = rem_time) count = len(day_log) - 1 #setting up the rem_time for next file, by noting the last message sent on that file. while(count >= 0): if(day_log[count][0] != '='): rem_time = day_log[count][1:6] break count = count-1 nick_change_day_list.append(today_nick_change_graph) if DAY_BY_DAY_ANALYSIS: return nick_change_day_list else: return aggregate_nick_change_graph
def nick_change_graph(log_dict, DAY_BY_DAY_ANALYSIS=False): """ creates a graph which tracks the nick changes of the users where each edge has a time stamp denoting the time at which the nick was changed by the user Args: log_dict (str): Dictionary of logs created using reader.py Returns: list of the day_to_day nick changes if config.DAY_BY_DAY_ANALYSIS=True or else an aggregate nick change graph for the given time period. """ rem_time = None #remembers the time of the last message of the file parsed before the current file nick_change_day_list = [] aggregate_nick_change_graph = nx.MultiDiGraph( ) # graph for nick changes in the whole time span (not day to day) for day_content_all_channels in log_dict.values(): for day_content in day_content_all_channels: day_log = day_content["log_data"] today_nick_change_graph = nx.MultiDiGraph() #using networkx current_line_no = -1 for line in day_log: current_line_no = current_line_no + 1 if ( line[0] == '=' and "changed the topic of" not in line ): #excluding the condition when user changes the topic. Search for only nick changes nick1 = util.splice_find(line, "=", " is", 3) nick2 = util.splice_find(line, "wn as", "\n", 5) earlier_line_no = current_line_no while earlier_line_no >= 0: #to find the line just before "=="" so as to find time of Nick Change earlier_line_no = earlier_line_no - 1 if (day_log[earlier_line_no][0] != '='): year, month, day = util.get_year_month_day( day_content) util.build_graphs(nick1, nick2, day_log[earlier_line_no][1:6], year, month, day, today_nick_change_graph, aggregate_nick_change_graph) break if (earlier_line_no == -1): today_nick_change_graph.add_edge(nick1, nick2, weight=rem_time) aggregate_nick_change_graph.add_edge(nick1, nick2, weight=rem_time) count = len( day_log ) - 1 #setting up the rem_time for next file, by noting the last message sent on that file. while (count >= 0): if (day_log[count][0] != '='): rem_time = day_log[count][1:6] break count = count - 1 nick_change_day_list.append(today_nick_change_graph) if DAY_BY_DAY_ANALYSIS: return nick_change_day_list else: return aggregate_nick_change_graph
def test_get_year_month_day(self, auxiliary_day_content, expected_output): self.assertEqual(util.get_year_month_day(auxiliary_day_content), expected_output)
def message_time_graph(log_dict, nicks, nick_same_list, DAY_BY_DAY_ANALYSIS=False): """ creates a directed graph where each edge denotes a message sent from a user to another user with the stamp denoting the time at which the message was sent Args: log_dict (dictionary): Dictionary of logs data created using reader.py nicks(List) : List of nickname created using nickTracker.py nick_same_list(List) :List of same_nick names created using nickTracker.py Returns: msg_time_graph_list(List): List of message time graphs for different days msg_time_aggr_graph: aggregate message time graph where edges are date + time when sender sends a message to receiver """ msg_time_graph_list = [] msg_time_aggr_graph = nx.MultiDiGraph() G = util.to_graph(nick_same_list) conn_comp_list = list(connected_components(G)) def compare_spliced_nick(nick_to_compare, spliced_nick, nick_name, line): if(nick_to_compare == nick_name): if(spliced_nick != nick_name): nick_receiver = nick_receiver_from_conn_comp(nick_name, conn_comp_list) util.build_graphs(nick_sender, nick_receiver, line[1:6], year, month, day, graph_conversation, msg_time_aggr_graph) util.create_connected_nick_list(conn_comp_list) for day_content_all_channels in log_dict.values(): for day_content in day_content_all_channels: day_log = day_content["log_data"] year, month, day = util.get_year_month_day(day_content) graph_conversation = nx.MultiDiGraph() #graph with multiple directed edges between clients used for line in day_log: flag_comma = 0 if(util.check_if_msg_line (line)): m = re.search(r"\<(.*?)\>", line) spliced_nick = util.correctLastCharCR(m.group(0)[1:-1]) nick_sender = "" nick_sender = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, spliced_nick, conn_comp_list, nick_sender) for nick_name in nicks: rec_list = [e.strip() for e in line.split(':')] #receiver list splited about : util.rec_list_splice(rec_list) if not rec_list[1]: #index 0 will contain time 14:02 break rec_list = util.correct_last_char_list(rec_list) for nick_to_search in rec_list: if(nick_to_search == nick_name): if(spliced_nick != nick_name): nick_receiver = "" nick_receiver = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, nick_name, conn_comp_list, nick_receiver) util.build_graphs(nick_sender, nick_receiver, line[1:6], year, month, day, graph_conversation, msg_time_aggr_graph) if "," in rec_list[1]: #receiver list may of the form <Dhruv> Rohan, Ram : flag_comma = 1 rec_list_2 = [e.strip() for e in rec_list[1].split(',')] rec_list_2 = util.correct_last_char_list(rec_list_2) for nick_to_search in rec_list_2: compare_spliced_nick(nick_to_search, spliced_nick, nick_name, line) if(flag_comma == 0): #receiver list can be <Dhruv> Rohan, Hi! rec = line[line.find(">") + 1:line.find(", ")] rec = util.correctLastCharCR(rec[1:]) compare_spliced_nick(rec, spliced_nick, nick_name, line) msg_time_graph_list.append(graph_conversation) if DAY_BY_DAY_ANALYSIS: return msg_time_graph_list else: return msg_time_aggr_graph
def message_number_graph(log_dict, nicks, nick_same_list, DAY_BY_DAY_ANALYSIS=False): """ Creates a directed graph with each node representing an IRC user and each directed edge has a weight which mentions the number messages sent and recieved by that user in the selected time frame. Args: log_dict (dict): with key as dateTime.date object and value as {"data":datalist,"channel_name":channels name} nicks(list): list of all the nicks nick_same_list(list): list of lists mentioning nicks which belong to same users Returns: message_number_graph (nx graph object) """ message_number_day_list = [] conversations=[[0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)] aggregate_message_number_graph = nx.DiGraph() #graph with multiple directed edges between clients used G = util.to_graph(nick_same_list) conn_comp_list = list(connected_components(G)) util.create_connected_nick_list(conn_comp_list) def msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list,conversations,today_conversation): for receiver in rec_list: if(receiver == nick): if(corrected_nick != nick): nick_receiver = '' nick_receiver = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, nick, conn_comp_list, nick_receiver) if DAY_BY_DAY_ANALYSIS: today_conversation = util.extend_conversation_list(nick_sender, nick_receiver, today_conversation) else: conversations = util.extend_conversation_list(nick_sender, nick_receiver, conversations) def message_no_add_egde(message_graph, conversation): for index in xrange(config.MAX_EXPECTED_DIFF_NICKS): if(len(conversation[index]) == 3 and conversation[index][0] >= config.THRESHOLD_MESSAGE_NUMBER_GRAPH): if len(conversation[index][1]) >= config.MINIMUM_NICK_LENGTH and len(conversation[index][2]) >= config.MINIMUM_NICK_LENGTH: message_graph.add_edge(conversation[index][1], conversation[index][2], weight=conversation[index][0]) return message_graph for day_content_all_channels in log_dict.values(): for day_content in day_content_all_channels: day_log = day_content["log_data"] today_conversation = [[0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)] for line in day_log: flag_comma = 0 if(util.check_if_msg_line (line)): parsed_nick = re.search(r"\<(.*?)\>", line) corrected_nick = util.correctLastCharCR(parsed_nick.group(0)[1:-1]) nick_sender = "" nick_receiver = "" nick_sender = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, corrected_nick, conn_comp_list, nick_sender) for nick in nicks: rec_list = [e.strip() for e in line.split(':')] util.rec_list_splice(rec_list) if not rec_list[1]: break rec_list = util.correct_last_char_list(rec_list) msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list, conversations,today_conversation) if "," in rec_list[1]: flag_comma = 1 rec_list_2=[e.strip() for e in rec_list[1].split(',')] for i in xrange(0,len(rec_list_2)): if(rec_list_2[i]): rec_list_2[i] = util.correctLastCharCR(rec_list_2[i]) msg_no_analysis_helper(rec_list_2, corrected_nick, nick, conn_comp_list, conversations, today_conversation) if(flag_comma == 0): rec = line[line.find(">")+1:line.find(", ")] rec = rec[1:] rec = util.correctLastCharCR(rec) if(rec == nick): if(corrected_nick != nick): nick_receiver = nick_receiver_from_conn_comp(nick, conn_comp_list) if DAY_BY_DAY_ANALYSIS: today_message_number_graph = nx.DiGraph() today_message_number_graph = message_no_add_egde(today_message_number_graph, today_conversation) year, month, day = util.get_year_month_day(day_content) message_number_day_list.append([today_message_number_graph, year+'-'+month+'-'+day]) print "\nBuilding graph object with EDGE WEIGHT THRESHOLD:", config.THRESHOLD_MESSAGE_NUMBER_GRAPH if not DAY_BY_DAY_ANALYSIS: aggregate_message_number_graph = message_no_add_egde(aggregate_message_number_graph, conversations) if config.DEBUGGER: print "========> 30 on " + str(len(conversations)) + " conversations" print conversations[:30] if DAY_BY_DAY_ANALYSIS: return message_number_day_list else: return aggregate_message_number_graph
def message_time_graph(log_dict, nicks, nick_same_list, DAY_BY_DAY_ANALYSIS=False): """ creates a directed graph where each edge denotes a message sent from a user to another user with the stamp denoting the time at which the message was sent Args: log_dict (dictionary): Dictionary of logs data created using reader.py nicks(List) : List of nickname created using nickTracker.py nick_same_list(List) :List of same_nick names created using nickTracker.py Returns: msg_time_graph_list(List): List of message time graphs for different days msg_time_aggr_graph: aggregate message time graph where edges are date + time when sender sends a message to receiver """ msg_time_graph_list = [] msg_time_aggr_graph = nx.MultiDiGraph() G = util.to_graph(nick_same_list) conn_comp_list = list(connected_components(G)) def compare_spliced_nick(nick_to_compare, spliced_nick, nick_name, line): if (nick_to_compare == nick_name): if (spliced_nick != nick_name): nick_receiver = nick_receiver_from_conn_comp( nick_name, conn_comp_list) util.build_graphs(nick_sender, nick_receiver, line[1:6], year, month, day, graph_conversation, msg_time_aggr_graph) util.create_connected_nick_list(conn_comp_list) for day_content_all_channels in log_dict.values(): for day_content in day_content_all_channels: day_log = day_content["log_data"] year, month, day = util.get_year_month_day(day_content) graph_conversation = nx.MultiDiGraph( ) #graph with multiple directed edges between clients used for line in day_log: flag_comma = 0 if (util.check_if_msg_line(line)): m = re.search(r"\<(.*?)\>", line) spliced_nick = util.correctLastCharCR(m.group(0)[1:-1]) nick_sender = "" nick_sender = util.get_nick_sen_rec( config.MAX_EXPECTED_DIFF_NICKS, spliced_nick, conn_comp_list, nick_sender) for nick_name in nicks: rec_list = [e.strip() for e in line.split(':') ] #receiver list splited about : util.rec_list_splice(rec_list) if not rec_list[1]: #index 0 will contain time 14:02 break rec_list = util.correct_last_char_list(rec_list) for nick_to_search in rec_list: if (nick_to_search == nick_name): if (spliced_nick != nick_name): nick_receiver = "" nick_receiver = util.get_nick_sen_rec( config.MAX_EXPECTED_DIFF_NICKS, nick_name, conn_comp_list, nick_receiver) util.build_graphs(nick_sender, nick_receiver, line[1:6], year, month, day, graph_conversation, msg_time_aggr_graph) if "," in rec_list[ 1]: #receiver list may of the form <Dhruv> Rohan, Ram : flag_comma = 1 rec_list_2 = [ e.strip() for e in rec_list[1].split(',') ] rec_list_2 = util.correct_last_char_list( rec_list_2) for nick_to_search in rec_list_2: compare_spliced_nick(nick_to_search, spliced_nick, nick_name, line) if (flag_comma == 0 ): #receiver list can be <Dhruv> Rohan, Hi! rec = line[line.find(">") + 1:line.find(", ")] rec = util.correctLastCharCR(rec[1:]) compare_spliced_nick(rec, spliced_nick, nick_name, line) msg_time_graph_list.append(graph_conversation) if DAY_BY_DAY_ANALYSIS: return msg_time_graph_list else: return msg_time_aggr_graph
def message_number_graph(log_dict, nicks, nick_same_list, DAY_BY_DAY_ANALYSIS=False): """ Creates a directed graph with each node representing an IRC user and each directed edge has a weight which mentions the number messages sent and recieved by that user in the selected time frame. Args: log_dict (dict): with key as dateTime.date object and value as {"data":datalist,"channel_name":channels name} nicks(list): list of all the nicks nick_same_list(list): list of lists mentioning nicks which belong to same users Returns: message_number_graph (nx graph object) """ message_number_day_list = [] conversations = [[0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)] aggregate_message_number_graph = nx.DiGraph( ) #graph with multiple directed edges between clients used G = util.to_graph(nick_same_list) conn_comp_list = list(connected_components(G)) util.create_connected_nick_list(conn_comp_list) def msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list, conversations, today_conversation): for receiver in rec_list: if (receiver == nick): if (corrected_nick != nick): nick_receiver = '' nick_receiver = util.get_nick_sen_rec( config.MAX_EXPECTED_DIFF_NICKS, nick, conn_comp_list, nick_receiver) if DAY_BY_DAY_ANALYSIS: today_conversation = util.extend_conversation_list( nick_sender, nick_receiver, today_conversation) else: conversations = util.extend_conversation_list( nick_sender, nick_receiver, conversations) def message_no_add_egde(message_graph, conversation): for index in xrange(config.MAX_EXPECTED_DIFF_NICKS): if (len(conversation[index]) == 3 and conversation[index][0] >= config.THRESHOLD_MESSAGE_NUMBER_GRAPH): if len( conversation[index] [1]) >= config.MINIMUM_NICK_LENGTH and len( conversation[index][2]) >= config.MINIMUM_NICK_LENGTH: message_graph.add_edge(conversation[index][1], conversation[index][2], weight=conversation[index][0]) return message_graph for day_content_all_channels in log_dict.values(): for day_content in day_content_all_channels: day_log = day_content["log_data"] today_conversation = [ [0] for i in range(config.MAX_EXPECTED_DIFF_NICKS) ] for line in day_log: flag_comma = 0 if (util.check_if_msg_line(line)): parsed_nick = re.search(r"\<(.*?)\>", line) corrected_nick = util.correctLastCharCR( parsed_nick.group(0)[1:-1]) nick_sender = "" nick_receiver = "" nick_sender = util.get_nick_sen_rec( config.MAX_EXPECTED_DIFF_NICKS, corrected_nick, conn_comp_list, nick_sender) for nick in nicks: rec_list = [e.strip() for e in line.split(':')] util.rec_list_splice(rec_list) if not rec_list[1]: break rec_list = util.correct_last_char_list(rec_list) msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list, conversations, today_conversation) if "," in rec_list[1]: flag_comma = 1 rec_list_2 = [ e.strip() for e in rec_list[1].split(',') ] for i in xrange(0, len(rec_list_2)): if (rec_list_2[i]): rec_list_2[i] = util.correctLastCharCR( rec_list_2[i]) msg_no_analysis_helper(rec_list_2, corrected_nick, nick, conn_comp_list, conversations, today_conversation) if (flag_comma == 0): rec = line[line.find(">") + 1:line.find(", ")] rec = rec[1:] rec = util.correctLastCharCR(rec) if (rec == nick): if (corrected_nick != nick): nick_receiver = nick_receiver_from_conn_comp( nick, conn_comp_list) if DAY_BY_DAY_ANALYSIS: today_message_number_graph = nx.DiGraph() today_message_number_graph = message_no_add_egde( today_message_number_graph, today_conversation) year, month, day = util.get_year_month_day(day_content) message_number_day_list.append([ today_message_number_graph, year + '-' + month + '-' + day ]) print "\nBuilding graph object with EDGE WEIGHT THRESHOLD:", config.THRESHOLD_MESSAGE_NUMBER_GRAPH if not DAY_BY_DAY_ANALYSIS: aggregate_message_number_graph = message_no_add_egde( aggregate_message_number_graph, conversations) if config.DEBUGGER: print "========> 30 on " + str(len(conversations)) + " conversations" print conversations[:30] if DAY_BY_DAY_ANALYSIS: return message_number_day_list else: return aggregate_message_number_graph