コード例 #1
0
ファイル: user.py プロジェクト: rohangoel96/IRCLogParser
def nick_change_graph(log_dict, DAY_BY_DAY_ANALYSIS=False):

    """ creates a graph which tracks the nick changes of the users
    where each edge has a time stamp denoting the time 
    at which the nick was changed by the user

    Args:
        log_dict (str): Dictionary of logs created using reader.py    

    Returns:
       list of the day_to_day nick changes if config.DAY_BY_DAY_ANALYSIS=True or else an aggregate nick change graph for the 
       given time period.
    """     

    rem_time = None #remembers the time of the last message of the file parsed before the current file
    nick_change_day_list = []
    aggregate_nick_change_graph = nx.MultiDiGraph() # graph for nick changes in the whole time span (not day to day)
    
    for day_content_all_channels in log_dict.values():      
        
        for day_content in day_content_all_channels:            
                day_log = day_content["log_data"]                   
                
                today_nick_change_graph = nx.MultiDiGraph()   #using networkx
                current_line_no = -1
                
                for line in day_log:
                    current_line_no = current_line_no + 1
                    
                    if(line[0] == '=' and "changed the topic of" not in line):  #excluding the condition when user changes the topic. Search for only nick changes
                        nick1 = util.splice_find(line, "=", " is", 3)
                        nick2 = util.splice_find(line, "wn as", "\n", 5)                        
                        earlier_line_no = current_line_no

                        while earlier_line_no >= 0: #to find the line just before "=="" so as to find time of Nick Change
                            earlier_line_no = earlier_line_no - 1
                            if(day_log[earlier_line_no][0] != '='):                             
                                year, month, day = util.get_year_month_day(day_content)
                                util.build_graphs(nick1, nick2, day_log[earlier_line_no][1:6], year, month, day, today_nick_change_graph, aggregate_nick_change_graph)
                                break

                        if(earlier_line_no == -1):
                            today_nick_change_graph.add_edge(nick1, nick2, weight=rem_time)                                              
                            aggregate_nick_change_graph.add_edge(nick1, nick2, weight = rem_time)
                
                count = len(day_log) - 1 #setting up the rem_time for next file, by noting the last message sent on that file.
                
                while(count >= 0):
                    if(day_log[count][0] != '='):
                        rem_time = day_log[count][1:6]
                        break
                    count = count-1
                
                nick_change_day_list.append(today_nick_change_graph)    
                        
    if DAY_BY_DAY_ANALYSIS:
        return nick_change_day_list
    else:
        return aggregate_nick_change_graph
コード例 #2
0
def nick_change_graph(log_dict, DAY_BY_DAY_ANALYSIS=False):
    """ creates a graph which tracks the nick changes of the users
    where each edge has a time stamp denoting the time 
    at which the nick was changed by the user

    Args:
        log_dict (str): Dictionary of logs created using reader.py    

    Returns:
       list of the day_to_day nick changes if config.DAY_BY_DAY_ANALYSIS=True or else an aggregate nick change graph for the 
       given time period.
    """

    rem_time = None  #remembers the time of the last message of the file parsed before the current file
    nick_change_day_list = []
    aggregate_nick_change_graph = nx.MultiDiGraph(
    )  # graph for nick changes in the whole time span (not day to day)

    for day_content_all_channels in log_dict.values():

        for day_content in day_content_all_channels:
            day_log = day_content["log_data"]

            today_nick_change_graph = nx.MultiDiGraph()  #using networkx
            current_line_no = -1

            for line in day_log:
                current_line_no = current_line_no + 1

                if (
                        line[0] == '=' and "changed the topic of" not in line
                ):  #excluding the condition when user changes the topic. Search for only nick changes
                    nick1 = util.splice_find(line, "=", " is", 3)
                    nick2 = util.splice_find(line, "wn as", "\n", 5)
                    earlier_line_no = current_line_no

                    while earlier_line_no >= 0:  #to find the line just before "=="" so as to find time of Nick Change
                        earlier_line_no = earlier_line_no - 1
                        if (day_log[earlier_line_no][0] != '='):
                            year, month, day = util.get_year_month_day(
                                day_content)
                            util.build_graphs(nick1, nick2,
                                              day_log[earlier_line_no][1:6],
                                              year, month, day,
                                              today_nick_change_graph,
                                              aggregate_nick_change_graph)
                            break

                    if (earlier_line_no == -1):
                        today_nick_change_graph.add_edge(nick1,
                                                         nick2,
                                                         weight=rem_time)
                        aggregate_nick_change_graph.add_edge(nick1,
                                                             nick2,
                                                             weight=rem_time)

            count = len(
                day_log
            ) - 1  #setting up the rem_time for next file, by noting the last message sent on that file.

            while (count >= 0):
                if (day_log[count][0] != '='):
                    rem_time = day_log[count][1:6]
                    break
                count = count - 1

            nick_change_day_list.append(today_nick_change_graph)

    if DAY_BY_DAY_ANALYSIS:
        return nick_change_day_list
    else:
        return aggregate_nick_change_graph
コード例 #3
0
 def test_get_year_month_day(self, auxiliary_day_content, expected_output):
     self.assertEqual(util.get_year_month_day(auxiliary_day_content),
                      expected_output)
コード例 #4
0
ファイル: network.py プロジェクト: rohangoel96/IRCLogParser
def message_time_graph(log_dict, nicks, nick_same_list, DAY_BY_DAY_ANALYSIS=False):
    """ creates a directed graph where each edge denotes a message sent from a user to another user
    with the stamp denoting the time at which the message was sent

    Args:
        log_dict (dictionary): Dictionary of logs data created using reader.py
        nicks(List) : List of nickname created using nickTracker.py
        nick_same_list(List) :List of same_nick names created using nickTracker.py

    Returns:
       msg_time_graph_list(List): List of message time graphs for different days
       msg_time_aggr_graph: aggregate message time graph where edges are date + time when sender sends a message to receiver
    """  
    msg_time_graph_list = []
    msg_time_aggr_graph = nx.MultiDiGraph()
    G = util.to_graph(nick_same_list)
    conn_comp_list = list(connected_components(G))

    def compare_spliced_nick(nick_to_compare, spliced_nick, nick_name, line):
        if(nick_to_compare == nick_name):
            if(spliced_nick != nick_name):
                nick_receiver = nick_receiver_from_conn_comp(nick_name, conn_comp_list)        
                util.build_graphs(nick_sender, nick_receiver, line[1:6], year, month, day, graph_conversation, msg_time_aggr_graph)             
     
    util.create_connected_nick_list(conn_comp_list)

    for day_content_all_channels in log_dict.values():
        for day_content in day_content_all_channels:
            day_log = day_content["log_data"]
            year, month, day = util.get_year_month_day(day_content)
            graph_conversation = nx.MultiDiGraph()  #graph with multiple directed edges between clients used
            for line in day_log:
                flag_comma = 0
                if(util.check_if_msg_line (line)):
                    m = re.search(r"\<(.*?)\>", line)         
                    spliced_nick = util.correctLastCharCR(m.group(0)[1:-1])
                    nick_sender = ""                          
                    nick_sender = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, spliced_nick, conn_comp_list, nick_sender)

                    for nick_name in nicks:
                        rec_list = [e.strip() for e in line.split(':')]  #receiver list splited about :
                        util.rec_list_splice(rec_list)
                        if not rec_list[1]:  #index 0 will contain time 14:02
                            break                        
                        rec_list = util.correct_last_char_list(rec_list)        
                        for nick_to_search in rec_list:
                            if(nick_to_search == nick_name):
                                if(spliced_nick != nick_name):                                    
                                    nick_receiver = ""                                         
                                    nick_receiver = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, nick_name, conn_comp_list, nick_receiver)                                            
                                    util.build_graphs(nick_sender, nick_receiver, line[1:6], year, month, day, graph_conversation, msg_time_aggr_graph)

                        if "," in rec_list[1]:  #receiver list may of the form <Dhruv> Rohan, Ram :
                            flag_comma = 1
                            rec_list_2 = [e.strip() for e in rec_list[1].split(',')]
                            rec_list_2 = util.correct_last_char_list(rec_list_2)        
                            for nick_to_search in rec_list_2:                              
                                compare_spliced_nick(nick_to_search, spliced_nick, nick_name, line)   

                        if(flag_comma == 0):  #receiver list can be <Dhruv> Rohan, Hi!
                            rec = line[line.find(">") + 1:line.find(", ")]
                            rec = util.correctLastCharCR(rec[1:])                           
                            compare_spliced_nick(rec, spliced_nick, nick_name, line)    

            msg_time_graph_list.append(graph_conversation)

    if DAY_BY_DAY_ANALYSIS:
        return msg_time_graph_list
    else:
        return msg_time_aggr_graph
コード例 #5
0
ファイル: network.py プロジェクト: rohangoel96/IRCLogParser
def message_number_graph(log_dict, nicks, nick_same_list, DAY_BY_DAY_ANALYSIS=False):
    """ Creates a directed graph
        with each node representing an IRC user
        and each directed edge has a weight which 
        mentions the number messages sent and recieved by that user 
        in the selected time frame.
    Args:
        log_dict (dict): with key as dateTime.date object and value as {"data":datalist,"channel_name":channels name}
        nicks(list): list of all the nicks
        nick_same_list(list): list of lists mentioning nicks which belong to same users
    Returns:
       message_number_graph (nx graph object)
    """
    message_number_day_list = []
    conversations=[[0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)]
    aggregate_message_number_graph = nx.DiGraph()  #graph with multiple directed edges between clients used

    G = util.to_graph(nick_same_list)
    conn_comp_list = list(connected_components(G))

    util.create_connected_nick_list(conn_comp_list)

    def msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list,conversations,today_conversation):
        for receiver in rec_list:
            if(receiver == nick):
                if(corrected_nick != nick):                                 
                    nick_receiver = ''
                    nick_receiver = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, nick, conn_comp_list, nick_receiver)    

                    if DAY_BY_DAY_ANALYSIS:
                        today_conversation = util.extend_conversation_list(nick_sender, nick_receiver, today_conversation)
                    else:
                        conversations = util.extend_conversation_list(nick_sender, nick_receiver, conversations)

    def message_no_add_egde(message_graph, conversation):
        for index in xrange(config.MAX_EXPECTED_DIFF_NICKS):
            if(len(conversation[index]) == 3 and conversation[index][0] >= config.THRESHOLD_MESSAGE_NUMBER_GRAPH):
                if len(conversation[index][1]) >= config.MINIMUM_NICK_LENGTH and len(conversation[index][2]) >= config.MINIMUM_NICK_LENGTH:
                    message_graph.add_edge(conversation[index][1], conversation[index][2], weight=conversation[index][0])
        return message_graph


    for day_content_all_channels in log_dict.values():
        for day_content in day_content_all_channels:
            day_log = day_content["log_data"]
            today_conversation = [[0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)]
            for line in day_log:
                flag_comma = 0

                if(util.check_if_msg_line (line)):
                    parsed_nick = re.search(r"\<(.*?)\>", line)
                    corrected_nick = util.correctLastCharCR(parsed_nick.group(0)[1:-1])
                    nick_sender = ""
                    nick_receiver = ""                    
                    nick_sender = util.get_nick_sen_rec(config.MAX_EXPECTED_DIFF_NICKS, corrected_nick, conn_comp_list, nick_sender)        

                    for nick in nicks:
                        rec_list = [e.strip() for e in line.split(':')]
                        util.rec_list_splice(rec_list)
                        if not rec_list[1]:
                            break                        
                        rec_list = util.correct_last_char_list(rec_list)       
                        msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list, conversations,today_conversation)

                        if "," in rec_list[1]:
                            flag_comma = 1
                            rec_list_2=[e.strip() for e in rec_list[1].split(',')]
                            for i in xrange(0,len(rec_list_2)):
                                if(rec_list_2[i]):
                                    rec_list_2[i] = util.correctLastCharCR(rec_list_2[i])                            
                            msg_no_analysis_helper(rec_list_2, corrected_nick, nick, conn_comp_list, conversations, today_conversation)                

                        if(flag_comma == 0):
                            rec = line[line.find(">")+1:line.find(", ")]
                            rec = rec[1:]
                            rec = util.correctLastCharCR(rec)
                            if(rec == nick):
                                if(corrected_nick != nick):                                   
                                    nick_receiver = nick_receiver_from_conn_comp(nick, conn_comp_list)        

            if DAY_BY_DAY_ANALYSIS:
                today_message_number_graph = nx.DiGraph()
                today_message_number_graph = message_no_add_egde(today_message_number_graph, today_conversation)                
                year, month, day = util.get_year_month_day(day_content)
                message_number_day_list.append([today_message_number_graph, year+'-'+month+'-'+day])

    print "\nBuilding graph object with EDGE WEIGHT THRESHOLD:", config.THRESHOLD_MESSAGE_NUMBER_GRAPH

    if not DAY_BY_DAY_ANALYSIS:
        aggregate_message_number_graph = message_no_add_egde(aggregate_message_number_graph, conversations)
        

    if config.DEBUGGER:
        print "========> 30 on " + str(len(conversations)) + " conversations"
        print conversations[:30]

    if DAY_BY_DAY_ANALYSIS:
        return message_number_day_list
    else:
        return aggregate_message_number_graph
コード例 #6
0
def message_time_graph(log_dict,
                       nicks,
                       nick_same_list,
                       DAY_BY_DAY_ANALYSIS=False):
    """ creates a directed graph where each edge denotes a message sent from a user to another user
    with the stamp denoting the time at which the message was sent

    Args:
        log_dict (dictionary): Dictionary of logs data created using reader.py
        nicks(List) : List of nickname created using nickTracker.py
        nick_same_list(List) :List of same_nick names created using nickTracker.py

    Returns:
       msg_time_graph_list(List): List of message time graphs for different days
       msg_time_aggr_graph: aggregate message time graph where edges are date + time when sender sends a message to receiver
    """
    msg_time_graph_list = []
    msg_time_aggr_graph = nx.MultiDiGraph()
    G = util.to_graph(nick_same_list)
    conn_comp_list = list(connected_components(G))

    def compare_spliced_nick(nick_to_compare, spliced_nick, nick_name, line):
        if (nick_to_compare == nick_name):
            if (spliced_nick != nick_name):
                nick_receiver = nick_receiver_from_conn_comp(
                    nick_name, conn_comp_list)
                util.build_graphs(nick_sender, nick_receiver, line[1:6], year,
                                  month, day, graph_conversation,
                                  msg_time_aggr_graph)

    util.create_connected_nick_list(conn_comp_list)

    for day_content_all_channels in log_dict.values():
        for day_content in day_content_all_channels:
            day_log = day_content["log_data"]
            year, month, day = util.get_year_month_day(day_content)
            graph_conversation = nx.MultiDiGraph(
            )  #graph with multiple directed edges between clients used
            for line in day_log:
                flag_comma = 0
                if (util.check_if_msg_line(line)):
                    m = re.search(r"\<(.*?)\>", line)
                    spliced_nick = util.correctLastCharCR(m.group(0)[1:-1])
                    nick_sender = ""
                    nick_sender = util.get_nick_sen_rec(
                        config.MAX_EXPECTED_DIFF_NICKS, spliced_nick,
                        conn_comp_list, nick_sender)

                    for nick_name in nicks:
                        rec_list = [e.strip() for e in line.split(':')
                                    ]  #receiver list splited about :
                        util.rec_list_splice(rec_list)
                        if not rec_list[1]:  #index 0 will contain time 14:02
                            break
                        rec_list = util.correct_last_char_list(rec_list)
                        for nick_to_search in rec_list:
                            if (nick_to_search == nick_name):
                                if (spliced_nick != nick_name):
                                    nick_receiver = ""
                                    nick_receiver = util.get_nick_sen_rec(
                                        config.MAX_EXPECTED_DIFF_NICKS,
                                        nick_name, conn_comp_list,
                                        nick_receiver)
                                    util.build_graphs(nick_sender,
                                                      nick_receiver, line[1:6],
                                                      year, month, day,
                                                      graph_conversation,
                                                      msg_time_aggr_graph)

                        if "," in rec_list[
                                1]:  #receiver list may of the form <Dhruv> Rohan, Ram :
                            flag_comma = 1
                            rec_list_2 = [
                                e.strip() for e in rec_list[1].split(',')
                            ]
                            rec_list_2 = util.correct_last_char_list(
                                rec_list_2)
                            for nick_to_search in rec_list_2:
                                compare_spliced_nick(nick_to_search,
                                                     spliced_nick, nick_name,
                                                     line)

                        if (flag_comma == 0
                            ):  #receiver list can be <Dhruv> Rohan, Hi!
                            rec = line[line.find(">") + 1:line.find(", ")]
                            rec = util.correctLastCharCR(rec[1:])
                            compare_spliced_nick(rec, spliced_nick, nick_name,
                                                 line)

            msg_time_graph_list.append(graph_conversation)

    if DAY_BY_DAY_ANALYSIS:
        return msg_time_graph_list
    else:
        return msg_time_aggr_graph
コード例 #7
0
def message_number_graph(log_dict,
                         nicks,
                         nick_same_list,
                         DAY_BY_DAY_ANALYSIS=False):
    """ Creates a directed graph
        with each node representing an IRC user
        and each directed edge has a weight which 
        mentions the number messages sent and recieved by that user 
        in the selected time frame.
    Args:
        log_dict (dict): with key as dateTime.date object and value as {"data":datalist,"channel_name":channels name}
        nicks(list): list of all the nicks
        nick_same_list(list): list of lists mentioning nicks which belong to same users
    Returns:
       message_number_graph (nx graph object)
    """
    message_number_day_list = []
    conversations = [[0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)]
    aggregate_message_number_graph = nx.DiGraph(
    )  #graph with multiple directed edges between clients used

    G = util.to_graph(nick_same_list)
    conn_comp_list = list(connected_components(G))

    util.create_connected_nick_list(conn_comp_list)

    def msg_no_analysis_helper(rec_list, corrected_nick, nick, conn_comp_list,
                               conversations, today_conversation):
        for receiver in rec_list:
            if (receiver == nick):
                if (corrected_nick != nick):
                    nick_receiver = ''
                    nick_receiver = util.get_nick_sen_rec(
                        config.MAX_EXPECTED_DIFF_NICKS, nick, conn_comp_list,
                        nick_receiver)

                    if DAY_BY_DAY_ANALYSIS:
                        today_conversation = util.extend_conversation_list(
                            nick_sender, nick_receiver, today_conversation)
                    else:
                        conversations = util.extend_conversation_list(
                            nick_sender, nick_receiver, conversations)

    def message_no_add_egde(message_graph, conversation):
        for index in xrange(config.MAX_EXPECTED_DIFF_NICKS):
            if (len(conversation[index]) == 3 and conversation[index][0] >=
                    config.THRESHOLD_MESSAGE_NUMBER_GRAPH):
                if len(
                        conversation[index]
                    [1]) >= config.MINIMUM_NICK_LENGTH and len(
                        conversation[index][2]) >= config.MINIMUM_NICK_LENGTH:
                    message_graph.add_edge(conversation[index][1],
                                           conversation[index][2],
                                           weight=conversation[index][0])
        return message_graph

    for day_content_all_channels in log_dict.values():
        for day_content in day_content_all_channels:
            day_log = day_content["log_data"]
            today_conversation = [
                [0] for i in range(config.MAX_EXPECTED_DIFF_NICKS)
            ]
            for line in day_log:
                flag_comma = 0

                if (util.check_if_msg_line(line)):
                    parsed_nick = re.search(r"\<(.*?)\>", line)
                    corrected_nick = util.correctLastCharCR(
                        parsed_nick.group(0)[1:-1])
                    nick_sender = ""
                    nick_receiver = ""
                    nick_sender = util.get_nick_sen_rec(
                        config.MAX_EXPECTED_DIFF_NICKS, corrected_nick,
                        conn_comp_list, nick_sender)

                    for nick in nicks:
                        rec_list = [e.strip() for e in line.split(':')]
                        util.rec_list_splice(rec_list)
                        if not rec_list[1]:
                            break
                        rec_list = util.correct_last_char_list(rec_list)
                        msg_no_analysis_helper(rec_list, corrected_nick, nick,
                                               conn_comp_list, conversations,
                                               today_conversation)

                        if "," in rec_list[1]:
                            flag_comma = 1
                            rec_list_2 = [
                                e.strip() for e in rec_list[1].split(',')
                            ]
                            for i in xrange(0, len(rec_list_2)):
                                if (rec_list_2[i]):
                                    rec_list_2[i] = util.correctLastCharCR(
                                        rec_list_2[i])
                            msg_no_analysis_helper(rec_list_2, corrected_nick,
                                                   nick, conn_comp_list,
                                                   conversations,
                                                   today_conversation)

                        if (flag_comma == 0):
                            rec = line[line.find(">") + 1:line.find(", ")]
                            rec = rec[1:]
                            rec = util.correctLastCharCR(rec)
                            if (rec == nick):
                                if (corrected_nick != nick):
                                    nick_receiver = nick_receiver_from_conn_comp(
                                        nick, conn_comp_list)

            if DAY_BY_DAY_ANALYSIS:
                today_message_number_graph = nx.DiGraph()
                today_message_number_graph = message_no_add_egde(
                    today_message_number_graph, today_conversation)
                year, month, day = util.get_year_month_day(day_content)
                message_number_day_list.append([
                    today_message_number_graph, year + '-' + month + '-' + day
                ])

    print "\nBuilding graph object with EDGE WEIGHT THRESHOLD:", config.THRESHOLD_MESSAGE_NUMBER_GRAPH

    if not DAY_BY_DAY_ANALYSIS:
        aggregate_message_number_graph = message_no_add_egde(
            aggregate_message_number_graph, conversations)

    if config.DEBUGGER:
        print "========> 30 on " + str(len(conversations)) + " conversations"
        print conversations[:30]

    if DAY_BY_DAY_ANALYSIS:
        return message_number_day_list
    else:
        return aggregate_message_number_graph