def test_correct_seconds(self):
     for cur_value in self.known_values:
         cur_rec_dict = parse_apache_line(cur_value[0])
         test_record = ATSRecord(cur_rec_dict)
         assert(cur_value[1] == test_record.time_to_second())
Esempio n. 2
0
    def parse_log(self):
        """
        Read each line of the log file and batch the records corresponding
        to each client (ip) make a dictionary of lists each consisting of all
         records
        """
        #to check the performance and the sensitivity of the log mancher
        total_failure_munches = 0
        for log_filename in self._log_file_list:
            try:
                self._log_lines = open(log_filename)
            except IOError:
                raise IOError

            self._log_lines.seek(0, 2)  #go to end to check the size
            total_file_size = self._log_lines.tell()
            self._log_lines.seek(0, 0)  #and go back to the begining
            previous_progress = 0

            print "Parsing ", log_filename.split('/')[-1]

            #we are going to keep track of each ip and last session number corresponding
            #to that ip
            ip_session_tracker = {}
            for cur_rec in self._log_lines:
                new_session = False
                cur_rec_dict = parse_apache_line(cur_rec)

                if cur_rec_dict:
                    cur_ip = cur_rec_dict["host"]
                    cur_ats_rec = ATSRecord(cur_rec_dict)

                    if not cur_ip in ip_session_tracker:
                        ip_session_tracker[cur_ip] = 0
                        new_session = True

                    #now we are checking if we hit a new session
                    #if we already decided that we are in a new session then there is nothing
                    #to investigate
                    if not new_session:
                        #so we have a session already recorded, compare
                        #the time of that last record of that session with
                        #this session
                        if cur_ats_rec.time_to_second(
                        ) - self._ordered_records[
                            (cur_ip, ip_session_tracker[cur_ip]
                             )][-1].time_to_second() > self.DEAD_SESSION_PAUSE:
                            #the session is dead we have to start a new session
                            ip_session_tracker[cur_ip] += 1
                            new_session = True

                    if new_session:
                        self._ordered_records[(cur_ip,
                                               ip_session_tracker[cur_ip])] = [
                                                   cur_ats_rec
                                               ]
                    else:
                        self._ordered_records[(
                            cur_ip,
                            ip_session_tracker[cur_ip])].append(cur_ats_rec)

                else:
                    #unable to munch and grasp the data due to unrecognizable format
                    total_failure_munches += 1

                #reporting progress
                current_progress = (self._log_lines.tell() *
                                    100) / total_file_size
                if (current_progress != previous_progress):
                    print "%", current_progress
                    previous_progress = current_progress

            self._log_lines.close()

        self._log_file_list = []

        #for debug, it should be moved to be dumped in the logger
        print "Parsed ", len(self._ordered_records)
        if total_failure_munches > 0:
            print "Failed to parse ", total_failure_munches, " records"
        self.dict_invalid = False
Esempio n. 3
0
 def test_correct_seconds(self):
     for cur_value in self.known_values:
         cur_rec_dict = parse_apache_line(cur_value[0])
         test_record = ATSRecord(cur_rec_dict)
         assert (cur_value[1] == test_record.time_to_second())
Esempio n. 4
0
    def parse_log(self):
        """
        Read each line of the log file and batch the records corresponding
        to each client (ip) make a dictionary of lists each consisting of all
         records
        """
        #to check the performance and the sensitivity of the log mancher
        total_failure_munches = 0
        for log_filename in self._log_file_list:
            try:
                self._log_lines = open(log_filename)
            except IOError:
                raise IOError

            self._log_lines.seek(0, 2) #go to end to check the size
            total_file_size = self._log_lines.tell()
            self._log_lines.seek(0, 0) #and go back to the begining
            previous_progress = 0

            print "Parsing ", log_filename.split('/')[-1]

            #we are going to keep track of each ip and last session number corresponding
            #to that ip
            ip_session_tracker = {}
            for cur_rec in self._log_lines:
                new_session = False
                cur_rec_dict = parse_apache_line(cur_rec)

                if cur_rec_dict:
                    cur_ip = cur_rec_dict["host"];
                    cur_ats_rec = ATSRecord(cur_rec_dict);

                    if not cur_ip in ip_session_tracker:
                        ip_session_tracker[cur_ip] = 0
                        new_session = True

                    #now we are checking if we hit a new session
                    #if we already decided that we are in a new session then there is nothing
                    #to investigate
                    if not new_session:
                        #so we have a session already recorded, compare
                        #the time of that last record of that session with
                        #this session
                        if cur_ats_rec.time_to_second() - self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])][-1].time_to_second() > self.DEAD_SESSION_PAUSE:
                            #the session is dead we have to start a new session
                            ip_session_tracker[cur_ip] += 1
                            new_session = True

                    if new_session:
                        self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])] = [cur_ats_rec]
                    else:
                        self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])].append(cur_ats_rec)

                else:
                    #unable to munch and grasp the data due to unrecognizable format
                    total_failure_munches += 1

                #reporting progress
                current_progress = (self._log_lines.tell()*100)/total_file_size
                if (current_progress != previous_progress):
                    print "%", current_progress
                    previous_progress = current_progress


            self._log_lines.close()

        self._log_file_list = []

        #for debug, it should be moved to be dumped in the logger
        print "Parsed ", len(self._ordered_records)
        if total_failure_munches > 0:
            print "Failed to parse ", total_failure_munches, " records"
        self.dict_invalid = False