def test_correct_seconds(self): for cur_value in self.known_values: cur_rec_dict = parse_apache_line(cur_value[0]) test_record = ATSRecord(cur_rec_dict) assert(cur_value[1] == test_record.time_to_second())
def parse_log(self): """ Read each line of the log file and batch the records corresponding to each client (ip) make a dictionary of lists each consisting of all records """ #to check the performance and the sensitivity of the log mancher total_failure_munches = 0 for log_filename in self._log_file_list: try: self._log_lines = open(log_filename) except IOError: raise IOError self._log_lines.seek(0, 2) #go to end to check the size total_file_size = self._log_lines.tell() self._log_lines.seek(0, 0) #and go back to the begining previous_progress = 0 print "Parsing ", log_filename.split('/')[-1] #we are going to keep track of each ip and last session number corresponding #to that ip ip_session_tracker = {} for cur_rec in self._log_lines: new_session = False cur_rec_dict = parse_apache_line(cur_rec) if cur_rec_dict: cur_ip = cur_rec_dict["host"] cur_ats_rec = ATSRecord(cur_rec_dict) if not cur_ip in ip_session_tracker: ip_session_tracker[cur_ip] = 0 new_session = True #now we are checking if we hit a new session #if we already decided that we are in a new session then there is nothing #to investigate if not new_session: #so we have a session already recorded, compare #the time of that last record of that session with #this session if cur_ats_rec.time_to_second( ) - self._ordered_records[ (cur_ip, ip_session_tracker[cur_ip] )][-1].time_to_second() > self.DEAD_SESSION_PAUSE: #the session is dead we have to start a new session ip_session_tracker[cur_ip] += 1 new_session = True if new_session: self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])] = [ cur_ats_rec ] else: self._ordered_records[( cur_ip, ip_session_tracker[cur_ip])].append(cur_ats_rec) else: #unable to munch and grasp the data due to unrecognizable format total_failure_munches += 1 #reporting progress current_progress = (self._log_lines.tell() * 100) / total_file_size if (current_progress != previous_progress): print "%", current_progress previous_progress = current_progress self._log_lines.close() self._log_file_list = [] #for debug, it should be moved to be dumped in the logger print "Parsed ", len(self._ordered_records) if total_failure_munches > 0: print "Failed to parse ", total_failure_munches, " records" self.dict_invalid = False
def test_correct_seconds(self): for cur_value in self.known_values: cur_rec_dict = parse_apache_line(cur_value[0]) test_record = ATSRecord(cur_rec_dict) assert (cur_value[1] == test_record.time_to_second())
def parse_log(self): """ Read each line of the log file and batch the records corresponding to each client (ip) make a dictionary of lists each consisting of all records """ #to check the performance and the sensitivity of the log mancher total_failure_munches = 0 for log_filename in self._log_file_list: try: self._log_lines = open(log_filename) except IOError: raise IOError self._log_lines.seek(0, 2) #go to end to check the size total_file_size = self._log_lines.tell() self._log_lines.seek(0, 0) #and go back to the begining previous_progress = 0 print "Parsing ", log_filename.split('/')[-1] #we are going to keep track of each ip and last session number corresponding #to that ip ip_session_tracker = {} for cur_rec in self._log_lines: new_session = False cur_rec_dict = parse_apache_line(cur_rec) if cur_rec_dict: cur_ip = cur_rec_dict["host"]; cur_ats_rec = ATSRecord(cur_rec_dict); if not cur_ip in ip_session_tracker: ip_session_tracker[cur_ip] = 0 new_session = True #now we are checking if we hit a new session #if we already decided that we are in a new session then there is nothing #to investigate if not new_session: #so we have a session already recorded, compare #the time of that last record of that session with #this session if cur_ats_rec.time_to_second() - self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])][-1].time_to_second() > self.DEAD_SESSION_PAUSE: #the session is dead we have to start a new session ip_session_tracker[cur_ip] += 1 new_session = True if new_session: self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])] = [cur_ats_rec] else: self._ordered_records[(cur_ip, ip_session_tracker[cur_ip])].append(cur_ats_rec) else: #unable to munch and grasp the data due to unrecognizable format total_failure_munches += 1 #reporting progress current_progress = (self._log_lines.tell()*100)/total_file_size if (current_progress != previous_progress): print "%", current_progress previous_progress = current_progress self._log_lines.close() self._log_file_list = [] #for debug, it should be moved to be dumped in the logger print "Parsed ", len(self._ordered_records) if total_failure_munches > 0: print "Failed to parse ", total_failure_munches, " records" self.dict_invalid = False