def calculateFrequenciesFromXML(self): input_file = open(sys.argv[1], "r") lines = input_file.readlines() num_lines_parsed = 0 # Set up the first context current_context = Context(self.extractTimestamp(lines[0])) for line in lines: line_timestamp = self.extractTimestamp(line) # Handles malformed timestamps if line_timestamp is None: continue # Extract other components of message line_message = line[line.find("<msg>")+5:line.find("</msg>")] # If we hit a new second, that is the two timestamps differ by more than the context duration if line_timestamp - current_context.timestamp > timedelta(seconds=Globals.CONTEXT_DURATION-1): self.writeContextToFile(current_context) self.context_history.append(current_context) current_context = Context(line_timestamp) # Else, we're in the same second, so update the current context else: current_context.addMessageToContext(line_message) num_lines_parsed += 1 if num_lines_parsed % 1000000 == 0: print("Finished: " + str(num_lines_parsed)) # Add the latest context self.context_history.append(current_context) self.writeContextToFile(current_context) self.output_file.close()