def __init__(self, callback, timeout=10): self._logger = LoggerFactory.create_logger(self) self._http_comms = {} self._timeout = timeout # Seconds self._counter = 0 self._filters = HttpFilter() self._callback_f = callback
class BrowsingHistory: def __init__(self, callback, timeout=10): self._logger = LoggerFactory.create_logger(self) self._http_comms = {} self._timeout = timeout # Seconds self._counter = 0 self._filters = HttpFilter() self._callback_f = callback def add_http_result(self, http_result): http_comm = self.to_http_comm(http_result) key = http_comm.five_tuple_key if http_result.pattern == 'GET': http_comm.uri = http_result.result self._http_comms[key] = http_comm if http_result.pattern == 'Host:': if key not in self._http_comms: return if self._http_comms[key].stream_id == http_comm.stream_id: self._http_comms[key].host = http_result.result if http_result.pattern == 'Content-Type:': if key not in self._http_comms: return if is_request_and_response_pair(self._http_comms[key], http_comm): self._http_comms[key].content_type = http_result.result if http_result.pattern == '<title': if key not in self._http_comms: return if is_request_and_response_pair(self._http_comms[key], http_comm): self._http_comms[key].title = http_result.result # Check http comm is valid or not if self._is_http_comm_valid(key): if self._apply_filters(self._http_comms[key]): self._callback_f(self._http_comms[key]) del self._http_comms[key] self._gc_manager() def add_http_result_without_filter(self, http_result): http_comm = self.to_http_comm(http_result) key = http_comm.five_tuple_key if http_result.pattern == 'GET': http_comm.uri = http_result.result self._http_comms[key] = http_comm if http_result.pattern == 'Host:': if key not in self._http_comms: return if self._http_comms[key].stream_id == http_comm.stream_id: self._http_comms[key].host = http_result.result self._callback_f(self._http_comms[key]) self._gc_manager() def _gc_manager(self): self._counter += 1 if self._counter > 100000: self._gc() self._counter = 0 @staticmethod def to_http_comm(http_result): """ convert HttpResult to HttpCommnication """ return HTTPCommunicationModel(http_result.id, http_result.src_ip, http_result.dst_ip, http_result.src_port, http_result.dst_port, http_result.timestamp, http_result.stream_id) def _is_http_comm_valid(self, key): http_comm = self._http_comms[key] if not http_comm.is_valid(): return False if not http_comm.content_type == 'text/html': return False if not http_comm.title: return False return True def _apply_filters(self, http_comm): if not self._filters.url(http_comm.url): return False if not self._filters.title(http_comm.title): return False return True def _gc(self): current_time = datetime.now() gc_keys = [] for key, http_comm in self._http_comms.items(): d = current_time - http_comm.created_at if d.total_seconds() > self._timeout: gc_keys.append(key) for key in gc_keys: del self._http_comms[key] self._logger.info("{0} record are deleted".format(len(gc_keys)))