Example #1
0
 def __init__(self, packetdispatcher):
     '''
     parses http.flows from packetdispatcher, and parses those for HAR info
     '''
     # parse http flows
     self.flows= []
     for flow in packetdispatcher.tcp.flowdict.itervalues():
         try:
             self.flows.append(http.Flow(flow))
         except (http.Error,):
             error = sys.exc_info()[1]
             log.warning(error)
         except (dpkt.dpkt.Error,):
             error = sys.exc_info()[1]
             log.warning(error)
     # combine the messages into a list
     pairs = reduce(lambda p, f: p+f.pairs, self.flows, [])
     # set-up
     self.user_agents = UserAgentTracker()
     if settings.process_pages:
         self.page_tracker = PageTracker()
     else:
         self.page_tracker = None
     self.entries = []
     # sort pairs on request.ts_connect
     pairs.sort(
         key=lambda pair: pair.request.ts_connect
     )
     # iter through messages and do important stuff
     for msg in pairs:
         entry = Entry(msg.request, msg.response)
         # if msg.request has a user-agent, add it to our list
         if 'user-agent' in msg.request.msg.headers:
             self.user_agents.add(msg.request.msg.headers['user-agent'])
         # if msg.request has a referer, keep track of that, too
         if self.page_tracker:
             entry.pageref = self.page_tracker.getref(entry)
         # add it to the list
         self.entries.append(entry)
     self.user_agent = self.user_agents.dominant_user_agent()
     # handle DNS AFTER sorting
     # this algo depends on first appearance of a name
     # being the actual first mention
     names_mentioned = set()
     dns = packetdispatcher.udp.dns
     for entry in self.entries:
         name = entry.request.host
         # if this is the first time seeing the name
         if name not in names_mentioned:
             if name in dns.by_hostname:
                 # TODO: handle multiple DNS queries for now just use last one
                 entry.add_dns(dns.by_hostname[name][-1])
             names_mentioned.add(name)
Example #2
0
class HttpSession(object):
    '''
    Represents all http traffic from within a pcap.

    Members:
    * user_agents = UserAgentTracker
    * user_agent = most-used user-agent in the flow
    * flows = [http.Flow]
    * entries = [Entry], all http request/response pairs
    '''
    def __init__(self, packetdispatcher):
        '''
        parses http.flows from packetdispatcher, and parses those for HAR info
        '''
        # parse http flows
        self.flows = []
        for flow in packetdispatcher.tcp.flowdict.itervalues():
            try:
                self.flows.append(http.Flow(flow))
            except http.Error as error:
                log.warning(error)
        # combine the messages into a list
        pairs = reduce(lambda p, f: p + f.pairs, self.flows, [])
        # set-up
        self.user_agents = UserAgentTracker()
        self.page_tracker = PageTracker()
        self.entries = []
        # sort pairs on request.ts_connect
        pairs.sort(key=lambda pair: pair.request.ts_connect)
        # iter through messages and do important stuff
        for msg in pairs:
            entry = Entry(msg.request, msg.response)
            # if msg.request has a user-agent, add it to our list
            if 'user-agent' in msg.request.msg.headers:
                self.user_agents.add(msg.request.msg.headers['user-agent'])
            # if msg.request has a referer, keep track of that, too
            entry.page_ref = self.page_tracker.getref(entry)
            # add it to the list
            self.entries.append(entry)
        self.user_agent = self.user_agents.dominant_user_agent()
        # handle DNS AFTER sorting
        # this algo depends on first appearance of a name
        # being the actual first mention
        names_mentioned = set()
        dns = packetdispatcher.udp.dns
        for entry in self.entries:
            name = entry.request.host
            # if this is the first time seeing the name
            if name not in names_mentioned:
                if name in dns.by_hostname:
                    # TODO: handle multiple DNS queries for now just use last one
                    entry.add_dns(dns.by_hostname[name][-1])
                names_mentioned.add(name)

    def json_repr(self):
        '''
        return a JSON serializable python object representation of self.
        '''
        return {
            'log': {
                'version': '1.1',
                'creator': {
                    'name': 'pcap2har',
                    'version': '0.1'
                },
                'browser': {
                    'name': self.user_agent,
                    'version': 'mumble'
                },
                'pages': self.page_tracker,
                'entries': sorted(self.entries, key=lambda x: x.ts_start)
            }
        }
class HttpSession(object):
    '''
    Represents all http traffic from within a pcap.

    Members:
    * user_agents = UserAgentTracker
    * user_agent = most-used user-agent in the flow
    * flows = [http.Flow]
    * entries = [Entry], all http request/response pairs
    '''
    def __init__(self, packetdispatcher):
        '''
        parses http.flows from packetdispatcher, and parses those for HAR info
        '''
        # parse http flows
        self.flows= []
        for flow in packetdispatcher.tcp.flowdict.itervalues():
            try:
                self.flows.append(http.Flow(flow))
            except (http.Error,):
                error = sys.exc_info()[1]
                log.warning(error)
            except (dpkt.dpkt.Error,):
                error = sys.exc_info()[1]
                log.warning(error)
        # combine the messages into a list
        pairs = reduce(lambda p, f: p+f.pairs, self.flows, [])
        # set-up
        self.user_agents = UserAgentTracker()
        if settings.process_pages:
            self.page_tracker = PageTracker()
        else:
            self.page_tracker = None
        self.entries = []
        # sort pairs on request.ts_connect
        pairs.sort(
            key=lambda pair: pair.request.ts_connect
        )
        # iter through messages and do important stuff
        for msg in pairs:
            entry = Entry(msg.request, msg.response)
            # if msg.request has a user-agent, add it to our list
            if 'user-agent' in msg.request.msg.headers:
                self.user_agents.add(msg.request.msg.headers['user-agent'])
            # if msg.request has a referer, keep track of that, too
            if self.page_tracker:
                entry.pageref = self.page_tracker.getref(entry)
            # add it to the list
            self.entries.append(entry)
        self.user_agent = self.user_agents.dominant_user_agent()
        # handle DNS AFTER sorting
        # this algo depends on first appearance of a name
        # being the actual first mention
        names_mentioned = set()
        dns = packetdispatcher.udp.dns
        for entry in self.entries:
            name = entry.request.host
            # if this is the first time seeing the name
            if name not in names_mentioned:
                if name in dns.by_hostname:
                    # TODO: handle multiple DNS queries for now just use last one
                    entry.add_dns(dns.by_hostname[name][-1])
                names_mentioned.add(name)

    def json_repr(self):
        '''
        return a JSON serializable python object representation of self.
        '''
        d = {
            'log': {
                'version' : '1.1',
                'creator': {
                    'name': 'pcap2har',
                    'version': '0.1'
                },
                'browser': {
                    'name': self.user_agent,
                    'version': 'mumble'
                },
                'entries': sorted(self.entries, key=lambda x: x.ts_start)
            }
        }
        if self.page_tracker:
            d['log']['pages'] = self.page_tracker
        return d
Example #4
0
    def __init__(self, packetdispatcher, drop_response_bodies=False):
        '''
        Parses http.flows from packetdispatcher, and parses those for HAR info
        '''
        self.errors = []
        # parse http flows
        self.flows = []
        for flow in packetdispatcher.tcp.flows():
            try:
                self.flows.append(http.Flow(flow, drop_response_bodies))
            except http.Error as error:
                self.errors.append(HttpErrorRecord(error))
                logging.warning(error)
            except dpkt.dpkt.Error as error:
                self.errors.append(HttpErrorRecord(error))
                logging.warning(error)
        # combine the messages into a list
        pairs = reduce(lambda p, f: p+f.pairs, self.flows, [])
        # set-up
        self.user_agents = UserAgentTracker()
        if settings.process_pages:
            self.page_tracker = PageTracker()
        else:
            self.page_tracker = None
        self.entries = []
        # sort pairs on request.ts_connect
        pairs.sort(
            key=lambda pair: pair.request.ts_connect
        )
        # iter through messages and do important stuff
        for msg in pairs:
            entry = Entry(msg.request, msg.response)
            # if msg.request has a user-agent, add it to our list
            if 'user-agent' in msg.request.msg.headers:
                self.user_agents.add(msg.request.msg.headers['user-agent'])
            # if msg.request has a referer, keep track of that, too
            if self.page_tracker:
                entry.pageref = self.page_tracker.getref(entry)
            # add it to the list, if we're supposed to keep it.
            if entry.response or settings.keep_unfulfilled_requests:
                self.entries.append(entry)
        self.user_agent = self.user_agents.dominant_user_agent()
        # handle DNS AFTER sorting
        # this algo depends on first appearance of a name
        # being the actual first mention
        names_mentioned = set()
        dns = packetdispatcher.udp.dns
        page_times = {}
        for entry in self.entries:
            name = entry.request.host
            # if this is the first time seeing the name
            if name not in names_mentioned:
                if name in dns.by_hostname:
                    # Handle multiple DNS queries for now just use last one, 
                    # i.e. for IPv4/IPv6 addresses
                    for d in dns.by_hostname[name]:
                        entry.add_dns(d)
                names_mentioned.add(name)
            entry.calc_total_time()
            # handle page network load time
            p_time = page_times.get(entry.pageref, (entry.ts_start, 0))
            page_times[entry.pageref] = (min(p_time[0], entry.ts_start), 
                                         max(p_time[1], entry.ts_start + entry.total_time))

        # write page network load times
        for page in self.page_tracker.pages:
            p_time = page_times.get(page.pageref, None)
            if p_time:
                page.network_load_time = p_time[1] - p_time[0]