Example #1
0
def test_create_asset_timeline(har_data):
    """
    Tests the asset timeline function by making sure that it inserts one object
    correctly.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # Get the datetime object of the start time and total load time
    time_key = dateutil.parser.parse(entry['startedDateTime'])
    load_time = int(entry['time'])

    asset_timeline = har_parser.create_asset_timeline([entry])

    # The number of entries in the timeline should match the load time
    assert len(asset_timeline) == load_time

    for t in range(1, load_time):
        assert time_key in asset_timeline
        assert len(asset_timeline[time_key]) == 1
        # Compare the dicts
        for key, value in iteritems(entry):
            assert asset_timeline[time_key][0][key] == entry[key]
        time_key = time_key + datetime.timedelta(milliseconds=1)
Example #2
0
def test_create_asset_timeline(har_data):
    """
    Tests the asset timeline function by making sure that it inserts one object
    correctly.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # Get the datetime object of the start time and total load time
    time_key = dateutil.parser.parse(entry['startedDateTime'])
    load_time = int(entry['time'])

    asset_timeline = har_parser.create_asset_timeline([entry])

    # The number of entries in the timeline should match the load time
    assert len(asset_timeline) == load_time

    for t in range(1, load_time):
        assert time_key in asset_timeline
        assert len(asset_timeline[time_key]) == 1
        # Compare the dicts
        for key, value in entry.iteritems():
            assert asset_timeline[time_key][0][key] == entry[key]
        time_key = time_key + datetime.timedelta(milliseconds=1)
Example #3
0
def test_init(har_data):
    # Make sure we only tolerate valid input
    with pytest.raises(ValueError):
        har_parser = HarParser('please_dont_work')
        assert har_parser

    har_data = har_data('humanssuck.net.har')
    har_parser = HarParser(har_data)
    for page in har_parser.pages:
        assert isinstance(page, HarPage)

    assert har_parser.browser == {'name': 'Firefox', 'version': '25.0.1'}
    assert har_parser.version == '1.1'
    assert har_parser.creator == {'name': 'Firebug', 'version': '1.12'}
Example #4
0
def test_match_status_code(har_data):
    """
    Tests the ability of the parser to match status codes.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_status_code(entry, '2.*')
    assert not har_parser.match_status_code(entry, '3.*')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_status_code(entry, '200', regex=False)
    assert not har_parser.match_status_code(entry, '201', regex=False)
Example #5
0
def test_http_version(har_data):
    """
    Tests the ability of the parser to match status codes.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_http_version(entry, '.*1.1')
    assert not har_parser.match_http_version(entry, '.*2')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_http_version(entry, 'HTTP/1.1', regex=False)
    assert not har_parser.match_http_version(entry, 'HTTP/2.0', regex=False)
Example #6
0
def test_init(har_data):
    """
    Test the object loading
    """
    with pytest.raises(ValueError):
        page = HarPage(PAGE_ID)

    init_data = har_data('humanssuck.net.har')

    # Throws PageNotFoundException with bad page ID
    with pytest.raises(PageNotFoundError):
        page = HarPage(BAD_PAGE_ID, har_data=init_data)

    # Make sure it can load with either har_data or a parser
    page = HarPage(PAGE_ID, har_data=init_data)
    assert isinstance(page, HarPage)
    parser = HarParser(init_data)
    page = HarPage(PAGE_ID, har_parser=parser)
    assert isinstance(page, HarPage)

    assert len(page.entries) == 4
    # Make sure that the entries are actually in order. Going a little bit
    # old school here.
    for index in range(0, len(page.entries)):
        if index != len(page.entries) - 1:
            current_date = dateutil.parser.parse(
                page.entries[index]['startedDateTime'])
            next_date = dateutil.parser.parse(
                page.entries[index + 1]['startedDateTime'])
            assert current_date <= next_date
Example #7
0
def scan_files(path):
    data = []
    # Parse all files in directory
    for filename in os.listdir(path):
        with open(os.path.join(path, filename), 'r') as f:
            har_parser = HarParser(json.loads(f.read()))

        start_time = dateutil.parser.parse(har_parser.pages[0].entries[0]["startedDateTime"]) 
        latest_time = start_time
        
        # Parse all resources HTML, CSS, JS...
        for entry in (har_parser.pages[0].entries):
            if entry["time"] == None:
                s = 0
            else:
                s = float(entry["time"])/1000

            current_time = dateutil.parser.parse(entry["startedDateTime"]) + datetime.timedelta(seconds = s)
            if (current_time > latest_time):
                latest_time = current_time
      
        total = latest_time - start_time
        # if total < datetime.timedelta(seconds = 1000):
			# os.remove(os.path.join(path, filename))
			# print(filename)
			
        data.append(total.total_seconds()*1000)
    return data
def check_service_in_har(har_data, service_name):
    logging.info('Checking for service -->'+ service_name)
    har_parser = HarParser(json.loads(har_data))
    for x in har_parser.har_data['entries']:
        if x['request']['url'] == service_name:
            logging.info('got service -> '+service_name)
            return True
def save_har_to_csv(test, testname, service_list, desc_list):
    import csv
    harname = os.path.join(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(__file__)), os.pardir)),
                           'temp', testname+'.har')
    csv_name = os.path.join(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(__file__)), os.pardir)),
                           'temp', testname+'.csv')
    if os.path.exists(csv_name):
        os.remove(csv_name)
    with open(harname, 'r') as f:
        har_parser = HarParser(json.loads(f.read()))
        with open(csv_name, mode='x') as csv_file:
            csv_writer = csv.writer(csv_file, delimiter=',', lineterminator='\n')
            csv_writer.writerow(['desc','url','status', 'response_type','time', 'starttime'])
            for x in har_parser.har_data['entries']:
                    if x['request']['url'] in service_list:
                        desc = desc_list[service_list.index(x['request']['url'])]
                        url = x['request']['url']
                        status = x['response']['status']
                        time = x['time']
                        start = x['startedDateTime']
                        csv_writer.writerow([desc, url, status, 'actual', time, start])
            #write expected to csv
            for x in test['api']:
                csv_writer.writerow([x['description'],  x['servicename'], x['status_code'], 'expected', x['expectedresponseinms'], 0])
    return csv_name
Example #10
0
    def __get_page_content_from_har(self):
        with open(self.har_path, "r") as f:
            har_parser = HarParser(json.loads(f.read()))

        for page in har_parser.pages[:1]:
            for file in page.html_files:
                return file["response"]["content"]["text"]
        raise Exception("Unable to access HAR file.")
Example #11
0
 def harparser(self):
     """
     Captures the har and converts to a HarParser object
     :return: HarParser object, a page from har capture
     """
     result_har = json.dumps(self._client.har, ensure_ascii=False)
     har_parser = HarParser(json.loads(result_har))
     return har_parser.pages[0]
Example #12
0
def test_match_request_type(har_data):
    """
    Tests the ability of the parser to match a request type.
    """
    # The HarParser does not work without a full har file, but we only want
    # to test a piece, so this initial load is just so we can get the object
    # loaded, we don't care about the data in that HAR file.
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_request_type(entry, '.*ET')
    assert not har_parser.match_request_type(entry, '.*ST')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_request_type(entry, 'GET', regex=False)
    assert not har_parser.match_request_type(entry, 'POST', regex=False)
Example #13
0
 def get_pages(self):
     if self.pages: return self.pages
     try:
         if 'har' not in self: return []
         har_parser = HarParser(self['har'])
         self.pages = har_parser.pages
         return self.pages
     except Exception as e:
         logging.warning('Saw exception when parsing HAR: {}'.format(e))
         return []
Example #14
0
    def setHeadersFromHarFile(self, fileName, urlMustContain):
        if not os.path.exists(fileName):
            return

        try:
            from pathlib import Path
            
            headersList = []
            
            if Path(fileName).suffix == '.har':
                from haralyzer import HarParser
            
                file = helpers.getFile(fileName)

                j = json.loads(file)

                har_page = HarParser(har_data=j)

                # find the right url
                for page in har_page.pages:
                    for entry in page.entries:
                        if urlMustContain in entry['request']['url']:
                            headersList = entry['request']['headers']
                            break

            else:
                headersList = helpers.getJsonFile(fileName)
                headersList = get(headersList, 'headers')

            headers = []

            for header in headersList:
                name = header.get('name', '')
                value = header.get('value', '')

                # ignore pseudo-headers
                if name.startswith(':'):
                    continue

                if name.lower() == 'content-length' or name.lower() == 'host':
                    continue

                # otherwise response will stay compressed and unreadable
                if name.lower() == 'accept-encoding' and not self.hasBrotli:
                    value = value.replace(', br', '')

                newHeader = (name, value)

                headers.append(newHeader)

            self.headers = OrderedDict(headers)
        
        except Exception as e:
            helpers.handleException(e)
Example #15
0
def main(args):
    logging.basicConfig(level=args.level)
    with open(args.archive, "r", encoding="utf-8") as f:
        body = json.load(f)
    har_parser = HarParser(body)

    from visitors import HttpArchiveVisitor

    visitor = HttpArchiveVisitor()
    visitor.visit(har_parser)

    visitor.summarize()
Example #16
0
File: main.py Project: adervish/cdn
def parse_file(f):
    har_parser = HarParser(json.loads(f))

    rows = [['X-CACHE-HEADER', 'BYTES', 'URL']]

    hosts = {}
    size = {}
    total_bytes = 0  #total bytes for all content across the entire thing

    for page in har_parser.pages:
        assert isinstance(page, HarPage)
        for entry in page.entries:
            cdn = []
            headers = entry['response']['headers']
            #print(entry['response'], file=sys.stderr)
            cdn_str = None
            total_bytes += entry['response']['content']['size']
            #pp.pprint(entry['request'])
            url = urlparse(entry['request']['url'])
            for h in headers:
                if (h['name'] == 'x-cache'):
                    hosts[url.netloc] = 1
                    #print(url, file=sys.stderr)
                    cdn_str = h['value']
                    cdn.append(cdn_str)

            if (cdn_str in size):
                size[cdn_str] = size[cdn_str] + entry['response']['content'][
                    'size']
            else:
                size[cdn_str] = entry['response']['content']['size']
            print("\t".join([
                str(cdn),
                str(entry['response']['content']['size']),
                entry['request']['url'], url.netloc
            ]))
            rows.append([
                cdn, entry['response']['content']['size'],
                linkify(entry['request']['url'])
            ])

    bysize = [['CACHE TAG', '% OF BYTES']]
    for sk in size.keys():
        bysize.append([sk, "{:.1%}".format(size[sk] / total_bytes)])

        bysize_t = list(map(list, zip(*bysize)))
        hosts_t = list(map(list, zip(*[hosts.keys()])))
    return {
        'total_bytes': total_bytes,
        'hosts_t': hosts_t,
        'bysize': bysize,
        'rows': rows
    }
Example #17
0
def get_entries(filename: str, entry_id: int = None) -> (dict, list):
    """Gets either all the entries or a certain one"""
    with open(
            os.path.join(os.getenv("UPLOAD_FOLDER", "/tmp"),
                         filename),  # nosec
            "r",
            encoding="utf-8",
    ) as process_file:
        render_pages = HarParser(json.loads(process_file.read())).pages
    items = [entry for page in render_pages for entry in page.entries]
    if isinstance(entry_id, int):
        return items[entry_id]
    return items
Example #18
0
def test_init_entry_with_no_pageref(har_data):
    '''
    If we find an entry with no pageref it should end up in a HarPage object
    with page ID of unknown
    '''
    data = har_data('missing_pageref.har')
    har_parser = HarParser(data)
    # We should have two pages. One is defined in the pages key of the har file
    # but has no entries. The other should be our unknown page, with a single
    # entry
    assert len(har_parser.pages) == 2
    page = [p for p in har_parser.pages if p.page_id == 'unknown'][0]
    assert len(page.entries) == 1
Example #19
0
 def capture_url_traffic(self, url, wait_time=0):
     """
     Capture the har for a given url
     :param str url: url to capture traffic for
     :param int wait_time: time to wait after the page load
     :return: HarParser object, a page from har capture
     """
     self._client.new_har(options={'captureHeaders': True})
     self._driver.goto_url(url, absolute=True)
     time.sleep(wait_time)
     result_har = json.dumps(self._client.har, ensure_ascii=False)
     har_parser = HarParser(json.loads(result_har))
     return har_parser.pages[0]
Example #20
0
    def setHeadersFromHarFile(self, fileName, urlMustContain):
        try:
            from pathlib import Path

            headersList = []

            if Path(fileName).suffix == '.har':
                from haralyzer import HarParser

                file = helpers.getFile(fileName)

                j = json.loads(file)

                har_page = HarParser(har_data=j)

                # find the right url
                for page in har_page.pages:
                    for entry in page.entries:
                        if urlMustContain in entry['request']['url']:
                            headersList = entry['request']['headers']
                            break

            else:
                headersList = helpers.getJsonFile(fileName)
                headersList = get(headersList, 'headers')

            headers = []

            for header in headersList:
                name = header.get('name', '')

                # ignore pseudo-headers
                if name.startswith(':'):
                    continue

                if name.lower() == 'content-length' or name.lower() == 'host':
                    continue

                newHeader = (name, header.get('value', ''))

                headers.append(newHeader)

            self.headers = OrderedDict(headers)

        except Exception as e:
            helpers.handleException(e)
Example #21
0
def extract_adobe_from_har(file_path_to_har_file):
    list_to_print = []

    with open(file_path_to_har_file, "r") as f:
        har_parser = HarParser(json.loads(f.read()))

    for har_page in har_parser.pages:

        ## POST requests
        post_requests = har_page.post_requests

        # filter for adobe hits
        adobe_post_hits = []
        for request in post_requests:
            if "https://woolworthsfoodgroup.sc.omtrdc" in request["request"]["url"]:
                adobe_post_hits.append(request)
                # print(json.dumps(request, indent=4))

        for adobe_post_hit in adobe_post_hits:
            query = parse_query_string(adobe_post_hit["request"]["postData"]["text"])

            list_to_print.append(query)

        ## GET requests
        get_requests = har_page.get_requests

        # filter adobe requests
        for request in get_requests:
            if "https://woolworthsfoodgroup.sc.omtrdc" in request["request"]["url"]:
                # print(request["request"]["url"])

                my_url = request["request"]["url"]
                parsed = urllib.parse.urlparse(my_url)

                data_sent = urllib.parse.unquote(str(parsed.query))
                query = parse_query_string(parsed.query)

                list_to_print.append(query)

    new_list = sorted(list_to_print, key=lambda k: k["t"])


    return new_list
Example #22
0
def test_match_status_code(har_data):
    """
    Tests the ability of the parser to match status codes.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_status_code(entry, '2.*')
    assert not har_parser.match_status_code(entry, '3.*')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_status_code(entry, '200', regex=False)
    assert not har_parser.match_status_code(entry, '201', regex=False)
Example #23
0
def test_http_version(har_data):
    """
    Tests the ability of the parser to match status codes.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_http_version(entry, '.*1.1')
    assert not har_parser.match_http_version(entry, '.*2')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_http_version(entry, 'HTTP/1.1', regex=False)
    assert not har_parser.match_http_version(entry, 'HTTP/2.0', regex=False)
def parse_har_file(har_file):
    """
    Parse a HAR file into a list of request objects
    This currently filters requests by content_type (text/html)
    """
    har_parser = HarParser(json.load(har_file))

    requests = []

    for page in har_parser.pages:
        entries = page.filter_entries(content_type='text/html')
        for entry in entries:
            entry_request = entry['request']

            request_base_url = "{0.scheme}://{0.netloc}".format(
                urlsplit(entry_request['url']))

            request = {
                'method': entry_request['method'],
                'url': entry_request['url'].replace(request_base_url, ""),
                'datetime': dateutil.parser.parse(entry['startedDateTime']),
            }

            if entry_request['method'] == 'POST':
                request['data'] = {
                    unquote_plus(item['name']): unquote_plus(item['value'])
                    for item in entry_request['postData']['params']
                }
                request['data'].pop('csrf_token', None)

            requests.append(request)

    requests.sort(key=itemgetter('datetime'))

    for request in requests:
        request.pop('datetime', None)

    return {'requests': requests}
Example #25
0
def test_match_request_type(har_data):
    """
    Tests the ability of the parser to match a request type.
    """
    # The HarParser does not work without a full har file, but we only want
    # to test a piece, so this initial load is just so we can get the object
    # loaded, we don't care about the data in that HAR file.
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_request_type(entry, '.*ET')
    assert not har_parser.match_request_type(entry, '.*ST')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_request_type(entry, 'GET', regex=False)
    assert not har_parser.match_request_type(entry, 'POST', regex=False)
Example #26
0
def get_response_contents_from_har(har_path):
    response_contents = defaultdict(str)
    with open(har_path, 'r') as f:
        try:
            har_parser = HarParser(json.loads(f.read()))
        except ValueError:
            return response_contents
        for page in har_parser.pages:
            for entry in page.entries:
                try:
                    url = entry["request"]["url"]
                    base_url = url.split("?")[0].split("#")[0]
                    mime_type = entry["response"]["content"]["mimeType"]
                    if "image" in mime_type or "font" in mime_type or \
                            "css" in mime_type:
                        continue
                    # print mime_type
                    body = entry["response"]["content"]["text"]
                    # print url, body[:128]
                    # response_contents.append((url, body))
                    response_contents[base_url] += ("\n======\n" + body)
                except Exception:
                    pass
    return response_contents
Example #27
0
def get_info_from_har(file_path):
    with open(file_path, 'r', encoding='UTF8') as f:
        har_parser = HarParser(json.loads(f.read()))

    method = har_parser.pages[0].actual_page['request']['method']
    url = har_parser.pages[0].actual_page['request']['url']
    headers = {}
    for header in har_parser.pages[0].actual_page['request']['headers']:
        key = header['name']
        value = header['value']
        headers[key] = value

    queryString = har_parser.pages[0].actual_page['request']['queryString']
    cookies = har_parser.pages[0].actual_page['request']['cookies']

    context = {
        'method': method,
        'url': url,
        'headers': headers,
        'queryString': queryString,
        'cookies': cookies
    }

    return context
Example #28
0
def read_har(harfile):
    # Read harfile and return haralyzer parser
    with open(harfile, 'r') as f:
        har_parser = HarParser(json.loads(f.read()))

    return har_parser
Example #29
0
import ast
import json
import csv
from bs4 import BeautifulSoup
from haralyzer import HarParser

with open('../../Downloads/public.tableau.com_base_6.har', 'r') as f:
    # with open('../../Downloads/tahir-data/public.tableau.com_two.har', 'r') as f:
    # with open('../../Downloads/men_baseball/3-harib-mensbaseball.har', 'r') as f:
    # with open('../../Downloads/men_baseball/13full-harib-mensbaseball.har', 'r') as f:
    # with open('../../Downloads/soccer/mens_soccer_6.har', 'r') as f:
    # with open('../../Downloads/soccer/mens_soccer_8.har', 'r') as f:
    # with open('../../Downloads/soccer/mens_soccer_15.har', 'r') as f:
    har_parser = HarParser(json.loads(f.read()))
tree_list = []
for page in har_parser.pages:
    for index, entry in enumerate(page.entries):
        if har_parser.match_request_type(entry, 'POST'):
            text_str = str(entry.get('response').get('content').get('text'))
            if text_str.__contains__(
                    'School Name:') and not text_str.__contains__('518965;'):
                data = json.loads(text_str.strip())
                # data = text_str
                # import pdb;pdb.set_trace()
                # data = text_str
                # data = ast.literal_eval(entry.get('response').get('content').get('text'))
                print(text_str)
                cmdResultList = data.get('vqlCmdResponse').get('cmdResultList')
                print(cmdResultList)
                for index, i in enumerate(cmdResultList):
                    # try:
Example #30
0
 def __init__(self, pydict):
     self.__har = pydict
     self.__har_parser = HarParser(pydict)
Example #31
0
def test_match_headers(har_data):

    # The HarParser does not work without a full har file, but we only want
    # to test a piece, so this initial load is just so we can get the object
    # loaded, we don't care about the data in that HAR file.
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    raw_headers = har_data('single_entry.har')

    # Make sure that bad things happen if we don't give it response/request
    test_data = {
        'captain beefheart': {
            'accept': '.*text/html,application/xhtml.*',
            'host': 'humanssuck.*',
            'accept-encoding': '.*deflate',
        },
    }
    with pytest.raises(ValueError):
        _headers_test(har_parser, raw_headers, test_data, True, True)

    # TEST THE REGEX FEATURE FIRST #

    # These should all be True
    test_data = {
        'request': {
            'accept': '.*text/html,application/xhtml.*',
            'host': 'humanssuck.*',
            'accept-encoding': '.*deflate',
        },
        'response': {
            'server': 'nginx',
            'content-type': 'text.*',
            'connection': '.*alive',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, True, True)

    test_data = {
        'request': {
            'accept': '.*text/xml,application/xhtml.*',
            'host': 'humansrule.*',
            'accept-encoding': 'i dont accept that',
        },
        'response': {
            'server': 'apache',
            'content-type': 'application.*',
            'connection': '.*dead',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, False, True)

    # Test literal string matches #

    # These should all be True
    test_data = {
        'request': {
            'accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'host': 'humanssuck.net',
            'accept-encoding': 'gzip, deflate',
        },
        'response': {
            'server': 'nginx',
            'content-type': 'text/html; charset=UTF-8',
            'connection': 'keep-alive',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, True, False)

    test_data = {
        'request': {
            'accept': 'I accept nothing',
            'host': 'humansrule.guru',
            'accept-encoding': 'i dont accept that',
        },
        'response': {
            'server': 'apache',
            'content-type': 'your mom',
            'connection': 'not keep-alive',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, False, False)
Example #32
0
from haralyzer import HarParser, HarPage
from numpy import trapz
import pandas as pd
import asciiplotlib as apl
# import matplotlib.pyplot as plt

# Handle too many or not enough inputs
if len(sys.argv) < 2:
    raise Exception("Error: need a path to HAR file as command-line argument")
elif len(sys.argv) > 2:
    raise Exception("Error: gave too many command-line arguments")

# Get HAR archive File name (as command-line argument)
har = sys.argv[1]
with open(har, 'r') as f:
    har_parser = HarParser(json.loads(f.read()))

# Get onLoad per page load
page_onLoad = []
for item in har_parser.har_data["pages"]:
    page_onLoad.append(item.get("pageTimings").get("onLoad"))

# Get total in bytes for _bytesIn and _objectSize
numPages = 0
total_bytesIn = []
total_objectSize = []
for page in har_parser.pages:
    numPages += 1
    byteSize = objSize = 0
    for entry in page.entries:
        byteSize += int(entry["_bytesIn"])
Example #33
0
    'WWW-Authenticate', 'X-Frame-Options', 'A-IM', 'Accept', 'Accept-Charset',
    'Accept-Datetime', 'Accept-Encoding', 'Accept-Language',
    'Access-Control-Request-Method', 'Access-Control-Request-Headers',
    'Authorization', 'Cache-Control', 'Connection', 'Content-Length',
    'Content-MD5', 'Content-Type', 'Cookie', 'Date', 'Expect', 'Forwarded',
    'From', 'Host', 'HTTP2-Settings', 'If-Match', 'If-Modified-Since',
    'If-None-Match', 'If-Range', 'If-Unmodified-Since', 'Max-Forwards',
    'Origin', 'Pragma', 'Proxy-Authorization', 'Range', 'Referer', 'TE',
    'Upgrade', 'User-Agent', 'Via', 'Warning'
]

FIELDS = []
for a in FIELDSs:
    FIELDS.append(a.lower())
with open('arcCSP.har', 'r') as f:
    data = HarParser(json.loads(f.read()))

    for page in data.pages:
        toprint = ""
        toprint = toprint + "=========================\n" + str(page)
        print(toprint)
        for entry in page.entries:
            tab = entry['request']['headers']
            toprinta = ""
            toprinta = toprinta + entry['request']['url'] + "\n" + entry[
                'request']['httpVersion'] + "\n"
            #print(entry['request']['url'])
            #print(entry['request']['httpVersion'])
            #print(' ')
            i = 0
            for aa in tab:
Example #34
0
                url = "https://www.instagram.com/p/%s/" % shortcode
                ts = int(time)
                utc = datetime.utcfromtimestamp(ts).strftime(
                    '%Y-%m-%d %H:%M:%S')
                g.write("%s,%s,%s,%s,%s,%s\n" %
                        (shortcode, url, time, utc, likes, comments))
        except Exception as e:
            #print(e)
            pass
    #return shortcode_list2


if __name__ == "__main__":
    with open(sys.argv[1], 'rb') as f:
        har = f.read()
        har_parser = HarParser(json.loads(har))
        har_page = HarPage('page_4', har_data=json.loads(har))
    x = len(har_page.entries)
    for i in range(0, x):
        resource_type = har_page.entries[i]['_resourceType']
        #print(resource_type)
        req_url = har_page.entries[i]['request']['url']
        if req_url == "https://www.instagram.com/katyperry/":
            #First 12 posts
            res = har_page.entries[0]['response']['content']['text']
            #print(res)
            first_12_posts = get_shortcode_first(res)
        elif resource_type == "xhr" and req_url.startswith(
                "https://www.instagram.com/graphql/query/?query_hash="):
            #for other posts
            res = har_page.entries[i]['response']['content']['text']