Ejemplo n.º 1
0
def test_init(har_data):
    # Make sure we only tolerate valid input
    with pytest.raises(ValueError):
        har_parser = HarParser('please_dont_work')
        assert har_parser

    har_data = har_data('humanssuck.net.har')
    har_parser = HarParser(har_data)
    for page in har_parser.pages:
        assert isinstance(page, HarPage)

    assert har_parser.browser == {'name': 'Firefox', 'version': '25.0.1'}
    assert har_parser.version == '1.1'
    assert har_parser.creator == {'name': 'Firebug', 'version': '1.12'}
Ejemplo n.º 2
0
def test_init(har_data):
    """
    Test the object loading
    """
    with pytest.raises(ValueError):
        page = HarPage(PAGE_ID)

    init_data = har_data('humanssuck.net.har')

    # Throws PageNotFoundException with bad page ID
    with pytest.raises(PageNotFoundError):
        page = HarPage(BAD_PAGE_ID, har_data=init_data)

    # Make sure it can load with either har_data or a parser
    page = HarPage(PAGE_ID, har_data=init_data)
    assert isinstance(page, HarPage)
    parser = HarParser(init_data)
    page = HarPage(PAGE_ID, har_parser=parser)
    assert isinstance(page, HarPage)

    assert len(page.entries) == 4
    # Make sure that the entries are actually in order. Going a little bit
    # old school here.
    for index in range(0, len(page.entries)):
        if index != len(page.entries) - 1:
            current_date = dateutil.parser.parse(
                page.entries[index]['startedDateTime'])
            next_date = dateutil.parser.parse(
                page.entries[index + 1]['startedDateTime'])
            assert current_date <= next_date
def save_har_to_csv(test, testname, service_list, desc_list):
    import csv
    harname = os.path.join(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(__file__)), os.pardir)),
                           'temp', testname+'.har')
    csv_name = os.path.join(os.path.abspath(os.path.join(os.path.abspath(os.path.dirname(__file__)), os.pardir)),
                           'temp', testname+'.csv')
    if os.path.exists(csv_name):
        os.remove(csv_name)
    with open(harname, 'r') as f:
        har_parser = HarParser(json.loads(f.read()))
        with open(csv_name, mode='x') as csv_file:
            csv_writer = csv.writer(csv_file, delimiter=',', lineterminator='\n')
            csv_writer.writerow(['desc','url','status', 'response_type','time', 'starttime'])
            for x in har_parser.har_data['entries']:
                    if x['request']['url'] in service_list:
                        desc = desc_list[service_list.index(x['request']['url'])]
                        url = x['request']['url']
                        status = x['response']['status']
                        time = x['time']
                        start = x['startedDateTime']
                        csv_writer.writerow([desc, url, status, 'actual', time, start])
            #write expected to csv
            for x in test['api']:
                csv_writer.writerow([x['description'],  x['servicename'], x['status_code'], 'expected', x['expectedresponseinms'], 0])
    return csv_name
Ejemplo n.º 4
0
def test_create_asset_timeline(har_data):
    """
    Tests the asset timeline function by making sure that it inserts one object
    correctly.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # Get the datetime object of the start time and total load time
    time_key = dateutil.parser.parse(entry['startedDateTime'])
    load_time = int(entry['time'])

    asset_timeline = har_parser.create_asset_timeline([entry])

    # The number of entries in the timeline should match the load time
    assert len(asset_timeline) == load_time

    for t in range(1, load_time):
        assert time_key in asset_timeline
        assert len(asset_timeline[time_key]) == 1
        # Compare the dicts
        for key, value in iteritems(entry):
            assert asset_timeline[time_key][0][key] == entry[key]
        time_key = time_key + datetime.timedelta(milliseconds=1)
def check_service_in_har(har_data, service_name):
    logging.info('Checking for service -->'+ service_name)
    har_parser = HarParser(json.loads(har_data))
    for x in har_parser.har_data['entries']:
        if x['request']['url'] == service_name:
            logging.info('got service -> '+service_name)
            return True
Ejemplo n.º 6
0
def scan_files(path):
    data = []
    # Parse all files in directory
    for filename in os.listdir(path):
        with open(os.path.join(path, filename), 'r') as f:
            har_parser = HarParser(json.loads(f.read()))

        start_time = dateutil.parser.parse(har_parser.pages[0].entries[0]["startedDateTime"]) 
        latest_time = start_time
        
        # Parse all resources HTML, CSS, JS...
        for entry in (har_parser.pages[0].entries):
            if entry["time"] == None:
                s = 0
            else:
                s = float(entry["time"])/1000

            current_time = dateutil.parser.parse(entry["startedDateTime"]) + datetime.timedelta(seconds = s)
            if (current_time > latest_time):
                latest_time = current_time
      
        total = latest_time - start_time
        # if total < datetime.timedelta(seconds = 1000):
			# os.remove(os.path.join(path, filename))
			# print(filename)
			
        data.append(total.total_seconds()*1000)
    return data
Ejemplo n.º 7
0
 def harparser(self):
     """
     Captures the har and converts to a HarParser object
     :return: HarParser object, a page from har capture
     """
     result_har = json.dumps(self._client.har, ensure_ascii=False)
     har_parser = HarParser(json.loads(result_har))
     return har_parser.pages[0]
Ejemplo n.º 8
0
    def __get_page_content_from_har(self):
        with open(self.har_path, "r") as f:
            har_parser = HarParser(json.loads(f.read()))

        for page in har_parser.pages[:1]:
            for file in page.html_files:
                return file["response"]["content"]["text"]
        raise Exception("Unable to access HAR file.")
Ejemplo n.º 9
0
 def get_pages(self):
     if self.pages: return self.pages
     try:
         if 'har' not in self: return []
         har_parser = HarParser(self['har'])
         self.pages = har_parser.pages
         return self.pages
     except Exception as e:
         logging.warning('Saw exception when parsing HAR: {}'.format(e))
         return []
Ejemplo n.º 10
0
    def setHeadersFromHarFile(self, fileName, urlMustContain):
        if not os.path.exists(fileName):
            return

        try:
            from pathlib import Path
            
            headersList = []
            
            if Path(fileName).suffix == '.har':
                from haralyzer import HarParser
            
                file = helpers.getFile(fileName)

                j = json.loads(file)

                har_page = HarParser(har_data=j)

                # find the right url
                for page in har_page.pages:
                    for entry in page.entries:
                        if urlMustContain in entry['request']['url']:
                            headersList = entry['request']['headers']
                            break

            else:
                headersList = helpers.getJsonFile(fileName)
                headersList = get(headersList, 'headers')

            headers = []

            for header in headersList:
                name = header.get('name', '')
                value = header.get('value', '')

                # ignore pseudo-headers
                if name.startswith(':'):
                    continue

                if name.lower() == 'content-length' or name.lower() == 'host':
                    continue

                # otherwise response will stay compressed and unreadable
                if name.lower() == 'accept-encoding' and not self.hasBrotli:
                    value = value.replace(', br', '')

                newHeader = (name, value)

                headers.append(newHeader)

            self.headers = OrderedDict(headers)
        
        except Exception as e:
            helpers.handleException(e)
Ejemplo n.º 11
0
Archivo: main.py Proyecto: adervish/cdn
def parse_file(f):
    har_parser = HarParser(json.loads(f))

    rows = [['X-CACHE-HEADER', 'BYTES', 'URL']]

    hosts = {}
    size = {}
    total_bytes = 0  #total bytes for all content across the entire thing

    for page in har_parser.pages:
        assert isinstance(page, HarPage)
        for entry in page.entries:
            cdn = []
            headers = entry['response']['headers']
            #print(entry['response'], file=sys.stderr)
            cdn_str = None
            total_bytes += entry['response']['content']['size']
            #pp.pprint(entry['request'])
            url = urlparse(entry['request']['url'])
            for h in headers:
                if (h['name'] == 'x-cache'):
                    hosts[url.netloc] = 1
                    #print(url, file=sys.stderr)
                    cdn_str = h['value']
                    cdn.append(cdn_str)

            if (cdn_str in size):
                size[cdn_str] = size[cdn_str] + entry['response']['content'][
                    'size']
            else:
                size[cdn_str] = entry['response']['content']['size']
            print("\t".join([
                str(cdn),
                str(entry['response']['content']['size']),
                entry['request']['url'], url.netloc
            ]))
            rows.append([
                cdn, entry['response']['content']['size'],
                linkify(entry['request']['url'])
            ])

    bysize = [['CACHE TAG', '% OF BYTES']]
    for sk in size.keys():
        bysize.append([sk, "{:.1%}".format(size[sk] / total_bytes)])

        bysize_t = list(map(list, zip(*bysize)))
        hosts_t = list(map(list, zip(*[hosts.keys()])))
    return {
        'total_bytes': total_bytes,
        'hosts_t': hosts_t,
        'bysize': bysize,
        'rows': rows
    }
Ejemplo n.º 12
0
def main(args):
    logging.basicConfig(level=args.level)
    with open(args.archive, "r", encoding="utf-8") as f:
        body = json.load(f)
    har_parser = HarParser(body)

    from visitors import HttpArchiveVisitor

    visitor = HttpArchiveVisitor()
    visitor.visit(har_parser)

    visitor.summarize()
Ejemplo n.º 13
0
def get_entries(filename: str, entry_id: int = None) -> (dict, list):
    """Gets either all the entries or a certain one"""
    with open(
            os.path.join(os.getenv("UPLOAD_FOLDER", "/tmp"),
                         filename),  # nosec
            "r",
            encoding="utf-8",
    ) as process_file:
        render_pages = HarParser(json.loads(process_file.read())).pages
    items = [entry for page in render_pages for entry in page.entries]
    if isinstance(entry_id, int):
        return items[entry_id]
    return items
Ejemplo n.º 14
0
 def capture_url_traffic(self, url, wait_time=0):
     """
     Capture the har for a given url
     :param str url: url to capture traffic for
     :param int wait_time: time to wait after the page load
     :return: HarParser object, a page from har capture
     """
     self._client.new_har(options={'captureHeaders': True})
     self._driver.goto_url(url, absolute=True)
     time.sleep(wait_time)
     result_har = json.dumps(self._client.har, ensure_ascii=False)
     har_parser = HarParser(json.loads(result_har))
     return har_parser.pages[0]
Ejemplo n.º 15
0
def test_init_entry_with_no_pageref(har_data):
    '''
    If we find an entry with no pageref it should end up in a HarPage object
    with page ID of unknown
    '''
    data = har_data('missing_pageref.har')
    har_parser = HarParser(data)
    # We should have two pages. One is defined in the pages key of the har file
    # but has no entries. The other should be our unknown page, with a single
    # entry
    assert len(har_parser.pages) == 2
    page = [p for p in har_parser.pages if p.page_id == 'unknown'][0]
    assert len(page.entries) == 1
Ejemplo n.º 16
0
def test_match_status_code(har_data):
    """
    Tests the ability of the parser to match status codes.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_status_code(entry, '2.*')
    assert not har_parser.match_status_code(entry, '3.*')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_status_code(entry, '200', regex=False)
    assert not har_parser.match_status_code(entry, '201', regex=False)
Ejemplo n.º 17
0
def test_http_version(har_data):
    """
    Tests the ability of the parser to match status codes.
    """
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_http_version(entry, '.*1.1')
    assert not har_parser.match_http_version(entry, '.*2')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_http_version(entry, 'HTTP/1.1', regex=False)
    assert not har_parser.match_http_version(entry, 'HTTP/2.0', regex=False)
Ejemplo n.º 18
0
    def setHeadersFromHarFile(self, fileName, urlMustContain):
        try:
            from pathlib import Path

            headersList = []

            if Path(fileName).suffix == '.har':
                from haralyzer import HarParser

                file = helpers.getFile(fileName)

                j = json.loads(file)

                har_page = HarParser(har_data=j)

                # find the right url
                for page in har_page.pages:
                    for entry in page.entries:
                        if urlMustContain in entry['request']['url']:
                            headersList = entry['request']['headers']
                            break

            else:
                headersList = helpers.getJsonFile(fileName)
                headersList = get(headersList, 'headers')

            headers = []

            for header in headersList:
                name = header.get('name', '')

                # ignore pseudo-headers
                if name.startswith(':'):
                    continue

                if name.lower() == 'content-length' or name.lower() == 'host':
                    continue

                newHeader = (name, header.get('value', ''))

                headers.append(newHeader)

            self.headers = OrderedDict(headers)

        except Exception as e:
            helpers.handleException(e)
Ejemplo n.º 19
0
def test_match_request_type(har_data):
    """
    Tests the ability of the parser to match a request type.
    """
    # The HarParser does not work without a full har file, but we only want
    # to test a piece, so this initial load is just so we can get the object
    # loaded, we don't care about the data in that HAR file.
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    entry = har_data('single_entry.har')

    # TEST THE REGEX FEATURE FIRST #
    assert har_parser.match_request_type(entry, '.*ET')
    assert not har_parser.match_request_type(entry, '.*ST')
    # TEST LITERAL STRING MATCH #
    assert har_parser.match_request_type(entry, 'GET', regex=False)
    assert not har_parser.match_request_type(entry, 'POST', regex=False)
Ejemplo n.º 20
0
def extract_adobe_from_har(file_path_to_har_file):
    list_to_print = []

    with open(file_path_to_har_file, "r") as f:
        har_parser = HarParser(json.loads(f.read()))

    for har_page in har_parser.pages:

        ## POST requests
        post_requests = har_page.post_requests

        # filter for adobe hits
        adobe_post_hits = []
        for request in post_requests:
            if "https://woolworthsfoodgroup.sc.omtrdc" in request["request"]["url"]:
                adobe_post_hits.append(request)
                # print(json.dumps(request, indent=4))

        for adobe_post_hit in adobe_post_hits:
            query = parse_query_string(adobe_post_hit["request"]["postData"]["text"])

            list_to_print.append(query)

        ## GET requests
        get_requests = har_page.get_requests

        # filter adobe requests
        for request in get_requests:
            if "https://woolworthsfoodgroup.sc.omtrdc" in request["request"]["url"]:
                # print(request["request"]["url"])

                my_url = request["request"]["url"]
                parsed = urllib.parse.urlparse(my_url)

                data_sent = urllib.parse.unquote(str(parsed.query))
                query = parse_query_string(parsed.query)

                list_to_print.append(query)

    new_list = sorted(list_to_print, key=lambda k: k["t"])


    return new_list
def parse_har_file(har_file):
    """
    Parse a HAR file into a list of request objects
    This currently filters requests by content_type (text/html)
    """
    har_parser = HarParser(json.load(har_file))

    requests = []

    for page in har_parser.pages:
        entries = page.filter_entries(content_type='text/html')
        for entry in entries:
            entry_request = entry['request']

            request_base_url = "{0.scheme}://{0.netloc}".format(
                urlsplit(entry_request['url']))

            request = {
                'method': entry_request['method'],
                'url': entry_request['url'].replace(request_base_url, ""),
                'datetime': dateutil.parser.parse(entry['startedDateTime']),
            }

            if entry_request['method'] == 'POST':
                request['data'] = {
                    unquote_plus(item['name']): unquote_plus(item['value'])
                    for item in entry_request['postData']['params']
                }
                request['data'].pop('csrf_token', None)

            requests.append(request)

    requests.sort(key=itemgetter('datetime'))

    for request in requests:
        request.pop('datetime', None)

    return {'requests': requests}
Ejemplo n.º 22
0
def get_info_from_har(file_path):
    with open(file_path, 'r', encoding='UTF8') as f:
        har_parser = HarParser(json.loads(f.read()))

    method = har_parser.pages[0].actual_page['request']['method']
    url = har_parser.pages[0].actual_page['request']['url']
    headers = {}
    for header in har_parser.pages[0].actual_page['request']['headers']:
        key = header['name']
        value = header['value']
        headers[key] = value

    queryString = har_parser.pages[0].actual_page['request']['queryString']
    cookies = har_parser.pages[0].actual_page['request']['cookies']

    context = {
        'method': method,
        'url': url,
        'headers': headers,
        'queryString': queryString,
        'cookies': cookies
    }

    return context
Ejemplo n.º 23
0
def get_response_contents_from_har(har_path):
    response_contents = defaultdict(str)
    with open(har_path, 'r') as f:
        try:
            har_parser = HarParser(json.loads(f.read()))
        except ValueError:
            return response_contents
        for page in har_parser.pages:
            for entry in page.entries:
                try:
                    url = entry["request"]["url"]
                    base_url = url.split("?")[0].split("#")[0]
                    mime_type = entry["response"]["content"]["mimeType"]
                    if "image" in mime_type or "font" in mime_type or \
                            "css" in mime_type:
                        continue
                    # print mime_type
                    body = entry["response"]["content"]["text"]
                    # print url, body[:128]
                    # response_contents.append((url, body))
                    response_contents[base_url] += ("\n======\n" + body)
                except Exception:
                    pass
    return response_contents
Ejemplo n.º 24
0
    'WWW-Authenticate', 'X-Frame-Options', 'A-IM', 'Accept', 'Accept-Charset',
    'Accept-Datetime', 'Accept-Encoding', 'Accept-Language',
    'Access-Control-Request-Method', 'Access-Control-Request-Headers',
    'Authorization', 'Cache-Control', 'Connection', 'Content-Length',
    'Content-MD5', 'Content-Type', 'Cookie', 'Date', 'Expect', 'Forwarded',
    'From', 'Host', 'HTTP2-Settings', 'If-Match', 'If-Modified-Since',
    'If-None-Match', 'If-Range', 'If-Unmodified-Since', 'Max-Forwards',
    'Origin', 'Pragma', 'Proxy-Authorization', 'Range', 'Referer', 'TE',
    'Upgrade', 'User-Agent', 'Via', 'Warning'
]

FIELDS = []
for a in FIELDSs:
    FIELDS.append(a.lower())
with open('arcCSP.har', 'r') as f:
    data = HarParser(json.loads(f.read()))

    for page in data.pages:
        toprint = ""
        toprint = toprint + "=========================\n" + str(page)
        print(toprint)
        for entry in page.entries:
            tab = entry['request']['headers']
            toprinta = ""
            toprinta = toprinta + entry['request']['url'] + "\n" + entry[
                'request']['httpVersion'] + "\n"
            #print(entry['request']['url'])
            #print(entry['request']['httpVersion'])
            #print(' ')
            i = 0
            for aa in tab:
Ejemplo n.º 25
0
from haralyzer import HarParser, HarPage
from numpy import trapz
import pandas as pd
import asciiplotlib as apl
# import matplotlib.pyplot as plt

# Handle too many or not enough inputs
if len(sys.argv) < 2:
    raise Exception("Error: need a path to HAR file as command-line argument")
elif len(sys.argv) > 2:
    raise Exception("Error: gave too many command-line arguments")

# Get HAR archive File name (as command-line argument)
har = sys.argv[1]
with open(har, 'r') as f:
    har_parser = HarParser(json.loads(f.read()))

# Get onLoad per page load
page_onLoad = []
for item in har_parser.har_data["pages"]:
    page_onLoad.append(item.get("pageTimings").get("onLoad"))

# Get total in bytes for _bytesIn and _objectSize
numPages = 0
total_bytesIn = []
total_objectSize = []
for page in har_parser.pages:
    numPages += 1
    byteSize = objSize = 0
    for entry in page.entries:
        byteSize += int(entry["_bytesIn"])
Ejemplo n.º 26
0
                url = "https://www.instagram.com/p/%s/" % shortcode
                ts = int(time)
                utc = datetime.utcfromtimestamp(ts).strftime(
                    '%Y-%m-%d %H:%M:%S')
                g.write("%s,%s,%s,%s,%s,%s\n" %
                        (shortcode, url, time, utc, likes, comments))
        except Exception as e:
            #print(e)
            pass
    #return shortcode_list2


if __name__ == "__main__":
    with open(sys.argv[1], 'rb') as f:
        har = f.read()
        har_parser = HarParser(json.loads(har))
        har_page = HarPage('page_4', har_data=json.loads(har))
    x = len(har_page.entries)
    for i in range(0, x):
        resource_type = har_page.entries[i]['_resourceType']
        #print(resource_type)
        req_url = har_page.entries[i]['request']['url']
        if req_url == "https://www.instagram.com/katyperry/":
            #First 12 posts
            res = har_page.entries[0]['response']['content']['text']
            #print(res)
            first_12_posts = get_shortcode_first(res)
        elif resource_type == "xhr" and req_url.startswith(
                "https://www.instagram.com/graphql/query/?query_hash="):
            #for other posts
            res = har_page.entries[i]['response']['content']['text']
Ejemplo n.º 27
0
def read_har(harfile):
    # Read harfile and return haralyzer parser
    with open(harfile, 'r') as f:
        har_parser = HarParser(json.loads(f.read()))

    return har_parser
Ejemplo n.º 28
0
def test_match_headers(har_data):

    # The HarParser does not work without a full har file, but we only want
    # to test a piece, so this initial load is just so we can get the object
    # loaded, we don't care about the data in that HAR file.
    init_data = har_data('humanssuck.net.har')
    har_parser = HarParser(init_data)

    raw_headers = har_data('single_entry.har')

    # Make sure that bad things happen if we don't give it response/request
    test_data = {
        'captain beefheart': {
            'accept': '.*text/html,application/xhtml.*',
            'host': 'humanssuck.*',
            'accept-encoding': '.*deflate',
        },
    }
    with pytest.raises(ValueError):
        _headers_test(har_parser, raw_headers, test_data, True, True)

    # TEST THE REGEX FEATURE FIRST #

    # These should all be True
    test_data = {
        'request': {
            'accept': '.*text/html,application/xhtml.*',
            'host': 'humanssuck.*',
            'accept-encoding': '.*deflate',
        },
        'response': {
            'server': 'nginx',
            'content-type': 'text.*',
            'connection': '.*alive',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, True, True)

    test_data = {
        'request': {
            'accept': '.*text/xml,application/xhtml.*',
            'host': 'humansrule.*',
            'accept-encoding': 'i dont accept that',
        },
        'response': {
            'server': 'apache',
            'content-type': 'application.*',
            'connection': '.*dead',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, False, True)

    # Test literal string matches #

    # These should all be True
    test_data = {
        'request': {
            'accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'host': 'humanssuck.net',
            'accept-encoding': 'gzip, deflate',
        },
        'response': {
            'server': 'nginx',
            'content-type': 'text/html; charset=UTF-8',
            'connection': 'keep-alive',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, True, False)

    test_data = {
        'request': {
            'accept': 'I accept nothing',
            'host': 'humansrule.guru',
            'accept-encoding': 'i dont accept that',
        },
        'response': {
            'server': 'apache',
            'content-type': 'your mom',
            'connection': 'not keep-alive',
        },
    }

    _headers_test(har_parser, raw_headers, test_data, False, False)
Ejemplo n.º 29
0
 def __init__(self, pydict):
     self.__har = pydict
     self.__har_parser = HarParser(pydict)
Ejemplo n.º 30
0
    if args.encode == "1":
        #IE and firefox encode has strange way to encode none english charater. "头" utf8 hex is e5a4b4, but IE and firefox change it to c3a5c2a4c2b4, so need to fix it here
        hex = har_text.encode("hex")
        n = 2
        hex = ' '.join([hex[j:j + n] for j in range(0, len(hex), n)])
        hex_r = re.sub(r'c3 a(\w) c2 (\w{2}) c2 (\w{2})', r'e\1\2\3',
                       hex).replace(' ', '')
        har_text = hex_r.decode('hex')
        encodlist = os.popen(
            'grep \'"name": "Content-Type"\' ' + i +
            ' -A 1|grep -iPo \'charset=.*"\'|sort -u|grep -iv "utf-8"'
        ).readlines()
        for encode in encodlist:
            replacestring = encode.strip().replace('"', '').split('=')[1]
            har_text = har_text.replace(replacestring, 'UTF-8')
    har_parser = HarParser(json.loads(har_text))

    #for page in har_parser.pages:
    for entry in har_parser.har_data['entries']:
        entries.append(entry)
print "Found requests number: " + str(len(entries))

#generates dic has entries list ID, started_time and url. Then can always use entries list ID to match request and response.
#{0: ['2019-01-31T01:51:06.305Z', 'POST:/dvwa/vulnerabilities/xss_r/?name=test','username=123&passowrd=123']}
start_time_dict = {}
for i in range(len(entries)):
    start_time_dict[i] = []
    start_time_dict[i].append(str(entries[i]['startedDateTime']))
    #print entries[i]['request']['url']
    url_match = re.search(r'(http://|https://)(.*?\/)(.*)',
                          str(entries[i]['request']['url']))