def test_filter_entries_load_time(har_data): """ Tests ability to filter entries by load time """ init_data = har_data('humanssuck.net_duplicate_url.har') page = HarPage(PAGE_ID, har_data=init_data) entries = page.filter_entries(load_time__gt=100) assert len(entries) == 4 entries = page.filter_entries(load_time__gt=300) assert len(entries) == 3 entries = page.filter_entries(load_time__gt=500) assert len(entries) == 0
def test_filter_entries(har_data): """ Tests ability to filter entries, with or without regex """ init_data = har_data('humanssuck.net.har') page = HarPage(PAGE_ID, har_data=init_data) # Filter by request type only entries = page.filter_entries(request_type='.*ET') assert len(entries) == 4 for entry in entries: assert entry['request']['method'] == 'GET' # Filter by request type and content_type entries = page.filter_entries(request_type='.*ET', content_type='image.*') assert len(entries) == 1 for entry in entries: assert entry['request']['method'] == 'GET' for header in entry['response']['headers']: if header['name'] == 'Content-Type': assert re.search('image.*', header['value']) # Filter by request type, content type, and status code entries = page.filter_entries(request_type='.*ET', content_type='image.*', status_code='2.*') assert len(entries) == 1 for entry in entries: assert entry['request']['method'] == 'GET' assert re.search('2.*', str(entry['response']['status'])) for header in entry['response']['headers']: if header['name'] == 'Content-Type': assert re.search('image.*', header['value']) entries = page.filter_entries(request_type='.*ST') assert len(entries) == 0 entries = page.filter_entries(request_type='.*ET', content_type='video.*') assert len(entries) == 0 entries = page.filter_entries(request_type='.*ET', content_type='image.*', status_code='3.*')
def parser(websites, repetition): for dom in websites: print( '################################################################') print(dom) jssize = [] cssize = [] imagesize = [] videosize = [] pagesize = [] jsnum = [] csnum = [] imagenum = [] videonum = [] pagenum = [] for i in range(0, repetition): #General har files string = str(i) + dom + 'har.har' with open(string, 'r') as f: har_parser = HarPage(dom, har_data=json.loads(f.read())) #for each har file of the web site under review #print ("Image Load time: " + str(har_parser.image_load_time)) #print ("Html Load time: " + str(har_parser.html_load_time)) #combines all the diffrent har files generated for that secific web site and shows average/min/max #for the size of the diffrent files jssize.append(har_parser.js_size) cssize.append(har_parser.css_size) imagesize.append(har_parser.image_size) videosize.append(har_parser.video_size) pagesize.append(har_parser.page_size) #for the number of diffrent files jsnum.append( len( har_parser.filter_entries(content_type='js.*', status_code='2.*'))) csnum.append( len( har_parser.filter_entries(content_type='css.*', status_code='2.*'))) imagenum.append( len( har_parser.filter_entries(content_type='image.*', status_code='2.*'))) videonum.append( len( har_parser.filter_entries(content_type='video.*', status_code='2.*'))) pagenum.append(len(har_parser.filter_entries(status_code='2.*'))) h = har_parser.filter_entries(content_type='text.*', status_code='2.*') print("Max page size: " + str(max(pagesize)), end='\n') print("Max js size: " + str(max(jssize)), end='\n') print("Max css size: " + str(max(cssize)), end='\n') print("Max image size: " + str(max(imagesize)), end='\n') print("Max video size: " + str(max(videosize)), end='\n') print("Max page number: " + str(max(pagenum)), end='\n') print("Max js number: " + str(max(jsnum)), end='\n') print("Max css number: " + str(max(csnum)), end='\n') print("Max image number: " + str(max(imagenum)), end='\n') print("Max video number: " + str(max(videonum)), end='\n') print( '--------------------------------------------------------------------' ) print("Min page size: " + str(min(pagesize)), end='\n') print("Min js size: " + str(min(jssize)), end='\n') print("Min css size: " + str(min(cssize)), end='\n') print("Min image size: " + str(min(imagesize)), end='\n') print("Min video size: " + str(min(videosize)), end='\n') print("Min page number: " + str(min(pagenum)), end='\n') print("Min js number: " + str(min(jsnum)), end='\n') print("Min css number: " + str(min(csnum)), end='\n') print("Min image number: " + str(min(imagenum)), end='\n') print("Min video number: " + str(min(videonum)), end='\n') print( '--------------------------------------------------------------------' ) print("Average page size: " + str(mean(pagesize)), end='\n') print("Average js size: " + str(mean(jssize)), end='\n') print("Average css size: " + str(mean(cssize)), end='\n') print("Average image size: " + str(mean(imagesize)), end='\n') print("Average video size: " + str(mean(videosize)), end='\n') print("Average page number: " + str(mean(pagenum)), end='\n') print("Average js number: " + str(mean(jsnum)), end='\n') print("Average css number: " + str(mean(csnum)), end='\n') print("Average image number: " + str(mean(imagenum)), end='\n') print("Average video number: " + str(mean(videonum)), end='\n') print( '--------------------------------------------------------------------' ) #har files with delay specification string2 = str(i) + dom + 'Performance.har' with open(string2) as data_file: data = json.load(data_file) #All in milliseconds #Calculate the total time required to load a page print("Total time required to load the page: " + str(data["timing"]["loadEventEnd"] - data["timing"]["navigationStart"])) #Calculate request response times print("Request response times: " + str(data["timing"]["responseEnd"] - data["timing"]["requestStart"])) #connect start: represents the moment the request to open a connection is sent to the network #responce Start: represents the moment the browser received the first byte of the response print("Time to first byte: " + str(data["timing"]["responseStart"] - data["timing"]["connectStart"]))
Specific max number as limit in the for loop. for x in range (0, n) Make sure CSV file is empty and exists as it writes into it. ''' for x in range(0, 86): with open(os.path.join('har_files', f'{x}.har'), 'r') as f: har_page = HarPage('page_1', har_data=json.loads(f.read())) entries = har_page.filter_entries() with open("output.csv", "a") as csv_file: csv_app = csv.writer(csv_file) csv_app.writerow([ har_page.url, har_page.hostname, har_page.page_load_time, har_page.time_to_first_byte, har_page.page_size, har_page.initial_load_time, har_page.content_load_time, har_page.html_load_time, har_page.css_load_time, har_page.js_load_time, har_page.image_load_time, har_page.audio_load_time, har_page.video_load_time ]) ''' ### WORK WITH LOAD TIMES (all load times are in ms) ### # Link http://pythonhosted.org/haralyzer/haralyzer.html