def test_create_asset_timeline(har_data): """ Tests the asset timeline function by making sure that it inserts one object correctly. """ init_data = har_data('humanssuck.net.har') har_parser = HarParser(init_data) entry = har_data('single_entry.har') # Get the datetime object of the start time and total load time time_key = dateutil.parser.parse(entry['startedDateTime']) load_time = int(entry['time']) asset_timeline = har_parser.create_asset_timeline([entry]) # The number of entries in the timeline should match the load time assert len(asset_timeline) == load_time for t in range(1, load_time): assert time_key in asset_timeline assert len(asset_timeline[time_key]) == 1 # Compare the dicts for key, value in iteritems(entry): assert asset_timeline[time_key][0][key] == entry[key] time_key = time_key + datetime.timedelta(milliseconds=1)
def test_create_asset_timeline(har_data): """ Tests the asset timeline function by making sure that it inserts one object correctly. """ init_data = har_data('humanssuck.net.har') har_parser = HarParser(init_data) entry = har_data('single_entry.har') # Get the datetime object of the start time and total load time time_key = dateutil.parser.parse(entry['startedDateTime']) load_time = int(entry['time']) asset_timeline = har_parser.create_asset_timeline([entry]) # The number of entries in the timeline should match the load time assert len(asset_timeline) == load_time for t in range(1, load_time): assert time_key in asset_timeline assert len(asset_timeline[time_key]) == 1 # Compare the dicts for key, value in entry.iteritems(): assert asset_timeline[time_key][0][key] == entry[key] time_key = time_key + datetime.timedelta(milliseconds=1)
har_parser = HarParser(json.loads(f.read())) userRequestPages = [] ### User Requested Pages ### entries = [] ### CREATE A TIMELINE OF ALL THE ENTRIES ### for page in har_parser.pages: userRequestPages.append((page.url, page.image_load_time,page.page_size, page.image_size)) #userRequestPages.append((page.url,0,0,0)) for entry in page.entries: entries.append(entry) #User Requested pages userRequestPagesDF = pd.DataFrame(userRequestPages,columns=['url','loadtime','page_size','image_size']) userRequestPagesDF.to_csv(outPathFile+'_userRequestPages.tsv',sep='\t',header=True,index=False) timeline = har_parser.create_asset_timeline(entries) webRequestURL = [] for key, value in timeline.items(): for data in value: webRequestURL.append((data['request']['url'],data['time'])) # ,data['request']['queryString'] domain = set() domainURL = [] domainDqpKeyValue = [] df = pd.DataFrame(webRequestURL,columns=['url','time']) df = df.groupby('url')['time'].sum().reset_index() def cleanDqp(dqp): return re.sub('=+','=',re.sub('&+','&',dqp)).strip()