Esempio n. 1
0
def main():
	s = Server('/home/creature/browsermob/bin/browsermob-proxy', { 'port' : 1337})
	s.start()
	proxy = s.create_proxy({'port': 1338})
	failcount = 0
	d = tu.newProxytest(proxy)
	proxy.new_har(options={'captureHeaders':False, 'captureContent': True})
	if not deployOneTest(d):
		failcount += 1
	if not deleteTest(d):
		failcount += 1
	if not projectDeployTest(d):
		failcount += 1
		out = open('deploy.har', 'w')
		out.write(str(proxy.har))
		out.close()
	# test all services from multiple projects showing up in services
	if not multiDeployTest(d):
		failcount += 1
	# test that stopping services from services page removes them from project deployments
	if not cleanupTest(d):
		failcount += 1

	tu.endtest(d)
	s.stop()
	sys.exit(failcount)
def proxy():
    server = Server("/home/sergey/repositories/browsermob-proxy-2.1.4-bin/browsermob-proxy-2.1.4/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    proxy.new_har(title='project_har')
    yield proxy
    server.stop()
def browser_and_proxy():
    server = Server(config.BROWSERMOB_PATH)
    server.start()
    proxy = server.create_proxy()
    proxy.new_har(options={'captureContent': True})

    # Set up Chrome
    option = webdriver.ChromeOptions()
    option.add_argument('--proxy-server=%s' % proxy.proxy)

    prefs = {"profile.managed_default_content_settings.images": 2}
    option.add_experimental_option("prefs", prefs)
    option.add_argument('--headless')
    option.add_argument('--no-sandbox')
    option.add_argument('--disable-gpu')

    capabilities = DesiredCapabilities.CHROME.copy()
    capabilities['acceptSslCerts'] = True
    capabilities['acceptInsecureCerts'] = True

    path = config.CHROME_PATH
    browser = webdriver.Chrome(options=option,
                               desired_capabilities=capabilities,
                               executable_path=path)

    try:
        yield browser, proxy
    finally:
        browser.quit()
        server.stop()
Esempio n. 4
0
def locate(url, search_parameters=config.SEARCH_PARAMETERS):
    server = Server(config.BROWSERMOB_PROXY)
    server.start()
    proxy = server.create_proxy()
    options = Options()
    options.headless = config.HEADLESS
    profile = webdriver.FirefoxProfile(config.FIREFOX_PROFILE)
    selenium_proxy = proxy.selenium_proxy()
    profile.set_proxy(selenium_proxy)
    browser = webdriver.Firefox(firefox_profile=profile, options=options)
    proxy.new_har('source', options={'captureHeaders': True})
    browser.get(url)
    sleep(5)
    browser.close()
    server.stop()
    streams = []
    subtitles = []
    for entry in proxy.har['log']['entries']:
        for param in search_parameters:
            request = {'method': entry['request']['method'], 'url': entry['request']['url'], 'headers': {x['name']: x['value'] for x in entry['request']['headers']}}
            if param in entry['request']['url'].split('?')[0]:
                if request not in streams:
                    streams.append(request)
            elif '.vtt' in entry['request']['url'].split('?')[0] or '.srt' in entry['request']['url'].split('?')[0] or '.ass' in entry['request']['url'].split('?')[0]:
                if request not in subtitles:
                    subtitles.append(request)
    if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'bmp.log')):
        os.remove(os.path.join(os.path.abspath(os.getcwd()), 'bmp.log'))
    if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'geckodriver.log')):
        os.remove(os.path.join(os.path.abspath(os.getcwd()), 'geckodriver.log'))
    if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'server.log')):
        os.remove(os.path.join(os.path.abspath(os.getcwd()), 'server.log'))
    return streams, subtitles
Esempio n. 5
0
class ProxyDistributor:
    browsermob_proxy_path = "/home/amerigo/PycharmProjects/Progetto/tesi/risorse/browsermob-proxy/bin/browsermob-proxy"

    def __init__(self):
        self.server = Server(self.browsermob_proxy_path)
        self.actual_port = 9090

    def get_new_proxy(self):
        proxy = self.server.create_proxy({
            'port': self.actual_port,
            'captureHeaders': True,
            'captureContent': True,
            'captureBinaryContent': True
        })
        self.actual_port += 1
        return proxy

    def start(self):
        if self.server is None:
            self.server = Server(self.browsermob_proxy_path)
        self.server.start()

    def stop(self):
        if self.server is not None:
            self.server.stop()
Esempio n. 6
0
def getAuthKey():
    username = config['Users']['username']
    password = config['Users']['password']
    proxyServerAddress = config['config']['proxyserverpath']

    # Creating Proxy server
    server = Server(proxyServerAddress)
    server.start()
    proxy = server.create_proxy()
    proxy.whitelist(regexp='*emofid.com*', status_code=123)
    proxy.new_har(title="mofid",
                  options={
                      'captureContent': False,
                      'captureBinaryContent': False,
                      'captureHeaders': True
                  })

    # Creating browser
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    browser = webdriver.Chrome(chrome_options=chrome_options)

    url = "https://account.emofid.com/Login?returnUrl=%2Fconnect%2Fauthorize%2Fcallback%3Fclient_id%3Deasy2_client_pkce%26redirect_uri%3Dhttps%253A%252F%252Fd.easytrader.emofid.com%252Fauth-callback%26response_type%3Dcode%26scope%3Deasy2_api%2520openid%26state%3Df8ff796b1d994e0d8f6fa1f6e878f165%26code_challenge%3D7qf19ieakAg4BvrDkBTHbr5h7_A0BSvci7dtp-0ZUWY%26code_challenge_method%3DS256%26response_mode%3Dquery"
    browser.get(url)

    userFiled = browser.find_element_by_xpath('//*[@id="Username"]')
    userFiled.clear()
    userFiled.send_keys(username)

    passwordFiled = browser.find_element_by_xpath('//*[@id="Password"]')
    passwordFiled.clear()
    passwordFiled.send_keys(password, Keys.RETURN)

    element = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located(
            (By.XPATH, "/html/body/app-root/d-release-notes/div/div/button")))
    element.click()

    try:
        browser.find_element_by_xpath(
            '//*[@id="root"]/main/div[2]/div[1]/ul[2]/li[1]/span/i').click()
    except:
        print('Error')

    with open('data.json', 'w') as outfile:
        json.dump(proxy.har, outfile)

    server.stop()

    tree = Tree(proxy.har)
    authKey = ''
    result = tree.execute(
        "$.log.entries.request[@.url is 'https://d11.emofid.com/easy/api/account/checkuser'].headers"
    )
    for entry in result:
        for e in entry:
            if e['name'] == 'Authorization':
                authKey = e["value"]
    return authKey
Esempio n. 7
0
def fetch_har_by_url(url, segments, index):
    project_dir = os.path.dirname(__file__)
    bpm_path = os.path.join(project_dir,
                            "browsermob-proxy-2.1.1/bin/browsermob-proxy")

    server = Server(bpm_path)
    server.start()
    proxy = server.create_proxy()

    profile = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())

    driver = webdriver.Firefox(firefox_profile=profile,
                               executable_path=os.path.join(
                                   os.path.dirname(__file__), _geckodriver))

    proxy.new_har(url,
                  options={
                      'captureHeaders': True,
                      'captureContent': True,
                      'captureBinaryContent': True
                  })
    proxy.wait_for_traffic_to_stop(2000, 10000)

    driver.get(url)

    har = proxy.har

    server.stop()
    driver.quit()

    return har
Esempio n. 8
0
class BrowserMobProxyTestCaseMixin(object):
    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(self.browsermob_script,
                                        options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Services.prefs.setIntPref('network.proxy.type', 1);
                Services.prefs.setCharPref('network.proxy.http', 'localhost');
                Services.prefs.setIntPref('network.proxy.http_port', %(port)s);
                Services.prefs.setCharPref('network.proxy.ssl', 'localhost');
                Services.prefs.setIntPref('network.proxy.ssl_port', %(port)s);
            """ % {"port": client.port})
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Esempio n. 9
0
def proxy():
    server = Server(os.path.join(os.path.dirname(__file__), 'browsermob-proxy-2.1.4/bin/browsermob-proxy'))
    server.start()
    proxy = server.create_proxy()
    proxy.new_har(title='test_har')
    yield proxy
    server.stop()
def save_web_page_stats_to_har(url, webdriver_name, save_to_file):
    """Generate the HAR archive from an URL with the Selenium webdriver
    'webdriver_name', saving the HAR file to 'save_to_file'
    """
    browsermob_server = Server(Config.browsermob_executable)
    browsermob_server.start()
    random_port = get_a_random_free_tcp_port()
    proxy_conn = browsermob_server.create_proxy({"port": random_port})
    driver = create_selenium_webdriver(webdriver_name, proxy_conn)
    try:
        proxy_conn.new_har(url, options={'captureHeaders': True})
        driver.get(url)

        har_json = json.dumps(proxy_conn.har, ensure_ascii=False,
                              indent=4, separators=(',', ': '))
        # Save '.HAR' file
        with io.open(save_to_file + '.har', mode='wt', buffering=1,
                     encoding='utf8', errors='backslashreplace',
                     newline=None) as output_har_f:
            output_har_f.write(unicode(har_json))

        # Save '.PROF' file with profiling report (timings, sizes, etc)
        with io.open(save_to_file + '.prof', mode='wb', buffering=1,
                     newline=None) as prof_output:
            report_har_dictionary(proxy_conn.har, prof_output)

    finally:
        proxy_conn.close()
        browsermob_server.stop()
        driver.quit()
Esempio n. 11
0
def main(argv):
	init()

	parser = argparse.ArgumentParser()
	parser.add_argument('-u', action='store', dest='start_url', help='Set page URL', required=True)
	parser.add_argument('-c', action='store', dest='cookies_file', help='JSON file with cookies', required=False)
	parser.add_argument('-w', action='store', dest='webdriver_type', help='Set WebDriver type (firefox or phantomjs, firebox by default)', default="firefox", required=False)
	results = parser.parse_args()
	
	start_url = results.start_url
	cookies_file = results.cookies_file
	webdriver_type = results.webdriver_type

	allowed_domain = urlparse(start_url).netloc

	browsermobproxy_path = get_browsermobproxy_path()

	options = {
		'port': 9090,
	}

	server = Server(browsermobproxy_path,options)
	server.start()
	proxy = server.create_proxy()

	if webdriver_type == "phantomjs":
		service_args = ['--proxy=localhost:9091','--proxy-type=http',]
		driver = webdriver.PhantomJS(service_args=service_args)
		driver.set_window_size(1440, 1024)
	else:
		profile  = webdriver.FirefoxProfile()
		profile.set_proxy(proxy.selenium_proxy())
		driver = webdriver.Firefox(firefox_profile=profile)

	proxy.new_har('woodpycker', options={'captureHeaders': True, 'captureContent': True})
	driver.get(start_url)

	if not cookies_file is None:
		with open(cookies_file, 'rb') as fp:
		    cookies = json.load(fp)
		for cookie in cookies:
			driver.add_cookie(cookie)
		driver.refresh()

	links = driver.find_elements_by_tag_name('a')
	lenl = len(links)
	for i in range(0,lenl):
		if links[i].is_displayed():
			url = links[i].get_attribute('href')
			text = links[i].get_attribute('text')
			if url.find(allowed_domain) != -1:
				links[i].click()
				print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL)
				show_status_codes(proxy.har,allowed_domain)
			driver.back()
			driver.refresh()
			links = driver.find_elements_by_tag_name('a')

	driver.quit()
	server.stop()
Esempio n. 12
0
class testAuthBasic(unittest.TestCase):
    def setUp(self):
        self.server = Server(
            r"C:\Users\user\Documents\ScrinShots\SeleniumLessons\basic_auth\browsermob-proxy\bin\browsermob-proxy"
        )
        self.server.start()

        self.proxy = self.server.create_proxy()

        self.profile = webdriver.FirefoxProfile()

        self.profile.set_proxy(self.proxy.selenium_proxy())

        self.driver = webdriver.Firefox(firefox_profile=self.profile)
        self.driver.implicitly_wait(8)

        self.driver.get("https://auth-demo.aerobatic.io/")

    def tearDown(self):
        self.server.stop()
        self.driver.quit()

    def test_search(self):

        self.proxy.basic_authentication("auth-demo.aerobatic.io", "aerobatic",
                                        "aerobatic")
        self.driver.get("https://auth-demo.aerobatic.io/protected-standard/")

        assert self.driver.find_element_by_css_selector(
            "a.button-primary[href='/']")
Esempio n. 13
0
def getToken():
    server = Server(r'F:\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
    server.start()
    proxy = server.create_proxy()

    chrome_options = Options()
    chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))

    driver = webdriver.Chrome(chrome_options=chrome_options)
    base_url = "http://jzsc.mohurd.gov.cn/data/company/detail?id=C5C5C4C3C5C2C7C7C5C5C0C2C7CCC7C7C5C6"
    proxy.new_har("douyin",
                  options={
                      'captureHeaders': True,
                      'captureContent': True
                  })
    driver.get(base_url)
    while '验证已过期,是否重新重新进行验证或停留在当前页面?' in driver.page_source:
        driver.find_element_by_xpath(
            '//*[@id="app"]/div/header/div[5]/div/div[3]/div/button[1]').click(
            )
        time.sleep(2.5)
        driver.refresh()
        time.sleep(3)
    result = proxy.har
    token = set()
    for entry in result['log']['entries']:
        _url = entry['request']['url']
        if "api/webApi/dataservice/query/comp/caDetailList?qyId" in str(_url):
            _response = entry['request']
            _accessToken = entry['request']['headers'][4]['value']
            if _accessToken != '':
                token.add(_accessToken)
    server.stop()
    driver.quit()
    return list(token)[0]
Esempio n. 14
0
class GetWebPageHar:
    proxy_path = "./browsermob-proxy-2.1.4/bin/browsermob-proxy"

    def __init__(self):
        self.server = Server(str(Path(self.proxy_path).absolute()))
        self.server.start()
        self.proxy = self.server.create_proxy()

    def get_har(self, url, output_filename):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--proxy-server={0}".format(
            self.proxy.proxy))
        chrome_options.add_argument("--ignore-certificate-errors")
        chrome_options.add_argument("--incognito")
        driver = webdriver.Chrome(chrome_options=chrome_options)

        self.proxy.new_har("google")
        driver.get(url)
        time.sleep(10)
        har_json = json.dumps(self.proxy.har, indent=4, ensure_ascii=False)
        with open(output_filename, "w") as f:
            f.write(har_json)
        driver.quit()

    def stop(self):
        self.server.stop()
def CaptureNetworkTraffic(url, server_ip, headers, file_path):
    ''' 
	This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser
	url - Page url
	server_ip - remap host to for specific URL
	headers - this is a dictionary of the headers to be set
	file_path - File in which HAR gets stored
	'''
    port = {'port': 9090}
    server = Server("G:\\browsermob\\bin\\browsermob-proxy",
                    port)  #Path to the BrowserMobProxy
    server.start()
    proxy = server.create_proxy()
    proxy.remap_hosts("www.example.com", server_ip)
    proxy.remap_hosts("www.example1.com", server_ip)
    proxy.remap_hosts("www.example2.com", server_ip)
    proxy.headers(headers)
    profile = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())
    driver = webdriver.Firefox(firefox_profile=profile)
    new = {'captureHeaders': 'True', 'captureContent': 'True'}
    proxy.new_har("google", new)
    driver.get(url)
    proxy.har  # returns a HAR JSON blob
    server.stop()
    driver.quit()
    file1 = open(file_path, 'w')
    json.dump(proxy.har, file1)
    file1.close()
Esempio n. 16
0
def reloadHeaderAndCookie():
    killPortProcess(8090)
    global browser
    browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy'
    server = Server(browsermob_path, {'port': 8090})
    server.start()
    time.sleep(10)
    proxy = server.create_proxy()
    time.sleep(1)

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    browser = webdriver.Chrome(options=chrome_options)
    url1 = "https://www.united.com/en/us"
    url2 = "https://www.united.com/ual/en/US/flight-search/book-a-flight/results/awd?f=SFO&t=PVG&d=2021-11-07&tt=1&at=1&sc=7&px=1&taxng=1&newHP=True&idx=1"

    options = {'captureHeaders': True, 'captureCookies': True}
    proxy.new_har("united", options=options)
    browser.get(url1)
    time.sleep(5)

    browser.get(url2)

    print("click close button to get actual results")
    time.sleep(5)
    print("we now save all auths")
    newH = proxy.har  # returns a HAR JSON blob
    with open('latestUnitedAuth.json', 'w') as outfile:
        json.dump(newH, outfile)
    time.sleep(10)
    server.stop()
    browser.quit()
Esempio n. 17
0
def download_file(url, file_name):
    server = Server('./browsermob-proxy')  #Local path to BMP
    server.start()
    proxy = server.create_proxy(
    )  #Proxy is used to generate a HAR file containing the connection URLS that the MP3s are loaded from.
    chrome_options = Options()
    chrome_options.add_argument("--proxy-server={0}".format(
        proxy.proxy))  #Configure chrome options
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--headless')
    driver = webdriver.Chrome(executable_path='./chromedriver',
                              options=chrome_options)
    proxy.new_har('filename')
    driver.get(url)
    save = proxy.har
    server.stop()
    driver.quit()
    results = [entry['request']['url'] for entry in save['log']['entries']]
    embedded_link = [
        res for res in results if "https://embed.vhx.tv/videos" in res
    ][0]
    subprocess.call([
        "./youtube-dl", "-f"
        "best[height=540]", "-o"
        "{}.mp4".format(file_name), "--ignore-errors", embedded_link
    ])
Esempio n. 18
0
def get_dependencies(url):
    browsermobproxy_location = "browsermob-proxy/bin/browsermob-proxy"
    clean_opened_processes()
    server = Server(browsermobproxy_location)
    server.start()
    time.sleep(0.5)

    proxy = server.create_proxy()
    time.sleep(0.5)

    options = Options()
    options.headless = True

    profile = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())

    driver = webdriver.Firefox(options=options, firefox_profile=profile)
    proxy.new_har("captured_elems")
    driver.get(url)
    time.sleep(3)

    resources = [
        elem["request"]["url"] for elem in proxy.har["log"]["entries"]
    ]

    server.stop()
    driver.quit()

    resources = list(set(resources))  # eliminamos duplicados
    return resources
Esempio n. 19
0
def print_hi():
    server = Server(r'D:\exchange_data\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
    server.start()
    proxy = server.create_proxy()

    # 设置driver options
    chrome_options = Options()
    chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))

    driver = webdriver.Chrome(chrome_options=chrome_options)
    #
    url = 'https://www.baidu.com/'
    proxy.new_har('fund', options={'captureHeaders': True, 'captureContent': True})
    driver.get(url)

    result = proxy.har
    print(result)

    for entry in result['log']['entries']:
        _url = entry['request']['url']
        # 根据URL找到数据接口
        # if "lsjz?callback=" in _url:
        _response = entry['response']
        _content = _response['content']['text']
            # 获取接口返回内容
        print(_content)
    server.stop()
def CaptureNetworkTraffic(url,server_ip,headers,file_path):
	''' 
	This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser
	url - Page url
	server_ip - remap host to for specific URL
	headers - this is a dictionary of the headers to be set
	file_path - File in which HAR gets stored
	'''
	port = {'port':9090}
	server = Server("G:\\browsermob\\bin\\browsermob-proxy",port) #Path to the BrowserMobProxy
	server.start()
	proxy = server.create_proxy()
	proxy.remap_hosts("www.example.com",server_ip)
	proxy.remap_hosts("www.example1.com",server_ip)
	proxy.remap_hosts("www.example2.com",server_ip)
	proxy.headers(headers)
	profile  = webdriver.FirefoxProfile()
	profile.set_proxy(proxy.selenium_proxy())
	driver = webdriver.Firefox(firefox_profile=profile)
	new = {'captureHeaders':'True','captureContent':'True'}
	proxy.new_har("google",new)
	driver.get(url)
	proxy.har # returns a HAR JSON blob
	server.stop()
	driver.quit()
	file1 = open(file_path,'w')
	json.dump(proxy.har,file1)
	file1.close()
Esempio n. 21
0
def run(dep, arr, arr_date, num, china):
    server = Server(path)  # 设置服务器脚本路径
    server.start()
    tasks = []
    semaphore = asyncio.Semaphore(num)  # 限制并发量
    i = 0
    for date in arr_date:
        for departureCity in dep:
            for arrivalCity in arr:
                if departureCity != arrivalCity:
                    if departureCity not in china or arrivalCity not in china:
                        url = search_url(departureCity, arrivalCity, date)
                        print(departureCity + "--" + arrivalCity +
                              "开始爬取数据..." + str(i))
                        i = i + 1
                        while True:
                            try:
                                c = get_request(url, server, semaphore,
                                                departureCity, arrivalCity,
                                                date)
                                task = asyncio.ensure_future(c)
                                task.add_done_callback(callback)
                                tasks.append(task)
                                break
                            except Exception as e:
                                print(e)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    print("server closed")
    server.stop()
Esempio n. 22
0
    def __init__(self, browsermobDirectory, headless=False):
        # Imports
        print(
            "New class reference, finding valid signature. This might take a minute."
        )
        from browsermobproxy import Server
        import psutil
        import json
        import time
        import json
        from selenium import webdriver
        from selenium.webdriver.firefox.options import Options

        # Kills any browsermob-proxy
        for proc in psutil.process_iter():
            # check whether the process name matches
            if proc.name() == "browsermob-proxy":
                proc.kill()

        dict = {'port': 8090}
        server = Server(path=browsermobDirectory, options=dict)
        # "browsermob-proxy/bin/browsermob-proxy"
        server.start()
        time.sleep(1)
        proxy = server.create_proxy()
        time.sleep(1)

        # Creates FF profile
        profile = webdriver.FirefoxProfile()
        selenium_proxy = proxy.selenium_proxy()
        profile.set_proxy(selenium_proxy)
        options = Options()
        if headless == True:
            options.headless = True
        driver = webdriver.Firefox(firefox_profile=profile, options=options)

        # Records FF Har
        proxy.new_har("list")
        driver.get("https://www.tiktok.com/en/trending")
        data = proxy.har
        for element in data['log']['entries']:
            if "https://m.tiktok.com/share/item/list?" in element['request'][
                    'url'] or "https://www.tiktok.com/share/item/list?" in element[
                        'request']['url']:
                print("Found signature, continuing.")
                self.signature = element['request']['queryString'][6]['value']

        # Get Trending hashtags
        hashtags = driver.find_elements_by_xpath(
            '//h3[@class="_list_item_title"]/a')
        hashtagArray = []
        for hashtag in hashtags:
            hashtagArray.append(hashtag.get_attribute('title'))

        self.hashtag = hashtagArray
        self.headless = headless
        self.browsermobDirectory = browsermobDirectory

        server.stop()
        driver.quit()
Esempio n. 23
0
def getVideoMp4Url():
    conn = sqlite3.connect('sqlite.db')
    cursor = conn.cursor()
    sql = "select id,url,mp4url from video;"
    cursor.execute(sql)
    result = cursor.fetchall()
    server = Server("/bin/browsermob-proxy-2.0-beta-6/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    #
    #    # 设置浏览器
    profile = webdriver.FirefoxProfile()
    profile.set_preference("javascript.enabled", False)
    profile.set_proxy(proxy.selenium_proxy())
    driver = webdriver.Firefox(firefox_profile=profile)
    driver.get(
        "http://www.tongzhuo100.com/login/v/?url=http://www.tongzhuo100.com/")

    # 输入账号密码
    stu_id = driver.find_element_by_name("usr").send_keys("18500951888")
    stu_pwd = driver.find_element_by_name("pwd")
    stu_pwd.send_keys("tz4006345699")

    # 登录
    stu_pwd.send_keys(Keys.RETURN)
    time.sleep(10)

    ##得到监控
    # 得到网页源代码
    for index in range(len(result)):
        if result[index][2]:
            print "aready update %s" % result[index][1]
        else:
            proxy.new_har("tongzhuo")
            driver.get(result[index][1])
            while True:
                content = proxy.har  # returns a HAR JSON blob
                data = content['log']['entries']
                getMp4 = False
                for j in range(len(data)):
                    url = data[j]['request']['url']
                    if url.find("mp4") != -1:
                        getMp4 = True
                        sql = "update video set mp4url=\'%s\' where id=\'%s\';" % (
                            url, result[index][0])
                        print sql
                        cursor.execute(sql)
                        conn.commit()
                        break
                if getMp4:
                    break
                else:
                    time.sleep(3)
    conn.commit()
    conn.close()
    server.stop()
    driver.quit()
def main(argv):
    del argv
    global folder_path, max_thread
    folder_path = os.path.abspath(FLAGS.download)
    max_thread = FLAGS.threads
    t1 = threading.Thread(target=threadMaster, name="master")
    t1.start()
    global over
    try:
        os.mkdir(folder_path)
    except:
        if not os.path.basename(folder_path) in os.listdir(
                os.path.dirname(folder_path)):
            print("Error!")
            exit(0)
    server = Server(FLAGS.mob)
    server.start()
    px = server.create_proxy()
    co = webdriver.ChromeOptions()
    co.add_argument("--proxy-server={}".format(px.proxy))
    driver = webdriver.Chrome(FLAGS.chromedriver, chrome_options=co)
    #    cookie = get_cookies()
    #    if cookie == None:
    #        driver.get("https://www.saavn.com/login.php?action=login")
    #    else:
    #        driver.get("https://www.saavn.com")
    #        _ = [driver.add_cookie(x) for x in cookie]
    with open(FLAGS.playlist, "r") as f:
        playlist_link = f.readline()
    driver.get(playlist_link)
    songs = len(
        driver.find_element_by_css_selector(
            ".track-list").find_elements_by_css_selector(".song-wrap"))
    px.new_har("saavn")
    final_req = {}
    driver.execute_script(
        "Content.playAllSongs(null,true,{},true,null)".format(str(0)))
    _ = input("Press [Enter] when Ready")
    for i in range(songs):
        driver.execute_script(
            "Content.playAllSongs(null,true,{},true,null)".format(str(i)))
        time.sleep(2)
        for ent in px.har["log"]["entries"]:
            if re.search("\.mp3\?", ent["request"]["url"]):
                if not final_req.get(ent["request"]["url"], None):
                    final_req[ent["request"]["url"]] = 1
                    with lock:
                        downList.append(ent["request"]["url"])
    with lock:
        over = True
    url_list = list(set(filter(lambda x: re.search("\.mp3\?", x), final_req)))
    t1.join()
    #Cleaning Up
    driver.close()
    server.stop()
Esempio n. 25
0
def get_signature_url(user_url):
    try:
        # 代理服务
        server = Server(proxy_file)
        server.start()
        proxy = server.create_proxy()

        options = webdriver.FirefoxOptions()
        # options.add_argument("--headless")
        options.add_argument("--proxy-server={0}".format(proxy.proxy))
        options.add_argument('--disable-gpu')
        options.add_argument('lang=zh_CN.UTF-8')
        options.add_argument("user-agent=" + USER_AGENTS[0])
        options.add_argument('accept=' + accept[0])
        options.add_argument("accept-language=" + accept_language[0])
        options.add_argument('accept-encoding="gzip, deflate, br"')
        options.add_argument("upgrade-insecure-requests=1")
        options.add_argument('cache-control="max-age=0"')
        options.add_argument(
            "Cookie='_ga=GA1.2.500940217.1582528192; _gid=GA1.2.1014623963.1583113987'"
        )
        options.add_argument("Host=www.iesdouyin.com")
        # options.headless = True
        # options.add_experimental_option('excludeSwitches', ['enable-automation'])  # 爬虫关键字

        driver = webdriver.Firefox(firefox_options=options,
                                   executable_path=driver_path)
        proxy.new_har("douyin",
                      options={
                          'captureHeaders': True,
                          'captureContent': True
                      })
        logger.info("原始URL {}".format(url))
        driver.get(user_url)
        time.sleep(6)
        result = proxy.har  # 获取HAR
        # print(result)
        for entry in result['log']['entries']:
            _url = entry['request']['url']
            # print(_url)
            # # 根据URL找到数据接口,这里要找的是 http://git.liuyanlin.cn/get_ht_list 这个接口
            if "_signature" in _url:
                logger.info("获取到用户第一个数据请求接口------>>>\n{}".format(_url))
                return _url
                # print(_url)
                # _response = entry['response']
                # _content = _response['content']
                # 获取接口返回内容
                # print(_content)
    except Exception as e:
        logger.exception(e)
        pass
    finally:
        server.stop()
        driver.quit()
Esempio n. 26
0
def run():

    # set up proxy
    server = Server(
    )  # or add path to binary: 'Server(path="browsermob-proxy")'
    server.start()
    # 'useEcc' is needed to have decent response time with HTTPS
    proxy = server.create_proxy({"useEcc": True})

    # set up Geckodriver/Firefox
    # # uncomment if you want to use Firefox
    # profile = webdriver.FirefoxProfile()
    # profile.set_proxy(proxy.selenium_proxy())
    # driver = webdriver.Firefox(firefox_profile=profile)

    # set up Chrome driver
    options = webdriver.ChromeOptions()
    options.add_argument("--proxy-server=%s" % proxy.proxy)
    # options.add_argument("--headless")  # uncomment if you want headless Chrome
    capabilities = webdriver.DesiredCapabilities.CHROME.copy()
    capabilities["acceptInsecureCerts"] = True
    driver = webdriver.Chrome(options=options,
                              desired_capabilities=capabilities)

    # start test case
    driver.implicitly_wait(10)
    test_case = "ga_demo_store_add_to_cart"
    # 'captureContent' for POST requests
    proxy.new_har(test_case, options={"captureContent": True})
    driver.get("https://enhancedecommerce.appspot.com/")
    sleep(2)
    driver.find_element_by_id("homepage-9bdd2-1").click()
    sleep(2)
    driver.find_element_by_id("addToCart").click()
    sleep(2)

    # export har and close all
    har = proxy.har
    server.stop()
    driver.quit()

    # uncomment if you need to export the har
    # with open(
    #     join(abspath(dirname(__file__)), test_case) + ".har", "w", encoding="utf8"
    # ) as f:
    #     json.dump(har, f)

    # check hits against tracking plan and print results
    path = join(abspath(dirname(__file__)), "tracking_plan.json")
    tracking_plan = gaunit.TrackingPlan.from_json(path)
    r = gaunit.check_har(test_case, tracking_plan, har=har)

    r.print_result(display_ok=True)
Esempio n. 27
0
class BrowsermobProxy(object):
    def __init__(self, browsermob_proxy_bat_location: str):
        self.server = Server(browsermob_proxy_bat_location, {'port': 9394})

    def get_proxy(self):
        return self.server.create_proxy()

    def start_server(self):
        self.server.start()

    def stop_server(self):
        self.server.stop()
Esempio n. 28
0
class HARCatcher(SeleniumWrapper):
    def __init__(self,
                 url,
                 har_name,
                 browsermob_proxy_location,
                 selector_dictionary=None,
                 default_timeout=None,
                 firefox_binary=None,
                 highlight=False,
                 geckodriver="geckodriver"):

        self.selector_dictionary = selector_dictionary
        self.default_timeout = default_timeout if default_timeout is not None else 30
        self.highlight = highlight

        for proc in psutil.process_iter():
            # check whether the process name matches
            if proc.name() == "browsermob-proxy":
                proc.kill()
        options = {'port': 8090}
        # self.server = Server(path="../tools/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict)
        self.server = Server(path=browsermob_proxy_location, options=options)

        self.server.start()
        time.sleep(1)
        self.proxy = self.server.create_proxy()
        time.sleep(1)

        profile = webdriver.FirefoxProfile()
        selenium_proxy = self.proxy.selenium_proxy()
        profile.set_proxy(selenium_proxy)
        self.driver = webdriver.Firefox(firefox_profile=profile,
                                        firefox_binary=firefox_binary,
                                        executable_path=geckodriver)
        self.proxy.new_har(har_name)
        self.driver.get(url)

    def get_har(self):
        """
        Returns the har file as a python dictionary.
        :return: har file as dict
        """
        return self.proxy.har

    def deinit(self):
        self.server.stop()
        try:
            self.driver.quit()
        except:
            print(
                "The driver couldn't be properly closed for an unknown reason."
            )
Esempio n. 29
0
class CreateHar(object):
    """create HTTP archive file"""

    def __init__(self, mob_path,email,password):
        """initial setup"""
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None
        self.email = email
        self.password = password

    @staticmethod
    def __store_into_file(title, result):
        """store result"""
        har_file = open(title + '.har', 'w')
        har_file.write(str(result))
        har_file.close()

    def __start_server(self):
        """prepare and start server"""
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self):
        """prepare and start driver"""
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=profile)

    def start_all(self):
        """start server and driver"""
        self.__start_server()
        self.__start_driver()

    def create_har(self, title, url):
        """start request and parse response"""
        self.proxy.new_har(ref=title,options={'captureHeaders': True,'captureCookie':True})
        # self.proxy.new_har(title)
        self.proxy.remap_hosts(address="https://unite.nike.com/login?appVersion=833&experienceVersion=833")
        self.driver.get(url)
        self.driver.find_element_by_xpath('.//*[@name="emailAddress"]').send_keys(self.email)
        self.driver.find_element_by_xpath('.//*[@name="password"]').send_keys(self.password)
        self.driver.find_element_by_xpath('.//*[@value="SIGN IN"]').click()
        time.sleep(14)
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(title, result)

    def stop_all(self):
        """stop server and driver"""
        self.server.stop()
        self.driver.quit()
Esempio n. 30
0
class HTTPTracing(object):
    """create HTTP archive file"""
    def __init__(self, url):
        """initial setup"""
        self.browser_mob = path
        self.server = self.driver = self.proxy = None
        self.traffic_for_domain = []
        self.url = url

    def __start_server(self):
        """prepare and start server"""
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __enter__(self):
        self.__start_server()
        self.driver = webdriver.Firefox(proxy=self.proxy.selenium_proxy())
        self.driver.implicitly_wait(10)
        self.driver.get(self.url)
        self.proxy.new_har(self.url,
                           options={
                               'captureHeaders': True,
                               'captureCookies': True
                           })

        return self

    @property
    def entries(self):
        return self.proxy.har["log"]["entries"]

    def wait_change_entry(self, url, duration=2):
        num_loops = duration / 0.1
        count = 0
        har = self.proxy.har
        entry = har["log"]["entries"]
        while count <= num_loops and not self.url_in_entry(url):
            time.sleep(0.1)
            count += 1
        return self.f_entries

    def url_in_entry(self, base_url):
        self.f_entries = list(
            filter(lambda x: base_url in x["request"]["url"], self.entries))
        return self.f_entries

    def __exit__(self, exc_type, exc_val, exc_tb):
        """stop server and driver"""
        self.server.stop()
        self.driver.quit()
def getStreamFile(linkToFilm):
    #print("WAITING FOR PROXY")
    #proxyThread.join()
    #print("DONE WAITING")
    #proxy = proxyContainer["proxy"]
    #server = proxyContainer["server"]

    server = Server(r"c:\Users\Alex\Documents\GitHub\Hackathon_MEGOGO_2018\getStreamFile\browsermob-proxy\bin\browsermob-proxy")
    server.start()
    proxy = server.create_proxy()

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    chrome_options.add_argument("headless")
    browser = webdriver.Chrome(chrome_options=chrome_options)

    proxy.new_har("defaultName")
    browser.get(linkToFilm)

    element = 0
    try:
        # that's for serials
        element = browser.find_element_by_xpath('//*[@id="episode-code"]/iframe')

    except NoSuchElementException:

        # and that's for movies
        element = browser.find_element_by_xpath('//*[@id="basplayer_hd"]/iframe')

    element.click()
    element.click() # double-click to ensure that video will load

    i = 0

    while True:
        print("wait...")
        time.sleep(0.1)
        i = i + 0.1
        for elem in proxy.har['log']['entries']:
            if "request" in elem.keys():
                if "queryString" in elem['request'].keys():
                    if len(elem['request']['queryString']) > 0 and elem['request']['queryString'][0]['name'] == "tok":
                        if "m3u8" in elem['request']['url']:
                        	browser.quit()
	                        server.stop()
	                        return elem['request']['url']

        if i > 20:
            print("Error in getting requests. Restart the app.") # TODO: restart browser() in case requests won't load
Esempio n. 32
0
class RequestLog(object):
    def __init__(self,
                 har_name,
                 blacklist=[],
                 har_options={
                     'captureHeaders': True,
                     'captureContent': True
                 }):
        import os
        #har_options = {}
        browsermob_exe = os.environ[
            'BROWSERMOB'] if 'BROWSERMOB' in os.environ else 'D:/browsermob-proxy-2.1.4/bin/browsermob-proxy'
        self.server = Server(browsermob_exe)
        self.server.start()
        #raise
        #self.server = RemoteServer(host='111.230.223.37',port=62421)
        self.proxy = self.server.create_proxy(
            {'httpProxy': '111.230.223.37:24342'})
        self.proxy.new_har(har_name, options=har_options)
        for host, code in blacklist:
            self.setBlackList(host, code)

    def setBlackList(self, host, code):
        self.proxy.blacklist(host, code)

    def setOptions(self, options):
        options.add_argument('--proxy-server={0}'.format(self.proxy.proxy))

    def getHar(self):
        return self.proxy.har

    def getEntries(self):
        return self.proxy.har['log']['entries']

    def close(self):
        try:
            if self.server != None:
                self.server.stop()
        except:
            pass
        if self.proxy != None:
            self.proxy.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
Esempio n. 33
0
def run(departureCity, arrivalCity, date, num):
    server = Server(path)  # 设置服务器脚本路径
    server.start()
    tasks = []
    semaphore = asyncio.Semaphore(num)  # 限制并发量

    url = search_url(departureCity, arrivalCity, date)
    print(departureCity + "--" + arrivalCity + "开始爬取数据...")
    c = get_request(url, server, semaphore, departureCity, arrivalCity, date)
    task = asyncio.ensure_future(c)
    task.add_done_callback(callback)
    tasks.append(task)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))
    print("server closed")
    server.stop()
Esempio n. 34
0
def main():
	init()
	if len(sys.argv) >= 2:
	    start_url = sys.argv[1]
	else:
	    print "You must specify page URL!"
	    sys.exit()

	allowed_domain = urlparse(start_url).netloc

	browsermobproxy_path = "/usr/local/opt/browsermobproxy/bin/browsermob-proxy"

	options = {
		'port': 9090,

	}

	server = Server(browsermobproxy_path,options)
	server.start()
	proxy = server.create_proxy()

	profile  = webdriver.FirefoxProfile()
	profile.set_proxy(proxy.selenium_proxy())
	driver = webdriver.Firefox(firefox_profile=profile)

	driver.get(start_url)

	links = driver.find_elements_by_tag_name('a')
	lenl = len(links)
	for i in range(0,lenl):
		if links[i].is_displayed():
			url = links[i].get_attribute('href')
			text = links[i].get_attribute('text')
			if url.find(allowed_domain) != -1:
				proxy.new_har('demo')
				links[i].click()
				print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL)
				show_status_codes(proxy.har,allowed_domain)
			driver.back()
			driver.refresh()
			links = driver.find_elements_by_tag_name('a')

	driver.quit()
	server.stop()
class CreateHar(object):
    """create HTTP archive file"""
 
    def __init__(self, mob_path):
        """initial setup"""
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None
 
    @staticmethod
    def __store_into_file(title, result):
        """store result"""
        har_file = open(title + '.har', 'w')
        har_file.write(str(result))
        har_file.close()
 
    def __start_server(self):
        """prepare and start server"""
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()
 
    def __start_driver(self):
        """prepare and start driver"""
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=profile)
 
    def start_all(self):
        """start server and driver"""
        self.__start_server()
        self.__start_driver()
 
    def create_har(self, title, url):
        """start request and parse response"""
        self.proxy.new_har(title)
        self.driver.get(url)
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(title, result)
 
    def stop_all(self):
        """stop server and driver"""
        self.server.stop()
        self.driver.quit()
Esempio n. 36
0
	def fetch(url, config, output_directory, fetchEngine="browsermobproxy+selenium", browser="firefox"):

		if fetchEngine in ("phantomjs", "ph"):

			data = subprocess.check_output( config['fetchEngines']['phantomjs_command'].replace("$url", url), shell=True )

		elif fetchEngine in ("browsermobproxy+selenium", "bs"):

			from browsermobproxy import Server
			from selenium import webdriver

			server = Server(config['fetchEngines']['browsermobproxy_binary'])
			server.start()
			proxy = server.create_proxy()

			if browser in ("firefox", "ff"):
				profile = webdriver.FirefoxProfile()
				profile.set_proxy(proxy.selenium_proxy())
				driver = webdriver.Firefox(firefox_profile=profile)
			else:
				chrome_options = webdriver.ChromeOptions()
				chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
				driver = webdriver.Chrome(chrome_options = chrome_options)

			proxy.new_har(url, options={'captureHeaders': True})
			driver.get(url)

			data = json.dumps(proxy.har, ensure_ascii=False)

			server.stop()
			driver.quit()
		else:
			sys.exit("Unrecognized engine.")

		if (data):
			fileName = output_directory + "/" + url.replace("http://", "").replace("https://", "") + "_" + strftime("%Y-%m-%d_%H:%M:%S", gmtime()) + ".har"
			f = open(fileName, "w")
			f.write(data.encode("utf8"))
			f.close()

			return fileName
		else:
			return None
Esempio n. 37
0
class ad_driver():
    _driver = None
    _server = None
    _proxy = None

    def __init__(self, path_to_batch, browser="chrome"):

        """ start browsermob proxy """
        self._server = Server(path_to_batch)
        self._server.start()
        self._proxy = self._server.create_proxy()

        """ Init browser profile """
        if browser is "chrome":
            PROXY = "localhost:%s" % self._proxy.port  # IP:PORT or HOST:PORT
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=%s' % PROXY)
            self._driver = webdriver.Chrome(chrome_options=chrome_options)
        elif browser is "ff":
            profile = webdriver.FirefoxProfile()
            driver = webdriver.Firefox(firefox_profile=profile, proxy=proxy)
        else:
            print "Please set 'browser' variable to any of the value \n 'chrome', 'ff' !"
        self._driver.maximize_window()
        self._driver.implicitly_wait(20)

    def execute(self, test):

        self._proxy.new_har(test["name"])
        self._driver.get(_test_data_dir + os.sep + test['file'])
        time.sleep(2)
        callToTestMethod = getattr(test_steps, test["name"])
        callToTestMethod(self._driver)
        har = self._proxy.har
        requests = har['log']['entries']
        return requests

    def quit(self):
        self._server.stop()
        self._driver.quit()
Esempio n. 38
0
class Proxy(object):

        proxy = None
        proxy_server = None
        test_id = None

        def __init__(self, test_id):
            self.test_id = test_id
            self.start_proxy()

        def start_proxy(self):
            self.proxy_server = Server(config.proxy_bin)
            self.proxy_server.start()
            self.proxy = self.proxy_server.create_proxy()
            if config.blacklist:
                self.set_blacklist(config.blacklist)
            self.proxy.new_har(self.test_id)
            logger.debug('Browsermob proxy started.')
            return self

        def stop_proxy(self):
            filename = '{}.har'.format(self.test_id)
            with open(filename, 'w') as harfile:
                json.dump(self.proxy.har, harfile)
            data = json.dumps(self.proxy.har, ensure_ascii=False)
            self.proxy_server.stop()
            self.proxy = None
            self.proxy_server = None
            logger.debug('Browsermob proxy stopped. HAR created: {}'
                         .format(filename))

        def set_blacklist(self, domain_list):
            for domain in domain_list:
                self.proxy.blacklist("^https?://([a-z0-9-]+[.])*{}*.*"
                                     .format(domain), 404)
            logger.debug("Proxy blacklist set.")

        def get_blacklist(self):
            return requests.get('{}{}/blacklist'
                                .format(config.proxy_api, self.proxy.port))
Esempio n. 39
0
class BrowserMobProxyTestCaseMixin(object):

    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(
            self.browsermob_script, options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Components.utils.import("resource://gre/modules/Preferences.jsm");
                Preferences.set("network.proxy.type", 1);
                Preferences.set("network.proxy.http", "localhost");
                Preferences.set("network.proxy.http_port", {port});
                Preferences.set("network.proxy.ssl", "localhost");
                Preferences.set("network.proxy.ssl_port", {port});
            """.format(port=client.port))
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Esempio n. 40
0
def create_hars(urls, browsermob_dir, run_cached):
    for url in urls:
        print 'starting browsermob proxy'
        server = Server('{}/bin/browsermob-proxy'.format(browsermob_dir))
        server.start()

        proxy = server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)

        url_slug = slugify(url)
        proxy.new_har(url_slug)

        print 'loading page: {}'.format(url)
        driver.get(url)

        har_name = '{}-{}.har'.format(url_slug, time.time())
        print 'saving HAR file: {}'.format(har_name)
        save_har(har_name, proxy.har)

        if run_cached:
            url_slug = '{}-cached'.format(slugify(url))
            proxy.new_har(url_slug)

            print 'loading cached page: {}'.format(url)
            driver.get(url)

            har_name = '{}-{}.har'.format(url_slug, time.time())
            print 'saving HAR file: {}'.format(har_name)
            save_har(har_name, proxy.har)

        driver.quit()

        print 'stopping browsermob proxy'
        server.stop()
Esempio n. 41
0
class BrowserMobProxyTestCaseMixin(object):

    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(
            self.browsermob_script, options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Services.prefs.setIntPref('network.proxy.type', 1);
                Services.prefs.setCharPref('network.proxy.http', 'localhost');
                Services.prefs.setIntPref('network.proxy.http_port', %(port)s);
                Services.prefs.setCharPref('network.proxy.ssl', 'localhost');
                Services.prefs.setIntPref('network.proxy.ssl_port', %(port)s);
            """ % {"port": client.port})
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Esempio n. 42
0
    def run_webdriver(self, start_url, port, config, download_dir):
        global useragent
        global referer
        urllib3_logger = logging.getLogger('urllib3')
        urllib3_logger.setLevel(logging.DEBUG)
        logging.info("Starting WebRunner")
        firefox_profile = None
        server = None
        proxy = None
        har = None

        if config.referer:
            referer = config.referer
        else:
            referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format(
                config.url, config.url)

        if config.useragent:
            useragent = config.useragent
        else:
            useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0'

        logging.debug("Running with UserAgent: {}".format(useragent))
        logging.debug("Running with Referer: {}".format(referer))
        logging.debug("Checking URL: {}".format(config.url))

        server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port})
        server.start()
        proxy = server.create_proxy()
        proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'})

        request_js = (
            'var referer = request.getProxyRequest().getField("Referer");'
            'addReferer(request);'
            'function addReferer(r){'
            'if (! referer ) {'
            'r.addRequestHeader("Referer","' + referer + '");'
            '}'
            'return;'
            '}')
        proxy.request_interceptor(request_js)
        if config.firefoxprofile:
            firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile)
        else:
            firefox_profile = FirefoxProfile()

        logging.debug("Using profile {}".format(firefox_profile.path))

        firefox_profile.set_preference("security.OCSP.enabled", 0)
        firefox_profile.set_preference("browser.download.folderList", 2)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("browser.download.dir", download_dir)
        firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                       "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;appl\
                                       ication/msword;text/plain;application/octet")
        firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_proxy(proxy.selenium_proxy())
        try:
            webdriver = WebDriver(firefox_profile)
            proxy.new_har(start_url.hostname,
                          options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"})
            self.analyse_page(webdriver, start_url)
            har = proxy.har
            logging.info("Stopping WebRunner")
            proxy.close()
            server.stop()
            webdriver.quit()
            har = Har(har)
        except Exception, e:
            logging.error(e)
            proxy.close()
            webdriver.quit()
            server.stop()
Esempio n. 43
0
class performance(object):
    #create performance data

    def __init__(self, mob_path):
        #initialize
        from datetime import datetime
        print "%s: Go "%(datetime.now())
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None

    @staticmethod
    def __store_into_file(args,title, result):
        #store data collected into file
        if 'path' in args:
        	har_file = open(args['path']+'/'+title + '.json', 'w')
        else:
        	har_file = open(title + '.json', 'w')
        har_file.write(str(result))
       	har_file.close()

    def __start_server(self):
        #prepare and start server
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self,args):
        #prepare and start driver
        
        #chromedriver
        if args['browser'] == 'chrome':
        	print "Browser: Chrome"
        	print "URL: {0}".format(args['url'])
        	chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver")
        	os.environ["webdriver.chrome.driver"] = chromedriver
        	url = urlparse.urlparse (self.proxy.proxy).path
        	chrome_options = webdriver.ChromeOptions()
        	chrome_options.add_argument("--proxy-server={0}".format(url))
        	chrome_options.add_argument("--no-sandbox")
        	self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options)
        #firefox
        if args['browser'] == 'firefox':
            print "Browser: Firefox"
            profile = webdriver.FirefoxProfile()
            profile.set_proxy(self.proxy.selenium_proxy())
            self.driver = webdriver.Firefox(firefox_profile=profile)
		
			

    def start_all(self,args):
        #start server and driver
        self.__start_server()
        self.__start_driver(args)

    def create_har(self,args):
        #start request and parse response
        self.proxy.new_har(args['url'], options={'captureHeaders': True})
        self.driver.get(args['url'])
        
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(args,'har', result)
        
        performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False)
        self.__store_into_file(args,'perf', performance)

    def stop_all(self):
        #stop server and driver
        from datetime import datetime
        print "%s: Finish"%(datetime.now())
        
        self.server.stop()
        self.driver.quit()
Esempio n. 44
0
from browsermobproxy import Server
server = Server("/root/Desktop/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy")
server.start()
proxy = server.create_proxy()

from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.set_proxy(proxy.selenium_proxy())
driver = webdriver.Firefox(firefox_profile=profile)


proxy.new_har("google")
driver.get("http://www.google.co.uk")
test = proxy.har # returns a HAR JSON blob

print test

server.stop()
driver.quit()
Esempio n. 45
0
class HarProfiler:

    def __init__(self, config, url, login_first=False):
        self.url = url
        self.login_first = login_first

        self.login_user = config.get('login_user')
        self.login_password = config.get('login_password')

        self.browsermob_dir = config['browsermob_dir']
        self.har_dir = config['har_dir']
        self.label_prefix = config['label_prefix'] or ''
        self.run_cached = config['run_cached']
        self.virtual_display = config['virtual_display']
        self.virtual_display_size_x = config['virtual_display_size_x']
        self.virtual_display_size_y = config['virtual_display_size_y']

        self.label = '{}{}'.format(self.label_prefix, self.slugify(url))
        self.cached_label = '{}-cached'.format(self.label)

        epoch = time.time()
        self.har_name = '{}-{}.har'.format(self.label, epoch)
        self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch)

    def __enter__(self):
        if self.virtual_display:
            log.info('starting virtual display')
            self.display = Display(visible=0, size=(
                self.virtual_display_size_x,
                self.virtual_display_size_y
            ))
            self.display.start()

        log.info('starting browsermob proxy')
        self.server = Server('{}/bin/browsermob-proxy'.format(
            self.browsermob_dir)
        )
        self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        log.info('stopping browsermob proxy')
        self.server.stop()
        if self.virtual_display:
            log.info('stopping virtual display')
            self.display.stop()

    def _make_proxied_webdriver(self):
        proxy = self.server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        return (driver, proxy)

    def _save_har(self, har, cached=False):
        if not os.path.isdir(self.har_dir):
            os.makedirs(self.har_dir)
        if not cached:
            har_name = self.har_name
        elif cached:
            har_name = self.cached_har_name

        log.info('saving HAR file: {}'.format(har_name))
        with open(os.path.join(self.har_dir, har_name), 'w') as f:
            json.dump(har, f, indent=2, ensure_ascii=False)

    def _login(self, driver):
        log.info('logging in...')

        error_msg = 'must specify login credentials in yaml config file'
        if self.login_user is None:
            raise RuntimeError(error_msg)
        if self.login_password is None:
            raise RuntimeError(error_msg)

        driver.get('https://courses.edx.org/login')

        # handle both old and new style logins
        try:
            email_field = driver.find_element_by_id('email')
            password_field = driver.find_element_by_id('password')
        except NoSuchElementException:
            email_field = driver.find_element_by_id('login-email')
            password_field = driver.find_element_by_id('login-password')
        email_field.send_keys(self.login_user)
        password_field.send_keys(self.login_password)
        password_field.submit()

    def _add_page_event_timings(self, driver, har):
        jscript = textwrap.dedent("""
            var performance = window.performance || {};
            var timings = performance.timing || {};
            return timings;
            """)
        timings = driver.execute_script(jscript)
        har['log']['pages'][0]['pageTimings']['onContentLoad'] = (
            timings['domContentLoadedEventEnd'] - timings['navigationStart']
        )
        har['log']['pages'][0]['pageTimings']['onLoad'] = (
            timings['loadEventEnd'] - timings['navigationStart']
        )
        return har

    def load_page(self):
        try:
            driver, proxy = self._make_proxied_webdriver()

            if self.login_first:
                self._login(driver)

            proxy.new_har(self.label)
            log.info('loading page: {}'.format(self.url))
            driver.get(self.url)
            har = self._add_page_event_timings(driver, proxy.har)
            self._save_har(har)

            if self.run_cached:
                proxy.new_har(self.cached_label)
                log.info('loading cached page: {}'.format(self.url))
                driver.get(self.url)
                har = self._add_page_event_timings(driver, proxy.har)
                self._save_har(har, cached=True)
        except Exception:
            raise
        finally:
            driver.quit()

    def slugify(self, text):
        pattern = re.compile(r'[^a-z0-9]+')
        slug = '-'.join(word for word in pattern.split(text.lower()) if word)
        return slug
Esempio n. 46
0
class HarProfiler:

    def __init__(self, config, url):
        self.har_dir = config['har_dir']
        self.browsermob_dir = config['browsermob_dir']
        self.label_prefix = config['label_prefix'] or ''
        self.virtual_display = config['virtual_display']
        self.virtual_display_size_x = config['virtual_display_size_x']
        self.virtual_display_size_y = config['virtual_display_size_y']

        self.label = '{}{}'.format(
            self.label_prefix,
            format(self.slugify(url))
        )
        self.cached_label = '{}-cached'.format(self.label)

        epoch = time.time()
        self.har_name = '{}-{}.har'.format(self.label, epoch)
        self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch)

    def __enter__(self):
        log.info('starting virtual display')
        if self.virtual_display:
            self.display = Display(visible=0, size=(
                self.virtual_display_size_x,
                self.virtual_display_size_y
            ))
            self.display.start()

        log.info('starting browsermob proxy')
        self.server = Server('{}/bin/browsermob-proxy'.format(
            self.browsermob_dir)
        )
        self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        log.info('stopping browsermob proxy')
        self.server.stop()
        log.info('stopping virtual display')
        self.display.stop()

    def _make_proxied_webdriver(self):
        proxy = self.server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        return (driver, proxy)

    def _save_har(self, har, cached=False):
        if not os.path.isdir(self.har_dir):
            os.makedirs(self.har_dir)
        if not cached:
            har_name = self.har_name
        elif cached:
            har_name = self.cached_har_name

        log.info('saving HAR file: {}'.format(har_name))
        with open(os.path.join(self.har_dir, har_name), 'w' ) as f:
            json.dump(har, f, indent=2, ensure_ascii=False)

    def load_page(self, url, run_cached=True):
        driver, proxy = self._make_proxied_webdriver()
        proxy.new_har(self.label)
        log.info('loading page: {}'.format(url))
        driver.get(url)
        self._save_har(proxy.har)

        if run_cached:
            proxy.new_har(self.cached_label)
            log.info('loading cached page: {}'.format(url))
            driver.get(url)
            self._save_har(proxy.har, cached=True)

        driver.quit()

    def slugify(self, text):
        pattern = re.compile(r'[^a-z0-9]+')
        slug = '-'.join(word for word in pattern.split(text.lower()) if word)
        return slug
Esempio n. 47
0
class Browser:

    def __init__(self, chromedriverPath, browsermobPath, harfilePath, cookies=None):
        self.harfilePath = harfilePath
        self.server = Server(browsermobPath)
        self.server.start()
        self.proxy = self.server.create_proxy()

        os.environ["webdriver.chrome.driver"] = chromedriverPath
        url = urlparse (self.proxy.proxy).path
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--proxy-server={0}".format(url))
        
        self.driver = webdriver.Chrome(chromedriverPath,chrome_options =chrome_options)
        if cookies:
            print("Loading cookies from "+str(cookies))
            with open(cookies, 'r') as cookieFile:
                cookieJson = json.loads(cookieFile.read())
            for cookie in cookieJson:
                self.driver.add_cookie(cookie)

    def get(self, url, timeout=20):
        print(url)
        self.proxy.new_har(url, {"captureContent":True})
        try:
            self.driver.set_page_load_timeout(timeout)
            self.driver.get(url)
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/5);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/4);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(4) #wait for the page to load
        except TimeoutException:
            print("Timeout")
            self.driver.find_element_by_tag_name("body").send_keys(Keys.CONTROL+Keys.ESCAPE)

        try:
            source = self.driver.page_source
            result = json.dumps(self.proxy.har, ensure_ascii=False)
            with open(self.harfilePath+"/"+str(int(time.time()*1000.0))+".har", "w") as harfile:
                harfile.write(result)
            return source
        except TimeoutException:
            print("Retrying, with a timeout of "+str(timeout+5))
            return self.get(url, timeout=timeout+5)

    def close(self):
        try:
            self.server.stop()
        except Exception:
            print("Warning: Error stopping server")
            pass
        try:
            self.driver.quit()
        except Exception:
            print("Warning: Error stopping driver")
            pass
class BrowserMobLibrary():

    ROBOT_LIBRARY_SCOPE = 'GLOBAL'
    ROBOT_LIBRARY_VERSION = VERSION

    def __init__(self):
        self.isServerStarted = False
        self.activeProxy = None
        self.server = None
        self.proxies = []

    def _proxy(self):
        if self.activeProxy is None:
            raise Exception("No proxy has been created")
        return self.activeProxy

    def start_browsermob(self, browsermob_path):
        self.server = Server(browsermob_path)
        self.server.start()
        self.isServerStarted = True

    def stop_browsermob(self):
        self.server.stop()
        self.server = None
        self.isServerStarted = False

    def create_proxy(self):
        self.activeProxy = self.server.create_proxy
        self.proxies.append(self.activeProxy)
        return self.server.create_proxy()

    def close_proxy(self, proxy):
        self.proxies.remove(proxy)
        proxy.close()

    def close_active_proxy(self):
        self.close_proxy(self.activeProxy)

    def set_active_proxy(self, proxy):
        self.activeProxy = proxy

    def get_active_proxy(self):
        return self.activeProxy

    def get_all_proxies(self):
        return self.proxies

    def close_all_proxies(self):
        for proxy in self.proxies:
            proxy.close()

    def capture_traffic(self, reference=None, **options):
        return self._proxy().new_har(reference, options)

    def get_captured_traffic(self):
        return self._proxy().har

    def set_capture_reference(self, reference=None):
        return self._proxy().new_page(reference)

    def ignore_all_traffic_matching(self, regexp, status_code):
        return self._proxy().blacklist(regexp, status_code)

    def only_capture_traffic_matching(self, regexp, status_code):
        return self._proxy().whitelist(regexp, status_code)

    def use_basic_authentication(self, domain, username, password):
        return self._proxy().basic_authentication(domain, username, password)

    def set_headers(self, headers, ):
        return self._proxy().headers(headers)

    def set_response_interceptor(self, js, ):
        return self._proxy().response_interceptor(js)

    def set_request_interceptor(self, js, ):
        return self._proxy().request_interceptor(js)

    def set_bandwith_limits(self, **options):
        return self._proxy().limits(options)

    def set_proxy_timeouts(self, **options):
        return self._proxy().timeouts(options)

    def remap_hosts(self, address, ip_address):
        return self._proxy().remap_hosts(address, ip_address)

    def wait_for_traffic_to_stop(self, quiet_period, timeout):
        return self._proxy().wait_for_traffic_to_stop(quiet_period, timeout)

    def clear_proxy_dns_cache(self):
        return self._proxy().clear_dns_cache()

    def rewrite_url(self, match, replace):
        return self._proxy().rewrite_url(match, replace)
Esempio n. 49
0
def retrieve_har():
    print "Retrieving .har file using generated url..."
        
    har_name_ex = har_name + ".har"
    complete_har_path = os.path.join(har_save_path, har_name_ex)
    
    # Magic starts here:
    server = Server(path)
    server.start()
    proxy = server.create_proxy()

    profile  = webdriver.FirefoxProfile(ff_profile)
    profile.set_proxy(proxy.selenium_proxy())
    driver = webdriver.Firefox(firefox_profile=profile)
    
    # Broken script to load the page in Google Chrome instead of Mozilla Firefox
    """
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    driver = webdriver.Chrome(chrome_options = chrome_options)
    """

    proxy.new_har(har_name, options={'captureHeaders': True})
    driver.get(url)
    
    #Trying to click 'vplayer'
    try:
        driver.switch_to.frame(0)   # Clicking the video automagically
        jwplayer = driver.find_element_by_name('vplayer')
        jwplayer.click()
    
    #And if that somehow doesn't work
    except Exception:
        print "Couldn't click player!"
        print "Trying again in 5 seconds..."
        
        time.sleep(5)
        
        #Try it again...
        try:
            driver.switch_to.frame(0)   # Clicking the video automagically (again)
            jwplayer = driver.find_element_by_name('vplayer')
            jwplayer.click()
            
        #And if that doesn't work either
        except Exception:
            print "Not able to click the video player"
            
        #Stop the server and the driver
        server.stop()
        driver.quit()
        
        time.sleep(3)
        sys.exit()
    
    time.sleep(1)
    
    #Exporting the wanted .har file
    result = json.dumps(proxy.har, ensure_ascii=False, indent=4)    # indent=4 puts the .har file on seperated lines

    #And write it to an automatically created file
    har_file = open(complete_har_path,'w')
    har_file.write(str(result))
    har_file.close()

    #Stop the server and the driver
    server.stop()
    driver.quit()
Esempio n. 50
0
    def run_webdriver(self, start_url, port, config, download_dir):
        """
        Run Selenium WebDriver
        """
        useragent = None
        referer = None
        webdriver = None
        urllib3_logger = logging.getLogger('urllib3')
        urllib3_logger.setLevel(logging.DEBUG)

        self.logger.info("Starting WebRunner")
        firefox_profile = None
        server = None
        proxy = None
        har = None

        if config.referer:
            referer = config.referer
        else:
            referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format(
                config.url, config.url)

        if config.useragent:
            useragent = config.useragent
        else:
            useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0'

        self.logger.debug("Running with UserAgent: {}".format(useragent))
        self.logger.debug("Running with Referer: {}".format(referer))
        self.logger.debug("Checking URL: {}".format(config.url))

        server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port})
        server.start()
        proxy = server.create_proxy()
        proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'})
        request_js = (
            'var referer = request.getProxyRequest().getField("Referer");'
            'addReferer(request);'
            'function addReferer(r){'
            'if (! referer ) {'
            'r.addRequestHeader("Referer","' + referer + '");'
            '}'
            'return;'
            '}')
        proxy.request_interceptor(request_js)
        from types import NoneType
        if config.firefoxprofile is not None and os.path.isdir(config.firefoxprofile):
            self.logger.debug("Using existing firefox profile")
            firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile)
        else:
            firefox_profile = FirefoxProfile()

        self.logger.debug("Using profile {}".format(firefox_profile.path))

        firefox_profile.set_preference("security.OCSP.enabled", 0)
        firefox_profile.set_preference("browser.download.folderList", 2)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("browser.download.dir", download_dir)
        firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                       "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;application/msword;text/plain;application/octet")
        firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("security.mixed_content.block_active_content", False)
        firefox_profile.set_preference("security.mixed_content.block_display_content", False)
        firefox_profile.set_preference("extensions.blocklist.enabled", False)
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_proxy(proxy.selenium_proxy())
        firefox_profile.set_preference("webdriver.log.file", "/tmp/ff.log")
        firefox_profile.set_preference("webdriver.log.driver", "DEBUG")
        firefox_profile.set_preference("browser.newtabpage.enhanced", False)
        firefox_profile.set_preference("browser.newtabpage.enabled", False)
        firefox_profile.set_preference("browser.newtabpage.directory.ping", "")
        firefox_profile.set_preference("browser.newtabpage.directory.source", "")
        firefox_profile.set_preference("browser.search.geoip.url", "")

        try:
            self.xvfb.start()
            capabilities = DesiredCapabilities.FIREFOX
            capabilities['loggingPrefs'] = {'browser':'ALL'}
            if os.path.exists("{}/firefox".format(firefox_profile.path)):
                binary = FirefoxBinary("{}/firefox".format(firefox_profile.path))
            else:
                binary = FirefoxBinary("/usr/bin/firefox")
            webdriver = WebDriver(capabilities=capabilities, firefox_profile=firefox_profile, firefox_binary=binary)
            proxy.new_har(start_url.hostname,
                          options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"})
            self.analyse_page(webdriver, start_url)
            for entry in webdriver.get_log('browser'):
                self.logger.info("Firefox: {}".format(entry))
            har = proxy.har
            self.logger.info("Stopping WebRunner")
            proxy.close()
            server.stop()
            webdriver.quit()
            har = Har(har)
        except Exception, e:
            self.logger.error(e)
            proxy.close()
            if webdriver:
                webdriver.quit()
            self.xvfb.stop()
            server.stop()
class WebTrafficGenerator:
    
    def __init__(self,args):
        
        self.browser_mob_proxy_location = os.environ.get("BROWSERMOBPROXY_BIN")
        
        if not self.browser_mob_proxy_location:
            self.browser_mob_proxy_location = "./browsermob-proxy/bin/browsermob-proxy"
        
        # Parse arguments
        self.urls_file = args['in_file']
        
        self.out_stats_folder = args['out_folder']
        
        self.timeout = args['timeout']
        
        self.save_headers = args['headers']

        self.max_interval = args['max_interval']
        
        self.browsers_num = args['browsers']

        self.max_requests = args['limit_urls']
        
        self.no_sleep = args['no_sleep']
        
        self.no_https = args['no_https']
        
    def run(self):
        
        # create temporary directory for downloads
        self.temp_dir = tempfile.TemporaryDirectory()
        
        try:
            
            # Read URLs and time
            
            self.urls=[]
            self.thinking_times=[]
            
            visit_timestamps=[]
            
            with open(self.urls_file ,"r") as f:
                
                history = f.read().splitlines()
    
            for line in history:
                
                entry = line.split()
                
                if not (entry[1].lower().startswith("file://") or
                    (entry[1].lower().startswith("http://") and 
                     (entry[1].lower().startswith("10.",7) or 
                      entry[1].lower().startswith("192.168.",7))) or 
                    (entry[1].lower().startswith("https://") and 
                     (entry[1].lower().startswith("10.",8) or 
                      entry[1].lower().startswith("192.168.",8)))):
                    
                    # convert timestamp in seconds
                    visit_timestamps.append(float(entry[0])/1000000)
                    
                    if (not self.no_https or not entry[1].lower().startswith("https://")):
                        self.urls.append(entry[1])
            
            if not self.max_requests:
                self.max_requests = len(self.urls)
    
            visit_timestamps.sort()
            
            for i in range(1, len(visit_timestamps)):
                
                think_time=(visit_timestamps[i]-visit_timestamps[i-1])
                
                if think_time<=self.max_interval:
                    
                    self.thinking_times.append(think_time)
            
            self.cdf, self.inverse_cdf, self.cdf_samples = compute_cdf(self.thinking_times)
            
            print ("Number of URLs: "+str(len(self.urls)))
            
            # Create or clean statistics folder
            
            if not os.path.exists(self.out_stats_folder):
                os.makedirs(self.out_stats_folder)
            else:
                for file in os.listdir(self.out_stats_folder):
                    
                    file_path = os.path.join(self.out_stats_folder, file)
                    
                    if os.path.isfile(file_path):
                        os.remove(file_path)
    
            # Plot history statistics
            self.plot_thinking_time_cdf()
            #self.plot_thinking_time_inverse_cdf()
            
            # Start Proxy
            self.server = Server(self.browser_mob_proxy_location)
            
            self.server.start()
            
            # start queues
            self.urls_queue = Queue()
            self.hars_queue = Queue()
            
            # start Barrier (for coordinating proxy server restart) 
            self.barrier = Barrier(self.browsers_num, action = self.restart_proxy_server)
            
            try:
                
                self.workers = [Browser(i, self.server,
                                        self.urls_queue, self.hars_queue,
                                        self.barrier,
                                        self.timeout, self.save_headers,
                                        self.temp_dir.name)
                                for i in range(self.browsers_num)]
                
                for w in self.workers:
                    w.start()
                
                number_of_requests = 0
                # Start requesting pages
                for url in self.urls:
    
                    if number_of_requests==self.max_requests:
                        break
    
                    self.urls_queue.put(url)
                    number_of_requests += 1
                    
                    if not self.no_sleep:
                        time.sleep(self.get_thinking_time())
                
                for w in self.workers:
                    self.urls_queue.put(None)
                
                self.hars = []
                
                for w in self.workers:
                    browser_hars = self.hars_queue.get()
                    self.hars.extend(browser_hars)
                
                # write HAR file
                with open(os.path.join(self.out_stats_folder,"HARs.json"),"w") as f:
                    json.dump(self.hars,f)
                
                # Gather statistics
                self.stats = {
                              "totalTime":[],
                              "blocked":[],
                              "dns":[],
                              "connect":[],
                              "send":[],
                              "wait":[],
                              "receive":[],
                              "ssl":[]
                              }
                
                for har in self.hars:
                    
                    if har["log"]["totalTime"]!=-1:
                        self.stats["totalTime"].append(har["log"]["totalTime"])
                    
                    for entry in har["log"]["entries"]:
                        
                        if (not self.no_https or not entry["request"]["url"].lower().startswith("https://")):
                        
                            # Queuing
                            if entry["timings"]["blocked"]!=-1:
                                self.stats["blocked"].append(entry["timings"]["blocked"])
                                
                            # DNS resolution
                            if entry["timings"]["dns"]!=-1:
                                self.stats["dns"].append(entry["timings"]["dns"])
                                
                            # TCP Connection
                            if entry["timings"]["connect"]!=-1:
                                self.stats["connect"].append(entry["timings"]["connect"])
                                
                            # HTTP Request send
                            if entry["timings"]["send"]!=-1:
                                self.stats["send"].append(entry["timings"]["send"])
                                
                            # Wait the server
                            if entry["timings"]["wait"]!=-1:
                                self.stats["wait"].append(entry["timings"]["wait"])
                                
                            # HTTP Response receive
                            if entry["timings"]["receive"]!=-1:
                                self.stats["receive"].append(entry["timings"]["receive"])
                                
                            if entry["timings"]["ssl"]!=-1:
                                self.stats["ssl"].append(entry["timings"]["ssl"])
                        
                # Save statistics
                self.plot_stats()
                
                for w in self.workers:
                    w.join()
                    
            except KeyboardInterrupt:
                pass
            
            finally:
                self.urls_queue.close()
                self.hars_queue.close()
                self.server.stop()
                
        except Exception as e:
           print("Exception: " + str(e))
           
           import traceback
           traceback.print_exc()
           
        finally:
            
            self.temp_dir.cleanup()

    def restart_proxy_server(self):
        
        try:
            self.server.stop()
        except Exception as e:
            print("Failed to stop proxy server. Exception: " + str(e))
            
        # Start Proxy
        self.server = Server(self.browser_mob_proxy_location)
        
        self.server.start() 
        
        print("Proxy server restarted")    
    
    def plot_thinking_time_cdf(self):
        
        x = np.linspace(min(self.thinking_times), max(self.thinking_times), num=10000, endpoint=True)
    
        # Plot the cdf
        fig = plt.figure()
        axes = fig.add_subplot(111)
        axes.plot(x, self.cdf(x))
        axes.set_ylim((0,1))
        axes.set_xlabel("Seconds")
        axes.set_ylabel("CDF")
        axes.set_title("Thinking time")
        axes.grid(True)
    
        fig.savefig(os.path.join(self.out_stats_folder,"thinking_time_cdf.png"))

    def plot_thinking_time_inverse_cdf(self):
        
        x = np.linspace(min(self.cdf_samples), max(self.cdf_samples), num=10000, endpoint=True)
        
        # Plot the cdf
        fig = plt.figure()
        axes = fig.add_subplot(111)
        axes.plot(x, self.inverse_cdf(x))
        axes.set_xlim((0,1))
        axes.set_ylabel("Seconds")
        axes.set_xlabel("CDF")
        axes.set_title("Thinking time")
        axes.grid(True)
    
        fig.savefig(os.path.join(self.out_stats_folder,"thinking_time_inverse_cdf.png"))
   
    def get_thinking_time(self):
        
        rand=random.uniform(min(self.cdf_samples),max(self.cdf_samples))
        time = float(self.inverse_cdf(rand))
        return time
    
    def plot_stats(self):
        
        fig_total = plt.figure()
        axes_total = fig_total.add_subplot(111)
        
        fig_timings = plt.figure()
        axes_timings = fig_timings.add_subplot(1,1,1)
        
        fig_timings_log = plt.figure()
        axes_timings_log = fig_timings_log.add_subplot(1,1,1)
        
        for key in self.stats:
            if len(set(self.stats[key]))>1:
                cdf = compute_cdf(self.stats[key])
                
                x = np.linspace(min(self.stats[key]), max(self.stats[key]), num=10000, endpoint=True)
            
                # Plot the cdf
                if key=="totalTime":
                    axes_total.plot(x/1000, cdf[0](x), label=key)
                else:
                    axes_timings.plot(x, cdf[0](x), label=key)
                    
                    # zero is not valid with log axes
                    if min(self.stats[key])==0:
                        non_zero_min = find_non_zero_min(self.stats[key])
                        
                        if non_zero_min == 0:
                            continue
                        
                        x = np.linspace(non_zero_min, max(self.stats[key]), num=10000, endpoint=True)
                        
                    axes_timings_log.plot(x, cdf[0](x), label=key)
                
        axes_total.set_ylim((0,1))
        axes_total.set_xlabel("Seconds")
        axes_total.set_ylabel("CDF")
        axes_total.set_title("Page load time")
        axes_total.grid(True)
        
        fig_total.savefig(os.path.join(self.out_stats_folder,"page_load_cdf.png"))
        
        axes_timings.set_ylim((0,1))
        axes_timings.set_xlabel("Milliseconds")
        axes_timings.set_ylabel("CDF")
        axes_timings.set_title("Single resource timings")
        axes_timings.grid(True)
        axes_timings.legend(loc='best')
        
        axes_timings_log.set_ylim((0,1))
        axes_timings_log.set_xlabel("Milliseconds")
        axes_timings_log.set_ylabel("CDF")
        axes_timings_log.set_xscale("log")
        axes_timings_log.set_title("Single resource timings")
        axes_timings_log.grid(True, which="both", axis="x")
        axes_timings_log.grid(True, which="major", axis="y")
        
        axes_timings_log.legend(loc='best')
    
        fig_timings.savefig(os.path.join(self.out_stats_folder,"timings_cdf.png"))
        fig_timings_log.savefig(os.path.join(self.out_stats_folder,"timings_cdf_log.png"))