def initializeChrome(): global driver global server global proxy dict = {'port': 8090} try: server = Server(path="/usr/local/bin/skillshare-dl/browsermob-proxy", options=dict) except: path_to_bmp = Path( "./binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy").absolute( ) path_on_windows = str(PureWindowsPath(path_to_bmp)) server = Server(path=path_on_windows, options=dict) #server = Server(path="binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict) server.start() proxy = server.create_proxy() chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) path_to_chromedriver = str(Path("./binaries/chromedriver.exe").absolute()) try: driver = webdriver.Chrome(options=chrome_options) except: driver = webdriver.Chrome(path_to_chromedriver, options=chrome_options) print('initialized Chrome window!')
def create_server(browser, websites): print("=====> Creating New Server - Please Wait... <=====") server = Server(browsermob_proxy_path) server.start() proxy = server.create_proxy() configure_server(proxy, browser, websites) close_server(proxy, server)
def CaptureNetworkTraffic(url, server_ip, headers, file_path): ''' This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser url - Page url server_ip - remap host to for specific URL headers - this is a dictionary of the headers to be set file_path - File in which HAR gets stored ''' port = {'port': 9090} server = Server("G:\\browsermob\\bin\\browsermob-proxy", port) #Path to the BrowserMobProxy server.start() proxy = server.create_proxy() proxy.remap_hosts("www.example.com", server_ip) proxy.remap_hosts("www.example1.com", server_ip) proxy.remap_hosts("www.example2.com", server_ip) proxy.headers(headers) profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) new = {'captureHeaders': 'True', 'captureContent': 'True'} proxy.new_har("google", new) driver.get(url) proxy.har # returns a HAR JSON blob server.stop() driver.quit() file1 = open(file_path, 'w') json.dump(proxy.har, file1) file1.close()
def initializeChrome(): global driver global server global proxy dict = {'port': 8090} path_to_bmp = Path( "./binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy").absolute() path_on_windows = str(PureWindowsPath(path_to_bmp)) server = Server(path=path_on_windows, options=dict) #server = Server(path="binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict) server.start() proxy = server.create_proxy() user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36' username_windows = input( "Please enter your username the way it's written in C:/Users/ - it's needed to locate your Chrome user data." ) user_data_path = Path('C:/Users/' + str(username_windows) + '/AppData/Local/Google/Chrome/User Data') chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) chrome_options.add_experimental_option("excludeSwitches", ['enable-automation']) chrome_options.add_argument(f'--user-data-dir={user_data_path}') path_to_chromedriver = str(Path("./binaries/chromedriver.exe").absolute()) driver = webdriver.Chrome(path_to_chromedriver, options=chrome_options) print('initialized Chrome window!')
def test2(): browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy' server = Server(browsermob_path, {'port':9999}) time.sleep(1) proxy = server.create_proxy() time.sleep(1)
def __init__(self, root="C:/xampp/htdocs/webscrape/", folder=""): self.url = "" self.root = root + folder + "/" # Start browsermob proxy self.server = Server( r"C:\webdrivers\browsermob-proxy\bin\browsermob-proxy") self.server.start() self.proxy = self.server.create_proxy() # Setup Chrome webdriver - note: does not seem to work with headless On options = webdriver.ChromeOptions() options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" options.add_argument( '--proxy-server=%s' % self.proxy.proxy ) # Setup proxy to point to our browsermob so that it can track requests self.w = webdriver.Chrome(r'C:/webdrivers/chromedriver.exe', chrome_options=options) self.proxy.new_har("Listener", options={'captureHeaders': True}) # Request listener #self.proxy.new_har("Listener" ) # Request listener print("Browser and Server initialized...")
def setupdevices(): """ Description: Sets u browser proxy, Selenium driver, and har object Usage: [driver,proxy]=setupdevices() Inputs: NA Output: Selenium driver Browsermob proxy Browsermob server """ #set up proxy server = Server("############/browsermob-proxy-2.0-beta-9/bin/browsermob-proxy") server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) proxy.new_har("________") #set up driver driver = webdriver.Firefox(firefox_profile=profile) return (driver,proxy,server)
def get_driver(request, cmdopt_browser, cmdopt_window): """Fixture to create, return and close driver""" server = Server( "C:\\Program Files (x86)\\browsermob-proxy\\bin\\browsermob-proxy", {"port": 9090}) server.start() proxy = server.create_proxy() url = urllib.parse.urlparse(proxy.proxy).path driver = None if cmdopt_browser == "ie": driver = webdriver.Ie() elif cmdopt_browser == "firefox": if cmdopt_window == "headless": options = Firefox_options() options.add_argument("--headless") options.add_argument('--proxy-server={}'.format(url)) driver = webdriver.Firefox(firefox_options=options) else: options = Firefox_options() options.add_argument('--proxy-server={}'.format(url)) driver = webdriver.Firefox() proxy.new_har() request.addfinalizer(driver.quit) elif cmdopt_browser == "chrome": if cmdopt_window == "headless": options = Chrome_options() options.headless = True options.add_argument('--proxy-server={}'.format(url)) driver = webdriver.Chrome(chrome_options=options) proxy.new_har() request.addfinalizer(driver.quit) else: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--proxy-server={}'.format(url)) d = DesiredCapabilities.CHROME d['loggingPrefs'] = {'browser': 'ALL'} driver = webdriver.Chrome(desired_capabilities=d, chrome_options=chrome_options) proxy.new_har() ef_driver = EventFiringWebDriver(driver, MyListener()) def fin(): log_timestamp = str(datetime.datetime.now())[0:-4].replace( '-', '.').replace(' ', '_').replace(':', '.') browserlog_filename = log_timestamp + '_browser_log_file.log' browserlogfile = open(browserlog_filename, 'w') print('-------------------------') for i in ef_driver.get_log('browser'): print(i) browserlogfile.write(str(i) + '\n') print(proxy.har) server.stop() ef_driver.quit request.addfinalizer(fin) return ef_driver else: return "unsupported browser" return driver
def proxy(): server = Server("/home/sergey/repositories/browsermob-proxy-2.1.4-bin/browsermob-proxy-2.1.4/bin/browsermob-proxy") server.start() proxy = server.create_proxy() proxy.new_har(title='project_har') yield proxy server.stop()
def main(): s = Server('/home/creature/browsermob/bin/browsermob-proxy', { 'port' : 1337}) s.start() proxy = s.create_proxy({'port': 1338}) failcount = 0 d = tu.newProxytest(proxy) proxy.new_har(options={'captureHeaders':False, 'captureContent': True}) if not deployOneTest(d): failcount += 1 if not deleteTest(d): failcount += 1 if not projectDeployTest(d): failcount += 1 out = open('deploy.har', 'w') out.write(str(proxy.har)) out.close() # test all services from multiple projects showing up in services if not multiDeployTest(d): failcount += 1 # test that stopping services from services page removes them from project deployments if not cleanupTest(d): failcount += 1 tu.endtest(d) s.stop() sys.exit(failcount)
def login(weburl, user, passwd, proxy_location, webdriver_location): server = Server(proxy_location + r'\browsermob-proxy.bat') server.start() proxy = server.create_proxy() proxy.new_har(options={'captureContent': True, 'captureHeaders': True}) chrome_options = Options() chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--proxy-server={}'.format(proxy.proxy)) browser = webdriver.Chrome(webdriver_location + r'\chromedriver', options=chrome_options) browser.set_page_load_timeout(30) wait = WebDriverWait(browser, 10) #登入 browser.get(login_url) id0 = pq(browser.page_source)('div.z-page').attr('id').replace( '_', '') #抓第一行 每個id,都會隨機亂產生 browser.find_element_by_xpath('//*[@id="{}b"]'.format(id0)).send_keys( username) #亂數+b browser.find_element_by_xpath('//*[@id="{}c"]'.format(id0)).send_keys( password) #亂數+c browser.find_element_by_xpath('//*[@id="{}g"]'.format(id0)).click() #亂數+g time.sleep(4) content_id = pq(browser.page_source)('div.z-page').attr('id').replace( '_', '') #主頁面的亂數 print(id0) print(content_id) #回傳 browser, wait ,webid, proxy return browser, wait, proxy, content_id
def reloadHeaderAndCookie(): global httpArchive browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy' server = Server(browsermob_path) proxy = server.create_proxy() chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) browser = webdriver.Chrome(options=chrome_options) options = {'captureHeaders': True, 'captureCookies': True} signIn(browser) proxy.new_har("lifemiles", options=options) url = "https://www.lifemiles.com/fly/find" time.sleep(2) browser.get(url) time.sleep(5) originID = "ar_bkr_fromairport" browser.find_element_by_id(originID).send_keys("SFO") time.sleep(2) destinationID = "ar_bkr_toairport" browser.find_element_by_id(destinationID).send_keys("SFO") time.sleep(2) server.start() searchClass = "Booker_bookerActionButtonSmall__3Fh2d" try: browser.find_element_by_class_name(searchClass).click() except Exception, e: print("was not able to click search button, try javascript") browser.execute_script( "document.querySelector('.{0}').click()".format(searchClass))
def download_file(url, file_name): server = Server('./browsermob-proxy') #Local path to BMP server.start() proxy = server.create_proxy( ) #Proxy is used to generate a HAR file containing the connection URLS that the MP3s are loaded from. chrome_options = Options() chrome_options.add_argument("--proxy-server={0}".format( proxy.proxy)) #Configure chrome options chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--headless') driver = webdriver.Chrome(executable_path='./chromedriver', options=chrome_options) proxy.new_har('filename') driver.get(url) save = proxy.har server.stop() driver.quit() results = [entry['request']['url'] for entry in save['log']['entries']] embedded_link = [ res for res in results if "https://embed.vhx.tv/videos" in res ][0] subprocess.call([ "./youtube-dl", "-f" "best[height=540]", "-o" "{}.mp4".format(file_name), "--ignore-errors", embedded_link ])
def browser_and_proxy(): server = Server(config.BROWSERMOB_PATH) server.start() proxy = server.create_proxy() proxy.new_har(options={'captureContent': True}) # Set up Chrome option = webdriver.ChromeOptions() option.add_argument('--proxy-server=%s' % proxy.proxy) prefs = {"profile.managed_default_content_settings.images": 2} option.add_experimental_option("prefs", prefs) option.add_argument('--headless') option.add_argument('--no-sandbox') option.add_argument('--disable-gpu') capabilities = DesiredCapabilities.CHROME.copy() capabilities['acceptSslCerts'] = True capabilities['acceptInsecureCerts'] = True path = config.CHROME_PATH browser = webdriver.Chrome(options=option, desired_capabilities=capabilities, executable_path=path) try: yield browser, proxy finally: browser.quit() server.stop()
def getAuthKey(): username = config['Users']['username'] password = config['Users']['password'] proxyServerAddress = config['config']['proxyserverpath'] # Creating Proxy server server = Server(proxyServerAddress) server.start() proxy = server.create_proxy() proxy.whitelist(regexp='*emofid.com*', status_code=123) proxy.new_har(title="mofid", options={ 'captureContent': False, 'captureBinaryContent': False, 'captureHeaders': True }) # Creating browser chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) browser = webdriver.Chrome(chrome_options=chrome_options) url = "https://account.emofid.com/Login?returnUrl=%2Fconnect%2Fauthorize%2Fcallback%3Fclient_id%3Deasy2_client_pkce%26redirect_uri%3Dhttps%253A%252F%252Fd.easytrader.emofid.com%252Fauth-callback%26response_type%3Dcode%26scope%3Deasy2_api%2520openid%26state%3Df8ff796b1d994e0d8f6fa1f6e878f165%26code_challenge%3D7qf19ieakAg4BvrDkBTHbr5h7_A0BSvci7dtp-0ZUWY%26code_challenge_method%3DS256%26response_mode%3Dquery" browser.get(url) userFiled = browser.find_element_by_xpath('//*[@id="Username"]') userFiled.clear() userFiled.send_keys(username) passwordFiled = browser.find_element_by_xpath('//*[@id="Password"]') passwordFiled.clear() passwordFiled.send_keys(password, Keys.RETURN) element = WebDriverWait(browser, 10).until( EC.presence_of_element_located( (By.XPATH, "/html/body/app-root/d-release-notes/div/div/button"))) element.click() try: browser.find_element_by_xpath( '//*[@id="root"]/main/div[2]/div[1]/ul[2]/li[1]/span/i').click() except: print('Error') with open('data.json', 'w') as outfile: json.dump(proxy.har, outfile) server.stop() tree = Tree(proxy.har) authKey = '' result = tree.execute( "$.log.entries.request[@.url is 'https://d11.emofid.com/easy/api/account/checkuser'].headers" ) for entry in result: for e in entry: if e['name'] == 'Authorization': authKey = e["value"] return authKey
def __start_bmproxy(self, config): # BrowserMob from browsermobproxy import Server from arjuna.tpi.constant import ArjunaOption capture_traffic = config.value(ArjunaOption.BROWSER_NETWORK_RECORDER_ENABLED) if capture_traffic: bmproxy_dir = config.value(ArjunaOption.TOOLS_BMPROXY_DIR) sub_dirs = os.listdir(bmproxy_dir) bmproxy_bin_dir = None if "bin" in sub_dirs: bmproxy_bin_dir = os.path.join(bmproxy_dir, "bin") else: sub_dirs.sort(reverse=True) # Last version will be picked. for d in sub_dir: if d.startswith("browsermob"): bmproxy_bin_dir = os.path.join(bmproxy_dir, d, "bin") break if bmproxy_bin_dir is None: raise Exception("Network recording is enabled in configuration. There was an error in creating proxy server/server using BrowserMob Proxy. Could not find proxy package at {}".format(bmproxy_dir)) if platform.system().lower() == "windows": exe = "browsermob-proxy.bat" else: exe = "browsermob-proxy" bmproxy_exe_path = os.path.join(bmproxy_bin_dir, exe) try: self.__bmproxy_server = Server(bmproxy_exe_path) self.__bmproxy_server.start() except ProxyServerError as e: raise Exception("Network recording is enabled in configuration. There was an error in creating proxy server/server using BrowserMob Proxy. Fix and retry. Error message: {}".format(str(e)))
def __init__(self, results, reports, **kwargs): self.results = results self.reports = reports self.args = kwargs self.listen_port = 9760 self.lock = threading.Lock() self.vulnerable = [] self.server = None self.cookies = {} for entry in self.args['cookie'].split(';'): if entry.find('=') == -1: continue key, value = entry.strip().split('=', 1) self.cookies[key] = value # Create proxy server logging.info('Starting browsermobproxy server...') self.proxy_server = Server(self.args['browsermobproxy']) self.proxy_server.start() self.proxy = self.proxy_server.create_proxy() logging.info('Browsermobproxy server started') # Create Chrome engine logging.info('Creating Selenium Chrome webdriver...') self.chrome_options = webdriver.ChromeOptions() self.chrome_options.add_argument('--proxy-server={}'.format( self.proxy.proxy)) if 'headless' in self.args: self.chrome_options.add_argument('--headless') self.chrome_options.add_argument('--disable-gpu') self.chrome_options.add_argument("--disable-extensions") self.driver = webdriver.Chrome(chrome_options=self.chrome_options) logging.info('Selenium Chrome webdriver created')
def reloadHeaderAndCookie(): killPortProcess(8090) global browser browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy' server = Server(browsermob_path, {'port': 8090}) server.start() time.sleep(10) proxy = server.create_proxy() time.sleep(1) chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) browser = webdriver.Chrome(options=chrome_options) url1 = "https://www.united.com/en/us" url2 = "https://www.united.com/ual/en/US/flight-search/book-a-flight/results/awd?f=SFO&t=PVG&d=2021-11-07&tt=1&at=1&sc=7&px=1&taxng=1&newHP=True&idx=1" options = {'captureHeaders': True, 'captureCookies': True} proxy.new_har("united", options=options) browser.get(url1) time.sleep(5) browser.get(url2) print("click close button to get actual results") time.sleep(5) print("we now save all auths") newH = proxy.har # returns a HAR JSON blob with open('latestUnitedAuth.json', 'w') as outfile: json.dump(newH, outfile) time.sleep(10) server.stop() browser.quit()
def get_dependencies(url): browsermobproxy_location = "browsermob-proxy/bin/browsermob-proxy" clean_opened_processes() server = Server(browsermobproxy_location) server.start() time.sleep(0.5) proxy = server.create_proxy() time.sleep(0.5) options = Options() options.headless = True profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(options=options, firefox_profile=profile) proxy.new_har("captured_elems") driver.get(url) time.sleep(3) resources = [ elem["request"]["url"] for elem in proxy.har["log"]["entries"] ] server.stop() driver.quit() resources = list(set(resources)) # eliminamos duplicados return resources
def __init__(self, url, username, password): self.bearer = None # enable browser logging d = DesiredCapabilities.CHROME d['loggingPrefs'] = {'browser': 'ALL'} self.server = Server("BrowserMob\\bin\\browsermob-proxy") cli.print_info("Starting proxy server...") self.server.start() self.proxy = self.server.create_proxy() cli.add_to_print("OK\n\n") cli.print_warning("NOTE: Connections will show as INSECURE!\n") options = webdriver.ChromeOptions() self.url = url options.add_argument("--proxy-server={0}".format(self.proxy.proxy)) cli.print_info("Browser started\n") self.browser = Chrome(options=options, executable_path="./chromedriver.exe", desired_capabilities=d) self.username = username self.password = password del password del username
def reloadHeaderAndCookie(): browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy' server = Server(browsermob_path, {'port': 9999}) time.sleep(1) proxy = server.create_proxy() time.sleep(1)
def __init__(self, url, har_name, browsermob_proxy_location, selector_dictionary=None, default_timeout=None, firefox_binary=None, highlight=False, geckodriver="geckodriver"): self.selector_dictionary = selector_dictionary self.default_timeout = default_timeout if default_timeout is not None else 30 self.highlight = highlight for proc in psutil.process_iter(): # check whether the process name matches if proc.name() == "browsermob-proxy": proc.kill() options = {'port': 8090} # self.server = Server(path="../tools/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict) self.server = Server(path=browsermob_proxy_location, options=options) self.server.start() time.sleep(1) self.proxy = self.server.create_proxy() time.sleep(1) profile = webdriver.FirefoxProfile() selenium_proxy = self.proxy.selenium_proxy() profile.set_proxy(selenium_proxy) self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=firefox_binary, executable_path=geckodriver) self.proxy.new_har(har_name) self.driver.get(url)
def print_hi(): server = Server(r'D:\exchange_data\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat') server.start() proxy = server.create_proxy() # 设置driver options chrome_options = Options() chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy)) driver = webdriver.Chrome(chrome_options=chrome_options) # url = 'https://www.baidu.com/' proxy.new_har('fund', options={'captureHeaders': True, 'captureContent': True}) driver.get(url) result = proxy.har print(result) for entry in result['log']['entries']: _url = entry['request']['url'] # 根据URL找到数据接口 # if "lsjz?callback=" in _url: _response = entry['response'] _content = _response['content']['text'] # 获取接口返回内容 print(_content) server.stop()
def init_browser(self): # response listen proxy server = Server(r'C:\python_job\khedu-test\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat') server.start() proxy = server.create_proxy() proxy.new_har(options={ 'captureContent': True, 'captureHeaders': True }) # -- chromedriver -- chrome_options = Options() chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy)) chrome_options.add_argument('--headless') # 無頭模式 browser = webdriver.Chrome(r'C:\python_job\khedu-test\chromedriver.exe', options=chrome_options) browser.maximize_window() time.sleep(1)#變大視窗需要等待 browser.set_page_load_timeout(30) wait = WebDriverWait(browser,30) browser.get(login_url) # -- login -- id0 = pq(browser.page_source)('div.z-page').attr('id').replace('_', '') # 每次重load網站都有個隨機id browser.find_element_by_xpath('//*[@id="{}b"]'.format(id0)).send_keys(username) browser.find_element_by_xpath('//*[@id="{}c"]'.format(id0)).send_keys(password) browser.find_element_by_xpath('//*[@id="{}g"]'.format(id0)).click() time.sleep(2) # not so fast browser.refresh() #頁面會卡在轉圈圈 需要reload time.sleep(2) return browser, proxy, wait
def run(dep, arr, arr_date, num, china): server = Server(path) # 设置服务器脚本路径 server.start() tasks = [] semaphore = asyncio.Semaphore(num) # 限制并发量 i = 0 for date in arr_date: for departureCity in dep: for arrivalCity in arr: if departureCity != arrivalCity: if departureCity not in china or arrivalCity not in china: url = search_url(departureCity, arrivalCity, date) print(departureCity + "--" + arrivalCity + "开始爬取数据..." + str(i)) i = i + 1 while True: try: c = get_request(url, server, semaphore, departureCity, arrivalCity, date) task = asyncio.ensure_future(c) task.add_done_callback(callback) tasks.append(task) break except Exception as e: print(e) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) loop.close() print("server closed") server.stop()
def _start_ProxyHelper(self, options, proxy_port=None): """ 启动代理类,用于监听Http请求 :param options: chrom浏览器启动选项 :param proxy_port: 代理端口号 """ assert self._proxyHelper is None, "代理已开启" # 代理端口,若未指定,获取默认端口 proxy_port = GF.proxy_port() if proxy_port is None else proxy_port # 启动代理浏览器 server = Server(GF.proxy_path(), {'port': proxy_port}) server.start({'log_path': GF.cache_path()}) proxy = server.create_proxy() # 开启har监听 proxy.new_har(options={'captureContent': True, 'captureHeaders': True}) options.add_argument('--proxy-server={0}'.format(proxy.proxy)) # 启动代理日志 self.init_proxy_log() # 启动代理助手 self._proxyHelper = ProxyHelper(self, proxy) return options
def __init__(self, browsermobDirectory, headless=False): # Imports print( "New class reference, finding valid signature. This might take a minute." ) from browsermobproxy import Server import psutil import json import time import json from selenium import webdriver from selenium.webdriver.firefox.options import Options # Kills any browsermob-proxy for proc in psutil.process_iter(): # check whether the process name matches if proc.name() == "browsermob-proxy": proc.kill() dict = {'port': 8090} server = Server(path=browsermobDirectory, options=dict) # "browsermob-proxy/bin/browsermob-proxy" server.start() time.sleep(1) proxy = server.create_proxy() time.sleep(1) # Creates FF profile profile = webdriver.FirefoxProfile() selenium_proxy = proxy.selenium_proxy() profile.set_proxy(selenium_proxy) options = Options() if headless == True: options.headless = True driver = webdriver.Firefox(firefox_profile=profile, options=options) # Records FF Har proxy.new_har("list") driver.get("https://www.tiktok.com/en/trending") data = proxy.har for element in data['log']['entries']: if "https://m.tiktok.com/share/item/list?" in element['request'][ 'url'] or "https://www.tiktok.com/share/item/list?" in element[ 'request']['url']: print("Found signature, continuing.") self.signature = element['request']['queryString'][6]['value'] # Get Trending hashtags hashtags = driver.find_elements_by_xpath( '//h3[@class="_list_item_title"]/a') hashtagArray = [] for hashtag in hashtags: hashtagArray.append(hashtag.get_attribute('title')) self.hashtag = hashtagArray self.headless = headless self.browsermobDirectory = browsermobDirectory server.stop() driver.quit()
def locate(url, search_parameters=config.SEARCH_PARAMETERS): server = Server(config.BROWSERMOB_PROXY) server.start() proxy = server.create_proxy() options = Options() options.headless = config.HEADLESS profile = webdriver.FirefoxProfile(config.FIREFOX_PROFILE) selenium_proxy = proxy.selenium_proxy() profile.set_proxy(selenium_proxy) browser = webdriver.Firefox(firefox_profile=profile, options=options) proxy.new_har('source', options={'captureHeaders': True}) browser.get(url) sleep(5) browser.close() server.stop() streams = [] subtitles = [] for entry in proxy.har['log']['entries']: for param in search_parameters: request = {'method': entry['request']['method'], 'url': entry['request']['url'], 'headers': {x['name']: x['value'] for x in entry['request']['headers']}} if param in entry['request']['url'].split('?')[0]: if request not in streams: streams.append(request) elif '.vtt' in entry['request']['url'].split('?')[0] or '.srt' in entry['request']['url'].split('?')[0] or '.ass' in entry['request']['url'].split('?')[0]: if request not in subtitles: subtitles.append(request) if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'bmp.log')): os.remove(os.path.join(os.path.abspath(os.getcwd()), 'bmp.log')) if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'geckodriver.log')): os.remove(os.path.join(os.path.abspath(os.getcwd()), 'geckodriver.log')) if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'server.log')): os.remove(os.path.join(os.path.abspath(os.getcwd()), 'server.log')) return streams, subtitles
def getToken(): server = Server(r'F:\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat') server.start() proxy = server.create_proxy() chrome_options = Options() chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy)) driver = webdriver.Chrome(chrome_options=chrome_options) base_url = "http://jzsc.mohurd.gov.cn/data/company/detail?id=C5C5C4C3C5C2C7C7C5C5C0C2C7CCC7C7C5C6" proxy.new_har("douyin", options={ 'captureHeaders': True, 'captureContent': True }) driver.get(base_url) while '验证已过期,是否重新重新进行验证或停留在当前页面?' in driver.page_source: driver.find_element_by_xpath( '//*[@id="app"]/div/header/div[5]/div/div[3]/div/button[1]').click( ) time.sleep(2.5) driver.refresh() time.sleep(3) result = proxy.har token = set() for entry in result['log']['entries']: _url = entry['request']['url'] if "api/webApi/dataservice/query/comp/caDetailList?qyId" in str(_url): _response = entry['request'] _accessToken = entry['request']['headers'][4]['value'] if _accessToken != '': token.add(_accessToken) server.stop() driver.quit() return list(token)[0]
def fetch_har_by_url(url, segments, index): project_dir = os.path.dirname(__file__) bpm_path = os.path.join(project_dir, "browsermob-proxy-2.1.1/bin/browsermob-proxy") server = Server(bpm_path) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile, executable_path=os.path.join( os.path.dirname(__file__), _geckodriver)) proxy.new_har(url, options={ 'captureHeaders': True, 'captureContent': True, 'captureBinaryContent': True }) proxy.wait_for_traffic_to_stop(2000, 10000) driver.get(url) har = proxy.har server.stop() driver.quit() return har