def start_xvfb(): from xvfbwrapper import Xvfb if "DISPLAY" in os.environ: del os.environ["DISPLAY"] xvfb = Xvfb() xvfb.start() atexit.register(xvfb.stop)
def main(): import sys import imageio import pandas as pd import matplotlib.pyplot as plt from xvfbwrapper import Xvfb plt.ioff() # with Xvfb() as xvfb: # plt.ioff() vdisplay = Xvfb() vdisplay.start() if len(sys.argv)<5: subsampleRate = 10 else: subsampleRate = int(sys.argv[4]) if len(sys.argv)<6: speedup = 10 else: speedup = int(sys.argv[5]) bounds2video(sys.argv[1],sys.argv[2],sys.argv[3],subsampleRate,speedup) vdisplay.stop()
def __init__(self): log.start(logfile=time.strftime("log/%Y%m%d%H%M%S")+".log",logstdout=False) log.msg("initiating crawler...",level=log.INFO) self.crawler_id = self.get_crawler_id() log.msg("crawler id is %s" % self.crawler_id,level=log.INFO) self.r.set('crawler:ip:%s' % self.crawler_id,utils.get_external_ip()) self.r.set('crawler:port:%s' % self.crawler_id,settings.REDIS_LOCAL_PORT) self.r.set('crawler:mapping_port:%s' % self.crawler_id,settings.REDIS_LOCAL_MAPPING_PORT) log.msg("crawler ip is %s, port is %d" % (utils.get_external_ip(),settings.REDIS_LOCAL_PORT),level=log.INFO) account = self.get_account() self.username = account[0] self.password = account[1] log.msg("crawler account got",level=log.INFO) self.r_local.set('crawler:status:%s' % self.crawler_id, 'good') self.r_local.set('crawler:update_time:%s' % self.crawler_id, datetime.datetime.utcnow().strftime("%s")) log.msg("local crawler status set",level=log.INFO) heartbeat_thread = threading.Thread(target=self.maintain_local_heartbeat) heartbeat_thread.start() log.msg("local crawler heartbeat started",level=log.INFO) if platform.system() == "Linux": #on linux, use virtual display vdisplay = Xvfb() vdisplay.start() co = ChromeOptions() #TODO: Disable image after log in #TODO: optimize memory usage co.add_experimental_option("prefs",{"profile.default_content_settings":{"popups":1}}) #co.add_experimental_option("prefs",{"profile.default_content_settings":{"popups":1,"images":2,"media":2}}) self.driver = webdriver.Chrome(chrome_options=co) self.driver.set_window_size(640,960)
class Webdriver(unittest.TestCase): def setUp(self): self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() # if(not vdisplay.start()): # fo = open(LOG_FILE, "a") # fo.write("Xvfbfailure||"+str(TREATMENTID)+"||"+str(ID)+"\n") # fo.close() # sys.exit(0) if(BROWSER=='firefox'): if (platform.system()=='Darwin'): self.driver = webdriver.Firefox() elif (platform.system()=='Linux'): self.driver = webdriver.Firefox(proxy=proxy) else: print "Unidentified Platform" sys.exit(0) elif(BROWSER=='chrome'): if (platform.system()=='Darwin'): chromedriver = "./experiment/chromedriver/chromedriver_mac" os.environ["webdriver.chrome.driver"] = chromedriver self.driver = webdriver.Chrome(executable_path=chromedriver) elif (platform.system() == 'Linux'): chromedriver = "./experiment/chromedriver/chromedriver_linux" os.environ["webdriver.chrome.driver"] = chromedriver chrome_option = webdriver.ChromeOptions() chrome_option.add_argument("--proxy-server=yogi.pdl.cmu.edu:3128" ) self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_option) else: print "Unidentified Platform" sys.exit(0) else: print "Unsupported Browser" sys.exit(0) self.driver.implicitly_wait(10) self.base_url = "https://www.google.com/" self.verificationErrors = [] self.driver.set_page_load_timeout(40) self.accept_next_alert = True def test_webdriver(self): fo = open(AD_FILE, "w") fo.close() driver = self.driver driver.get(SITE) count = 0 while(count < N): els = driver.find_elements_by_css_selector("li.site-listing div.desc-container p.desc-paragraph a") for el in els: if(count < N): t = el.get_attribute('innerHTML').lower() fo = open(AD_FILE, "a") fo.write(t + '\n') fo.close() count += 1 driver.find_element_by_css_selector("a.next").click() def tearDown(self): self.vdisplay.stop() self.driver.quit()
class XvfbRobot(object): """ A robot library for creating virtual display on demand """ ROBOT_LIBRARY_SCOPE = "GLOBAL" ROBOT_LIBRARY_VERSION = VERSION _display = None def start_virtual_display(self, width=1440, height=900, colordepth=24, **kwargs): """Starts virtual display which will be destroyed after test execution will be end *Arguments:* - width: a width to be set in pixels - height: a height to be set in pixels - color_depth: a color depth to be used - kwargs: extra parameters *Example:* | Start Virtual Display | | Start Virtual Display | 1920 | 1080 | | Start Virtual Display | ${1920} | ${1080} | ${16} | """ if self._display is None: logger.info("Using virtual display: '{0}x{1}x{2}'".format(width, height, colordepth)) self._display = Xvfb(int(width), int(height), int(colordepth), **kwargs) self._display.start() atexit.register(self._display.stop)
class TestJavascript(unittest.TestCase): def setUp(self): # Configure a web service. handler = http.server.SimpleHTTPRequestHandler address = ("127.0.0.1", 9999) socketserver.TCPServer.allow_reuse_address = True # Prevents address conflicts. httpd = socketserver.TCPServer(address, handler) # Start the web service in a separate thread as deamon. httpd_thread = threading.Thread(target=httpd.serve_forever) httpd_thread.setDaemon(True) httpd_thread.start() # Start a display. self.display = Xvfb() self.display.start() # Start the browser driver for selenium testing. self.driver = webdriver.Firefox() self.driver.get("http://localhost:9999/index.html") def test_javascript(self): # Create an instance of the selenium Firefox driver. error = self.driver.find_elements_by_id("error")[0].text self.assertEqual(error, "") def tearDown(self): self.display.stop()
def start_xvfb(module): try: xvfb = Xvfb(width=1280, height=720) xvfb.start() atexit.register(xvfb.stop) except: module.fail_json(msg="xvfb broke")
def get_mtgox_info(input_vars): vdisplay = Xvfb() vdisplay.start() driver = webdriver.Firefox() driver.get("http://www.mtgox.com") uelem = driver.find_element_by_name("username") pelem = driver.find_element_by_name("password") lelem = driver.find_element_by_name("LOGIN") uelem.send_keys(input_vars['mtgoxId']) time.sleep(0.25) pelem.send_keys(input_vars['mtgoxPassword']) time.sleep(0.25) lelem.click() time.sleep(0.25) driver.get("https://www.mtgox.com/trade/funding-options") time.sleep(0.25) logout_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "logout"))) address_elem = driver.find_elements_by_xpath("/html/body/div[2]/div[2]/div[3]/div[3]/section/div/div[4]/div[2]/div/p/strong") print address_elem new_address=address_elem.text
def test_start_with_arbitrary_kwargs(self): xvfb = Xvfb(nolisten='tcp', noreset=None) self.addCleanup(xvfb.stop) xvfb.start() display_var = ':{}'.format(xvfb.new_display) self.assertEqual(display_var, os.environ['DISPLAY']) self.assertIsNotNone(xvfb.proc)
def enable(): import time import requests import settings from splinter import Browser from xvfbwrapper import Xvfb print "Trying to enable myself." vdisplay = Xvfb() vdisplay.start() email = settings.getEmail() password = settings.getPassword() team_name = settings.getTeamName() bot_user = settings.getBotUser() browser = Browser('chrome') url = 'https://{}.slack.com/services/{}'.format(team_name, bot_user) browser.visit(url) browser.fill('email', email) browser.fill('password', password) browser.find_by_id('signin_btn').first.click() browser.find_link_by_text('Enable').first.click() time.sleep(2) # Sometimes I saw a crash where there was no alert, so we'll wait a bit first. alert = browser.get_alert() alert.accept() time.sleep(2) # If you close the display too quickly, the request doesn't get processed. vdisplay.stop()
class BaseTestCase(unittest.TestCase): def setUp(self): self.xvfb = os.environ.get("ENABLE_XVFB", False) self.browser = os.environ.get("BROWSER", "Chrome") if self.xvfb: self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() if self.browser == "Firefox": self.driver = self.get_ff_driver() else: self.driver = self.get_chrome_driver() self.load = self.driver.get def tearDown(self): if self.driver: self.driver.quit() if self.xvfb and self.vdisplay: self.vdisplay.stop() def get_ff_driver(self): return webdriver.Firefox() def get_chrome_driver(self): opts = Options() if "TRAVIS" in os.environ: # github.com/travis-ci/travis-ci/issues/938 opts.add_argument("--no-sandbox") # Fix for https://code.google.com/p/chromedriver/issues/detail?id=799 opts.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"]) return webdriver.Chrome(chrome_options=opts)
class MainTest(unittest.TestCase): def setUp(self): self.xvfb = Xvfb(width=1280, height=720) self.xvfb.start() self.browser = webdriver.Firefox() self.addCleanup(self.browser.quit) def test_main(self): self.browser.get("http://www.youtube.com") #self.browser.get("http://www.youtube.com//results?search_query=ionic") search = self.browser.find_element_by_id("masthead-search-term") search.send_keys('angular',Keys.RETURN) self.browser.implicitly_wait(2) self.browser.save_screenshot("youtube.png") ''' with open('youtube.html','w') as html: page = driver.page_source html.write(page.encode('utf-8')) ''' link_elements = self.browser.find_elements_by_xpath("//a[@title]") f = csv.writer(open("youtube.csv", "w")) f.writerow(["Name", " Link"]) for element in link_elements: if len(element.text) > 5: name = element.text f.writerow([name.encode('utf-8')," "+ element.get_attribute("href")])
def test_start(self): xvfb = Xvfb() self.addCleanup(xvfb.stop) xvfb.start() display_var = ':{}'.format(xvfb.new_display) self.assertEqual(display_var, os.environ['DISPLAY']) self.assertIsNotNone(xvfb.proc)
class Xvfb(Plugin): def options(self, parser, env): super(Xvfb, self).options(parser, env) parser.add_option("--with-xvfb-options", action="store", dest="xvfb_options", default=env.get("NOSE_WITH_XVFB_OPTIONS"), help="Options to pass to Xvfb. Comma delimited with " "equals as separators if necessary. " "E.g. \"extension=SELINUX, once\". Currently, there is no " "way to provide options that begin with a +. This is a limitation " "in xvfbwrapper. Repetition is not allowed. [NOSE_WITH_XVFB_OPTIONS]") def configure(self, options, noseconfig): super(Xvfb, self).configure(options, noseconfig) self.xvfb_options = {} if options.xvfb_options: opts = [x.strip() for x in options.xvfb_options.split(",")] for item in opts: key, sign, value = item.partition("=") if not value: value = '' self.xvfb_options[key] = value def begin(self): logger.info('Starting xvfb virtual display 1024x768 with %s' % self.xvfb_options) self.vdisplay = XvfbWrapper(width=1024, height=768, **self.xvfb_options) self.vdisplay.start() def finalize(self, result): logger.info('Stopping xvfb virtual display') self.vdisplay.stop()
class PBSeleniumTest(unittest.TestCase): def setUp(self): env = os.environ self.browser_bin = env.get("BROWSER_BIN", "") # o/w use WD's default if "TRAVIS" in os.environ: self.xvfb = 1 else: # by default don't use XVFB if we are not running on CI self.xvfb = int(env.get("ENABLE_XVFB", 0)) self.pb_ext_path = self.get_extension_path() # path to the extension if self.xvfb: self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.driver = self.get_chrome_driver() self.js = self.driver.execute_script def load_url(self, url, wait_on_site=0): """Load a URL and wait before returning.""" self.driver.get(url) sleep(wait_on_site) def get_extension_path(self): """Return the path to the extension to be tested.""" if "PB_EXT_PATH" in os.environ: return os.environ["PB_EXT_PATH"] else: # check the default path if PB_EXT_PATH env. variable is empty print "Can't find the env. variable PB_EXT_PATH, will check ../.." # if the PB_EXT_PATH environment variable is not set # check the default location for the last modified crx file exts = glob("../../*.crx") # get matching files return max(exts, key=os.path.getctime) if exts else "" def txt_by_css(self, css_selector, timeout=SEL_DEFAULT_WAIT_TIMEOUT): """Find an element by CSS selector and return it's text.""" return self.find_el_by_css(css_selector, timeout).text def find_el_by_css(self, css_selector, timeout=SEL_DEFAULT_WAIT_TIMEOUT): return WebDriverWait(self.driver, timeout).until( EC.presence_of_element_located((By.CSS_SELECTOR, css_selector))) def get_chrome_driver(self): """Setup and return a Chrom[e|ium] browser for Selenium.""" opts = Options() absp = os.path.abspath if "TRAVIS" in os.environ: # github.com/travis-ci/travis-ci/issues/938 opts.add_argument("--no-sandbox") opts.add_extension(self.pb_ext_path) # will fail if ext can't be found if self.browser_bin: # otherwise will use webdriver's default binary print "Browser binary:", absp(self.browser_bin) opts.binary_location = self.browser_bin # set binary location # Fix for https://code.google.com/p/chromedriver/issues/detail?id=799 opts.add_experimental_option("excludeSwitches", ["ignore-certificate-errors"]) return webdriver.Chrome(chrome_options=opts) def tearDown(self): self.driver.quit() if self.xvfb and self.vdisplay: self.vdisplay.stop()
def xvfb(self, line, cell=None): display = Xvfb(**self.xvfb_kwargs) display.start() if cell is None: self.shell.ex(line) else: self.shell.ex(cell) display.stop()
def test_stop(self): orig_display = os.environ['DISPLAY'] xvfb = Xvfb() xvfb.start() self.assertNotEqual(orig_display, os.environ['DISPLAY']) xvfb.stop() self.assertIsNone(xvfb.proc) self.assertEqual(orig_display, os.environ['DISPLAY'])
def set_up(): global browser global xvfb xvfb = Xvfb(width=1280, height=720, colordepth=24) xvfb.start() browser = webdriver.Chrome() print browser.title print ':%d' % xvfb.vdisplay_num
def test_with_xvfb(): if use_xvfb: from xvfbwrapper import Xvfb display = Xvfb(width=1920, height=1080) display.start() my_test() if use_xvfb: display.stop()
class Webdriver(unittest.TestCase): def setUp(self): self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() # if(not vdisplay.start()): # fo = open(LOG_FILE, "a") # fo.write("Xvfbfailure||"+str(TREATMENTID)+"||"+str(ID)+"\n") # fo.close() # sys.exit(0) if(BROWSER=='firefox'): if (platform.system()=='Darwin'): self.driver = webdriver.Firefox() elif (platform.system()=='Linux'): self.driver = webdriver.Firefox(proxy=proxy) else: print "Unidentified Platform" sys.exit(0) elif(BROWSER=='chrome'): print "WARNING: Expecting chromedriver at specified location !!" if (platform.system()=='Darwin'): chromedriver = "./experiment/chromedriver/chromedriver_mac" os.environ["webdriver.chrome.driver"] = chromedriver self.driver = webdriver.Chrome(executable_path=chromedriver) elif (platform.system() == 'Linux'): chromedriver = "./experiment/chromedriver/chromedriver_linux" os.environ["webdriver.chrome.driver"] = chromedriver chrome_option = webdriver.ChromeOptions() chrome_option.add_argument("--proxy-server=yogi.pdl.cmu.edu:3128" ) self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_option) else: print "Unidentified Platform" sys.exit(0) else: print "Unsupported Browser" sys.exit(0) self.driver.implicitly_wait(10) self.base_url = "https://www.google.com/" self.verificationErrors = [] self.driver.set_page_load_timeout(40) self.accept_next_alert = True def test_webdriver(self): driver = self.driver cole.optIn(driver) driver.get(SITE) time.sleep(20) #raw_input("wait") pref = cole.get_ad_pref(driver) print SITE print 'pref=', pref if pref != []: fo = open(TARGET_FILE, "a") fo.write(SITE+"||"+"@".join(pref)+'\n') fo.close() def tearDown(self): self.vdisplay.stop() self.driver.quit()
def xvfb(request): from xvfbwrapper import Xvfb vdisplay = Xvfb() vdisplay.start() yield vdisplay.stop()
class OFMTransferSiteManager(OFMSiteManager): def __init__(self, user=None): # pylint: disable=super-init-not-called self.user = user if self.user: self._login_user = self.user.ofm_username self._login_password = self.user.ofm_password else: self._login_user = os.environ('OFM_USERNAME') self._login_password = os.environ('OFM_PASSWORD') self.display = Xvfb() self.display.start() def download_transfer_excels(self, matchdays=None): profile = webdriver.FirefoxProfile(os.path.join(BASE_DIR, 'ofm_transfer_data', 'firefox_profile')) profile.set_preference("browser.download.dir", os.path.join(BASE_DIR, 'ofm_transfer_data')) self.browser = webdriver.Firefox(firefox_profile=profile) self.browser.set_page_load_timeout(10) self.login() if not matchdays: matchdays = [Matchday.get_current()] for matchday in matchdays: if not self._is_transfer_file_present(matchday): try: self._jump_to_transfer_page(self, matchday=matchday) # pylint: disable=redundant-keyword-arg except TimeoutError: pass @staticmethod def _is_transfer_file_present(matchday=None): if not matchday: matchday = Matchday.get_current() if os.path.isfile(os.path.join(BASE_DIR, 'ofm_transfer_data', 'ofm_spielerwechsel_{}_{}.csv'.format( matchday.season.number, matchday.number) )): return True return False @timeout(5, use_signals=False) def _jump_to_transfer_page(self, matchday=None): if not matchday: self.jump_to_frame(Constants.Transfer.DOWNLOAD_TRANSFERS) else: self.jump_to_frame(Constants.Transfer.DOWNLOAD_TRANSFERS_FROM_MATCHDAY.format(matchday.number)) def kill_browser(self): if self.browser: self.browser.stop_client() self.display.stop()
def test_renderer(self): vdisplay = Xvfb() vdisplay.start() ren = window.Renderer() window.record(ren, n_frames=1, out_path=self.out_file, size=(600, 600)) self.assertTrue(os.path.exists(self.out_file)) vdisplay.stop()
def test_start_without_existing_display(self): del os.environ['DISPLAY'] xvfb = Xvfb() self.addCleanup(xvfb.stop) self.addCleanup(self.reset_display) xvfb.start() display_var = ':{}'.format(xvfb.new_display) self.assertEqual(display_var, os.environ['DISPLAY']) self.assertIsNotNone(xvfb.proc)
class T2w(unittest.TestCase): def setUp(self): p = helpers.findPort() server = webctrl.myserver.myserver(host="localhost", port=p) server.quiet = True controllers = ctrl.setupControllers(False, True, True) self.brewme = webctrl.runbrew( controllers, helpers.getTestRecipeList(), server) self.brewme.startNonBlocking() print "up and running" if VIRTUALDISPLAY: self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.driver = webdriver.Firefox() self.driver.implicitly_wait(30) self.base_url = "http://*****:*****@name='recipe'])[12]").click() driver.find_element_by_css_selector("input[type=\"submit\"]").click() self.assertEqual("17 Falconers Flight IPA", driver.find_element_by_css_selector("form > b").text) print "===== SUCCESS test_recipeliststatus =====" def is_element_present(self, how, what): try: self.driver.find_element(by=how, value=what) except NoSuchElementException, e: return False return True
def get_display(self): """Returns the first display available""" # Check if an Xorg server is listening # import subprocess as sp # if not hasattr(sp, 'DEVNULL'): # setattr(sp, 'DEVNULL', os.devnull) # x_listening = bool(sp.call('ps au | grep -v grep | grep -i xorg', # shell=True, stdout=sp.DEVNULL)) if self._display is not None: return ':%d' % self._display.new_display sysdisplay = None if self._config.has_option('execution', 'display_variable'): sysdisplay = self._config.get('execution', 'display_variable') sysdisplay = sysdisplay or os.getenv('DISPLAY') if sysdisplay: from collections import namedtuple def _mock(): pass # Store a fake Xvfb object ndisp = int(sysdisplay.split(':')[-1]) Xvfb = namedtuple('Xvfb', ['new_display', 'stop']) self._display = Xvfb(ndisp, _mock) return sysdisplay else: if 'darwin' in sys.platform: raise RuntimeError( 'Xvfb requires root permissions to run in OSX. Please ' 'make sure that an X server is listening and set the ' 'appropriate config on either $DISPLAY or nipype\'s ' '"display_variable" config. Valid X servers include ' 'VNC, XQuartz, or manually started Xvfb.') # If $DISPLAY is empty, it confuses Xvfb so unset if sysdisplay == '': del os.environ['DISPLAY'] try: from xvfbwrapper import Xvfb except ImportError: raise RuntimeError( 'A display server was required, but $DISPLAY is not defined ' 'and Xvfb could not be imported.') self._display = Xvfb(nolisten='tcp') self._display.start() # Older versions of xvfbwrapper used vdisplay_num if not hasattr(self._display, 'new_display'): setattr(self._display, 'new_display', self._display.vdisplay_num) return ':%d' % self._display.new_display
class Webdriver(unittest.TestCase): def setUp(self): self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() # if(not vdisplay.start()): # fo = open(LOG_FILE, "a") # fo.write("Xvfbfailure||"+str(TREATMENTID)+"||"+str(ID)+"\n") # fo.close() # sys.exit(0) if(BROWSER=='firefox'): if (platform.system()=='Darwin'): self.driver = webdriver.Firefox() elif (platform.system()=='Linux'): self.driver = webdriver.Firefox(proxy=proxy) else: print "Unidentified Platform" sys.exit(0) elif(BROWSER=='chrome'): print "WARNING: Expecting chromedriver at specified location !!" if (platform.system()=='Darwin'): chromedriver = "./experiment/chromedriver/chromedriver_mac" os.environ["webdriver.chrome.driver"] = chromedriver self.driver = webdriver.Chrome(executable_path=chromedriver) elif (platform.system() == 'Linux'): chromedriver = "./experiment/chromedriver/chromedriver_linux" os.environ["webdriver.chrome.driver"] = chromedriver chrome_option = webdriver.ChromeOptions() chrome_option.add_argument("--proxy-server=yogi.pdl.cmu.edu:3128" ) self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_option) else: print "Unidentified Platform" sys.exit(0) else: print "Unsupported Browser" sys.exit(0) self.driver.implicitly_wait(10) self.base_url = "https://www.google.com/" self.verificationErrors = [] self.driver.set_page_load_timeout(40) self.accept_next_alert = True def test_webdriver(self): driver = self.driver helper.setLogFile(LOG_FILE) helper.log("browserStarted||"+str(TREATMENTID), ID) run = 0 while (run < RUNS): helper.applyTreatment(driver, TREATMENTS[TREATMENTID], ID, TREATMENTID) helper.wait_for_others(AGENTS, ID, ROUND) time.sleep(20) helper.collectMeasurement(driver, MEASUREMENT, ID, TREATMENTID) run = run+1 def tearDown(self): self.vdisplay.stop() self.driver.quit()
def suro_ws_js_enabled_page(): vdisplay = Xvfb() vdisplay.start() browser = webdriver.Firefox() browser.get('http://www.google.com') print browser.title browser.quit() vdisplay.stop()
class T2w(unittest.TestCase): def setUp(self): p = helpers.findPort() server = webctrl.myserver.myserver(host="localhost", port=p) server.quiet = True self.brewme = webctrl.runbrew( helpers.timerCtrl(), helpers.getSimpleBSMX(), server) self.brewme.startNonBlocking() print "up and running" # Comment out next two lines to see firefox on local display self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.driver = webdriver.Firefox() self.driver.implicitly_wait(30) self.base_url = "http://localhost:%i" % p self.verificationErrors = [] self.accept_next_alert = True def url_base(self): return(self.base_url) def test_index(self): driver = self.driver self.driver.get(self.url_base()) self.assertTrue('Hopitty' in self.driver.title) driver.find_element_by_css_selector("button").click() self.assertEqual("Hopitty", driver.find_element_by_css_selector("h1").text) def test_RunControl(self): driver = self.driver self.driver.get(self.url_base()) # Make sure we start on home page self.assertTrue('Hopitty' in self.driver.title) driver.find_element_by_xpath("//a[2]/button").click() self.assertEqual("Run Control", driver.find_element_by_css_selector("h1").text) def is_element_present(self, how, what): try: self.driver.find_element(by=how, value=what) except NoSuchElementException, e: return False return True
def test_start_with_kwargs(self): w = 800 h = 600 depth = 16 xvfb = Xvfb(width=w, height=h, colordepth=depth) self.addCleanup(xvfb.stop) xvfb.start() self.assertEqual(w, xvfb.width) self.assertEqual(h, xvfb.height) self.assertEqual(depth, xvfb.colordepth) self.assertEqual(os.environ['DISPLAY'], ':%d' % xvfb.vdisplay_num) self.assertIsNotNone(xvfb.proc)
def xvfb_if_supported() -> Generator: try: with Xvfb(): yield except (EnvironmentError, OSError, RuntimeError): yield
class SeleniumScraper(implements(Scraper, ActionRunner)): def __init__(self, log, config: ScraperConfig): self.log = log self.config = config firefox_profile = webdriver.FirefoxProfile() firefox_profile.set_preference('permissions.default.image', self.config.img.get()) firefox_profile.set_preference( 'dom.ipc.plugins.enabled.libflashplayer.so', 'false') if self.config.xvfb.get(): self.display = Xvfb() self.display.start() self._driver = webdriver.Firefox(firefox_profile) self._driver.set_window_size(self.config.width.get(), self.config.height.get()) self._driver.set_window_position(self.config.lat.get() or 0, self.config.lon.get() or 0) self.last_url = None self.last_page = None self._actions = {} self._history = [] click = ClickAction(self, self.log) filter_remove = FilterRemoveAction(self, self.log) self.register('click', click) self.register('remove', filter_remove) @property def actions(self) -> dict: return self._actions def get_doc(self, doc): """ get the html element from the url [doc] :param doc: the url to the page :return: html.from string content """ self._driver.get(doc) page_source = self.driver.page_source self.last_url = doc self.last_page = page_source return self.last_page def get(self): page_source = self._driver.page_source self.last_page = page_source return self.last_page def action_get(self, actions: list): for x in actions: self.execute(x) return self.get() def filter_get(self, filters: list): for x in filters: self.execute_filter(x) return self.get() @property def history(self): return self._history def execute_filter(self, filter_def: dict): action_name, action_path = next(iter(filter_def.items())) action_name = action_name.strip().replace('_pre_', '') if action_name in self._actions.keys(): self._history.append((time.time(), action_name)) self._actions[action_name].execute(action_path) else: self.log.warn(f"Command [{action_name}] not recognised") def execute(self, action_composite: str): action_name, action_path = SeleniumScraper.__get_action( action_composite) action_name = action_name.strip() if action_name in self._actions.keys(): self._history.append((time.time(), action_name)) self._actions[action_name].execute(action_path) else: self.log.warn(f"Command [{action_name}] not recognised") @property def driver(self): return self._driver def make_absolute_url(self, url): return urljoin(self.last_url, url) @staticmethod def __get_action(action_composite): pattern = '{(.+?)}' matches = re.search(pattern, action_composite) if not matches: return None, None action_name = matches.group(1) action_xpath = re.sub(pattern, '', action_composite) return action_name, action_xpath def __del__(self): if self.driver is not None: try: time.sleep( 10) # wait for operations to complete before closing if hasattr(self, "_driver"): self.driver.close() if self.config.xvfb.get(): self.display.stop() except Exception as ex: self.log.error(ex) pass
def startup(self): """Validate that all of the external dependencies are installed""" ret = True # default /tmp/wptagent as an alive file on Linux if self.options.alive is None: if platform.system() == "Linux": self.options.alive = '/tmp/wptagent' else: self.options.alive = os.path.join(os.path.dirname(__file__), 'wptagent.alive') self.alive() ret = self.requires('dns', 'dnspython') and ret ret = self.requires('monotonic') and ret ret = self.requires('PIL', 'pillow') and ret ret = self.requires('psutil') and ret ret = self.requires('requests') and ret if not self.options.android and not self.options.iOS: ret = self.requires('tornado') and ret # Windows-specific imports if platform.system() == "Windows": ret = self.requires('win32api', 'pypiwin32') and ret # Try patching ws4py with a faster lib try: self.requires('wsaccel') import wsaccel wsaccel.patch_ws4py() except Exception: pass try: subprocess.check_output(['python', '--version']) except Exception: print "Make sure python 2.7 is available in the path." ret = False try: subprocess.check_output('{0} -version'.format(self.image_magick['convert']), shell=True) except Exception: print "Missing convert utility. Please install ImageMagick " \ "and make sure it is in the path." ret = False try: subprocess.check_output('{0} -version'.format(self.image_magick['mogrify']), shell=True) except Exception: print "Missing mogrify utility. Please install ImageMagick " \ "and make sure it is in the path." ret = False if platform.system() == "Linux": try: subprocess.check_output(['traceroute', '--version']) except Exception: logging.debug("Traceroute is missing, installing...") subprocess.call(['sudo', 'apt-get', '-yq', 'install', 'traceroute']) # if we are on Linux and there is no display, enable xvfb by default if platform.system() == "Linux" and not self.options.android and \ not self.options.iOS and 'DISPLAY' not in os.environ: self.options.xvfb = True if self.options.xvfb: ret = self.requires('xvfbwrapper') and ret if ret: from xvfbwrapper import Xvfb self.xvfb = Xvfb(width=1920, height=1200, colordepth=24) self.xvfb.start() # Figure out which display to capture from if platform.system() == "Linux" and 'DISPLAY' in os.environ: logging.debug('Display: %s', os.environ['DISPLAY']) self.capture_display = os.environ['DISPLAY'] elif platform.system() == "Darwin": proc = subprocess.Popen('ffmpeg -f avfoundation -list_devices true -i ""', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) _, err = proc.communicate() for line in err.splitlines(): matches = re.search(r'\[(\d+)\] Capture screen', line) if matches: self.capture_display = matches.group(1) break elif platform.system() == "Windows": self.capture_display = 'desktop' if self.options.throttle: try: subprocess.check_output('sudo cgset -h', shell=True) except Exception: print "Missing cgroups, make sure cgroup-tools is installed." ret = False # Fix Lighthouse install permissions if platform.system() != "Windows": from internal.os_util import run_elevated run_elevated('chmod', '-R 777 ~/.config/configstore/') try: import getpass run_elevated('chown', '-R {0}:{0} ~/.config'.format(getpass.getuser())) except Exception: pass # Check the iOS install if self.ios is not None: ret = self.ios.check_install() if not self.options.android and not self.options.iOS and not self.options.noidle: self.wait_for_idle(300) if self.adb is not None: if not self.adb.start(): print "Error configuring adb. Make sure it is installed and in the path." ret = False self.shaper.remove() if not self.shaper.install(): if platform.system() == "Windows": print "Error configuring traffic shaping, make sure secure boot is disabled." else: print "Error configuring traffic shaping, make sure it is installed." ret = False return ret
def runner(console: Console, shutdownEvent: Event, headless: bool, user: dict, proxy: dict, playlist: str, vnc: bool, sqsEndpoint: str, screenshotDir: str, runnerStats: Array, processStates: Array): tid = current_process().pid console.log('#%d Start' % tid) driver = None vdisplay = None x11vnc = None userDataDir = None spotify = None try: if headless == False: width = 1280 height = 1024 if 'windowSize' in user: [width, height] = user['windowSize'].split(',') vdisplay = Xvfb(width=width, height=height, colordepth=24, tempdir=None, noreset='+render') vdisplay.start() if vnc: x11vnc = X11vnc(vdisplay) x11vnc.start() driverManager = DriverManager(console, shutdownEvent) driverData = driverManager.getDriver(type='chrome', uid=tid, user=user, proxy=proxy, headless=headless) del driverManager collect() if not driverData or not driverData['driver']: if vdisplay: vdisplay.stop() if x11vnc: x11vnc.stop() raise Exception('No driver was returned from adapter') driver = driverData['driver'] userDataDir = driverData['userDataDir'] except Exception as e: runnerStats[STAT_DRIVER_NONE] += 1 console.exception('Driver unavailable') else: try: spotify = Adapter(driver, console, shutdownEvent) console.log('#%d Start create account for %s' % (tid, user['email'])) spotify.register(user) try: boto3.client('sqs').send_message( QueueUrl=sqsEndpoint, MessageBody=dumps({ 'user': user, 'playlist': playlist }), DelaySeconds=1, ) except: console.exception('#%d Failed to send message to the queue' % tid) else: console.log('#%d Account created for %s' % (tid, user['email'])) runnerStats[STAT_ACCOUNT_CREATED] += 1 except Exception as e: runnerStats[STAT_ERROR] += 1 try: id = randint(10000, 99999) with open(screenshotDir + ('%d.log' % id), 'w') as f: f.write(str(e)) driver.save_screenshot(screenshotDir + ('%d.png' % id)) except: console.exception() if driver: try: driver.quit() del driver except: pass if spotify: try: del spotify except: pass if userDataDir: try: rmtree(path=userDataDir, ignore_errors=True) except: pass if x11vnc: #Terminate vnc server if any try: x11vnc.stop() del x11vnc except: pass if vdisplay: try: vdisplay.stop() del vdisplay except: pass console.log('#%d Stop' % tid) collect()
class WPTAgent(object): """Main agent workflow""" def __init__(self, options, browsers): from internal.browsers import Browsers from internal.webpagetest import WebPageTest from internal.traffic_shaping import TrafficShaper from internal.adb import Adb from internal.ios_device import iOSDevice self.must_exit = False self.options = options self.capture_display = None self.job = None self.task = None self.xvfb = None self.root_path = os.path.abspath(os.path.dirname(__file__)) self.wpt = WebPageTest(options, os.path.join(self.root_path, "work")) self.persistent_work_dir = self.wpt.get_persistent_dir() self.adb = Adb(self.options, self.persistent_work_dir) if self.options.android else None self.ios = iOSDevice(self.options.device) if self.options.iOS else None self.browsers = Browsers(options, browsers, self.adb, self.ios) self.shaper = TrafficShaper(options) atexit.register(self.cleanup) signal.signal(signal.SIGTERM, self.signal_handler) signal.signal(signal.SIGINT, self.signal_handler) self.image_magick = {'convert': 'convert', 'compare': 'compare', 'mogrify': 'mogrify'} if platform.system() == "Windows": paths = [os.getenv('ProgramFiles'), os.getenv('ProgramFiles(x86)')] for path in paths: if path is not None and os.path.isdir(path): dirs = sorted(os.listdir(path), reverse=True) for subdir in dirs: if subdir.lower().startswith('imagemagick'): convert = os.path.join(path, subdir, 'convert.exe') compare = os.path.join(path, subdir, 'compare.exe') mogrify = os.path.join(path, subdir, 'mogrify.exe') if os.path.isfile(convert) and \ os.path.isfile(compare) and \ os.path.isfile(mogrify): if convert.find(' ') >= 0: convert = '"{0}"'.format(convert) if compare.find(' ') >= 0: compare = '"{0}"'.format(compare) if mogrify.find(' ') >= 0: mogrify = '"{0}"'.format(mogrify) self.image_magick['convert'] = convert self.image_magick['compare'] = compare self.image_magick['mogrify'] = mogrify break def run_testing(self): """Main testing flow""" if (sys.version_info >= (3, 0)): from time import monotonic else: from monotonic import monotonic start_time = monotonic() browser = None exit_file = os.path.join(self.root_path, 'exit') message_server = None if not self.options.android and not self.options.iOS: from internal.message_server import MessageServer message_server = MessageServer() message_server.start() if not message_server.is_ok(): logging.error("Unable to start the local message server") return while not self.must_exit: try: self.alive() if os.path.isfile(exit_file): try: os.remove(exit_file) except Exception: pass self.must_exit = True break if message_server is not None and self.options.exit > 0 and \ not message_server.is_ok(): logging.error("Message server not responding, exiting") break if self.browsers.is_ready(): self.job = self.wpt.get_test(self.browsers.browsers) if self.job is not None: self.job['image_magick'] = self.image_magick self.job['message_server'] = message_server self.job['capture_display'] = self.capture_display self.job['shaper'] = self.shaper self.task = self.wpt.get_task(self.job) while self.task is not None: start = monotonic() try: self.task['running_lighthouse'] = False if self.job['type'] != 'lighthouse': self.run_single_test() self.wpt.get_bodies(self.task) if self.task['run'] == 1 and not self.task['cached'] and \ self.job['warmup'] <= 0 and \ self.task['error'] is None and \ 'lighthouse' in self.job and self.job['lighthouse']: if 'page_result' not in self.task or \ self.task['page_result'] is None or \ self.task['page_result'] == 0 or \ self.task['page_result'] == 99999: self.task['running_lighthouse'] = True self.wpt.running_another_test(self.task) self.run_single_test() elapsed = monotonic() - start logging.debug('Test run time: %0.3f sec', elapsed) except Exception as err: msg = '' if err is not None and err.__str__() is not None: msg = err.__str__() self.task['error'] = 'Unhandled exception running test: '\ '{0}'.format(msg) logging.exception("Unhandled exception running test: %s", msg) traceback.print_exc(file=sys.stdout) self.wpt.upload_task_result(self.task) # Set up for the next run self.task = self.wpt.get_task(self.job) if self.job is not None: self.job = None else: self.sleep(self.options.polling) except Exception as err: msg = '' if err is not None and err.__str__() is not None: msg = err.__str__() if self.task is not None: self.task['error'] = 'Unhandled exception preparing test: '\ '{0}'.format(msg) logging.exception("Unhandled exception: %s", msg) traceback.print_exc(file=sys.stdout) if browser is not None: browser.on_stop_capture(None) browser.on_stop_recording(None) browser = None if self.options.exit > 0: run_time = (monotonic() - start_time) / 60.0 if run_time > self.options.exit: break # Exit if adb is having issues (will cause a reboot after several tries) if self.adb is not None and self.adb.needs_exit: break self.cleanup() def run_single_test(self): """Run a single test run""" self.alive() browser = self.browsers.get_browser(self.job['browser'], self.job) if browser is not None: browser.prepare(self.job, self.task) browser.launch(self.job, self.task) try: if self.task['running_lighthouse']: self.task['lighthouse_log'] = \ 'Lighthouse testing is not supported with this browser.' try: browser.run_lighthouse_test(self.task) except Exception: logging.exception('Error running lighthouse test') if self.task['lighthouse_log']: try: log_file = os.path.join(self.task['dir'], 'lighthouse.log.gz') with gzip.open(log_file, GZIP_TEXT, 7) as f_out: f_out.write(self.task['lighthouse_log']) except Exception: logging.exception('Error compressing lighthouse log') else: browser.run_task(self.task) except Exception as err: msg = '' if err is not None and err.__str__() is not None: msg = err.__str__() self.task['error'] = 'Unhandled exception in test run: '\ '{0}'.format(msg) logging.exception("Unhandled exception in test run: %s", msg) traceback.print_exc(file=sys.stdout) browser.stop(self.job, self.task) # Delete the browser profile if needed if self.task['cached'] or self.job['fvonly']: browser.clear_profile(self.task) else: err = "Invalid browser - {0}".format(self.job['browser']) logging.critical(err) self.task['error'] = err browser = None def signal_handler(self, *_): """Ctrl+C handler""" if self.must_exit: exit(1) if self.job is None: print("Exiting...") else: print("Will exit after test completes. Hit Ctrl+C again to exit immediately") self.must_exit = True def cleanup(self): """Do any cleanup that needs to be run regardless of how we exit""" logging.debug('Cleaning up') self.shaper.remove() if self.xvfb is not None: self.xvfb.stop() if self.adb is not None: self.adb.stop() if self.ios is not None: self.ios.disconnect() def sleep(self, seconds): """Sleep wrapped in an exception handler to properly deal with Ctrl+C""" try: time.sleep(seconds) except IOError: pass def wait_for_idle(self, timeout=30): """Wait for the system to go idle for at least 2 seconds""" if (sys.version_info >= (3, 0)): from time import monotonic else: from monotonic import monotonic import psutil logging.debug("Waiting for Idle...") cpu_count = psutil.cpu_count() if cpu_count > 0: target_pct = 50. / float(cpu_count) idle_start = None end_time = monotonic() + timeout idle = False while not idle and monotonic() < end_time: self.alive() check_start = monotonic() pct = psutil.cpu_percent(interval=0.5) if pct <= target_pct: if idle_start is None: idle_start = check_start if monotonic() - idle_start > 2: idle = True else: idle_start = None def alive(self): """Touch a watchdog file indicating we are still alive""" if self.options.alive: with open(self.options.alive, 'a'): os.utime(self.options.alive, None) def requires(self, module, module_name=None): """Try importing a module and installing it if it isn't available""" ret = False if module_name is None: module_name = module try: __import__(module) ret = True except ImportError: pass if not ret and sys.version_info < (3, 0): from internal.os_util import run_elevated logging.debug('Trying to install %s...', module_name) subprocess.call([sys.executable, '-m', 'pip', 'uninstall', '-y', module_name]) run_elevated(sys.executable, '-m pip uninstall -y {0}'.format(module_name)) subprocess.call([sys.executable, '-m', 'pip', 'install', module_name]) run_elevated(sys.executable, '-m pip install {0}'.format(module_name)) try: __import__(module) ret = True except ImportError: pass if not ret: if (sys.version_info >= (3, 0)): print("Missing {0} module. Please run 'pip3 install {1}'".format(module, module_name)) else: print("Missing {0} module. Please run 'pip install {1}'".format(module, module_name)) return ret def startup(self, detected_browsers): """Validate that all of the external dependencies are installed""" ret = True # default /tmp/wptagent as an alive file on Linux if self.options.alive is None: if platform.system() == "Linux": self.options.alive = '/tmp/wptagent' else: self.options.alive = os.path.join(os.path.dirname(__file__), 'wptagent.alive') self.alive() ret = self.requires('dns', 'dnspython') and ret ret = self.requires('monotonic') and ret ret = self.requires('PIL', 'pillow') and ret ret = self.requires('psutil') and ret ret = self.requires('requests') and ret if not self.options.android and not self.options.iOS: ret = self.requires('tornado') and ret # Windows-specific imports if platform.system() == "Windows": ret = self.requires('win32api', 'pywin32') and ret if self.options.webdriver and 'Firefox' in detected_browsers: ret = self.requires('selenium') # Optional imports self.requires('brotli') self.requires('fontTools', 'fonttools') # Try patching ws4py with a faster lib try: self.requires('wsaccel') import wsaccel wsaccel.patch_ws4py() except Exception: logging.debug('wsaccel not installed, Chrome debug interface will be slower than it could be') try: subprocess.check_output(['python', '--version']) except Exception: print("Make sure python 2.7 is available in the path.") ret = False try: subprocess.check_output('{0} -version'.format(self.image_magick['convert']), shell=True) except Exception: print("Missing convert utility. Please install ImageMagick and make sure it is in the path.") ret = False try: subprocess.check_output('{0} -version'.format(self.image_magick['mogrify']), shell=True) except Exception: print("Missing mogrify utility. Please install ImageMagick and make sure it is in the path.") ret = False if platform.system() == "Linux": try: subprocess.check_output(['traceroute', '--version']) except Exception: logging.debug("Traceroute is missing, installing...") subprocess.call(['sudo', 'apt', '-yq', 'install', 'traceroute']) if self.options.webdriver and 'Firefox' in detected_browsers: try: subprocess.check_output(['geckodriver', '-V']) except Exception: logging.debug("geckodriver is missing, installing...") subprocess.call(['sudo', 'apt', '-yq', 'install', 'firefox-geckodriver']) # If we are on Linux and there is no display, enable xvfb by default if platform.system() == "Linux" and not self.options.android and \ not self.options.iOS and 'DISPLAY' not in os.environ: self.options.xvfb = True if self.options.xvfb: ret = self.requires('xvfbwrapper') and ret if ret: from xvfbwrapper import Xvfb self.xvfb = Xvfb(width=1920, height=1200, colordepth=24) self.xvfb.start() # Figure out which display to capture from if platform.system() == "Linux" and 'DISPLAY' in os.environ: logging.debug('Display: %s', os.environ['DISPLAY']) self.capture_display = os.environ['DISPLAY'] elif platform.system() == "Darwin": proc = subprocess.Popen('ffmpeg -f avfoundation -list_devices true -i ""', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) _, err = proc.communicate() for line in err.splitlines(): matches = re.search(r'\[(\d+)\] Capture screen', line.decode('utf-8')) if matches: self.capture_display = matches.group(1) break elif platform.system() == "Windows": self.capture_display = 'desktop' if self.options.throttle: try: subprocess.check_output('sudo cgset -h', shell=True) except Exception: print("Missing cgroups, make sure cgroup-tools is installed.") ret = False # Fix Lighthouse install permissions if platform.system() != "Windows" and sys.version_info < (3, 0): from internal.os_util import run_elevated run_elevated('chmod', '-R 777 ~/.config/configstore/') try: import getpass run_elevated('chown', '-R {0}:{0} ~/.config'.format(getpass.getuser())) except Exception: pass # Check for Node 10+ if self.get_node_version() < 10.0: if platform.system() == "Linux": # This only works on debian-based systems logging.debug('Updating Node.js to 12.x') subprocess.call('curl -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -', shell=True) subprocess.call(['sudo', 'apt-get', 'install', '-y', 'nodejs']) if self.get_node_version() < 10.0: logging.warning("Node.js 10 or newer is required for Lighthouse testing") # Check the iOS install if self.ios is not None: ret = self.ios.check_install() if not self.options.android and not self.options.iOS and not self.options.noidle: self.wait_for_idle(300) if self.adb is not None: if not self.adb.start(): print("Error configuring adb. Make sure it is installed and in the path.") ret = False self.shaper.remove() if not self.shaper.install(): if platform.system() == "Windows": print("Error configuring traffic shaping, make sure secure boot is disabled.") else: print("Error configuring traffic shaping, make sure it is installed.") ret = False # Update the Windows root certs if platform.system() == "Windows": self.update_windows_certificates() return ret def get_node_version(self): """Get the installed version of Node.js""" version = 0 try: if (sys.version_info >= (3, 0)): stdout = subprocess.check_output(['node', '--version'], encoding='UTF-8') else: stdout = subprocess.check_output(['node', '--version']) matches = re.match(r'^v(\d+\.\d+)', stdout) if matches: version = float(matches.group(1)) except Exception: pass return version def update_windows_certificates(self): """ Update the root Windows certificates""" try: cert_file = os.path.join(self.persistent_work_dir, 'root_certs.sst') if not os.path.isdir(self.persistent_work_dir): os.makedirs(self.persistent_work_dir) needs_update = True if os.path.isfile(cert_file): days = (time.time() - os.path.getmtime(cert_file)) / 86400 if days < 5: needs_update = False if needs_update: logging.debug("Updating Windows root certificates...") if os.path.isfile(cert_file): os.unlink(cert_file) from internal.os_util import run_elevated run_elevated('certutil.exe', '-generateSSTFromWU "{0}"'.format(cert_file)) if os.path.isfile(cert_file): run_elevated('certutil.exe', '-addstore -f Root "{0}"'.format(cert_file)) except Exception: pass
def xvfb_launcher(request): if config.browser.headless: vdisplay = Xvfb() vdisplay.start() return vdisplay
def runner(self, t_context: TaskContext): tid = current_thread().native_id self.p_context.console.log('Start thread %d' % tid) driver = None vdisplay = None x11vnc = None userDataDir = None spotify = None try: if self.p_context.shutdownEvent.is_set(): return if t_context.headless == False: vdisplay = Xvfb(width=1280, height=1024, colordepth=24, tempdir=None, noreset='+render') vdisplay.start() if t_context.vnc: x11vnc = X11vnc(vdisplay) x11vnc.start() with self.lockDriver: driverData = self.driverManager.getDriver( type='chrome', uid=tid, user=t_context.user, proxy=t_context.proxy, headless=t_context.headless) if not driverData: raise Exception('No driverData was returned from adapter') driver = driverData['driver'] userDataDir = driverData['userDataDir'] if not driver: raise Exception('No driver was returned from adapter') except: self.p_context.console.error('Unavailale webdriver: %s' % format_exc()) else: try: spotify = Spotify.Adapter(driver, self.p_context.console, self.p_context.shutdownEvent, t_context.batchId) self.p_context.console.log('#%d Start create account for %s' % (tid, t_context.user['email'])) if spotify.register(t_context.user): self.p_context.console.log('#%d Account created for %s' % (tid, t_context.user['email'])) message = { 'user': t_context.user, 'playlist': t_context.playlist } with self.lockClient: try: self.client.send_message( QueueUrl=self.p_context.config.SQS_ENDPOINT, MessageBody=dumps(message), DelaySeconds=1, ) except: self.p_context.console.exception( 'T#%d Failed to send message to the queue %s' % (tid, self.p_context.config.SQS_ENDPOINT)) else: self.p_context.console.log( '#%d Message sent for %s' % (tid, t_context.user['email'])) with self.lockAccountCount: self.totalAccountCreated += 1 self.p_context.accountsCount[ self.p_context. channel] = self.totalAccountCreated else: if not self.p_context.shutdownEvent.is_set(): self.p_context.console.error( '#%d Failed to create account for %s' % (tid, t_context.user['email'])) except: self.p_context.console.exception() if driver: try: driver.quit() del driver except: pass if spotify: try: del spotify except: pass if userDataDir: try: rmtree(path=userDataDir, ignore_errors=True) except: pass if x11vnc: #Terminate vnc server if any try: x11vnc.stop() del x11vnc except: pass if vdisplay: try: vdisplay.stop() del vdisplay except: pass self.p_context.console.log('Stop thread %d' % tid) collect()
def useZstat(zstat, file_path_name_save, file_path_conte, file_path_name_resting_atlas): """Plot and save the image. Arguments --------- zstat : string Full file path and name to nii to plot. file_path_name_save : string Full file path and name to png output. Output dir will be created if it doesn't exist. file_path_conte : string Full file path to Conte atlas file_path_name_resting_atlas : string Returns ------- None. Normal error message: pixdim[1,2,3] should be non-zero; setting 0 dims to 1 plot_brain.py: Fatal IO error: client killed Example ------- python plot_brain.py -i /groupAnalysis/l2/zstat1_threshold.nii.gz -o /plots/l2test.png -c /git/bp2/32k_ConteAtlas_v2 -r rfMRI_REST1_LR_Atlas.dtseries.nii MIT OM Specific Tip ------------------- Call this function from a shell script to run headerless BUT requires: source activate mathiasg_vd_env export QT_API=pyqt module add openmind/xvfb-fix/0.1 #file_path_name=$1 #file_path_name_save=$2 #file_path_conte=$3 #file_path_name_resting_atlas=$4 python plot_brain.py \ -i $1 \ -o $2 \ -c $3 \ -r $4 """ import matplotlib.pyplot as plt import os from glob import glob import numpy as np import nibabel as nb import nibabel.gifti as gifti # Crucial: xvfb must be imported and started before importing mayavi from xvfbwrapper import Xvfb print('XVb pre') vdisplay = Xvfb() vdisplay.start() print('pre maya') # Crashes on this line if run with plain python (not xvfb-run ... python) and if xvfbwrapper is after it. from mayavi import mlab print('post maya') from tvtk.api import tvtk print('post tvtk') import math print('display') mlab.options.offscreen = True #offscreen window for rendering img = nb.load(file_path_name_resting_atlas) #img = nb.load('/Users/MathiasMacbook/Desktop/rfMRI_REST1_LR_Atlas.dtseries.nii') mim = img.header.matrix.mims[1] #for idx, bm in enumerate(mim.brainModels): # print((idx, bm.indexOffset, bm.brainStructure)) bm1 = mim.brainModels[0] lidx = bm1.vertexIndices.indices bm2 = mim.brainModels[1] ridx = bm1.surfaceNumberOfVertices + bm2.vertexIndices.indices bidx = np.concatenate((lidx, ridx)) axis = [0, 0, 1] theta = np.pi inflated = True split_brain = True surf = gifti.read(file_path_conte + '/Conte69.L.midthickness.32k_fs_LR.surf.gii') verts_L_data = surf.darrays[0].data faces_L_data = surf.darrays[1].data surf = gifti.read(file_path_conte + '/Conte69.R.midthickness.32k_fs_LR.surf.gii') verts_R_data = surf.darrays[0].data faces_R_data = surf.darrays[1].data if inflated: surf = gifti.read(file_path_conte + '/Conte69.L.inflated.32k_fs_LR.surf.gii') verts_L_display = surf.darrays[0].data faces_L_display = surf.darrays[1].data surf = gifti.read(file_path_conte + '/Conte69.R.inflated.32k_fs_LR.surf.gii') verts_R_display = surf.darrays[0].data faces_R_display = surf.darrays[1].data else: verts_L_display = verts_L_data.copy() verts_R_display = verts_R_data.copy() faces_L_display = faces_L_data.copy() faces_R_display = faces_R_data.copy() verts_L_display[:, 0] -= max(verts_L_display[:, 0]) verts_R_display[:, 0] -= min(verts_R_display[:, 0]) verts_L_display[:, 1] -= (max(verts_L_display[:, 1]) + 1) verts_R_display[:, 1] -= (max(verts_R_display[:, 1]) + 1) faces = np.vstack((faces_L_display, verts_L_display.shape[0] + faces_R_display)) if split_brain: verts2 = rotation_matrix(axis, theta).dot(verts_R_display.T).T else: verts_L_display[:, 1] -= np.mean(verts_L_display[:, 1]) verts_R_display[:, 1] -= np.mean(verts_R_display[:, 1]) verts2 = verts_R_display verts_rot = np.vstack((verts_L_display, verts2)) verts = np.vstack((verts_L_data, verts_R_data)) #print verts.shape #print faces.shape if not os.path.exists(os.path.split(file_path_name_save)[0]): os.makedirs(os.path.split(file_path_name_save)[0]) print('use zstat') img = nb.load(zstat) print('loaded img') threshold = 2.3 # 1000, lower limit display_threshold = 6 #8000, upper limit data = img.get_data() aff = img.affine indices = np.round((np.linalg.pinv(aff).dot(np.hstack((verts, np.ones((verts.shape[0], 1)))).T))[:3, :].T).astype(int) scalars2 = data[indices[:, 0], indices[:, 1], indices[:, 2]] scalars2[np.abs(scalars2) < threshold] = 0. scalars = np.zeros(verts.shape[0]) scalars[bidx] = scalars2[bidx] negative = positive = False if np.any(scalars < 0): negative = True if np.any(scalars > 0): positive = True nlabels = 2 vmin = 0 vmax = 0 if negative and positive: maxval = max(-scalars.min(), scalars.max()) if maxval > display_threshold: maxval = display_threshold vmin = -maxval vmax = maxval nlabels = 3 vmin = -display_threshold ###### vmax = display_threshold ###### elif negative: vmin = scalars.min() if vmin < -display_threshold: vmin = -display_threshold vmax = 0 vmin = -display_threshold ###### elif positive: vmax = scalars.max() if vmax > display_threshold: vmax = display_threshold vmin = 0 vmax = display_threshold ###### #print zstat dual_split = True fig1 = mlab.figure(1, bgcolor=(0, 0, 0)) mlab.clf() mesh = tvtk.PolyData(points=verts_rot, polys=faces) mesh.point_data.scalars = scalars mesh.point_data.scalars.name = 'scalars' surf = mlab.pipeline.surface(mesh, colormap='autumn', vmin=vmin, vmax=vmax) if dual_split: verts_rot_shifted = verts_rot.copy() verts_rot_shifted = rotation_matrix(axis, theta).dot(verts_rot_shifted.T).T verts_rot_shifted[:, 2] -= (np.max(verts_rot_shifted[:, 2]) - np.min(verts_rot_shifted[:, 2])) verts_rot_shifted[:, 0] -= np.max(verts_rot_shifted[:, 0]) mesh2 = tvtk.PolyData(points=verts_rot_shifted, polys=faces) mesh2.point_data.scalars = scalars mesh2.point_data.scalars.name = 'scalars' surf2 = mlab.pipeline.surface(mesh2, colormap='autumn', vmin=vmin, vmax=vmax) colorbar = mlab.colorbar(surf, nb_labels=nlabels) #, orientation='vertical') lut = surf.module_manager.scalar_lut_manager.lut.table.to_array() if negative and positive: half_index = lut.shape[0] / 2 index = int(half_index * threshold / vmax) lut[(half_index - index + 1):(half_index + index), :] = 192 lut[(half_index + index):, :] = 255 * plt.cm.autumn(np.linspace(0, 255, half_index - index).astype(int)) lut[:(half_index - index), :] = 255 * plt.cm.cool(np.linspace(0, 255, half_index - index).astype(int)) elif negative: index = int(lut.shape[0] * threshold / abs(vmin)) lut[(lut.shape[0] - index):, :] = 192 lut[:(lut.shape[0] - index), :] = 255 * plt.cm.cool(np.linspace(0, 255, lut.shape[0] - index).astype(int)) elif positive: index = int(lut.shape[0] * threshold / vmax) lut[:index, :] = 192 lut[index:, :] = 255 * plt.cm.autumn(np.linspace(0, 255, lut.shape[0] - index).astype(int)) lut[:, -1] = 255 surf.module_manager.scalar_lut_manager.lut.table = lut if dual_split: surf2.module_manager.scalar_lut_manager.lut.table = lut surf.module_manager.scalar_lut_manager.show_scalar_bar = False surf.module_manager.scalar_lut_manager.show_legend = False surf.module_manager.scalar_lut_manager.label_text_property.font_size = 10 surf.module_manager.scalar_lut_manager.show_scalar_bar = True surf.module_manager.scalar_lut_manager.show_legend = True mlab.draw() translate = [0, 0, 0] if inflated: zoom = -700 else: zoom = -600 if dual_split: if inflated: translate = [0, 0, -104.01467148] else: translate = [0, 0, -54.76305802] if inflated: zoom = -750 else: zoom = -570 #mlab.view(0, 90.0, zoom, translate) mlab.view(9, 90.0) print(file_path_name_save) mlab.savefig(file_path_name_save, figure=fig1, magnification=5) vdisplay.stop()
os.environ['lab_mode'] = lab_mode.split('@')[0] if lab_mode in TRAIN_MODES: run_new_mode(spec_file, spec_name, lab_mode) else: run_old_mode(spec_file, spec_name, lab_mode) def main(): if len(sys.argv) > 1: args = sys.argv[1:] assert len( args ) == 3, f'To use sys args, specify spec_file, spec_name, lab_mode' run_by_mode(*args) return experiments = util.read('config/experiments.json') for spec_file in experiments: for spec_name, lab_mode in experiments[spec_file].items(): run_by_mode(spec_file, spec_name, lab_mode) if __name__ == '__main__': mp.set_start_method('spawn') # for distributed pytorch to work if sys.platform == 'darwin': # avoid xvfb for MacOS: https://github.com/nipy/nipype/issues/1400 main() else: with Xvfb() as xvfb: # safety context for headless machines main()
class Crawler: def __init__(self, browser, n_sites, exclude, timeout, wait_time, log_stdout, out_path, pb_path, chromedriver_path, firefox_path, **kwargs): # pylint:disable=too-many-arguments,unused-argument self.browser = browser assert self.browser in (CHROME, FIREFOX) self.n_sites = n_sites self.exclude = exclude self.timeout = timeout self.wait_time = wait_time self.out_path = out_path self.pb_path = pb_path self.chromedriver_path = chromedriver_path self.firefox_path = firefox_path # version is based on when the crawl started self.version = time.strftime('%Y.%-m.%-d', time.localtime()) # set up logging self.logger = logging.getLogger() self.logger.setLevel(logging.INFO) log_fmt = logging.Formatter('%(asctime)s %(message)s') # by default, just log to file fh = logging.FileHandler(os.path.join(out_path, 'log.txt')) fh.setFormatter(log_fmt) self.logger.addHandler(fh) # log to stdout as well if configured if log_stdout: sh = logging.StreamHandler(sys.stdout) sh.setFormatter(log_fmt) self.logger.addHandler(sh) self.storage_objects = ['snitch_map', 'action_map'] def start_driver(self): """Start a new Selenium web driver and install the bundled extension.""" if self.browser == CHROME: # make extension ID constant across runs # create temp directory self.tmp_dir = tempfile.TemporaryDirectory() new_extension_path = os.path.join(self.tmp_dir.name, "src") # copy extension sources there copytree(os.path.join(self.pb_path, 'src'), new_extension_path) # update manifest.json manifest_path = os.path.join(new_extension_path, "manifest.json") with open(manifest_path, "r") as f: manifest = json.load(f) # this key and the extension ID # must both be derived from the same private key manifest[ 'key'] = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArMdgFkGsm7nOBr/9qkx8XEcmYSu1VkIXXK94oXLz1VKGB0o2MN+mXL/Dsllgkh61LZgK/gVuFFk89e/d6Vlsp9IpKLANuHgyS98FKx1+3sUoMujue+hyxulEGxXXJKXhk0kGxWdE0IDOamFYpF7Yk0K8Myd/JW1U2XOoOqJRZ7HR6is1W6iO/4IIL2/j3MUioVqu5ClT78+fE/Fn9b/DfzdX7RxMNza9UTiY+JCtkRTmm4ci4wtU1lxHuVmWiaS45xLbHphQr3fpemDlyTmaVoE59qG5SZZzvl6rwDah06dH01YGSzUF1ezM2IvY9ee1nMSHEadQRQ2sNduNZWC9gwIDAQAB" # noqa:E501 pylint:disable=line-too-long with open(manifest_path, "w") as f: json.dump(manifest, f) opts = Options() opts.add_argument('--no-sandbox') opts.add_argument("--load-extension=" + new_extension_path) prefs = {"profile.block_third_party_cookies": False} opts.add_experimental_option("prefs", prefs) opts.add_argument('--dns-prefetch-disable') self.driver = webdriver.Chrome(self.chromedriver_path, chrome_options=opts) elif self.browser == FIREFOX: profile = webdriver.FirefoxProfile() profile.set_preference('extensions.webextensions.uuids', '{"%s": "%s"}' % (FF_EXT_ID, FF_UUID)) # this is kind of a hack; eventually the functionality to install # an extension should be part of Selenium. See # https://github.com/SeleniumHQ/selenium/issues/4215 self.driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=self.firefox_path) command = 'addonInstall' info = ('POST', '/session/$sessionId/moz/addon/install') self.driver.command_executor._commands[command] = info # pylint:disable=protected-access path = os.path.join(self.pb_path, 'src') self.driver.execute(command, params={ 'path': path, 'temporary': True }) time.sleep(2) # apply timeout settings self.driver.set_page_load_timeout(self.timeout) self.driver.set_script_timeout(self.timeout) # wait for Badger to finish initializing self.load_extension_page(OPTIONS) wait_for_script(self.driver, ( "return chrome.extension.getBackgroundPage().badger.INITIALIZED" " && Object.keys(" " chrome.extension.getBackgroundPage()" " .badger.storage.getBadgerStorageObject('action_map').getItemClones()" ").length > 1")) def load_extension_page(self, page, retries=3): """ Load a page in the Privacy Badger extension. `page` should either be BACKGROUND or OPTIONS. """ if self.browser == CHROME: ext_url = (CHROME_URL_FMT + page) % CHROME_EXT_ID elif self.browser == FIREFOX: ext_url = (FF_URL_FMT + page) % FF_UUID for _ in range(retries): try: self.driver.get(ext_url) break except UnexpectedAlertPresentException: self.driver.switch_to_alert().dismiss() except WebDriverException as e: err = e else: self.logger.error('Error loading extension page: %s', err.msg) raise err def load_user_data(self, data): """Load saved user data into Privacy Badger after a restart""" self.load_extension_page(OPTIONS) for obj in self.storage_objects: script = ("(function (data) {" "data = JSON.parse(data);" "let bg = chrome.extension.getBackgroundPage();" "bg.badger.storage.%s.merge(data.%s);" "}(arguments[0]));") % (obj, obj) self.driver.execute_script(script, json.dumps(data)) time.sleep(2) # wait for localstorage to sync def dump_data(self): """Extract the objects Privacy Badger learned during its training run.""" self.load_extension_page(OPTIONS) data = {} for obj in self.storage_objects: script = ("return chrome.extension.getBackgroundPage()." "badger.storage.%s.getItemClones()" % obj) data[obj] = self.driver.execute_script(script) return data def clear_data(self): """Clear the training data Privacy Badger starts with.""" self.load_extension_page(OPTIONS) self.driver.execute_script("chrome.extension.getBackgroundPage()." "badger.storage.clearTrackerData();") def timeout_workaround(self): """ Selenium has a bug where a tab that raises a timeout exception can't recover gracefully. So we kill the tab and make a new one. TODO: find actual bug ticket """ self.driver.close() # kill the broken site self.driver.switch_to_window(self.driver.window_handles.pop()) before = set(self.driver.window_handles) self.driver.execute_script('window.open()') new_window = (set(self.driver.window_handles) ^ before).pop() self.driver.switch_to_window(new_window) def get_domain(self, domain): """ Try to load a domain over https, and fall back to http if the initial load times out. Then sleep `wait_time` seconds on the site to wait for AJAX calls to complete. """ try: url = "https://%s/" % domain self.driver.get(url) except TimeoutException: self.logger.info('timeout on %s ', url) self.timeout_workaround() url = "http://%s/" % domain self.logger.info('trying %s', url) self.driver.get(url) time.sleep(self.wait_time) return url def start_browser(self): self.start_driver() self.clear_data() def restart_browser(self, data): self.logger.info('restarting browser...') # It's ugly, but this section needs to be ABSOLUTELY crash-proof. for _ in range(RESTART_RETRIES): try: self.driver.quit() except: # noqa:E722 pylint:disable=bare-except pass try: del self.driver except: # noqa:E722 pylint:disable=bare-except pass try: self.start_browser() self.load_user_data(data) self.logger.error('Success') break except Exception as e: self.logger.error('Error restarting browser. Trying again...') if isinstance(e, WebDriverException): self.logger.error('%s: %s', type(e).__name__, e.msg) else: self.logger.error('%s: %s', type(e).__name__, e) else: # If we couldn't restart the browser after all that, just quit. self.logger.error('Could not restart browser.') sys.exit(1) def crawl(self): """ Visit the top `n_sites` websites in the Tranco List, in order, in a virtual browser with Privacy Badger installed. Afterwards, save the action_map and snitch_map that the Badger learned. """ domains = get_domain_list(self.n_sites, self.exclude) self.logger.info(("starting new crawl:\n" "\ttimeout: %ss\n" "\twait time: %ss\n" "\tbrowser: %s\n" "\tsurvey mode: False\n" "\tdomains to crawl: %d\n" "\tTLDs to exclude: %s"), self.timeout, self.wait_time, self.browser, self.n_sites, self.exclude) # create an XVFB virtual display (to avoid opening an actual browser) self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.start_browser() # list of domains we actually visited visited = [] old_snitches = {} for i, domain in enumerate(domains): try: # This script could fail during the data dump (trying to get # the options page), the data cleaning, or while trying to load # the next domain. last_data = self.dump_data() # try to fix misattribution errors if i >= 2: clean_data = self.cleanup(domains[i - 2], domains[i - 1], last_data) if last_data != clean_data: self.clear_data() self.load_user_data(clean_data) self.logger.info('visiting %d: %s', i + 1, domain) url = self.get_domain(domain) visited.append(url) except TimeoutException: self.logger.info('timeout on %s ', domain) # TODO: how to get rid of this nested try? try: self.timeout_workaround() except WebDriverException as e: if should_restart(e): self.restart_browser(last_data) except WebDriverException as e: self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg) if should_restart(e): self.restart_browser(last_data) finally: self.load_extension_page(OPTIONS) snitches = self.driver.execute_script( "return chrome.extension.getBackgroundPage()." "badger.storage.snitch_map._store;") diff = set(snitches) - set(old_snitches) if diff: self.logger.info("new trackers in snitch_map: %s", diff) old_snitches = snitches self.logger.info("Finished scan. Visited %d sites and errored on %d.", len(visited), len(domains) - len(visited)) try: self.logger.info('Getting data from browser storage...') data = self.dump_data() except WebDriverException: # If we can't load the background page here, just quit :( self.logger.error('Could not get badger storage.') sys.exit(1) self.driver.quit() self.vdisplay.stop() self.save(data) def cleanup(self, d1, d2, data): """ Remove from snitch map any domains that appear to have been added as a result of bugs. """ new_data = copy.deepcopy(data) snitch_map = new_data['snitch_map'] action_map = new_data['action_map'] # handle blank domain bug if '' in action_map: self.logger.info('Deleting blank domain from action map') self.logger.info(str(action_map[''])) del action_map[''] if '' in snitch_map: self.logger.info('Deleting blank domain from snitch map') self.logger.info(str(snitch_map[''])) del snitch_map[''] extract = TLDExtract() d1_base = extract(d1).registered_domain # handle the domain-attribution bug (Privacy Badger issue #1997). # If a domain we visited was recorded as a tracker on the domain we # visited immediately after it, it's probably a bug if d1_base in snitch_map and d2 in snitch_map[d1_base]: self.logger.info('Likely bug: domain %s tracking on %s', d1_base, d2) snitch_map[d1_base].remove(d2) # if the bug caused d1 to be added to the action map, remove it if not snitch_map[d1_base]: self.logger.info( 'Deleting domain %s from action & snitch maps', d1_base) if d1 in action_map: del action_map[d1] if d1_base in action_map: del action_map[d1_base] del snitch_map[d1_base] # if the bug caused d1 to be blocked, unblock it elif len(snitch_map[d1_base]) == 2: if d1 in action_map: self.logger.info( 'Downgrading domain %s from "block" to "allow"', d1) action_map[d1]['heuristicAction'] = 'allow' if d1_base in action_map: self.logger.info( 'Downgrading domain %s from "block" to "allow"', d1_base) action_map[d1_base]['heuristicAction'] = 'allow' return new_data def save(self, data, name='results.json'): data['version'] = self.version self.logger.info('Saving seed data version %s...', self.version) # save the snitch_map in a human-readable JSON file with open(os.path.join(self.out_path, name), 'w') as f: json.dump(data, f, indent=2, sort_keys=True, separators=(',', ': ')) self.logger.info('Saved data to %s.', name)
def crawl(self): """ Visit the top `n_sites` websites in the Tranco List, in order, in a virtual browser with Privacy Badger installed. Afterwards, save the and snitch_map that the Badger learned. """ if self.domain_list: domains = self.domain_list else: domains = get_domain_list(self.n_sites, self.exclude) self.logger.info(("starting new crawl:\n" "\ttimeout: %ss\n" "\twait time: %ss\n" "\tbrowser: %s\n" "\tsurvey mode: True\n" "\tdomains to crawl: %d\n" "\tTLDs to exclude: %s"), self.timeout, self.wait_time, self.browser, self.n_sites, self.exclude) # create an XVFB virtual display (to avoid opening an actual browser) self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.start_browser() # list of domains we actually visited visited = [] last_data = None first_i = 0 i = None for i, domain in enumerate(domains): # If we can't load the options page for some reason, treat it like # any other error try: # save the state of privacy badger before we do anything else last_data = self.dump_data() # If the localstorage data is getting too big, dump and restart if size_of(last_data) > self.max_data_size: self.save(last_data, 'results-%d-%d.json' % (first_i, i)) first_i = i + 1 last_data = {} self.restart_browser(last_data) self.logger.info('visiting %d: %s', i + 1, domain) url = self.get_domain(domain) visited.append(url) except TimeoutException: self.logger.info('timeout on %s ', domain) # TODO: how to get rid of this nested try? try: self.timeout_workaround() except WebDriverException as e: if should_restart(e): self.restart_browser(last_data) except WebDriverException as e: self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg) if should_restart(e): self.restart_browser(last_data) except KeyboardInterrupt: self.logger.warning( "Keyboard interrupt. Ending scan after %d sites.", i + 1) break self.logger.info('Finished scan. Visited %d sites and errored on %d.', len(visited), i + 1 - len(visited)) self.logger.info('Getting data from browser storage...') try: data = self.dump_data() except WebDriverException: if last_data: self.logger.error( "Could not get badger storage. Using cached data...") data = last_data else: self.logger.error('Could not export data. Exiting.') sys.exit(1) self.driver.quit() self.vdisplay.stop() self.save(data, 'results-%d-%d.json' % (first_i, i)) self.save(self.merge_saved_data())
f = open('network_2.txt', 'w') visited_f = open('already_visited_2.txt', 'w') visited = [] frontier = [] f_rv = open("already_visited.txt", "r") f_songs = open("songs_already_visited.txt", "w") content = f_rv.readlines() for line in content: visited.append(line.replace("\n", "").decode('utf-8')) counter = 0 stuck = 0 display = Xvfb() display.start() driver = webdriver.Chrome( '/home/laurynas/workspace/knowledge-graph-data-extractor/chromedriver') visited_size = len(visited) while visited: time.sleep(1) seed_band = visited.pop(0) counter += 1 print "band # " + str(counter) + "/" + str(visited_size) + "|" + str( seed_band.encode("utf8")) + "|" + " | with # of songs: " query = seed_band + u" songs" service_url = u'https://www.google.co.uk/search'
class SurveyCrawler(Crawler): def __init__(self, **kwargs): super(SurveyCrawler, self).__init__(**kwargs) self.max_data_size = kwargs.get('max_data_size') self.storage_objects = ['snitch_map'] if kwargs.get('domain_list'): self.domain_list = [] with open(kwargs.get('domain_list')) as f: for l in f: self.domain_list.append(l.strip()) if self.n_sites > 0: self.domain_list = self.domain_list[:self.n_sites] else: self.domain_list = None def set_passive_mode(self): self.load_extension_page(OPTIONS) script = ''' chrome.runtime.sendMessage({ type: "updateSettings", data: { passiveMode: true } });''' self.driver.execute_script(script) def start_browser(self): self.start_driver() # don't block anything, just listen and log self.set_passive_mode() def merge_saved_data(self): paths = glob.glob(os.path.join(self.out_path, 'results-*.json')) snitch_map = {} for p in paths: with open(p) as f: sm = json.load(f)['snitch_map'] for tracker, snitches in sm.items(): if tracker not in snitch_map: snitch_map[tracker] = snitches continue for snitch, data in snitches.items(): if snitch == 'length': snitch_map[tracker]['length'] = \ int(snitch_map[tracker]['length']) + int(data) continue snitch_map[tracker][snitch] = data return {'version': self.version, 'snitch_map': snitch_map} def crawl(self): """ Visit the top `n_sites` websites in the Tranco List, in order, in a virtual browser with Privacy Badger installed. Afterwards, save the and snitch_map that the Badger learned. """ if self.domain_list: domains = self.domain_list else: domains = get_domain_list(self.n_sites, self.exclude) self.logger.info(("starting new crawl:\n" "\ttimeout: %ss\n" "\twait time: %ss\n" "\tbrowser: %s\n" "\tsurvey mode: True\n" "\tdomains to crawl: %d\n" "\tTLDs to exclude: %s"), self.timeout, self.wait_time, self.browser, self.n_sites, self.exclude) # create an XVFB virtual display (to avoid opening an actual browser) self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.start_browser() # list of domains we actually visited visited = [] last_data = None first_i = 0 i = None for i, domain in enumerate(domains): # If we can't load the options page for some reason, treat it like # any other error try: # save the state of privacy badger before we do anything else last_data = self.dump_data() # If the localstorage data is getting too big, dump and restart if size_of(last_data) > self.max_data_size: self.save(last_data, 'results-%d-%d.json' % (first_i, i)) first_i = i + 1 last_data = {} self.restart_browser(last_data) self.logger.info('visiting %d: %s', i + 1, domain) url = self.get_domain(domain) visited.append(url) except TimeoutException: self.logger.info('timeout on %s ', domain) # TODO: how to get rid of this nested try? try: self.timeout_workaround() except WebDriverException as e: if should_restart(e): self.restart_browser(last_data) except WebDriverException as e: self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg) if should_restart(e): self.restart_browser(last_data) except KeyboardInterrupt: self.logger.warning( "Keyboard interrupt. Ending scan after %d sites.", i + 1) break self.logger.info('Finished scan. Visited %d sites and errored on %d.', len(visited), i + 1 - len(visited)) self.logger.info('Getting data from browser storage...') try: data = self.dump_data() except WebDriverException: if last_data: self.logger.error( "Could not get badger storage. Using cached data...") data = last_data else: self.logger.error('Could not export data. Exiting.') sys.exit(1) self.driver.quit() self.vdisplay.stop() self.save(data, 'results-%d-%d.json' % (first_i, i)) self.save(self.merge_saved_data())
class aCloudGuru_qUrlCollectorSpider(Spider): name = "aCloudGuru_qUrlCollectorSpider" allowed_domains = ["acloud.guru"] def __init__(self): self.start_urls = ["http://www.google.co.in"] def parse(self, response): self.setUpBrowser() dataDump = {} aCloudTopicUrls = {} aCloudTopicUrls['sa-pro-s3'] = { 'awsTag': 'sa-pro-s3', 'sourceUrl': 'https://acloud.guru/forums/aws-certified-solutions-architect-professional/s3', 'crawled': 'False', 'pgCrawled': 0, 'crawlPgLimit': '10', 'pageLoadWaitTime': '30' } aCloudTopicUrls['sa-pro-new'] = { 'awsTag': 'sa-pro-new', 'sourceUrl': 'https://acloud.guru/forums/aws-certified-solutions-architect-associate/newest?p=1', 'crawled': 'False', 'pgCrawled': 0, 'crawlPgLimit': '10', 'pageLoadWaitTime': '25' } # Lets be nice and crawl only limited pages try: dataDump = self.collectUrls(aCloudTopicUrls['sa-pro-new']) self.writeToFile(dataDump) # print "\n===========Printing in mains=========\n" # pprint(dataDump) except: print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print " Unable to get grab links " print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" self.tearDownBrowser() """ Function to setup the Browser """ def setUpBrowser(self): # Set the web browser parameters to not show gui ( aka headless ) # Ref - https://github.com/cgoldberg/xvfbwrapper self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.driver = webdriver.Firefox() """ Function to close the Browser """ def tearDownBrowser(self): # Stop the browser & close the display # Although github says quit works, it throws me an error # Ref - https://github.com/SeleniumHQ/selenium/issues/1469 self.driver.quit() self.vdisplay.stop() """ Function to collect the Urls in a given page """ def collectUrls(self, urlMetadata): urlItems = [] # The XPATH Location identifiers to make it configurable xpathDict = {} ## The XPATH ID of the element for which the the page load waits before processing other requests xpathDict[ 'pgLoadConfirmElement'] = "//div[@class='col-sm-8 forum-room-thread-list']/thread-list-component/thread-list-item-component/div[@class='thread-list-item']/@href" xpathDict[ 'qPopular'] = "//ul[@class='nav nav-tabs']/li[@heading='Popular']/a" xpathDict[ 'qNewest'] = "//ul[@class='nav nav-tabs']/li[@heading='New']/a" xpathDict[ 'qText'] = "//div[@class='col-sm-8 forum-room-thread-list']/thread-list-component/thread-list-item-component" xpathDict['qURL'] = ".//div[@class='thread-list-item']" # nxtPageBtn_XPATH = "//div[@class='clearfix p']/li[@class='paginate_button next']/a" # The time to wait for the webpage to laod in seconds pgWtTime = int(urlMetadata['pageLoadWaitTime']) self.driver.set_page_load_timeout(pgWtTime) self.driver.get(urlMetadata['sourceUrl']) for crawlCount in range(int(urlMetadata['crawlPgLimit'])): try: # Check if the page has the necessary elements before we start scraping element_present_check_1 = WebDriverWait( self.driver, pgWtTime).until( EC.presence_of_all_elements_located( (By.XPATH, xpathDict['pgLoadConfirmElement']))) # element_present_check_2 = WebDriverWait(self.driver, pgWtTime).until(EC.text_to_be_present_in_element_value((By.XPATH, ec_XPATH), "ago")) # Move to the most popular questions Tab # btnToClick = self.driver.find_element_by_xpath( xpathDict['qPopular'] ) # Move to the New questions Tab btnToClick = self.driver.find_element_by_xpath( xpathDict['qNewest']) self.driver.execute_script('arguments[0].click();', btnToClick) time.sleep(pgWtTime) # Find all the question div tags and iterate in for loop for the link reference qTextItems = self.driver.find_elements_by_xpath( xpathDict['qText']) for qText in qTextItems: qUrlList = qText.find_elements_by_xpath(xpathDict['qURL']) for qUrl in qUrlList: urlItems.append("https://acloud.guru" + qUrl.get_attribute('href')) urlMetadata['pgCrawled'] += 1 print "\n\n\t~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "\t All done in page : {0}, Lets go to page : {1}".format( (urlMetadata['pgCrawled'] - 1), urlMetadata['pgCrawled']) print "\t~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n" except TimeoutException: self.driver.execute_script("window.stop();") print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print " Timeout Exception : THE PAGE DID NOT LOAD PROPERLY " print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" except: print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print " THE PAGE DID NOT LOAD PROPERLY " print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" finally: crawlCount += 1 nextBtn = self.driver.find_element_by_link_text('Next') # Wont work because of bug - https://github.com/SeleniumHQ/selenium/issues/2285 # hover_over_nextBtn = self.driver.find_element_by_link_text('Next') # hover = ActionChains(self.driver).move_to_element(hover_over_nextBtn) # hover.perform() try: # Click the next button only if is active and not disabled, else break # find the parent and check if it is disabled btnClassTxt = nextBtn.find_element_by_xpath( '..').get_attribute('class').encode('utf-8') if "disabled" not in btnClassTxt: # Asynchronous execution # self.driver.execute_async_script('arguments[0].click();', nextBtn) self.driver.execute_script('arguments[0].click();', nextBtn) else: print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print " REACHED THE END OF THE GALAXY " print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" break except httplib.BadStatusLine: print "\n\n\t\tERROR : FAILED - To click on 'Next' button to navigate to next page\n" # pass break # Unique the list urlItemsSet = set(urlItems) # Prepare data to be dumpted to file urlMetadata['pgCrawled'] = str(urlMetadata['pgCrawled']) urlMetadata['uri'] = list(urlItemsSet) urlMetadata['crawled'] = 'True' urlMetadata['dateScraped'] = date.today().strftime( "%Y-%m-%d") + "-" + datetime.now().strftime('%H-%M') return urlMetadata def writeToFile(self, dataDump): outputDir = os.path.abspath(__file__ + "/../../../") outputFileName = '{0}-acloudguru-{1}.json'.format( dataDump['dateScraped'], dataDump['awsTag']) outputFileLoc = os.path.join(outputDir, "LnksToScrape", outputFileName) with open(outputFileLoc, 'w') as f: json.dump(dataDump, f, indent=4, sort_keys=True)
def startup(self, detected_browsers): """Validate that all of the external dependencies are installed""" ret = True # default /tmp/wptagent as an alive file on Linux if self.options.alive is None: if platform.system() == "Linux": self.options.alive = '/tmp/wptagent' else: self.options.alive = os.path.join(os.path.dirname(__file__), 'wptagent.alive') self.alive() ret = self.requires('dns', 'dnspython') and ret ret = self.requires('monotonic') and ret ret = self.requires('PIL', 'pillow') and ret ret = self.requires('psutil') and ret ret = self.requires('requests') and ret if not self.options.android and not self.options.iOS: ret = self.requires('tornado') and ret # Windows-specific imports if platform.system() == "Windows": ret = self.requires('win32api', 'pywin32') and ret if self.options.webdriver and 'Firefox' in detected_browsers: ret = self.requires('selenium') # Optional imports self.requires('brotli') self.requires('fontTools', 'fonttools') # Try patching ws4py with a faster lib try: self.requires('wsaccel') import wsaccel wsaccel.patch_ws4py() except Exception: logging.debug('wsaccel not installed, Chrome debug interface will be slower than it could be') try: subprocess.check_output(['python', '--version']) except Exception: print("Make sure python 2.7 is available in the path.") ret = False try: subprocess.check_output('{0} -version'.format(self.image_magick['convert']), shell=True) except Exception: print("Missing convert utility. Please install ImageMagick and make sure it is in the path.") ret = False try: subprocess.check_output('{0} -version'.format(self.image_magick['mogrify']), shell=True) except Exception: print("Missing mogrify utility. Please install ImageMagick and make sure it is in the path.") ret = False if platform.system() == "Linux": try: subprocess.check_output(['traceroute', '--version']) except Exception: logging.debug("Traceroute is missing, installing...") subprocess.call(['sudo', 'apt', '-yq', 'install', 'traceroute']) if self.options.webdriver and 'Firefox' in detected_browsers: try: subprocess.check_output(['geckodriver', '-V']) except Exception: logging.debug("geckodriver is missing, installing...") subprocess.call(['sudo', 'apt', '-yq', 'install', 'firefox-geckodriver']) # If we are on Linux and there is no display, enable xvfb by default if platform.system() == "Linux" and not self.options.android and \ not self.options.iOS and 'DISPLAY' not in os.environ: self.options.xvfb = True if self.options.xvfb: ret = self.requires('xvfbwrapper') and ret if ret: from xvfbwrapper import Xvfb self.xvfb = Xvfb(width=1920, height=1200, colordepth=24) self.xvfb.start() # Figure out which display to capture from if platform.system() == "Linux" and 'DISPLAY' in os.environ: logging.debug('Display: %s', os.environ['DISPLAY']) self.capture_display = os.environ['DISPLAY'] elif platform.system() == "Darwin": proc = subprocess.Popen('ffmpeg -f avfoundation -list_devices true -i ""', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) _, err = proc.communicate() for line in err.splitlines(): matches = re.search(r'\[(\d+)\] Capture screen', line.decode('utf-8')) if matches: self.capture_display = matches.group(1) break elif platform.system() == "Windows": self.capture_display = 'desktop' if self.options.throttle: try: subprocess.check_output('sudo cgset -h', shell=True) except Exception: print("Missing cgroups, make sure cgroup-tools is installed.") ret = False # Fix Lighthouse install permissions if platform.system() != "Windows" and sys.version_info < (3, 0): from internal.os_util import run_elevated run_elevated('chmod', '-R 777 ~/.config/configstore/') try: import getpass run_elevated('chown', '-R {0}:{0} ~/.config'.format(getpass.getuser())) except Exception: pass # Check for Node 10+ if self.get_node_version() < 10.0: if platform.system() == "Linux": # This only works on debian-based systems logging.debug('Updating Node.js to 12.x') subprocess.call('curl -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -', shell=True) subprocess.call(['sudo', 'apt-get', 'install', '-y', 'nodejs']) if self.get_node_version() < 10.0: logging.warning("Node.js 10 or newer is required for Lighthouse testing") # Check the iOS install if self.ios is not None: ret = self.ios.check_install() if not self.options.android and not self.options.iOS and not self.options.noidle: self.wait_for_idle(300) if self.adb is not None: if not self.adb.start(): print("Error configuring adb. Make sure it is installed and in the path.") ret = False self.shaper.remove() if not self.shaper.install(): if platform.system() == "Windows": print("Error configuring traffic shaping, make sure secure boot is disabled.") else: print("Error configuring traffic shaping, make sure it is installed.") ret = False # Update the Windows root certs if platform.system() == "Windows": self.update_windows_certificates() return ret
def __enter__(self): self.vdisplay = Xvfb(width=self.size[0], height=self.size[1]) self.vdisplay.start() self.app = wx.App(False) wx.UIActionSimulator().MouseMove(0, 0) return self
def run_inputs(instance_id, base_name, base_page): instance_status = {} lp_status = 0 instance_status[str(base_name+str(instance_id))] = {} instance_status[str(base_name+str(instance_id))]['timestamps'] = {} instance_status[str(base_name+str(instance_id))]['timestamps']['0_instance_born'] = str(datetime.utcnow()) try: if sys.platform != 'darwin' and use_xvfb: print('Starting Xvfb') vdisplay = Xvfb(width=1280, height=740) vdisplay.start() chrome_options = webdriver.ChromeOptions() if sys.platform != 'darwin': chrome_options.add_argument('headless') chrome_options.add_argument('no-sandbox') # chrome_options.add_argument('window-size=1200x700') print('Will create driver') driver = webdriver.Chrome(chrome_options=chrome_options) print('driver:'+str(driver)) # os.environ['MOZ_HEADLESS'] = '1' # driver = webdriver.Firefox() except BaseException as e: instance_status[str(base_name+str(instance_id))]['status'] = 'could_not_open' instance_status[str(base_name+str(instance_id))]['status_message'] = str(e) try: if sys.platform != 'darwin' and use_xvfb: print('Closing Xvfb') vdisplay.stop() except BaseException as e: print(e) return instance_status try: driver.get(base_page) instance_status[str(base_name+str(instance_id))]['timestamps']['1_requested_website'] = str(datetime.utcnow()) # Wait for chat launcher WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID, 'nds-chat-launcher'))) # Click on chat launcher instance_status[str(base_name+str(instance_id))]['timestamps']['2_chat_became_available'] = str(datetime.utcnow()) sign_in_button = driver.find_element_by_id('nds-chat-launcher').click() # Move to iframe WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID, 'nds-chat-iframe'))) iframe = driver.find_elements_by_tag_name('iframe')[0] driver.switch_to_frame(iframe) # Wait for segmento WebDriverWait(driver, 30, poll_frequency=0.1).until(EC.visibility_of_element_located((By.ID, 'nds-chatbot-message-3'))) WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.text_to_be_present_in_element((By.XPATH, '//*[@id="nds-chatbot-message-3"]/div[1]/div'),'¿A qué segmento perteneces?')) instance_status[str(base_name+str(instance_id))]['timestamps']['3_segmento_cliente_available'] = str(datetime.utcnow()) # Click on segmento driver.execute_script(""" (function(e,s){e.src=s;e.onload=function(){jQuery.noConflict();console.log('jQuery 2.2.4 injected');jQuery('#nds-chatbot-message-3 > div.nds-chat-comment-option-wrap > div:nth-child(3)').click()};document.head.appendChild(e);})(document.createElement('script'),'//code.jquery.com/jquery-2.2.4.min.js') """) instance_status[str(base_name+str(instance_id))]['timestamps']['4_segmento_cliente_selected'] = str(datetime.utcnow()) time.sleep(10) # Wait for name question WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.visibility_of_element_located((By.ID, 'nds-chatbot-message-4'))) WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.text_to_be_present_in_element((By.XPATH, '//*[@id="nds-chatbot-message-4"]/div/div'),'¿Cómo te llamas?')) instance_status[str(base_name+str(instance_id))]['timestamps']['5_como_te_llamas_prompted'] = str(datetime.utcnow()) # Get text input field and send name input_field = driver.find_element_by_id('txMessage') instance_name = base_name + str(instance_id) input_field.send_keys(instance_name, Keys.ENTER) instance_status[str(base_name+str(instance_id))]['timestamps']['6_instance_name_sent'] = str(datetime.utcnow()) # Wait for "¿Qué puedo hacer por ti?" prompt WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.visibility_of_element_located((By.ID, 'nds-chatbot-message-5'))) # Ask for agent input_field.send_keys('agente', Keys.ENTER) requested_agent_time = datetime.utcnow() instance_status[str(base_name+str(instance_id))]['timestamps']['7_requested_agent'] = str(requested_agent_time) # Live person status; # lp_status = 1 No Agents Available (last_element_text equals no_agents_available_text); # lp_status = 2 Connected to LivePerson (last_element_text equals connected_to_lp_text); no_agents_available_text_sub_1 = "Por el momento no hay agentes disponibles" no_agents_available_text_sub_2 = "Por favor comunícate a nuestro centro de contacto al (01 55) 5721-3390" no_agents_available_text = "Por el momento no hay agentes disponibles.Por favor comunícate a nuestro centro de contacto al (01 55) 5721-3390." connected_to_lp_text = "¡Buen día! Bienvenido al Chat de HSBC." time_out = 0 while lp_status == 0: chat_nds_bubbles = driver.execute_script(""" var z = document.getElementsByClassName('nds-chat-comment-by-nds-chat'); var arr = Array.prototype.slice.call(z); t = arr.map(function(e){return e.innerText}); return t """) last_element_text = '' for index, el in enumerate(chat_nds_bubbles): if connected_to_lp_text in el or (no_agents_available_text_sub_1 in el and no_agents_available_text_sub_2 in el): last_element_text = el break # Test if last_element_text equals no_agents_available_text if no_agents_available_text.replace(' ','').replace('\n','') in last_element_text.replace(' ','').replace('\n',''): lp_status = 1 no_agents_available_time = datetime.utcnow() instance_status[str(base_name+str(instance_id))]['timestamps']['8_no_agents_available'] = str(no_agents_available_time) instance_status[str(base_name+str(instance_id))]['timestamps']['delta_since_agent_requested'] = str(no_agents_available_time - requested_agent_time) instance_status[str(base_name+str(instance_id))]['timestamps']['chatbot_history'] = str(chat_nds_bubbles) break elif connected_to_lp_text in last_element_text: connected_to_lp_time = datetime.utcnow() instance_status[str(base_name+str(instance_id))]['timestamps']['8_connected_to_lp_time'] = str(connected_to_lp_time) instance_status[str(base_name+str(instance_id))]['timestamps']['delta_since_agent_requested'] = str(connected_to_lp_time - requested_agent_time) instance_status[str(base_name+str(instance_id))]['timestamps']['chatbot_history'] = str(chat_nds_bubbles) lp_status = 2 instance_status[str(base_name+str(instance_id))]['sent'] = [] instance_status[str(base_name+str(instance_id))]['received'] = [] # send first message try: initial_input = 'hola!' input_field = driver.find_element_by_id('txMessage') input_field.send_keys(initial_input, Keys.ENTER) sent_time = str(datetime.utcnow()) instance_status[str(base_name+str(instance_id))]['sent'].append({sent_time: initial_input}) except BaseException as e: print('Problems while sending initial_input '+str(instance_id)) print(e) if 'lp_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors instance_status[str(base_name+str(instance_id))]['lp_errors'] += str(e) else: instance_status[str(base_name+str(instance_id))]['lp_errors'] = str(e) break time.sleep(1) time_out += 1 if time_out > 60*3.5: timed_out_timestamp = datetime.utcnow() instance_status[str(base_name+str(instance_id))]['timestamps']['8_timed_out_timestamp'] = str(timed_out_timestamp) instance_status[str(base_name+str(instance_id))]['timestamps']['delta_since_agent_requested'] = str(timed_out_timestamp - requested_agent_time) instance_status[str(base_name+str(instance_id))]['timestamps']['chatbot_history'] = str(chat_nds_bubbles) break except BaseException as e: print('Problems with instance '+str(instance_id)) print(e) if 'other_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors if they exists instance_status[str(base_name+str(instance_id))]['other_errors'] += str(e) else: instance_status[str(base_name+str(instance_id))]['other_errors'] = str(e) if lp_status == 1: print(base_name+str(instance_id)+' got no agents available message') instance_status[str(base_name+str(instance_id))]['status'] = 'no_agents' elif lp_status == 2: print(base_name+str(instance_id)+' contacted liveperson') instance_status[str(base_name+str(instance_id))]['status'] = 'contacted_liveperson' time_in_lp = 60*3.5 + 1; finish_lp = 1 time_to_send_next_batch = 0 number_of_batches_to_send = 5 current_send_batch = 0 time_between_batches = int(int(time_in_lp / number_of_batches_to_send) - 1) last_nds_chatbubble_included = 0 while finish_lp < time_in_lp: if finish_lp > time_to_send_next_batch and current_send_batch < number_of_batches_to_send: current_send_batch += 1 time_to_send_next_batch += time_between_batches inputs_to_send = ['hola.. '+str(current_send_batch), 'como estas.. '+str(current_send_batch), 'quiero ayuda.. '+str(current_send_batch)] try: input_field = driver.find_element_by_id('txMessage') for input_ in inputs_to_send: input_field.send_keys(input_, Keys.ENTER) sent_time = str(datetime.utcnow()) instance_status[str(base_name+str(instance_id))]['sent'].append({sent_time: input_}) except BaseException as e: print('Problems during LivePerson with instance '+str(instance_id)) print(e) if 'lp_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors instance_status[str(base_name+str(instance_id))]['lp_errors'] += str(e) else: instance_status[str(base_name+str(instance_id))]['lp_errors'] = str(e) try: # Get received messages chat_nds_bubbles = driver.execute_script(""" var z = document.getElementsByClassName('nds-chat-comment-by-nds-chat'); var arr = Array.prototype.slice.call(z); t = arr.map(function(e){return e.innerText}); return t """) if last_nds_chatbubble_included == 0: # Initiate last_nds_chatbubble_included to the index where the text contains '¡Buen día! Bienvenido al Chat de HSBC. ¿En qué le puedo ayudar?' for index, el in enumerate(chat_nds_bubbles): if connected_to_lp_text in el: last_nds_chatbubble_included = index break while last_nds_chatbubble_included < len(chat_nds_bubbles)-1: last_nds_chatbubble_included += 1 new_m = chat_nds_bubbles[last_nds_chatbubble_included].replace('Justo ahora','').replace('\n','').replace('1 minuto antes','').replace('2 minutos antes','') new_m_timestamp = str(datetime.utcnow()) instance_status[str(base_name+str(instance_id))]['received'].append({new_m_timestamp: new_m}) except BaseException as e: print('Problems during LivePerson with instance '+str(instance_id)) print(e) if 'lp_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors instance_status[str(base_name+str(instance_id))]['lp_errors'] += str(e) else: instance_status[str(base_name+str(instance_id))]['lp_errors'] = str(e) time.sleep(1) finish_lp += 1 else: print(base_name+str(instance_id)+' timed out') instance_status[str(base_name+str(instance_id))]['status'] = 'timed_out' instance_status[str(base_name+str(instance_id))]['timestamps']['9_closing_instance'] = str(datetime.utcnow()) print('Closing '+base_name+str(instance_id)) try: driver.close() if sys.platform != 'darwin' and use_xvfb: vdisplay.stop() except BaseException as e: instance_status[str(base_name+str(instance_id))]['closing_error'] = str(e) return instance_status
def available_curses(carne, passw, visible=True, close=True): """ Enters ematricula.ucr.ac.cr and then goes to the section that has the information about available careers. Example usage: Launch a invisible browser and get the curses. -> available_curses('b53777', 'mypassword') Launch a visible browser to see step by step what is done. -> available_curses('b53777', 'mypassword', visible=True) Launch a visible browser and don't close when finished. -> available_curses('b53777', 'mypassword', visible=True, close=False) """ # Create and start a virtualdisplay vdisplay = Xvfb() # Launch the display if visible is false if not visible: vdisplay.start() # Open the browser and does the magic driver = webdriver.Firefox() driver.get('https://ematricula.ucr.ac.cr/ematricula/login.do') carne_box = driver.find_element_by_name('carne') pass_box = driver.find_element_by_name('pin') carne_box.send_keys(carne) pass_box.send_keys(passw) driver.find_element_by_name('crudMethod').click() wait_until_title_contains(driver, 'Sistema eMatricula') driver.find_element_by_link_text('Cursos Pendientes del Plan').click() wait_until_element_is_located(driver, 'formCarreras') career_dropdown = driver.find_element_by_name("carrera") careers = [ x for x in career_dropdown.find_elements_by_tag_name('option')[1:] ] for each_career in careers: print(each_career.get_attribute('innerHTML')) print(each_career.get_attribute('value')) each_career.click() wait_until_class_is_located(driver, 'data') table = driver.find_element_by_class_name("data") body = table.find_element_by_xpath("//tbody") tr = [x for x in body.find_elements_by_xpath("//tr")[1:]] cambio = 0 each_tr = tr[0] my_courses = [] for data in each_tr.find_elements_by_xpath("//td"): my_courses.append(data.get_attribute('innerHTML').strip()) print( "Los cursos se descargaron exitosamente. Cantidad de cursos faltantes" ) print(len(tr)) clean_courses = [] file = open("cursos.txt", "w") for i in range(0, len(tr)): new_Subject = Subject() new_Subject.sigla = my_courses[i * 5] new_Subject.curso = my_courses[i * 5 + 1] new_Subject.creditos = my_courses[i * 5 + 2] clean_courses.append(new_Subject) file.write("Materia") file.write(my_courses[i * 5]) #print (my_courses[i*5+1]) #print (my_courses[i*5+2]) #print (my_courses[i*5]) #print (my_courses[i*5+1]) #print (my_courses[i*5+2]) file.close() # Quit the browser if close: driver.quit() # If not visible, stop display if not visible: vdisplay.stop()
def init(): display = Xvfb(width=500, height=500) display.start() mlab.init_notebook()
self.driver.execute_script("return window.performance"), ensure_ascii=False) self.__store_into_file(args, 'perf', performance) def stop_all(self): #stop server and driver from datetime import datetime print "%s: Finish" % (datetime.now()) self.server.stop() self.driver.quit() if __name__ == '__main__': # for headless execution with Xvfb() as xvfb: parser = argparse.ArgumentParser( description='Performance Testing using Browsermob-Proxy and Python' ) parser.add_argument('-u', '--url', help='URL to test', required=True) parser.add_argument('-b', '--browser', help='Select Chrome or Firefox', required=True) parser.add_argument('-p', '--path', help='Select path for output files', required=False) args = vars(parser.parse_args()) path = os.getenv('BROWSERMOB_PROXY_PATH', '/browsermob-proxy-2.1.2/bin/browsermob-proxy')
class Client: COOKIE_FILE = "state/cookies.pkl" ROOT_URL = "http://www.tadpoles.com/" HOME_URL = "https://www.tadpoles.com/parents" MIN_SLEEP = 1 MAX_SLEEP = 3 def __init__(self): self.init_logging() def init_logging(self): logger = logging.getLogger('app') self.info = logger.info self.debug = logger.debug self.warning = logger.warning self.critical = logger.critical self.exception = logger.exception def __enter__(self): self.info("Starting xvfb display") self.vdisplay = Xvfb() self.vdisplay.start() self.info("Starting browser") self.br = self.browser = webdriver.Firefox() self.br.implicitly_wait(10) return self def __exit__(self, *args): self.info("Shutting down browser") self.browser.quit() self.info("Shutting down xfvb display") self.vdisplay.stop() def sleep(self, minsleep=None, maxsleep=None): _min = minsleep or self.MIN_SLEEP _max = maxsleep or self.MAX_SLEEP duration = randrange(_min * 100, _max * 100) / 100.0 self.debug('Sleeping %r' % duration) time.sleep(duration) def navigate_url(self, url): self.info("Navigating to %r", url) self.br.get(url) def load_cookies(self): self.info("Loading cookies.") if not isdir('state'): os.mkdir('state') with open(self.COOKIE_FILE, "rb") as f: self.cookies = pickle.load(f) def dump_cookies(self): self.info("Dumping cookies.") with open(self.COOKIE_FILE, "wb") as f: pickle.dump(self.br.get_cookies(), f) def add_cookies_to_browser(self): self.info("Adding the cookies to the browser.") for cookie in self.cookies: if self.br.current_url.strip('/').endswith(cookie['domain']): self.br.add_cookie(cookie) def requestify_cookies(self): # Cookies in the form reqeusts expects. self.info("Transforming the cookies for requests lib.") self.req_cookies = {} for s_cookie in self.cookies: self.req_cookies[s_cookie["name"]] = s_cookie["value"] def switch_windows(self): '''Switch to the other window.''' self.info("Switching windows.") all_windows = set(self.br.window_handles) self.info("All windows.") self.info(all_windows) current_window = set([self.br.current_window_handle]) self.info("Current windows.") self.info(current_window) other_window = (all_windows - current_window).pop() self.br.switch_to.window(other_window) def do_login(self): # Navigate to login page. self.info("Navigating to login page.") self.br.find_element_by_id("login-button").click() self.br.find_element_by_class_name("tp-block-half").click() self.br.find_element_by_class_name("other-login-button").click() self.info(self.br.current_url) # Enter email. self.info(" Sending username.") email = self.br.find_element_by_css_selector( ".controls input[type='text']") email.send_keys(input("Enter email: ")) # Enter password. self.info(" Sending password.") passwd = self.br.find_element_by_css_selector( ".controls input[type='password']") passwd.send_keys(input("Enter password: "******"submit". self.info("Sleeping 2 seconds.") self.sleep(minsleep=2) self.info("Clicking 'sumbit' button.") self.br.find_element_by_css_selector( ".tp-left-contents .btn-primary").click() self.sleep(minsleep=2) self.info("New url") self.info(self.br.current_url) def do_google_login(self): # Navigate to login page. self.info("Navigating to login page.") self.br.find_element_by_id("login-button").click() self.br.find_element_by_class_name("tp-block-half").click() for element in self.br.find_elements_by_tag_name("img"): if "btn-google.png" in element.get_attribute("src"): self.info(element) self.info("Clicking Google Button.") element.click() #self.info(self.br.find_element_by_xpath('//img[@data-bind="click:loginGoogle"]').get_attribute('innerHTML')) #self.br.find_element_by_class_name("other-login-button").click() # Sleeping really quick. self.info("Sleeping 2 seconds.") self.sleep(minsleep=2) # Focus on the google auth popup. self.switch_windows() #select use another account #self.info("Selecting 'Use another account'.") #self.br.find_element_by_class_name("BHzsHc").click() # Enter email. email = self.br.find_element_by_id("identifierId") email.send_keys(input("Enter email: ")) email.submit() self.br.find_element_by_id("identifierNext").click() self.info("Sleeping 2 seconds.") self.sleep(minsleep=2) # Enter password. #passwd = self.br.find_element_by_id("password") #passwd.send_keys(getpass("Enter password:"******"password") password.send_keys(getpass("Enter password:"******"passwordNext").click() self.info("Sleeping 2 seconds.") self.sleep(minsleep=2) # Enter 2FA pin. #pin = self.br.find_element_by_id("totpPin") #pin.send_keys(getpass("Enter google verification code: ")) #pin.submit() #self.br.find_element_by_id("totpNext").click() #self.info("Sleeping 2 seconds.") #self.sleep(minsleep=2) # Click "approve". #self.info("Sleeping 2 seconds.") #self.sleep(minsleep=2) #self.info("Clicking 'approve' button.") #self.br.find_element_by_id("submit_approve_access").click() # Switch back to tadpoles. #self.switch_windows(self.window_handles[-1]) self.info("Switching windows.") all_windows = set(self.br.window_handles) self.info(all_windows) self.info("Switching to window") main_window = all_windows.pop() self.info(main_window) self.br.switch_to.window(main_window) def iter_monthyear(self): '''Yields pairs of xpaths for each year/month tile on the right hand side of the user's home page. ''' month_xpath_tmpl = '//*[@id="app"]/div[4]/div[1]/ul/li[%d]/div/div/div/div/span[%d]' month_index = 1 while True: month_xpath = month_xpath_tmpl % (month_index, 1) year_xpath = month_xpath_tmpl % (month_index, 2) # Go home if not there already. if self.br.current_url != self.HOME_URL: self.navigate_url(self.HOME_URL) try: # Find the next month and year elements. month = self.br.find_element_by_xpath(month_xpath) year = self.br.find_element_by_xpath(year_xpath) except NoSuchElementException: # We reached the end of months on the profile page. self.warning("No months left to scrape. Stopping.") sys.exit(0) self.month = month self.year = year yield month, year month_index += 1 def iter_urls(self): '''Find all the image urls on the current page. ''' # For each month on the dashboard... for month, year in self.iter_monthyear(): # Navigate to the next month. month.click() self.warning("Getting urls for month: %r" % month.text) self.sleep(minsleep=2) re_url = re.compile('\("([^"]+)') for div in self.br.find_elements_by_xpath( "//div[@class='well left-panel pull-left']/ul/li/div"): url = re_url.search(div.get_attribute("style")) if not url: continue url = url.group(1) url = url.replace('thumbnail=true', '') url = url.replace('&thumbnail=true', '') url = 'https://www.tadpoles.com' + url daymonth = div.find_element_by_xpath( "./div/div[@class='header note mask']/span[@class='name']/span" ).text dayarray = daymonth.split('/') day = format(int(dayarray[1]), '02d') yield url, day def save_image(self, url, day): '''Save an image locally using requests. ''' # Make the local filename. _, key = url.split("key=") filename_parts = ['img', self.year.text, self.month.text, '%s'] filename_base = abspath(join(*filename_parts) % key) filename = filename_base + '.jpg' # Only download if the file doesn't already exist. if isfile(filename): self.debug("Already downloaded: %s" % filename) return elif isfile(filename_base + '.png'): self.debug("Already downloaded: %s.png" % filename_base) return else: self.info("Saving: %s" % filename) self.sleep() # Make sure the parent dir exists. dr = dirname(filename) if not isdir(dr): os.makedirs(dr) # Download it with requests. resp = requests.get(url, cookies=self.req_cookies, stream=True) if resp.status_code == 200: with open(filename, 'wb') as f: for chunk in resp.iter_content(1024): f.write(chunk) else: msg = 'Error (%r) downloading %r' raise DownloadError(msg % (resp.status_code, url)) ## set date for exif months = dict(jan="01", feb="02", mar="03", apr="04", may="05", jun="06", jul="07", aug="08", sep="09", oct="10", nov="11", dec="12") yearmonth = self.year.text + ':' + months[ self.month.text] + ':' + day + ' 12:00:00' ## check if the file is actually a png imgtype = imghdr.what(filename) if imghdr.what(filename) == 'png': self.info(" File is a png - renaming") os.rename(filename, filename_base + '.png') filename = filename_base + '.png' command = 'exiftool -overwrite_original "-PNG:CreationTime=' + yearmonth + '" "' + filename + '"' self.info(" Adding png exif: %s" % command) os.system(command) command = 'exiftool -overwrite_original "-AllDates=' + yearmonth + '" "' + filename + '"' self.info(" Adding exif: %s" % command) os.system(command) def download_images(self): '''Login to tadpoles.com and download all user's images. ''' self.navigate_url(self.ROOT_URL) try: self.load_cookies() except FileNotFoundError: login_type = None while login_type is None: input_value = input( "Login Type - [G]oogle or [E]mail/password: "******"Doing Google login...") self.do_google_login() elif input_value == "E" or input_value == "e": login_type = 'email' self.info("Doing Email login...") self.do_login() else: self.info( "-- Invalid choice entered - please choose 'G' or 'E'") self.dump_cookies() self.load_cookies() self.add_cookies_to_browser() self.navigate_url(self.HOME_URL) else: self.add_cookies_to_browser() self.navigate_url(self.HOME_URL) # Get the cookies ready for requests lib. self.requestify_cookies() for url in self.iter_urls(): try: self.save_image(url[0], url[1]) except DownloadError as exc: self.exception(exc) def main(self): with self as client: try: client.download_images() except Exception as exc: self.exception(exc)
def setUp(self): self.xvfb = Xvfb(width=1024, height=768) self.xvfb.start() self.browser = webdriver.Firefox()
def imap_py(**kwargs): selenium_conn_id = kwargs.get('templates_dict', None).get('selenium_conn_id', None) filename = kwargs.get('templates_dict', None).get('filename', None) s3_conn_id = kwargs.get('templates_dict', None).get('s3_conn_id', None) s3_bucket = kwargs.get('templates_dict', None).get('s3_bucket', None) s3_key = kwargs.get('templates_dict', None).get('s3_key', None) date = kwargs.get('templates_dict', None).get('date', None) @provide_session def get_conn(conn_id, session=None): conn = (session.query(Connection).filter( Connection.conn_id == conn_id).first()) return conn url = get_conn(selenium_conn_id).host email = get_conn(selenium_conn_id).user pwd = get_conn(selenium_conn_id).password vdisplay = Xvfb() vdisplay.start() caps = webdriver.DesiredCapabilities.FIREFOX caps["marionette"] = True profile = webdriver.FirefoxProfile() profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/csv") logging.info('Profile set...') options = Options() options.set_headless(headless=True) logging.info('Options set...') logging.info('Initializing Driver...') driver = webdriver.Firefox(firefox_profile=profile, firefox_options=options, capabilities=caps) logging.info('Driver Intialized...') driver.get(url) logging.info('Authenticating...') elem = driver.find_element_by_id("email") elem.send_keys(email) elem = driver.find_element_by_id("password") elem.send_keys(pwd) elem.send_keys(Keys.RETURN) logging.info('Successfully authenticated.') sleep_time = 15 logging.info('Downloading File....Sleeping for {} Seconds.'.format( str(sleep_time))) time.sleep(sleep_time) driver.close() vdisplay.stop() dest_s3 = S3Hook(s3_conn_id=s3_conn_id) os.chdir('/root/Downloads') csvfile = open(filename, 'r') output_json = 'file.json' with open(output_json, 'w') as jsonfile: reader = csv.DictReader(csvfile) for row in reader: row = dict((boa.constrict(k), v) for k, v in row.items()) row['run_date'] = date json.dump(row, jsonfile) jsonfile.write('\n') dest_s3.load_file(filename=output_json, key=s3_key, bucket_name=s3_bucket, replace=True) dest_s3.connection.close()
class TestPlotting(unittest.TestCase): def setUp(self): if os.getenv('TRAVIS', False): from xvfbwrapper import Xvfb self.vdisplay = Xvfb() self.vdisplay.start() # To have plt.show() non blocking import matplotlib.pyplot as p p.switch_backend('Agg') @classmethod def tearDownClass(cls): os.remove('planar_mesh_tmp_3.msh') def test_plot_planar(self): ''' Check plotting of planar sensor. ''' thickness = 200 # [um] width = 40 # [um] def potential_function(x, y): return fields.get_weighting_potential_analytic(x, y, D=thickness, S=width, is_planar=True) def field_function(x, y): return fields.get_weighting_field_analytic(x, y, D=thickness, S=width, is_planar=True) # Plot with analytical field function plot.plot_planar_sensor(pot_func=potential_function, width=width, pitch=width, thickness=thickness, n_pixel=1, V_backplane=0, V_readout=1, field_func=field_function) # Plot without a field function plot.plot_planar_sensor(pot_func=potential_function, width=width, pitch=width, thickness=thickness, n_pixel=1, V_backplane=0, V_readout=1, field_func=None) def test_plot_mesh(self): mesh = geometry.mesh_planar_sensor(n_pixel=5, width=50., thickness=100., resolution=100., filename='planar_mesh_tmp_3.msh') plot.plot_mesh(mesh)
def test_start_fails_with_unknown_kwargs(self): xvfb = Xvfb(foo='bar') with self.assertRaises(RuntimeError): xvfb.start()
class BrowserHandler: def __init__(self, args): self.args = args if self.args and not self.args.show_browser: self.display = Xvfb() self.display.start() log_level = self._define_log_level(self.args) capabilities = self._create_browser_capabilities(log_level) options = self._create_browser_options(log_level) profile = self._create_browser_profile() self.browser = Firefox( firefox_profile=profile, capabilities=capabilities, options=options, log_path=f"{TIMESTAMP}_geckodriver.log", ) # https://stackoverflow.com/questions/42754877/cant-upload-file-using-selenium-with-python-post-post-session-b90ee4c1-ef51-4 # pylint: disable=line-too-long self.browser._is_remote = False # pylint: disable=protected-access self.browser.maximize_window() @staticmethod def _define_log_level(args): if args and args.verbose and args.verbose >= 3: log_level = "trace" elif args and args.verbose and args.verbose == 2: log_level = "debug" elif args and args.verbose and args.verbose == 1: log_level = "info" else: log_level = "warn" return log_level @staticmethod def _create_browser_capabilities(log_level): capabilities = DesiredCapabilities.FIREFOX.copy() capabilities["moz:firefoxOptions"] = { "log": { "level": log_level, }, } return capabilities @staticmethod def _create_browser_options(log_level): options = Options() options.log.level = log_level return options @staticmethod def _create_browser_profile(): profile = FirefoxProfile() profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference("browser.download.dir", EXPORTS_FOLDER) profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv, application/zip") profile.set_preference("browser.helperApps.alwaysAsk.force", False) profile.set_preference("devtools.jsonview.enabled", False) profile.set_preference("media.volume_scale", "0.0") # https://github.com/mozilla/geckodriver/issues/858#issuecomment-322512336 profile.set_preference("dom.file.createInChild", True) return profile def kill(self): self.browser.stop_client() self.browser.close() try: self.browser.quit() except WebDriverException: pass if self.args and not self.args.show_browser: self.display.stop()
def train(env_config, env_ext, model_config, model_ext, exp_dir, seed, local_test, override_expe=True, save_n_random_q_images=0): import argparse from rl_agent.agent_utils import render_state_and_q_values from config import load_config from rl_agent.dqn_agent import DQNAgent import torch print("Expe", env_config, env_ext, model_config, model_ext, exp_dir, seed, sep=' ') print("Is cuda available ?", torch.cuda.is_available()) if not local_test: assert len(ray.get_gpu_ids()) == 1 assert torch.cuda.device_count( ) == 1, "Should be only 1, is {}".format(torch.cuda.device_count()) if local_test: display = open('nothing.txt', 'w') else: from xvfbwrapper import Xvfb display = Xvfb(width=100, height=100, colordepth=16) full_config, expe_path = load_config(env_config_file=env_config, model_config_file=model_config, env_ext_file=env_ext, model_ext_file=model_ext, out_dir=exp_dir, seed=seed) MAX_STATE_TO_REMEMBER = 50 # To avoid storing too much images in tensorboard DEFAULT_LOG_STATS = 500 log_stats_every = full_config.get("log_stats_every", DEFAULT_LOG_STATS) max_iter_expe = full_config["stop"]["max_iter_expe"] score_success = full_config["stop"]["episode_reward_mean"] if override_expe == False: # Check that the experiment has run more than a few episodes # If so, DON'T rerun everything (useful for grid search) rerun_expe = True for dir in os.listdir(expe_path): last_iter = 0 if "tfevents" in dir: tf_event_path = os.path.join(expe_path, dir) try: for i, elem in enumerate( tf.train.summary_iterator(tf_event_path)): if elem.step: last_iter = max(last_iter, elem.step) if last_iter < max_iter_expe - log_stats_every: os.remove(tf_event_path) print("Experiment doesn't seem to be over, rerun.") else: rerun_expe = False except tf.errors.DataLossError as e: print(e) os.remove(tf_event_path) if rerun_expe == False: print("Expe was over, don't rerun") return True writer = tensorboardX.SummaryWriter(expe_path) print("Expe path : ", expe_path) if "racing" in full_config["env_name"].lower(): from env_tools.car_racing import CarRacingSafe from env_tools.wrapper import CarFrameStackWrapper, CarActionWrapper reset_when_out = full_config["reset_when_out"] reward_when_falling = full_config["reward_when_out"] max_steps = full_config["max_steps"] game = CarRacingSafe(reset_when_out=reset_when_out, reward_when_out=reward_when_falling, max_steps=max_steps) DEFAULT_FRAME_SKIP = 3 n_frameskip = full_config.get("frameskip", DEFAULT_FRAME_SKIP) game = CarActionWrapper(game) game = CarFrameStackWrapper(game, n_frameskip=n_frameskip) elif "minigrid" in full_config['env_name'].lower(): from gym_minigrid.envs.safe_crossing import SafeCrossing from env_tools.wrapper import MinigridFrameStacker reward_when_falling = full_config["reward_when_out"] size = full_config["size_env"] feedback_when_wall_hit = full_config["feedback_when_wall_hit"] proba_reset = full_config["proba_reset"] use_lava = full_config["use_lava"] n_zone = full_config["n_zone"] good_zone_action_proba = full_config["good_zone_action_proba"] bad_zone_action_proba = full_config["bad_zone_action_proba"] obstacle_type = full_config["obstacle_type"] prevent_bad_action = full_config["prevent_bad_action"] game = SafeCrossing(size=size, reward_when_falling=reward_when_falling, proba_reset=proba_reset, feedback_when_wall_hit=feedback_when_wall_hit, use_lava=use_lava, n_zone=n_zone, good_zone_action_proba=good_zone_action_proba, bad_zone_action_proba=bad_zone_action_proba, obstacle_type=obstacle_type, prevent_bad_action=prevent_bad_action, seed=seed) game = MinigridFrameStacker(game, full_config["n_frameskip"]) elif "zork" in full_config['env_name'].lower(): raise NotImplementedError( "Zork is a pain in the A#%?, i'll do it later") #game = textworld.start('./zork1.z5') elif "text" in full_config['env_name'].lower(): import textworld.gym as tw_gym from textworld.envs.wrappers.filter import EnvInfos from env_tools.wrapper import TextWorldWrapper EXTRA_GAME_INFO = { "inventory": True, "description": True, "intermediate_reward": full_config["use_intermediate_reward"], "admissible_commands": True, "policy_commands": full_config["use_intermediate_reward"], } reward_when_falling = 0 game_path = os.path.join("text_game_files", full_config['ulx_file']) env_id = tw_gym.register_game( game_path, max_episode_steps=full_config["max_episode_steps"], name="simple1", request_infos=EnvInfos(**EXTRA_GAME_INFO)) game = gym.make(env_id) game = TextWorldWrapper( env=game, use_intermediate_reward=EXTRA_GAME_INFO["intermediate_reward"]) else: game = gym.make(full_config["env_name"]) discount_factor = full_config["discount_factor"] total_iter = 0 success_count = 0 num_episode = 0 early_stopping = False reward_wo_feedback_list = [] reward_undiscount_list = [] reward_discount_list = [] feedback_per_ep_list = [] percentage_tile_seen_list = [] iter_this_ep_list = [] last_reward_undiscount_list = [] last_reward_discount_list = [] self_destruct_list = [] self_destruct_trial_list = [] best_undiscount_reward = -float("inf") model_type = full_config["agent_type"] if model_type == "dqn": model = DQNAgent(config=full_config["dqn_params"], action_space=game.action_space, obs_space=game.observation_space, discount_factor=discount_factor, writer=writer, log_stats_every=log_stats_every) else: raise NotImplementedError("{} not available for model".format( full_config["agent_type"])) save_images_at = set(full_config["save_images_at"]) with display as xvfb: while total_iter < max_iter_expe and not early_stopping: state = game.reset() #game.render('human') done = False iter_this_ep = 0 reward_wo_feedback = 0 reward_total_discounted = 0 reward_total_not_discounted = 0 percentage_tile_seen = 0 n_feedback_this_ep = 0 self_kill_trial = 0 rendered_images = [] # Do we store images of state and q function associated with it ? if save_n_random_q_images > 0: steps_images_to_save = np.random.randint( 0, game.env.max_steps, save_n_random_q_images) elif num_episode in save_images_at: steps_images_to_save = range(0, int(1e6)) # save everything else: steps_images_to_save = [] while not done: # Render state, and compute q values to visualize them later if iter_this_ep in steps_images_to_save: array_rendered = render_state_and_q_values(model=model, game=game, state=state) rendered_images.append(array_rendered) # Save only the last frames, to avoid overloading tensorboard if len(rendered_images) > MAX_STATE_TO_REMEMBER: rendered_images.pop(0) action = model.select_action(state['state']) next_state, reward, done, info = game.step(action=action) if done: next_state['state'] = None model.push(state['state'], action, next_state['state'], reward, next_state['gave_feedback']) model.optimize(total_iter=total_iter, env=game) state = next_state total_iter += 1 iter_this_ep += 1 percentage_tile_seen = max( info.get('percentage_road_visited', 0), percentage_tile_seen) n_feedback_this_ep += info['gave_feedback'] self_kill_trial += info.get('tried_destruct', 0) assert next_state['gave_feedback'] == info[ 'gave_feedback'], "Problem, info should contain the same info as state" reward_total_discounted += reward * (discount_factor** iter_this_ep) reward_total_not_discounted += reward reward_wo_feedback += reward - info[ 'gave_feedback'] * reward_when_falling #======================= # LOG STATS HERE if total_iter % log_stats_every == 0: reward_discount_mean = np.mean(reward_discount_list) reward_undiscount_mean = np.mean(reward_undiscount_list) last_rewards_discount = np.mean( last_reward_undiscount_list) last_rewards_undiscount = np.mean( last_reward_discount_list) last_reward_wo_feedback = np.mean(reward_wo_feedback_list) iter_this_ep_mean = np.mean(iter_this_ep_list) last_feedback_mean = np.mean(feedback_per_ep_list) if "racing" in full_config["env_name"].lower(): writer.add_scalar("data/percentage_tile_seen", np.mean(percentage_tile_seen_list), total_iter) writer.add_scalar("data/number_of_feedback", last_feedback_mean, total_iter) writer.add_scalar( "data/number_of_feedback_over_iter_per_ep", last_feedback_mean / iter_this_ep_mean, total_iter) # writer.add_scalar("data/reward_discounted", last_rewards_discount, total_iter) # writer.add_scalar("data/reward_not_discounted", last_rewards_undiscount, total_iter) writer.add_scalar("data/reward_wo_feedback(unbiaised)", last_reward_wo_feedback, total_iter) writer.add_scalar("data/n_episodes", num_episode, total_iter) #writer.add_scalar("data/self_destruct_trial", np.mean(self_destruct_trial_list), total_iter) #writer.add_scalar("data/self_destruct", np.mean(self_destruct_list), total_iter) # writer.add_scalar("data/running_mean_reward_discounted", reward_discount_mean, total_iter) # writer.add_scalar("data/running_mean_reward_not_discounted", reward_undiscount_mean, total_iter) writer.add_scalar("data/iter_per_ep", iter_this_ep_mean, total_iter) #writer.add_scalar("data/epsilon", model.current_eps, total_iter) # writer.add_scalar("data/model_update", model.num_update_target, total_iter) writer.add_scalar("data/n_episode_since_last_log", len(last_reward_discount_list), total_iter) # writer.add_scalar("data/model_update_ep", model.num_update_target, num_episode) if last_rewards_undiscount > best_undiscount_reward: best_undiscount_reward = reward_discount_mean torch.save(model.policy_net.state_dict(), os.path.join(expe_path, "best_model.pth")) torch.save(model.policy_net.state_dict(), os.path.join(expe_path, "last_model.pth")) # Reset feedback and percentage feedback_per_ep_list = [] percentage_tile_seen_list = [] last_reward_undiscount_list = [] last_reward_discount_list = [] iter_this_ep_list = [] reward_wo_feedback_list = [] # DONE, GO HERE : # ================ # Save images of state and q func associated if rendered_images != []: for i, array_rendered in enumerate(rendered_images): num_iter = iter_this_ep - len(rendered_images) + i + 1 writer.add_image('data/{}/state_and_q'.format(num_episode), global_step=num_iter, img_tensor=array_rendered, dataformats="HWC") # Update target network if needed #model.callback(epoch=num_episode) reward_undiscount_list.append(reward_total_not_discounted) reward_discount_list.append(reward_total_discounted) last_reward_undiscount_list.append(reward_total_not_discounted) last_reward_discount_list.append(reward_total_discounted) feedback_per_ep_list.append(n_feedback_this_ep) percentage_tile_seen_list.append(percentage_tile_seen) iter_this_ep_list.append(iter_this_ep) self_destruct_list.append(info.get('self_destruct', 0)) self_destruct_trial_list.append(self_kill_trial) reward_wo_feedback_list.append(reward_wo_feedback) print( "End of ep #{}, n_timesteps (estim) {}, iter_this_ep : {}, current_eps {}, zone {}" .format(num_episode, total_iter, np.mean(iter_this_ep_list[-1]), model.current_eps, state.get('zone', "Not applicable"))) print( "(Estim) Discounted rew : {} undiscounted : {}, unbiaised : {}, n_feedback {} \n\n" .format(np.mean(last_reward_discount_list[-1]), np.mean(last_reward_undiscount_list[-1]), reward_wo_feedback_list[-1], np.mean(feedback_per_ep_list[-1]))) assert total_iter >= reward_wo_feedback_list[ -1] + feedback_per_ep_list[-1] if reward_total_discounted > score_success: success_count += 1 if success_count > 5: early_stopping = True else: success_count = 0 num_episode += 1 print("Experiment over") # Enforce cleaning writer.close() del model.memory del model del game torch.cuda.empty_cache() return True
from xvfbwrapper import Xvfb from selenium import webdriver vdisplay = Xvfb() vdisplay.start() driver = webdriver.Chrome() driver.get('http://www.adidas.ru/krossovki-deerupt-runner/B41768.html') print(driver.title)
class ProxiedBrowser(object): def __init__(self, dir, tor_port=9150): self.dir = dir self.tor_port = int(tor_port) def open(self): cap_dir = join(self.dir, "caps") try: os.makedirs(cap_dir) except: pass self.proxy = TCP(cap_dir=cap_dir, tor_port=self.tor_port) self.proxy.start() while not self.proxy.running: logger.info("Waiting for proxy to start...") sleep(1) logger.info("Starting Xvfb virtual display") self.vdisplay = Xvfb(width=1280, height=740) self.vdisplay.start() logger.info("Webdriver starting..") self.binary = FirefoxBinary(firefox_path=join(self.dir, FIREFOX_PATH), log_file=open("firefox.log", "w")) self.binary.add_command_line_options("--verbose") self.profile = FirefoxProfile( profile_directory=join(self.dir, FIREFOX_PROFILE)) self.profile.set_preference("network.proxy.socks_port", self.proxy.port) self.profile.set_preference("extensions.torlauncher.start_tor", False) # We started tor manually earlier. self.profile.set_preference("browser.startup.homepage", "") self.profile.set_preference("browser.startup.page", 0) self.profile.update_preferences() try: self.driver = webdriver.Firefox(firefox_binary=self.binary, firefox_profile=self.profile) sleep(2) # hack: wait until homepage etc have loaded. except Exception as ex: self.proxy.close() raise ex return self def __enter__(self): return self.open() def get(self, url): print("Fetching {url}".format(url=url)) self.proxy.consume_results( ) # clear anything previous, e.g the browsers homepage, whatever update checkers etc. self.driver.get(url) capture_files = self.proxy.consume_results() responses = list() for capture_file in capture_files: responses += extract_from_capturefile(capture_file) os.remove(capture_file) return responses def close(self): logging.info("Closing webdriver") self.driver.quit() logging.info("Closing virtual display") self.vdisplay.stop() logging.info("Closing proxy") self.proxy.close() self.proxy.join() def __exit__(self, type, value, traceback): self.close()
def crawl(self): """ Visit the top `n_sites` websites in the Tranco List, in order, in a virtual browser with Privacy Badger installed. Afterwards, save the action_map and snitch_map that the Badger learned. """ domains = get_domain_list(self.n_sites, self.exclude) self.logger.info(("starting new crawl:\n" "\ttimeout: %ss\n" "\twait time: %ss\n" "\tbrowser: %s\n" "\tsurvey mode: False\n" "\tdomains to crawl: %d\n" "\tTLDs to exclude: %s"), self.timeout, self.wait_time, self.browser, self.n_sites, self.exclude) # create an XVFB virtual display (to avoid opening an actual browser) self.vdisplay = Xvfb(width=1280, height=720) self.vdisplay.start() self.start_browser() # list of domains we actually visited visited = [] old_snitches = {} for i, domain in enumerate(domains): try: # This script could fail during the data dump (trying to get # the options page), the data cleaning, or while trying to load # the next domain. last_data = self.dump_data() # try to fix misattribution errors if i >= 2: clean_data = self.cleanup(domains[i - 2], domains[i - 1], last_data) if last_data != clean_data: self.clear_data() self.load_user_data(clean_data) self.logger.info('visiting %d: %s', i + 1, domain) url = self.get_domain(domain) visited.append(url) except TimeoutException: self.logger.info('timeout on %s ', domain) # TODO: how to get rid of this nested try? try: self.timeout_workaround() except WebDriverException as e: if should_restart(e): self.restart_browser(last_data) except WebDriverException as e: self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg) if should_restart(e): self.restart_browser(last_data) finally: self.load_extension_page(OPTIONS) snitches = self.driver.execute_script( "return chrome.extension.getBackgroundPage()." "badger.storage.snitch_map._store;") diff = set(snitches) - set(old_snitches) if diff: self.logger.info("new trackers in snitch_map: %s", diff) old_snitches = snitches self.logger.info("Finished scan. Visited %d sites and errored on %d.", len(visited), len(domains) - len(visited)) try: self.logger.info('Getting data from browser storage...') data = self.dump_data() except WebDriverException: # If we can't load the background page here, just quit :( self.logger.error('Could not get badger storage.') sys.exit(1) self.driver.quit() self.vdisplay.stop() self.save(data)