Ejemplo n.º 1
0
def start_xvfb():
  from xvfbwrapper import Xvfb
  if "DISPLAY" in os.environ:
    del os.environ["DISPLAY"]
  xvfb = Xvfb()
  xvfb.start()
  atexit.register(xvfb.stop)
Ejemplo n.º 2
0
def main():
    import sys
    import imageio
    import pandas as pd
    import matplotlib.pyplot as plt
    from xvfbwrapper import Xvfb

    plt.ioff()

    # with Xvfb() as xvfb:
    # plt.ioff()
    vdisplay = Xvfb()
    vdisplay.start()

    if len(sys.argv)<5:
        subsampleRate = 10
    else:
        subsampleRate = int(sys.argv[4])

    if len(sys.argv)<6:
        speedup = 10
    else:
        speedup = int(sys.argv[5])

    bounds2video(sys.argv[1],sys.argv[2],sys.argv[3],subsampleRate,speedup)
    vdisplay.stop()
Ejemplo n.º 3
0
 def __init__(self):
     log.start(logfile=time.strftime("log/%Y%m%d%H%M%S")+".log",logstdout=False)
     log.msg("initiating crawler...",level=log.INFO)
     self.crawler_id = self.get_crawler_id()
     log.msg("crawler id is %s" % self.crawler_id,level=log.INFO)
     self.r.set('crawler:ip:%s' % self.crawler_id,utils.get_external_ip())
     self.r.set('crawler:port:%s' % self.crawler_id,settings.REDIS_LOCAL_PORT)
     self.r.set('crawler:mapping_port:%s' % self.crawler_id,settings.REDIS_LOCAL_MAPPING_PORT)
     log.msg("crawler ip is %s, port is %d" % (utils.get_external_ip(),settings.REDIS_LOCAL_PORT),level=log.INFO)
     account = self.get_account()
     self.username = account[0]
     self.password = account[1]
     log.msg("crawler account got",level=log.INFO)
     self.r_local.set('crawler:status:%s' % self.crawler_id, 'good')
     self.r_local.set('crawler:update_time:%s' % self.crawler_id, datetime.datetime.utcnow().strftime("%s"))
     log.msg("local crawler status set",level=log.INFO)
     heartbeat_thread = threading.Thread(target=self.maintain_local_heartbeat)
     heartbeat_thread.start()
     log.msg("local crawler heartbeat started",level=log.INFO)
     if platform.system() == "Linux":
         #on linux, use virtual display
         vdisplay = Xvfb()
         vdisplay.start()
     co = ChromeOptions()
     #TODO: Disable image after log in
     #TODO: optimize memory usage
     co.add_experimental_option("prefs",{"profile.default_content_settings":{"popups":1}})
     #co.add_experimental_option("prefs",{"profile.default_content_settings":{"popups":1,"images":2,"media":2}})
     self.driver = webdriver.Chrome(chrome_options=co)
     self.driver.set_window_size(640,960)
Ejemplo n.º 4
0
class Webdriver(unittest.TestCase):
	def setUp(self):
		self.vdisplay = Xvfb(width=1280, height=720)
		self.vdisplay.start()
# 		if(not vdisplay.start()):
# 			fo = open(LOG_FILE, "a")
# 			fo.write("Xvfbfailure||"+str(TREATMENTID)+"||"+str(ID)+"\n")
# 			fo.close()
# 			sys.exit(0)
		if(BROWSER=='firefox'):
			if (platform.system()=='Darwin'):
				self.driver = webdriver.Firefox()
			elif (platform.system()=='Linux'):
				self.driver = webdriver.Firefox(proxy=proxy)
			else:
				print "Unidentified Platform"
				sys.exit(0)
		elif(BROWSER=='chrome'):
			if (platform.system()=='Darwin'):
				chromedriver = "./experiment/chromedriver/chromedriver_mac"
				os.environ["webdriver.chrome.driver"] = chromedriver
				self.driver = webdriver.Chrome(executable_path=chromedriver)
			elif (platform.system() == 'Linux'):
				chromedriver = "./experiment/chromedriver/chromedriver_linux"
				os.environ["webdriver.chrome.driver"] = chromedriver
				chrome_option = webdriver.ChromeOptions()
				chrome_option.add_argument("--proxy-server=yogi.pdl.cmu.edu:3128" )
				self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_option)
			else:
				print "Unidentified Platform"
				sys.exit(0)
		else:
			print "Unsupported Browser"
			sys.exit(0)
		self.driver.implicitly_wait(10)
		self.base_url = "https://www.google.com/"
		self.verificationErrors = []
		self.driver.set_page_load_timeout(40)
		self.accept_next_alert = True
	
	def test_webdriver(self):
		fo = open(AD_FILE, "w")
		fo.close()
		driver = self.driver
		driver.get(SITE)
		count = 0
		while(count < N):
			els = driver.find_elements_by_css_selector("li.site-listing div.desc-container p.desc-paragraph a")
			for el in els:
				if(count < N):
					t = el.get_attribute('innerHTML').lower()
					fo = open(AD_FILE, "a")
					fo.write(t + '\n')
					fo.close()
					count += 1
			driver.find_element_by_css_selector("a.next").click()
    
	def tearDown(self):
		self.vdisplay.stop()
		self.driver.quit()
Ejemplo n.º 5
0
class XvfbRobot(object):
    """
    A robot library for creating virtual display on demand
    """

    ROBOT_LIBRARY_SCOPE = "GLOBAL"
    ROBOT_LIBRARY_VERSION = VERSION
    _display = None

    def start_virtual_display(self, width=1440, height=900, colordepth=24, **kwargs):
        """Starts virtual display which will be
         destroyed after test execution will be end

        *Arguments:*
        - width: a width to be set in pixels
        - height: a height to be set in pixels
        - color_depth: a color depth to be used
        - kwargs: extra parameters

        *Example:*

        | Start Virtual Display |
        | Start Virtual Display | 1920 | 1080 |
        | Start Virtual Display | ${1920} | ${1080} | ${16} |
        """
        if self._display is None:
            logger.info("Using virtual display: '{0}x{1}x{2}'".format(width, height, colordepth))

            self._display = Xvfb(int(width), int(height), int(colordepth), **kwargs)
            self._display.start()
            atexit.register(self._display.stop)
Ejemplo n.º 6
0
class TestJavascript(unittest.TestCase):
    def setUp(self):

        # Configure a web service.
        handler = http.server.SimpleHTTPRequestHandler
        address = ("127.0.0.1", 9999)
        socketserver.TCPServer.allow_reuse_address = True  # Prevents address conflicts.
        httpd = socketserver.TCPServer(address, handler)

        # Start the web service in a separate thread as deamon.
        httpd_thread = threading.Thread(target=httpd.serve_forever)
        httpd_thread.setDaemon(True)
        httpd_thread.start()

        # Start a display.
        self.display = Xvfb()
        self.display.start()

        # Start the browser driver for selenium testing.
        self.driver = webdriver.Firefox()
        self.driver.get("http://localhost:9999/index.html")

    def test_javascript(self):

        # Create an instance of the selenium Firefox driver.
        error = self.driver.find_elements_by_id("error")[0].text
        self.assertEqual(error, "")

    def tearDown(self):
        self.display.stop()
Ejemplo n.º 7
0
def start_xvfb(module):
    try:
        xvfb = Xvfb(width=1280, height=720)
        xvfb.start()
        atexit.register(xvfb.stop)
    except:
        module.fail_json(msg="xvfb broke")
Ejemplo n.º 8
0
def get_mtgox_info(input_vars):
	vdisplay = Xvfb()
	vdisplay.start()

	driver = webdriver.Firefox()

	driver.get("http://www.mtgox.com")

	uelem = driver.find_element_by_name("username")
	pelem = driver.find_element_by_name("password")
	lelem = driver.find_element_by_name("LOGIN")

	uelem.send_keys(input_vars['mtgoxId'])
	time.sleep(0.25)
	pelem.send_keys(input_vars['mtgoxPassword'])
	time.sleep(0.25)
	lelem.click()
	time.sleep(0.25)

	driver.get("https://www.mtgox.com/trade/funding-options")
	time.sleep(0.25)

	logout_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "logout")))

	address_elem = driver.find_elements_by_xpath("/html/body/div[2]/div[2]/div[3]/div[3]/section/div/div[4]/div[2]/div/p/strong")

	print address_elem
	new_address=address_elem.text
Ejemplo n.º 9
0
 def test_start_with_arbitrary_kwargs(self):
     xvfb = Xvfb(nolisten='tcp', noreset=None)
     self.addCleanup(xvfb.stop)
     xvfb.start()
     display_var = ':{}'.format(xvfb.new_display)
     self.assertEqual(display_var, os.environ['DISPLAY'])
     self.assertIsNotNone(xvfb.proc)
Ejemplo n.º 10
0
def enable():
    import time
    import requests
    import settings
    from splinter import Browser
    from xvfbwrapper import Xvfb

    print "Trying to enable myself."
    vdisplay = Xvfb()
    vdisplay.start()


    email = settings.getEmail()
    password = settings.getPassword()
    team_name = settings.getTeamName()
    bot_user = settings.getBotUser()

    browser = Browser('chrome')
    url = 'https://{}.slack.com/services/{}'.format(team_name, bot_user)
    browser.visit(url)
    browser.fill('email', email)
    browser.fill('password', password)
    browser.find_by_id('signin_btn').first.click()
    browser.find_link_by_text('Enable').first.click()
    time.sleep(2) # Sometimes I saw a crash where there was no alert, so we'll wait a bit first.
    alert = browser.get_alert()
    alert.accept()
    time.sleep(2) # If you close the display too quickly, the request doesn't get processed.

    vdisplay.stop()
Ejemplo n.º 11
0
class BaseTestCase(unittest.TestCase):

    def setUp(self):
        self.xvfb = os.environ.get("ENABLE_XVFB", False)
        self.browser = os.environ.get("BROWSER", "Chrome")
        if self.xvfb:
            self.vdisplay = Xvfb(width=1280, height=720)
            self.vdisplay.start()
        if self.browser == "Firefox":
            self.driver = self.get_ff_driver()
        else:
            self.driver = self.get_chrome_driver()
        self.load = self.driver.get

    def tearDown(self):
        if self.driver:
            self.driver.quit()
        if self.xvfb and self.vdisplay:
            self.vdisplay.stop()

    def get_ff_driver(self):
        return webdriver.Firefox()

    def get_chrome_driver(self):
        opts = Options()
        if "TRAVIS" in os.environ:  # github.com/travis-ci/travis-ci/issues/938
            opts.add_argument("--no-sandbox")
        # Fix for https://code.google.com/p/chromedriver/issues/detail?id=799
        opts.add_experimental_option("excludeSwitches",
                                     ["ignore-certificate-errors"])
        return webdriver.Chrome(chrome_options=opts)
Ejemplo n.º 12
0
class MainTest(unittest.TestCase):
    
    def setUp(self):
        self.xvfb = Xvfb(width=1280, height=720)
        self.xvfb.start()
        self.browser = webdriver.Firefox()
        self.addCleanup(self.browser.quit)
    
    def test_main(self):
        self.browser.get("http://www.youtube.com")
        #self.browser.get("http://www.youtube.com//results?search_query=ionic")
        
        search = self.browser.find_element_by_id("masthead-search-term")
        search.send_keys('angular',Keys.RETURN)
        
        self.browser.implicitly_wait(2)
        self.browser.save_screenshot("youtube.png")
    
        '''
        with open('youtube.html','w') as html:
            page = driver.page_source
            html.write(page.encode('utf-8'))
        '''
        
        link_elements = self.browser.find_elements_by_xpath("//a[@title]")
        f = csv.writer(open("youtube.csv", "w"))
        f.writerow(["Name", "   Link"])  
        for element in link_elements:
            if len(element.text) > 5:
                name = element.text
                f.writerow([name.encode('utf-8'),"   "+ element.get_attribute("href")])
Ejemplo n.º 13
0
 def test_start(self):
     xvfb = Xvfb()
     self.addCleanup(xvfb.stop)
     xvfb.start()
     display_var = ':{}'.format(xvfb.new_display)
     self.assertEqual(display_var, os.environ['DISPLAY'])
     self.assertIsNotNone(xvfb.proc)
Ejemplo n.º 14
0
class Xvfb(Plugin):
    def options(self, parser, env):
        super(Xvfb, self).options(parser, env)
        parser.add_option("--with-xvfb-options", action="store",
            dest="xvfb_options",
            default=env.get("NOSE_WITH_XVFB_OPTIONS"),
            help="Options to pass to Xvfb. Comma delimited with "
            "equals as separators if necessary. "
            "E.g. \"extension=SELINUX, once\". Currently, there is no "
            "way to provide options that begin with a +. This is a limitation "
            "in xvfbwrapper. Repetition is not allowed. [NOSE_WITH_XVFB_OPTIONS]")

    def configure(self, options, noseconfig):
        super(Xvfb, self).configure(options, noseconfig)
        self.xvfb_options = {}
        if options.xvfb_options:
            opts = [x.strip() for x in options.xvfb_options.split(",")]
            for item in opts:
                key, sign, value = item.partition("=")
                if not value:
                    value = ''
                self.xvfb_options[key] = value

    def begin(self):
        logger.info('Starting xvfb virtual display 1024x768 with %s' % self.xvfb_options)
        self.vdisplay = XvfbWrapper(width=1024, height=768, **self.xvfb_options)
        self.vdisplay.start()

    def finalize(self, result):
        logger.info('Stopping xvfb virtual display')
        self.vdisplay.stop()
Ejemplo n.º 15
0
class PBSeleniumTest(unittest.TestCase):
    def setUp(self):
        env = os.environ
        self.browser_bin = env.get("BROWSER_BIN", "")  # o/w use WD's default
        if "TRAVIS" in os.environ:
            self.xvfb = 1
        else:
            # by default don't use XVFB if we are not running on CI
            self.xvfb = int(env.get("ENABLE_XVFB", 0))
        self.pb_ext_path = self.get_extension_path()  # path to the extension
        if self.xvfb:
            self.vdisplay = Xvfb(width=1280, height=720)
            self.vdisplay.start()
        self.driver = self.get_chrome_driver()
        self.js = self.driver.execute_script

    def load_url(self, url, wait_on_site=0):
        """Load a URL and wait before returning."""
        self.driver.get(url)
        sleep(wait_on_site)

    def get_extension_path(self):
        """Return the path to the extension to be tested."""
        if "PB_EXT_PATH" in os.environ:
            return os.environ["PB_EXT_PATH"]
        else:  # check the default path if PB_EXT_PATH env. variable is empty
            print "Can't find the env. variable PB_EXT_PATH, will check ../.."
            # if the PB_EXT_PATH environment variable is not set
            # check the default location for the last modified crx file
            exts = glob("../../*.crx")  # get matching files
            return max(exts, key=os.path.getctime) if exts else ""

    def txt_by_css(self, css_selector, timeout=SEL_DEFAULT_WAIT_TIMEOUT):
        """Find an element by CSS selector and return it's text."""
        return self.find_el_by_css(css_selector, timeout).text

    def find_el_by_css(self, css_selector, timeout=SEL_DEFAULT_WAIT_TIMEOUT):
        return WebDriverWait(self.driver, timeout).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, css_selector)))

    def get_chrome_driver(self):
        """Setup and return a Chrom[e|ium] browser for Selenium."""
        opts = Options()
        absp = os.path.abspath
        if "TRAVIS" in os.environ:  # github.com/travis-ci/travis-ci/issues/938
            opts.add_argument("--no-sandbox")
        opts.add_extension(self.pb_ext_path)  # will fail if ext can't be found
        if self.browser_bin:  # otherwise will use webdriver's default binary
            print "Browser binary:", absp(self.browser_bin)
            opts.binary_location = self.browser_bin  # set binary location
        # Fix for https://code.google.com/p/chromedriver/issues/detail?id=799
        opts.add_experimental_option("excludeSwitches",
                                     ["ignore-certificate-errors"])
        return webdriver.Chrome(chrome_options=opts)

    def tearDown(self):
        self.driver.quit()
        if self.xvfb and self.vdisplay:
            self.vdisplay.stop()
Ejemplo n.º 16
0
 def xvfb(self, line, cell=None):
     display = Xvfb(**self.xvfb_kwargs)
     display.start()
     if cell is None:
         self.shell.ex(line)
     else:
         self.shell.ex(cell)
     display.stop()
Ejemplo n.º 17
0
 def test_stop(self):
     orig_display = os.environ['DISPLAY']
     xvfb = Xvfb()
     xvfb.start()
     self.assertNotEqual(orig_display, os.environ['DISPLAY'])
     xvfb.stop()
     self.assertIsNone(xvfb.proc)
     self.assertEqual(orig_display, os.environ['DISPLAY'])
Ejemplo n.º 18
0
def set_up():
    global browser
    global xvfb
    xvfb = Xvfb(width=1280, height=720, colordepth=24)
    xvfb.start()
    browser = webdriver.Chrome()
    print browser.title
    print ':%d' % xvfb.vdisplay_num
Ejemplo n.º 19
0
 def test_with_xvfb():
     if use_xvfb:
         from xvfbwrapper import Xvfb
         display = Xvfb(width=1920, height=1080)
         display.start()
     my_test()
     if use_xvfb:
         display.stop()
Ejemplo n.º 20
0
class Webdriver(unittest.TestCase):
	def setUp(self):
		self.vdisplay = Xvfb(width=1280, height=720)
		self.vdisplay.start()
# 		if(not vdisplay.start()):
# 			fo = open(LOG_FILE, "a")
# 			fo.write("Xvfbfailure||"+str(TREATMENTID)+"||"+str(ID)+"\n")
# 			fo.close()
# 			sys.exit(0)
		if(BROWSER=='firefox'):
			if (platform.system()=='Darwin'):
				self.driver = webdriver.Firefox()
			elif (platform.system()=='Linux'):
				self.driver = webdriver.Firefox(proxy=proxy)
			else:
				print "Unidentified Platform"
				sys.exit(0)
		elif(BROWSER=='chrome'):
			print "WARNING: Expecting chromedriver at specified location !!"
			if (platform.system()=='Darwin'):
				chromedriver = "./experiment/chromedriver/chromedriver_mac"
				os.environ["webdriver.chrome.driver"] = chromedriver
				self.driver = webdriver.Chrome(executable_path=chromedriver)
			elif (platform.system() == 'Linux'):
				chromedriver = "./experiment/chromedriver/chromedriver_linux"
				os.environ["webdriver.chrome.driver"] = chromedriver
				chrome_option = webdriver.ChromeOptions()
				chrome_option.add_argument("--proxy-server=yogi.pdl.cmu.edu:3128" )
				self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_option)
			else:
				print "Unidentified Platform"
				sys.exit(0)
		else:
			print "Unsupported Browser"
			sys.exit(0)
		self.driver.implicitly_wait(10)
		self.base_url = "https://www.google.com/"
		self.verificationErrors = []
		self.driver.set_page_load_timeout(40)
		self.accept_next_alert = True
	
	def test_webdriver(self):
		driver = self.driver
		cole.optIn(driver)
		driver.get(SITE)
		time.sleep(20)
		#raw_input("wait")
		pref = cole.get_ad_pref(driver)
		print SITE
		print 'pref=', pref
		if pref != []:
			fo = open(TARGET_FILE, "a")
			fo.write(SITE+"||"+"@".join(pref)+'\n')
			fo.close()

	def tearDown(self):
		self.vdisplay.stop()
		self.driver.quit()
Ejemplo n.º 21
0
def xvfb(request):
    from xvfbwrapper import Xvfb

    vdisplay = Xvfb()
    vdisplay.start()

    yield

    vdisplay.stop()
Ejemplo n.º 22
0
class OFMTransferSiteManager(OFMSiteManager):
    def __init__(self, user=None):  # pylint: disable=super-init-not-called
        self.user = user
        if self.user:
            self._login_user = self.user.ofm_username
            self._login_password = self.user.ofm_password
        else:
            self._login_user = os.environ('OFM_USERNAME')
            self._login_password = os.environ('OFM_PASSWORD')

        self.display = Xvfb()
        self.display.start()

    def download_transfer_excels(self, matchdays=None):
        profile = webdriver.FirefoxProfile(os.path.join(BASE_DIR, 'ofm_transfer_data', 'firefox_profile'))
        profile.set_preference("browser.download.dir", os.path.join(BASE_DIR, 'ofm_transfer_data'))
        self.browser = webdriver.Firefox(firefox_profile=profile)
        self.browser.set_page_load_timeout(10)

        self.login()

        if not matchdays:
            matchdays = [Matchday.get_current()]

        for matchday in matchdays:
            if not self._is_transfer_file_present(matchday):
                try:
                    self._jump_to_transfer_page(self, matchday=matchday)  # pylint: disable=redundant-keyword-arg
                except TimeoutError:
                    pass

    @staticmethod
    def _is_transfer_file_present(matchday=None):

        if not matchday:
            matchday = Matchday.get_current()

        if os.path.isfile(os.path.join(BASE_DIR,
                                           'ofm_transfer_data',
                                           'ofm_spielerwechsel_{}_{}.csv'.format(
                                                matchday.season.number,
                                                matchday.number)
                                       )):
            return True
        return False

    @timeout(5, use_signals=False)
    def _jump_to_transfer_page(self, matchday=None):
        if not matchday:
            self.jump_to_frame(Constants.Transfer.DOWNLOAD_TRANSFERS)
        else:
            self.jump_to_frame(Constants.Transfer.DOWNLOAD_TRANSFERS_FROM_MATCHDAY.format(matchday.number))

    def kill_browser(self):
        if self.browser:
            self.browser.stop_client()
        self.display.stop()
Ejemplo n.º 23
0
    def test_renderer(self):
        vdisplay = Xvfb()
        vdisplay.start()

        ren = window.Renderer()
        window.record(ren, n_frames=1, out_path=self.out_file, size=(600, 600))
        self.assertTrue(os.path.exists(self.out_file))

        vdisplay.stop()
Ejemplo n.º 24
0
 def test_start_without_existing_display(self):
     del os.environ['DISPLAY']
     xvfb = Xvfb()
     self.addCleanup(xvfb.stop)
     self.addCleanup(self.reset_display)
     xvfb.start()
     display_var = ':{}'.format(xvfb.new_display)
     self.assertEqual(display_var, os.environ['DISPLAY'])
     self.assertIsNotNone(xvfb.proc)
Ejemplo n.º 25
0
class T2w(unittest.TestCase):
    def setUp(self):
        p = helpers.findPort()
        server = webctrl.myserver.myserver(host="localhost", port=p)
        server.quiet = True

        controllers = ctrl.setupControllers(False, True, True)
        self.brewme = webctrl.runbrew(
            controllers,
            helpers.getTestRecipeList(),
            server)
        self.brewme.startNonBlocking()

        print "up and running"

        if VIRTUALDISPLAY:
            self.vdisplay = Xvfb(width=1280, height=720)
            self.vdisplay.start()

        self.driver = webdriver.Firefox()
        self.driver.implicitly_wait(30)
        self.base_url = "http://*****:*****@name='recipe'])[12]").click()
        driver.find_element_by_css_selector("input[type=\"submit\"]").click()
        self.assertEqual("17 Falconers Flight IPA",
                         driver.find_element_by_css_selector("form > b").text)
        print "===== SUCCESS test_recipeliststatus ====="

    def is_element_present(self, how, what):
        try:
            self.driver.find_element(by=how, value=what)
        except NoSuchElementException, e:
            return False
        return True
Ejemplo n.º 26
0
    def get_display(self):
        """Returns the first display available"""

        # Check if an Xorg server is listening
        # import subprocess as sp
        # if not hasattr(sp, 'DEVNULL'):
        #     setattr(sp, 'DEVNULL', os.devnull)
        # x_listening = bool(sp.call('ps au | grep -v grep | grep -i xorg',
        #                    shell=True, stdout=sp.DEVNULL))

        if self._display is not None:
            return ':%d' % self._display.new_display

        sysdisplay = None
        if self._config.has_option('execution', 'display_variable'):
            sysdisplay = self._config.get('execution', 'display_variable')

        sysdisplay = sysdisplay or os.getenv('DISPLAY')
        if sysdisplay:
            from collections import namedtuple

            def _mock():
                pass

            # Store a fake Xvfb object
            ndisp = int(sysdisplay.split(':')[-1])
            Xvfb = namedtuple('Xvfb', ['new_display', 'stop'])
            self._display = Xvfb(ndisp, _mock)
            return sysdisplay
        else:
            if 'darwin' in sys.platform:
                raise RuntimeError(
                    'Xvfb requires root permissions to run in OSX. Please '
                    'make sure that an X server is listening and set the '
                    'appropriate config on either $DISPLAY or nipype\'s '
                    '"display_variable" config. Valid X servers include '
                    'VNC, XQuartz, or manually started Xvfb.')

            # If $DISPLAY is empty, it confuses Xvfb so unset
            if sysdisplay == '':
                del os.environ['DISPLAY']
            try:
                from xvfbwrapper import Xvfb
            except ImportError:
                raise RuntimeError(
                    'A display server was required, but $DISPLAY is not defined '
                    'and Xvfb could not be imported.')

            self._display = Xvfb(nolisten='tcp')
            self._display.start()

            # Older versions of xvfbwrapper used vdisplay_num
            if not hasattr(self._display, 'new_display'):
                setattr(self._display, 'new_display',
                        self._display.vdisplay_num)

            return ':%d' % self._display.new_display
Ejemplo n.º 27
0
class Webdriver(unittest.TestCase):
	def setUp(self):
		self.vdisplay = Xvfb(width=1280, height=720)
		self.vdisplay.start()
# 		if(not vdisplay.start()):
# 			fo = open(LOG_FILE, "a")
# 			fo.write("Xvfbfailure||"+str(TREATMENTID)+"||"+str(ID)+"\n")
# 			fo.close()
# 			sys.exit(0)
		if(BROWSER=='firefox'):
			if (platform.system()=='Darwin'):
				self.driver = webdriver.Firefox()
			elif (platform.system()=='Linux'):
				self.driver = webdriver.Firefox(proxy=proxy)
			else:
				print "Unidentified Platform"
				sys.exit(0)
		elif(BROWSER=='chrome'):
			print "WARNING: Expecting chromedriver at specified location !!"
			if (platform.system()=='Darwin'):
				chromedriver = "./experiment/chromedriver/chromedriver_mac"
				os.environ["webdriver.chrome.driver"] = chromedriver
				self.driver = webdriver.Chrome(executable_path=chromedriver)
			elif (platform.system() == 'Linux'):
				chromedriver = "./experiment/chromedriver/chromedriver_linux"
				os.environ["webdriver.chrome.driver"] = chromedriver
				chrome_option = webdriver.ChromeOptions()
				chrome_option.add_argument("--proxy-server=yogi.pdl.cmu.edu:3128" )
				self.driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_option)
			else:
				print "Unidentified Platform"
				sys.exit(0)
		else:
			print "Unsupported Browser"
			sys.exit(0)
		self.driver.implicitly_wait(10)
		self.base_url = "https://www.google.com/"
		self.verificationErrors = []
		self.driver.set_page_load_timeout(40)
		self.accept_next_alert = True
	
	def test_webdriver(self):
		driver = self.driver
		helper.setLogFile(LOG_FILE)
		helper.log("browserStarted||"+str(TREATMENTID), ID)
		run = 0
		while (run < RUNS):
			helper.applyTreatment(driver, TREATMENTS[TREATMENTID], ID, TREATMENTID)
			helper.wait_for_others(AGENTS, ID, ROUND)
			time.sleep(20)
			helper.collectMeasurement(driver, MEASUREMENT, ID, TREATMENTID)
			run = run+1

	def tearDown(self):
		self.vdisplay.stop()
		self.driver.quit()
Ejemplo n.º 28
0
def suro_ws_js_enabled_page():
    vdisplay = Xvfb()
    vdisplay.start()

    browser = webdriver.Firefox()
    browser.get('http://www.google.com')
    print browser.title
    browser.quit()
 
    vdisplay.stop()
Ejemplo n.º 29
0
class T2w(unittest.TestCase):
    def setUp(self):
        p = helpers.findPort()
        server = webctrl.myserver.myserver(host="localhost", port=p)
        server.quiet = True

        self.brewme = webctrl.runbrew(
            helpers.timerCtrl(),
            helpers.getSimpleBSMX(),
            server)
        self.brewme.startNonBlocking()

        print "up and running"

        # Comment out next two lines to see firefox on local display
        self.vdisplay = Xvfb(width=1280, height=720)
        self.vdisplay.start()

        self.driver = webdriver.Firefox()
        self.driver.implicitly_wait(30)
        self.base_url = "http://localhost:%i" % p
        self.verificationErrors = []
        self.accept_next_alert = True

    def url_base(self):
        return(self.base_url)

    def test_index(self):

        driver = self.driver
        self.driver.get(self.url_base())
        self.assertTrue('Hopitty' in self.driver.title)

        driver.find_element_by_css_selector("button").click()
        self.assertEqual("Hopitty",
                         driver.find_element_by_css_selector("h1").text)

    def test_RunControl(self):

        driver = self.driver
        self.driver.get(self.url_base())
        # Make sure we start on home page
        self.assertTrue('Hopitty' in self.driver.title)

        driver.find_element_by_xpath("//a[2]/button").click()
        self.assertEqual("Run Control",
                         driver.find_element_by_css_selector("h1").text)

    def is_element_present(self, how, what):
        try:
            self.driver.find_element(by=how, value=what)
        except NoSuchElementException, e:
            return False
        return True
Ejemplo n.º 30
0
 def test_start_with_kwargs(self):
     w = 800
     h = 600
     depth = 16
     xvfb = Xvfb(width=w, height=h, colordepth=depth)
     self.addCleanup(xvfb.stop)
     xvfb.start()
     self.assertEqual(w, xvfb.width)
     self.assertEqual(h, xvfb.height)
     self.assertEqual(depth, xvfb.colordepth)
     self.assertEqual(os.environ['DISPLAY'], ':%d' % xvfb.vdisplay_num)
     self.assertIsNotNone(xvfb.proc)
Ejemplo n.º 31
0
def xvfb_if_supported() -> Generator:
    try:
        with Xvfb():
            yield
    except (EnvironmentError, OSError, RuntimeError):
        yield
Ejemplo n.º 32
0
class SeleniumScraper(implements(Scraper, ActionRunner)):
    def __init__(self, log, config: ScraperConfig):
        self.log = log
        self.config = config
        firefox_profile = webdriver.FirefoxProfile()
        firefox_profile.set_preference('permissions.default.image',
                                       self.config.img.get())
        firefox_profile.set_preference(
            'dom.ipc.plugins.enabled.libflashplayer.so', 'false')

        if self.config.xvfb.get():
            self.display = Xvfb()
            self.display.start()

        self._driver = webdriver.Firefox(firefox_profile)

        self._driver.set_window_size(self.config.width.get(),
                                     self.config.height.get())
        self._driver.set_window_position(self.config.lat.get() or 0,
                                         self.config.lon.get() or 0)
        self.last_url = None
        self.last_page = None
        self._actions = {}
        self._history = []

        click = ClickAction(self, self.log)
        filter_remove = FilterRemoveAction(self, self.log)
        self.register('click', click)
        self.register('remove', filter_remove)

    @property
    def actions(self) -> dict:
        return self._actions

    def get_doc(self, doc):
        """
        get the html element from the url [doc]
        :param doc: the url to the page
        :return: html.from string content
        """
        self._driver.get(doc)
        page_source = self.driver.page_source
        self.last_url = doc
        self.last_page = page_source
        return self.last_page

    def get(self):
        page_source = self._driver.page_source
        self.last_page = page_source
        return self.last_page

    def action_get(self, actions: list):
        for x in actions:
            self.execute(x)
        return self.get()

    def filter_get(self, filters: list):
        for x in filters:
            self.execute_filter(x)
        return self.get()

    @property
    def history(self):
        return self._history

    def execute_filter(self, filter_def: dict):
        action_name, action_path = next(iter(filter_def.items()))
        action_name = action_name.strip().replace('_pre_', '')
        if action_name in self._actions.keys():
            self._history.append((time.time(), action_name))
            self._actions[action_name].execute(action_path)
        else:
            self.log.warn(f"Command [{action_name}] not recognised")

    def execute(self, action_composite: str):
        action_name, action_path = SeleniumScraper.__get_action(
            action_composite)
        action_name = action_name.strip()
        if action_name in self._actions.keys():
            self._history.append((time.time(), action_name))
            self._actions[action_name].execute(action_path)
        else:
            self.log.warn(f"Command [{action_name}] not recognised")

    @property
    def driver(self):
        return self._driver

    def make_absolute_url(self, url):
        return urljoin(self.last_url, url)

    @staticmethod
    def __get_action(action_composite):
        pattern = '{(.+?)}'
        matches = re.search(pattern, action_composite)
        if not matches:
            return None, None
        action_name = matches.group(1)
        action_xpath = re.sub(pattern, '', action_composite)
        return action_name, action_xpath

    def __del__(self):
        if self.driver is not None:
            try:
                time.sleep(
                    10)  # wait for operations to complete before closing
                if hasattr(self, "_driver"):
                    self.driver.close()
                if self.config.xvfb.get():
                    self.display.stop()
            except Exception as ex:
                self.log.error(ex)
                pass
Ejemplo n.º 33
0
    def startup(self):
        """Validate that all of the external dependencies are installed"""
        ret = True

        # default /tmp/wptagent as an alive file on Linux
        if self.options.alive is None:
            if platform.system() == "Linux":
                self.options.alive = '/tmp/wptagent'
            else:
                self.options.alive = os.path.join(os.path.dirname(__file__), 'wptagent.alive')
        self.alive()

        ret = self.requires('dns', 'dnspython') and ret
        ret = self.requires('monotonic') and ret
        ret = self.requires('PIL', 'pillow') and ret
        ret = self.requires('psutil') and ret
        ret = self.requires('requests') and ret
        if not self.options.android and not self.options.iOS:
            ret = self.requires('tornado') and ret
        # Windows-specific imports
        if platform.system() == "Windows":
            ret = self.requires('win32api', 'pypiwin32') and ret

        # Try patching ws4py with a faster lib
        try:
            self.requires('wsaccel')
            import wsaccel
            wsaccel.patch_ws4py()
        except Exception:
            pass

        try:
            subprocess.check_output(['python', '--version'])
        except Exception:
            print "Make sure python 2.7 is available in the path."
            ret = False

        try:
            subprocess.check_output('{0} -version'.format(self.image_magick['convert']), shell=True)
        except Exception:
            print "Missing convert utility. Please install ImageMagick " \
                  "and make sure it is in the path."
            ret = False

        try:
            subprocess.check_output('{0} -version'.format(self.image_magick['mogrify']), shell=True)
        except Exception:
            print "Missing mogrify utility. Please install ImageMagick " \
                  "and make sure it is in the path."
            ret = False

        if platform.system() == "Linux":
            try:
                subprocess.check_output(['traceroute', '--version'])
            except Exception:
                logging.debug("Traceroute is missing, installing...")
                subprocess.call(['sudo', 'apt-get', '-yq', 'install', 'traceroute'])

        # if we are on Linux and there is no display, enable xvfb by default
        if platform.system() == "Linux" and not self.options.android and \
                not self.options.iOS and 'DISPLAY' not in os.environ:
            self.options.xvfb = True

        if self.options.xvfb:
            ret = self.requires('xvfbwrapper') and ret
            if ret:
                from xvfbwrapper import Xvfb
                self.xvfb = Xvfb(width=1920, height=1200, colordepth=24)
                self.xvfb.start()

        # Figure out which display to capture from
        if platform.system() == "Linux" and 'DISPLAY' in os.environ:
            logging.debug('Display: %s', os.environ['DISPLAY'])
            self.capture_display = os.environ['DISPLAY']
        elif platform.system() == "Darwin":
            proc = subprocess.Popen('ffmpeg -f avfoundation -list_devices true -i ""',
                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
            _, err = proc.communicate()
            for line in err.splitlines():
                matches = re.search(r'\[(\d+)\] Capture screen', line)
                if matches:
                    self.capture_display = matches.group(1)
                    break
        elif platform.system() == "Windows":
            self.capture_display = 'desktop'

        if self.options.throttle:
            try:
                subprocess.check_output('sudo cgset -h', shell=True)
            except Exception:
                print "Missing cgroups, make sure cgroup-tools is installed."
                ret = False

        # Fix Lighthouse install permissions
        if platform.system() != "Windows":
            from internal.os_util import run_elevated
            run_elevated('chmod', '-R 777 ~/.config/configstore/')
            try:
                import getpass
                run_elevated('chown', '-R {0}:{0} ~/.config'.format(getpass.getuser()))
            except Exception:
                pass

        # Check the iOS install
        if self.ios is not None:
            ret = self.ios.check_install()

        if not self.options.android and not self.options.iOS and not self.options.noidle:
            self.wait_for_idle(300)
        if self.adb is not None:
            if not self.adb.start():
                print "Error configuring adb. Make sure it is installed and in the path."
                ret = False
        self.shaper.remove()
        if not self.shaper.install():
            if platform.system() == "Windows":
                print "Error configuring traffic shaping, make sure secure boot is disabled."
            else:
                print "Error configuring traffic shaping, make sure it is installed."
            ret = False
        return ret
Ejemplo n.º 34
0
def runner(console: Console, shutdownEvent: Event, headless: bool, user: dict,
           proxy: dict, playlist: str, vnc: bool, sqsEndpoint: str,
           screenshotDir: str, runnerStats: Array, processStates: Array):
    tid = current_process().pid
    console.log('#%d Start' % tid)
    driver = None
    vdisplay = None
    x11vnc = None
    userDataDir = None
    spotify = None
    try:
        if headless == False:
            width = 1280
            height = 1024
            if 'windowSize' in user:
                [width, height] = user['windowSize'].split(',')
            vdisplay = Xvfb(width=width,
                            height=height,
                            colordepth=24,
                            tempdir=None,
                            noreset='+render')
            vdisplay.start()
            if vnc:
                x11vnc = X11vnc(vdisplay)
                x11vnc.start()

        driverManager = DriverManager(console, shutdownEvent)
        driverData = driverManager.getDriver(type='chrome',
                                             uid=tid,
                                             user=user,
                                             proxy=proxy,
                                             headless=headless)
        del driverManager
        collect()
        if not driverData or not driverData['driver']:
            if vdisplay:
                vdisplay.stop()
            if x11vnc:
                x11vnc.stop()
            raise Exception('No driver was returned from adapter')

        driver = driverData['driver']
        userDataDir = driverData['userDataDir']
    except Exception as e:
        runnerStats[STAT_DRIVER_NONE] += 1
        console.exception('Driver unavailable')
    else:
        try:
            spotify = Adapter(driver, console, shutdownEvent)
            console.log('#%d Start create account for %s' %
                        (tid, user['email']))
            spotify.register(user)
            try:
                boto3.client('sqs').send_message(
                    QueueUrl=sqsEndpoint,
                    MessageBody=dumps({
                        'user': user,
                        'playlist': playlist
                    }),
                    DelaySeconds=1,
                )
            except:
                console.exception('#%d Failed to send message to the queue' %
                                  tid)
            else:
                console.log('#%d Account created for %s' %
                            (tid, user['email']))
                runnerStats[STAT_ACCOUNT_CREATED] += 1
        except Exception as e:
            runnerStats[STAT_ERROR] += 1
            try:
                id = randint(10000, 99999)
                with open(screenshotDir + ('%d.log' % id), 'w') as f:
                    f.write(str(e))
                driver.save_screenshot(screenshotDir + ('%d.png' % id))
            except:
                console.exception()
    if driver:
        try:
            driver.quit()
            del driver
        except:
            pass
    if spotify:
        try:
            del spotify
        except:
            pass
    if userDataDir:
        try:
            rmtree(path=userDataDir, ignore_errors=True)
        except:
            pass
    if x11vnc:  #Terminate vnc server if any
        try:
            x11vnc.stop()
            del x11vnc
        except:
            pass
    if vdisplay:
        try:
            vdisplay.stop()
            del vdisplay
        except:
            pass
    console.log('#%d Stop' % tid)
    collect()
Ejemplo n.º 35
0
class WPTAgent(object):
    """Main agent workflow"""
    def __init__(self, options, browsers):
        from internal.browsers import Browsers
        from internal.webpagetest import WebPageTest
        from internal.traffic_shaping import TrafficShaper
        from internal.adb import Adb
        from internal.ios_device import iOSDevice
        self.must_exit = False
        self.options = options
        self.capture_display = None
        self.job = None
        self.task = None
        self.xvfb = None
        self.root_path = os.path.abspath(os.path.dirname(__file__))
        self.wpt = WebPageTest(options, os.path.join(self.root_path, "work"))
        self.persistent_work_dir = self.wpt.get_persistent_dir()
        self.adb = Adb(self.options, self.persistent_work_dir) if self.options.android else None
        self.ios = iOSDevice(self.options.device) if self.options.iOS else None
        self.browsers = Browsers(options, browsers, self.adb, self.ios)
        self.shaper = TrafficShaper(options)
        atexit.register(self.cleanup)
        signal.signal(signal.SIGTERM, self.signal_handler)
        signal.signal(signal.SIGINT, self.signal_handler)
        self.image_magick = {'convert': 'convert', 'compare': 'compare', 'mogrify': 'mogrify'}
        if platform.system() == "Windows":
            paths = [os.getenv('ProgramFiles'), os.getenv('ProgramFiles(x86)')]
            for path in paths:
                if path is not None and os.path.isdir(path):
                    dirs = sorted(os.listdir(path), reverse=True)
                    for subdir in dirs:
                        if subdir.lower().startswith('imagemagick'):
                            convert = os.path.join(path, subdir, 'convert.exe')
                            compare = os.path.join(path, subdir, 'compare.exe')
                            mogrify = os.path.join(path, subdir, 'mogrify.exe')
                            if os.path.isfile(convert) and \
                                    os.path.isfile(compare) and \
                                    os.path.isfile(mogrify):
                                if convert.find(' ') >= 0:
                                    convert = '"{0}"'.format(convert)
                                if compare.find(' ') >= 0:
                                    compare = '"{0}"'.format(compare)
                                if mogrify.find(' ') >= 0:
                                    mogrify = '"{0}"'.format(mogrify)
                                self.image_magick['convert'] = convert
                                self.image_magick['compare'] = compare
                                self.image_magick['mogrify'] = mogrify
                                break

    def run_testing(self):
        """Main testing flow"""
        if (sys.version_info >= (3, 0)):
            from time import monotonic
        else:
            from monotonic import monotonic
        start_time = monotonic()
        browser = None
        exit_file = os.path.join(self.root_path, 'exit')
        message_server = None
        if not self.options.android and not self.options.iOS:
            from internal.message_server import MessageServer
            message_server = MessageServer()
            message_server.start()
            if not message_server.is_ok():
                logging.error("Unable to start the local message server")
                return
        while not self.must_exit:
            try:
                self.alive()
                if os.path.isfile(exit_file):
                    try:
                        os.remove(exit_file)
                    except Exception:
                        pass
                    self.must_exit = True
                    break
                if message_server is not None and self.options.exit > 0 and \
                        not message_server.is_ok():
                    logging.error("Message server not responding, exiting")
                    break
                if self.browsers.is_ready():
                    self.job = self.wpt.get_test(self.browsers.browsers)
                    if self.job is not None:
                        self.job['image_magick'] = self.image_magick
                        self.job['message_server'] = message_server
                        self.job['capture_display'] = self.capture_display
                        self.job['shaper'] = self.shaper
                        self.task = self.wpt.get_task(self.job)
                        while self.task is not None:
                            start = monotonic()
                            try:
                                self.task['running_lighthouse'] = False
                                if self.job['type'] != 'lighthouse':
                                    self.run_single_test()
                                    self.wpt.get_bodies(self.task)
                                if self.task['run'] == 1 and not self.task['cached'] and \
                                        self.job['warmup'] <= 0 and \
                                        self.task['error'] is None and \
                                        'lighthouse' in self.job and self.job['lighthouse']:
                                    if 'page_result' not in self.task or \
                                            self.task['page_result'] is None or \
                                            self.task['page_result'] == 0 or \
                                            self.task['page_result'] == 99999:
                                        self.task['running_lighthouse'] = True
                                        self.wpt.running_another_test(self.task)
                                        self.run_single_test()
                                elapsed = monotonic() - start
                                logging.debug('Test run time: %0.3f sec', elapsed)
                            except Exception as err:
                                msg = ''
                                if err is not None and err.__str__() is not None:
                                    msg = err.__str__()
                                self.task['error'] = 'Unhandled exception running test: '\
                                    '{0}'.format(msg)
                                logging.exception("Unhandled exception running test: %s", msg)
                                traceback.print_exc(file=sys.stdout)
                            self.wpt.upload_task_result(self.task)
                            # Set up for the next run
                            self.task = self.wpt.get_task(self.job)
                if self.job is not None:
                    self.job = None
                else:
                    self.sleep(self.options.polling)
            except Exception as err:
                msg = ''
                if err is not None and err.__str__() is not None:
                    msg = err.__str__()
                if self.task is not None:
                    self.task['error'] = 'Unhandled exception preparing test: '\
                        '{0}'.format(msg)
                logging.exception("Unhandled exception: %s", msg)
                traceback.print_exc(file=sys.stdout)
                if browser is not None:
                    browser.on_stop_capture(None)
                    browser.on_stop_recording(None)
                    browser = None
            if self.options.exit > 0:
                run_time = (monotonic() - start_time) / 60.0
                if run_time > self.options.exit:
                    break
            # Exit if adb is having issues (will cause a reboot after several tries)
            if self.adb is not None and self.adb.needs_exit:
                break
        self.cleanup()

    def run_single_test(self):
        """Run a single test run"""
        self.alive()
        browser = self.browsers.get_browser(self.job['browser'], self.job)
        if browser is not None:
            browser.prepare(self.job, self.task)
            browser.launch(self.job, self.task)
            try:
                if self.task['running_lighthouse']:
                    self.task['lighthouse_log'] = \
                        'Lighthouse testing is not supported with this browser.'
                    try:
                        browser.run_lighthouse_test(self.task)
                    except Exception:
                        logging.exception('Error running lighthouse test')
                    if self.task['lighthouse_log']:
                        try:
                            log_file = os.path.join(self.task['dir'], 'lighthouse.log.gz')
                            with gzip.open(log_file, GZIP_TEXT, 7) as f_out:
                                f_out.write(self.task['lighthouse_log'])
                        except Exception:
                            logging.exception('Error compressing lighthouse log')
                else:
                    browser.run_task(self.task)
            except Exception as err:
                msg = ''
                if err is not None and err.__str__() is not None:
                    msg = err.__str__()
                self.task['error'] = 'Unhandled exception in test run: '\
                    '{0}'.format(msg)
                logging.exception("Unhandled exception in test run: %s", msg)
                traceback.print_exc(file=sys.stdout)
            browser.stop(self.job, self.task)
            # Delete the browser profile if needed
            if self.task['cached'] or self.job['fvonly']:
                browser.clear_profile(self.task)
        else:
            err = "Invalid browser - {0}".format(self.job['browser'])
            logging.critical(err)
            self.task['error'] = err
        browser = None

    def signal_handler(self, *_):
        """Ctrl+C handler"""
        if self.must_exit:
            exit(1)
        if self.job is None:
            print("Exiting...")
        else:
            print("Will exit after test completes.  Hit Ctrl+C again to exit immediately")
        self.must_exit = True

    def cleanup(self):
        """Do any cleanup that needs to be run regardless of how we exit"""
        logging.debug('Cleaning up')
        self.shaper.remove()
        if self.xvfb is not None:
            self.xvfb.stop()
        if self.adb is not None:
            self.adb.stop()
        if self.ios is not None:
            self.ios.disconnect()

    def sleep(self, seconds):
        """Sleep wrapped in an exception handler to properly deal with Ctrl+C"""
        try:
            time.sleep(seconds)
        except IOError:
            pass

    def wait_for_idle(self, timeout=30):
        """Wait for the system to go idle for at least 2 seconds"""
        if (sys.version_info >= (3, 0)):
            from time import monotonic
        else:
            from monotonic import monotonic
        import psutil
        logging.debug("Waiting for Idle...")
        cpu_count = psutil.cpu_count()
        if cpu_count > 0:
            target_pct = 50. / float(cpu_count)
            idle_start = None
            end_time = monotonic() + timeout
            idle = False
            while not idle and monotonic() < end_time:
                self.alive()
                check_start = monotonic()
                pct = psutil.cpu_percent(interval=0.5)
                if pct <= target_pct:
                    if idle_start is None:
                        idle_start = check_start
                    if monotonic() - idle_start > 2:
                        idle = True
                else:
                    idle_start = None

    def alive(self):
        """Touch a watchdog file indicating we are still alive"""
        if self.options.alive:
            with open(self.options.alive, 'a'):
                os.utime(self.options.alive, None)

    def requires(self, module, module_name=None):
        """Try importing a module and installing it if it isn't available"""
        ret = False
        if module_name is None:
            module_name = module
        try:
            __import__(module)
            ret = True
        except ImportError:
            pass
        if not ret and sys.version_info < (3, 0):
            from internal.os_util import run_elevated
            logging.debug('Trying to install %s...', module_name)
            subprocess.call([sys.executable, '-m', 'pip', 'uninstall', '-y', module_name])
            run_elevated(sys.executable, '-m pip uninstall -y {0}'.format(module_name))
            subprocess.call([sys.executable, '-m', 'pip', 'install', module_name])
            run_elevated(sys.executable, '-m pip install {0}'.format(module_name))
            try:
                __import__(module)
                ret = True
            except ImportError:
                pass
        if not ret:
            if (sys.version_info >= (3, 0)):
                print("Missing {0} module. Please run 'pip3 install {1}'".format(module, module_name))
            else:
                print("Missing {0} module. Please run 'pip install {1}'".format(module, module_name))
        return ret

    def startup(self, detected_browsers):
        """Validate that all of the external dependencies are installed"""
        ret = True

        # default /tmp/wptagent as an alive file on Linux
        if self.options.alive is None:
            if platform.system() == "Linux":
                self.options.alive = '/tmp/wptagent'
            else:
                self.options.alive = os.path.join(os.path.dirname(__file__), 'wptagent.alive')
        self.alive()
        ret = self.requires('dns', 'dnspython') and ret
        ret = self.requires('monotonic') and ret
        ret = self.requires('PIL', 'pillow') and ret
        ret = self.requires('psutil') and ret
        ret = self.requires('requests') and ret
        if not self.options.android and not self.options.iOS:
            ret = self.requires('tornado') and ret
        # Windows-specific imports
        if platform.system() == "Windows":
            ret = self.requires('win32api', 'pywin32') and ret

        if self.options.webdriver and 'Firefox' in detected_browsers:
            ret = self.requires('selenium')

        # Optional imports
        self.requires('brotli')
        self.requires('fontTools', 'fonttools')

        # Try patching ws4py with a faster lib
        try:
            self.requires('wsaccel')
            import wsaccel
            wsaccel.patch_ws4py()
        except Exception:
            logging.debug('wsaccel not installed, Chrome debug interface will be slower than it could be')

        try:
            subprocess.check_output(['python', '--version'])
        except Exception:
            print("Make sure python 2.7 is available in the path.")
            ret = False

        try:
            subprocess.check_output('{0} -version'.format(self.image_magick['convert']), shell=True)
        except Exception:
            print("Missing convert utility. Please install ImageMagick and make sure it is in the path.")
            ret = False

        try:
            subprocess.check_output('{0} -version'.format(self.image_magick['mogrify']), shell=True)
        except Exception:
            print("Missing mogrify utility. Please install ImageMagick and make sure it is in the path.")
            ret = False

        if platform.system() == "Linux":
            try:
                subprocess.check_output(['traceroute', '--version'])
            except Exception:
                logging.debug("Traceroute is missing, installing...")
                subprocess.call(['sudo', 'apt', '-yq', 'install', 'traceroute'])

        if self.options.webdriver and 'Firefox' in detected_browsers:
            try:
                subprocess.check_output(['geckodriver', '-V'])
            except Exception:
                logging.debug("geckodriver is missing, installing...")
                subprocess.call(['sudo', 'apt', '-yq', 'install', 'firefox-geckodriver'])

        # If we are on Linux and there is no display, enable xvfb by default
        if platform.system() == "Linux" and not self.options.android and \
                not self.options.iOS and 'DISPLAY' not in os.environ:
            self.options.xvfb = True

        if self.options.xvfb:
            ret = self.requires('xvfbwrapper') and ret
            if ret:
                from xvfbwrapper import Xvfb
                self.xvfb = Xvfb(width=1920, height=1200, colordepth=24)
                self.xvfb.start()

        # Figure out which display to capture from
        if platform.system() == "Linux" and 'DISPLAY' in os.environ:
            logging.debug('Display: %s', os.environ['DISPLAY'])
            self.capture_display = os.environ['DISPLAY']
        elif platform.system() == "Darwin":
            proc = subprocess.Popen('ffmpeg -f avfoundation -list_devices true -i ""',
                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
            _, err = proc.communicate()
            for line in err.splitlines():
                matches = re.search(r'\[(\d+)\] Capture screen', line.decode('utf-8'))
                if matches:
                    self.capture_display = matches.group(1)
                    break
        elif platform.system() == "Windows":
            self.capture_display = 'desktop'

        if self.options.throttle:
            try:
                subprocess.check_output('sudo cgset -h', shell=True)
            except Exception:
                print("Missing cgroups, make sure cgroup-tools is installed.")
                ret = False

        # Fix Lighthouse install permissions
        if platform.system() != "Windows" and sys.version_info < (3, 0):
            from internal.os_util import run_elevated
            run_elevated('chmod', '-R 777 ~/.config/configstore/')
            try:
                import getpass
                run_elevated('chown', '-R {0}:{0} ~/.config'.format(getpass.getuser()))
            except Exception:
                pass

        # Check for Node 10+
        if self.get_node_version() < 10.0:
            if platform.system() == "Linux":
                # This only works on debian-based systems
                logging.debug('Updating Node.js to 12.x')
                subprocess.call('curl -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -',
                                shell=True)
                subprocess.call(['sudo', 'apt-get', 'install', '-y', 'nodejs'])
            if self.get_node_version() < 10.0:
                logging.warning("Node.js 10 or newer is required for Lighthouse testing")

        # Check the iOS install
        if self.ios is not None:
            ret = self.ios.check_install()

        if not self.options.android and not self.options.iOS and not self.options.noidle:
            self.wait_for_idle(300)
        if self.adb is not None:
            if not self.adb.start():
                print("Error configuring adb. Make sure it is installed and in the path.")
                ret = False
        self.shaper.remove()
        if not self.shaper.install():
            if platform.system() == "Windows":
                print("Error configuring traffic shaping, make sure secure boot is disabled.")
            else:
                print("Error configuring traffic shaping, make sure it is installed.")
            ret = False

        # Update the Windows root certs
        if platform.system() == "Windows":
            self.update_windows_certificates()

        return ret

    def get_node_version(self):
        """Get the installed version of Node.js"""
        version = 0
        try:
            if (sys.version_info >= (3, 0)):
                stdout = subprocess.check_output(['node', '--version'], encoding='UTF-8')
            else:
                stdout = subprocess.check_output(['node', '--version'])
            matches = re.match(r'^v(\d+\.\d+)', stdout)
            if matches:
                version = float(matches.group(1))
        except Exception:
            pass
        return version

    def update_windows_certificates(self):
        """ Update the root Windows certificates"""
        try:
            cert_file = os.path.join(self.persistent_work_dir, 'root_certs.sst')
            if not os.path.isdir(self.persistent_work_dir):
                os.makedirs(self.persistent_work_dir)
            needs_update = True
            if os.path.isfile(cert_file):
                days = (time.time() - os.path.getmtime(cert_file)) / 86400
                if days < 5:
                    needs_update = False
            if needs_update:
                logging.debug("Updating Windows root certificates...")
                if os.path.isfile(cert_file):
                    os.unlink(cert_file)
                from internal.os_util import run_elevated
                run_elevated('certutil.exe', '-generateSSTFromWU "{0}"'.format(cert_file))
                if os.path.isfile(cert_file):
                    run_elevated('certutil.exe', '-addstore -f Root "{0}"'.format(cert_file))
        except Exception:
            pass
Ejemplo n.º 36
0
def xvfb_launcher(request):
    if config.browser.headless:
        vdisplay = Xvfb()
        vdisplay.start()
        return vdisplay
Ejemplo n.º 37
0
    def runner(self, t_context: TaskContext):
        tid = current_thread().native_id
        self.p_context.console.log('Start thread %d' % tid)
        driver = None
        vdisplay = None
        x11vnc = None
        userDataDir = None
        spotify = None
        try:
            if self.p_context.shutdownEvent.is_set():
                return

            if t_context.headless == False:
                vdisplay = Xvfb(width=1280,
                                height=1024,
                                colordepth=24,
                                tempdir=None,
                                noreset='+render')
                vdisplay.start()
                if t_context.vnc:
                    x11vnc = X11vnc(vdisplay)
                    x11vnc.start()

            with self.lockDriver:
                driverData = self.driverManager.getDriver(
                    type='chrome',
                    uid=tid,
                    user=t_context.user,
                    proxy=t_context.proxy,
                    headless=t_context.headless)
            if not driverData:
                raise Exception('No driverData was returned from adapter')

            driver = driverData['driver']
            userDataDir = driverData['userDataDir']
            if not driver:
                raise Exception('No driver was returned from adapter')
        except:
            self.p_context.console.error('Unavailale webdriver: %s' %
                                         format_exc())
        else:
            try:
                spotify = Spotify.Adapter(driver, self.p_context.console,
                                          self.p_context.shutdownEvent,
                                          t_context.batchId)
                self.p_context.console.log('#%d Start create account for %s' %
                                           (tid, t_context.user['email']))
                if spotify.register(t_context.user):
                    self.p_context.console.log('#%d Account created for %s' %
                                               (tid, t_context.user['email']))
                    message = {
                        'user': t_context.user,
                        'playlist': t_context.playlist
                    }
                    with self.lockClient:
                        try:
                            self.client.send_message(
                                QueueUrl=self.p_context.config.SQS_ENDPOINT,
                                MessageBody=dumps(message),
                                DelaySeconds=1,
                            )
                        except:
                            self.p_context.console.exception(
                                'T#%d Failed to send message to the queue %s' %
                                (tid, self.p_context.config.SQS_ENDPOINT))
                        else:
                            self.p_context.console.log(
                                '#%d Message sent for %s' %
                                (tid, t_context.user['email']))
                            with self.lockAccountCount:
                                self.totalAccountCreated += 1
                                self.p_context.accountsCount[
                                    self.p_context.
                                    channel] = self.totalAccountCreated
                else:
                    if not self.p_context.shutdownEvent.is_set():
                        self.p_context.console.error(
                            '#%d Failed to create account for %s' %
                            (tid, t_context.user['email']))
            except:
                self.p_context.console.exception()
        if driver:
            try:
                driver.quit()
                del driver
            except:
                pass
        if spotify:
            try:
                del spotify
            except:
                pass
        if userDataDir:
            try:
                rmtree(path=userDataDir, ignore_errors=True)
            except:
                pass
        if x11vnc:  #Terminate vnc server if any
            try:
                x11vnc.stop()
                del x11vnc
            except:
                pass
        if vdisplay:
            try:
                vdisplay.stop()
                del vdisplay
            except:
                pass
        self.p_context.console.log('Stop thread %d' % tid)
        collect()
Ejemplo n.º 38
0
def useZstat(zstat, file_path_name_save, file_path_conte, file_path_name_resting_atlas):
    """Plot and save the image.
    
    Arguments
    ---------
    zstat : string
        Full file path and name to nii to plot.
    
    file_path_name_save : string
        Full file path and name to png output.  Output dir will be created if it doesn't exist.
    
    file_path_conte : string
        Full file path to Conte atlas
        
    file_path_name_resting_atlas : string
    
    Returns
    -------
    None.  Normal error message:  
    pixdim[1,2,3] should be non-zero; setting 0 dims to 1
    plot_brain.py: Fatal IO error: client killed
    
    Example
    -------
    python plot_brain.py -i /groupAnalysis/l2/zstat1_threshold.nii.gz -o /plots/l2test.png -c /git/bp2/32k_ConteAtlas_v2 -r rfMRI_REST1_LR_Atlas.dtseries.nii
    
    MIT OM Specific Tip
    -------------------
    Call this function from a shell script to run headerless BUT requires:
    source activate mathiasg_vd_env
    export QT_API=pyqt
    module add openmind/xvfb-fix/0.1

    #file_path_name=$1
    #file_path_name_save=$2
    #file_path_conte=$3
    #file_path_name_resting_atlas=$4
    python plot_brain.py \
    -i $1 \
    -o $2 \
    -c $3 \
    -r $4
    
    """

    import matplotlib.pyplot as plt
    import os
    from glob import glob
    import numpy as np
    import nibabel as nb
    import nibabel.gifti as gifti

    # Crucial:  xvfb must be imported and started before importing mayavi
    from xvfbwrapper import Xvfb
    print('XVb pre')
    vdisplay = Xvfb()
    vdisplay.start()

    print('pre maya')
    # Crashes on this line if run with plain python (not xvfb-run ... python) and if xvfbwrapper is after it.
    from mayavi import mlab
    print('post maya')
    from tvtk.api import tvtk
    print('post tvtk')
    import math

    print('display')
    mlab.options.offscreen = True #offscreen window for rendering

    img = nb.load(file_path_name_resting_atlas)
    #img = nb.load('/Users/MathiasMacbook/Desktop/rfMRI_REST1_LR_Atlas.dtseries.nii')
    mim = img.header.matrix.mims[1]
    #for idx, bm in enumerate(mim.brainModels):
    #    print((idx, bm.indexOffset, bm.brainStructure))
    bm1 = mim.brainModels[0]
    lidx = bm1.vertexIndices.indices
    bm2 = mim.brainModels[1]
    ridx = bm1.surfaceNumberOfVertices + bm2.vertexIndices.indices
    bidx = np.concatenate((lidx, ridx))

    axis = [0, 0, 1]
    theta = np.pi

    inflated = True
    split_brain = True

    surf = gifti.read(file_path_conte + '/Conte69.L.midthickness.32k_fs_LR.surf.gii') 
    verts_L_data = surf.darrays[0].data
    faces_L_data = surf.darrays[1].data

    surf = gifti.read(file_path_conte + '/Conte69.R.midthickness.32k_fs_LR.surf.gii') 
    verts_R_data = surf.darrays[0].data
    faces_R_data = surf.darrays[1].data

    if inflated:
        surf = gifti.read(file_path_conte + '/Conte69.L.inflated.32k_fs_LR.surf.gii')
        verts_L_display = surf.darrays[0].data
        faces_L_display = surf.darrays[1].data
        surf = gifti.read(file_path_conte + '/Conte69.R.inflated.32k_fs_LR.surf.gii')
        verts_R_display = surf.darrays[0].data
        faces_R_display = surf.darrays[1].data
    else:
        verts_L_display = verts_L_data.copy()
        verts_R_display = verts_R_data.copy()
        faces_L_display = faces_L_data.copy()
        faces_R_display = faces_R_data.copy()

    verts_L_display[:, 0] -= max(verts_L_display[:, 0])
    verts_R_display[:, 0] -= min(verts_R_display[:, 0])
    verts_L_display[:, 1] -= (max(verts_L_display[:, 1]) + 1)
    verts_R_display[:, 1] -= (max(verts_R_display[:, 1]) + 1)

    faces = np.vstack((faces_L_display, verts_L_display.shape[0] + faces_R_display))

    if split_brain:
        verts2 = rotation_matrix(axis, theta).dot(verts_R_display.T).T
    else:
        verts_L_display[:, 1] -= np.mean(verts_L_display[:, 1])
        verts_R_display[:, 1] -= np.mean(verts_R_display[:, 1])
        verts2 = verts_R_display

    verts_rot = np.vstack((verts_L_display, verts2))
    verts = np.vstack((verts_L_data, verts_R_data))
    #print verts.shape
    #print faces.shape

    if not os.path.exists(os.path.split(file_path_name_save)[0]):
        os.makedirs(os.path.split(file_path_name_save)[0])    

    print('use zstat')
    img = nb.load(zstat)
    print('loaded img')
    
    threshold = 2.3 # 1000, lower limit
    display_threshold = 6 #8000, upper limit

    data = img.get_data()
    aff = img.affine
    indices = np.round((np.linalg.pinv(aff).dot(np.hstack((verts, 
                                              np.ones((verts.shape[0], 1)))).T))[:3, :].T).astype(int)
    scalars2 = data[indices[:, 0], indices[:, 1], indices[:, 2]]
    scalars2[np.abs(scalars2) < threshold] = 0.
    scalars = np.zeros(verts.shape[0])
    scalars[bidx] = scalars2[bidx]

    negative = positive = False
    if np.any(scalars < 0):
        negative = True
    if np.any(scalars > 0):
        positive = True

    nlabels = 2
    vmin = 0
    vmax = 0
    if negative and positive:
        maxval = max(-scalars.min(), scalars.max())
        if maxval > display_threshold:
            maxval = display_threshold
        vmin = -maxval
        vmax = maxval
        nlabels = 3
        vmin = -display_threshold ######
        vmax = display_threshold ######
    elif negative:
        vmin = scalars.min()
        if vmin < -display_threshold:
            vmin = -display_threshold
        vmax = 0
        vmin = -display_threshold ######
    elif positive:
        vmax = scalars.max()
        if vmax > display_threshold:
            vmax = display_threshold
        vmin = 0
        vmax = display_threshold ######
    #print zstat
    
    dual_split = True

    fig1 = mlab.figure(1, bgcolor=(0, 0, 0))
    mlab.clf()
    mesh = tvtk.PolyData(points=verts_rot, polys=faces)
    mesh.point_data.scalars = scalars
    mesh.point_data.scalars.name = 'scalars'
    surf = mlab.pipeline.surface(mesh, colormap='autumn', vmin=vmin, vmax=vmax)
    if dual_split:
        verts_rot_shifted = verts_rot.copy()
        verts_rot_shifted = rotation_matrix(axis, theta).dot(verts_rot_shifted.T).T
        verts_rot_shifted[:, 2] -= (np.max(verts_rot_shifted[:, 2]) - np.min(verts_rot_shifted[:, 2]))
        verts_rot_shifted[:, 0] -= np.max(verts_rot_shifted[:, 0])
        mesh2 = tvtk.PolyData(points=verts_rot_shifted, polys=faces)
        mesh2.point_data.scalars = scalars
        mesh2.point_data.scalars.name = 'scalars'
        surf2 = mlab.pipeline.surface(mesh2, colormap='autumn', vmin=vmin, vmax=vmax)
    colorbar = mlab.colorbar(surf, nb_labels=nlabels) #, orientation='vertical')
    lut = surf.module_manager.scalar_lut_manager.lut.table.to_array()

    if negative and positive:
        half_index = lut.shape[0] / 2
        index =  int(half_index * threshold / vmax)
        lut[(half_index - index + 1):(half_index + index), :] = 192
        lut[(half_index + index):, :] = 255 * plt.cm.autumn(np.linspace(0, 255, half_index - index).astype(int))
        lut[:(half_index - index), :] = 255 * plt.cm.cool(np.linspace(0, 255, half_index - index).astype(int))
    elif negative:
        index =  int(lut.shape[0] * threshold / abs(vmin))
        lut[(lut.shape[0] - index):, :] = 192
        lut[:(lut.shape[0] - index), :] = 255 * plt.cm.cool(np.linspace(0, 255, lut.shape[0] - index).astype(int))
    elif positive:
        index =  int(lut.shape[0] * threshold / vmax)
        lut[:index, :] = 192
        lut[index:, :] = 255 * plt.cm.autumn(np.linspace(0, 255, lut.shape[0] - index).astype(int))
    lut[:, -1] = 255

    surf.module_manager.scalar_lut_manager.lut.table = lut
    if dual_split:
        surf2.module_manager.scalar_lut_manager.lut.table = lut
    surf.module_manager.scalar_lut_manager.show_scalar_bar = False
    surf.module_manager.scalar_lut_manager.show_legend = False
    surf.module_manager.scalar_lut_manager.label_text_property.font_size = 10
    surf.module_manager.scalar_lut_manager.show_scalar_bar = True
    surf.module_manager.scalar_lut_manager.show_legend = True
    mlab.draw()

    translate = [0, 0, 0]
    if inflated:
        zoom = -700
    else:
        zoom = -600
    if dual_split:
        if inflated:
            translate = [0,   0, -104.01467148]
        else:
            translate = [0,  0, -54.76305802]        
        if inflated:
            zoom = -750
        else:
            zoom = -570
    
    #mlab.view(0, 90.0, zoom, translate)
    mlab.view(9, 90.0)

    print(file_path_name_save)
   
    mlab.savefig(file_path_name_save, figure=fig1, magnification=5)

    vdisplay.stop()    
Ejemplo n.º 39
0
    os.environ['lab_mode'] = lab_mode.split('@')[0]
    if lab_mode in TRAIN_MODES:
        run_new_mode(spec_file, spec_name, lab_mode)
    else:
        run_old_mode(spec_file, spec_name, lab_mode)


def main():
    if len(sys.argv) > 1:
        args = sys.argv[1:]
        assert len(
            args
        ) == 3, f'To use sys args, specify spec_file, spec_name, lab_mode'
        run_by_mode(*args)
        return

    experiments = util.read('config/experiments.json')
    for spec_file in experiments:
        for spec_name, lab_mode in experiments[spec_file].items():
            run_by_mode(spec_file, spec_name, lab_mode)


if __name__ == '__main__':
    mp.set_start_method('spawn')  # for distributed pytorch to work
    if sys.platform == 'darwin':
        # avoid xvfb for MacOS: https://github.com/nipy/nipype/issues/1400
        main()
    else:
        with Xvfb() as xvfb:  # safety context for headless machines
            main()
Ejemplo n.º 40
0
class Crawler:
    def __init__(self, browser, n_sites, exclude, timeout, wait_time,
                 log_stdout, out_path, pb_path, chromedriver_path,
                 firefox_path, **kwargs):  # pylint:disable=too-many-arguments,unused-argument
        self.browser = browser
        assert self.browser in (CHROME, FIREFOX)
        self.n_sites = n_sites
        self.exclude = exclude
        self.timeout = timeout
        self.wait_time = wait_time
        self.out_path = out_path
        self.pb_path = pb_path
        self.chromedriver_path = chromedriver_path
        self.firefox_path = firefox_path

        # version is based on when the crawl started
        self.version = time.strftime('%Y.%-m.%-d', time.localtime())

        # set up logging
        self.logger = logging.getLogger()
        self.logger.setLevel(logging.INFO)
        log_fmt = logging.Formatter('%(asctime)s %(message)s')

        # by default, just log to file
        fh = logging.FileHandler(os.path.join(out_path, 'log.txt'))
        fh.setFormatter(log_fmt)
        self.logger.addHandler(fh)

        # log to stdout as well if configured
        if log_stdout:
            sh = logging.StreamHandler(sys.stdout)
            sh.setFormatter(log_fmt)
            self.logger.addHandler(sh)

        self.storage_objects = ['snitch_map', 'action_map']

    def start_driver(self):
        """Start a new Selenium web driver and install the bundled
        extension."""
        if self.browser == CHROME:
            # make extension ID constant across runs

            # create temp directory
            self.tmp_dir = tempfile.TemporaryDirectory()
            new_extension_path = os.path.join(self.tmp_dir.name, "src")

            # copy extension sources there
            copytree(os.path.join(self.pb_path, 'src'), new_extension_path)

            # update manifest.json
            manifest_path = os.path.join(new_extension_path, "manifest.json")
            with open(manifest_path, "r") as f:
                manifest = json.load(f)
            # this key and the extension ID
            # must both be derived from the same private key
            manifest[
                'key'] = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArMdgFkGsm7nOBr/9qkx8XEcmYSu1VkIXXK94oXLz1VKGB0o2MN+mXL/Dsllgkh61LZgK/gVuFFk89e/d6Vlsp9IpKLANuHgyS98FKx1+3sUoMujue+hyxulEGxXXJKXhk0kGxWdE0IDOamFYpF7Yk0K8Myd/JW1U2XOoOqJRZ7HR6is1W6iO/4IIL2/j3MUioVqu5ClT78+fE/Fn9b/DfzdX7RxMNza9UTiY+JCtkRTmm4ci4wtU1lxHuVmWiaS45xLbHphQr3fpemDlyTmaVoE59qG5SZZzvl6rwDah06dH01YGSzUF1ezM2IvY9ee1nMSHEadQRQ2sNduNZWC9gwIDAQAB"  # noqa:E501 pylint:disable=line-too-long
            with open(manifest_path, "w") as f:
                json.dump(manifest, f)

            opts = Options()
            opts.add_argument('--no-sandbox')
            opts.add_argument("--load-extension=" + new_extension_path)

            prefs = {"profile.block_third_party_cookies": False}
            opts.add_experimental_option("prefs", prefs)
            opts.add_argument('--dns-prefetch-disable')
            self.driver = webdriver.Chrome(self.chromedriver_path,
                                           chrome_options=opts)

        elif self.browser == FIREFOX:
            profile = webdriver.FirefoxProfile()
            profile.set_preference('extensions.webextensions.uuids',
                                   '{"%s": "%s"}' % (FF_EXT_ID, FF_UUID))

            # this is kind of a hack; eventually the functionality to install
            # an extension should be part of Selenium. See
            # https://github.com/SeleniumHQ/selenium/issues/4215
            self.driver = webdriver.Firefox(firefox_profile=profile,
                                            firefox_binary=self.firefox_path)
            command = 'addonInstall'
            info = ('POST', '/session/$sessionId/moz/addon/install')
            self.driver.command_executor._commands[command] = info  # pylint:disable=protected-access
            path = os.path.join(self.pb_path, 'src')
            self.driver.execute(command,
                                params={
                                    'path': path,
                                    'temporary': True
                                })
            time.sleep(2)

        # apply timeout settings
        self.driver.set_page_load_timeout(self.timeout)
        self.driver.set_script_timeout(self.timeout)

        # wait for Badger to finish initializing
        self.load_extension_page(OPTIONS)
        wait_for_script(self.driver, (
            "return chrome.extension.getBackgroundPage().badger.INITIALIZED"
            " && Object.keys("
            "  chrome.extension.getBackgroundPage()"
            "  .badger.storage.getBadgerStorageObject('action_map').getItemClones()"
            ").length > 1"))

    def load_extension_page(self, page, retries=3):
        """
        Load a page in the Privacy Badger extension. `page` should either be
        BACKGROUND or OPTIONS.
        """
        if self.browser == CHROME:
            ext_url = (CHROME_URL_FMT + page) % CHROME_EXT_ID
        elif self.browser == FIREFOX:
            ext_url = (FF_URL_FMT + page) % FF_UUID

        for _ in range(retries):
            try:
                self.driver.get(ext_url)
                break
            except UnexpectedAlertPresentException:
                self.driver.switch_to_alert().dismiss()
            except WebDriverException as e:
                err = e
        else:
            self.logger.error('Error loading extension page: %s', err.msg)
            raise err

    def load_user_data(self, data):
        """Load saved user data into Privacy Badger after a restart"""
        self.load_extension_page(OPTIONS)
        for obj in self.storage_objects:
            script = ("(function (data) {"
                      "data = JSON.parse(data);"
                      "let bg = chrome.extension.getBackgroundPage();"
                      "bg.badger.storage.%s.merge(data.%s);"
                      "}(arguments[0]));") % (obj, obj)
            self.driver.execute_script(script, json.dumps(data))

        time.sleep(2)  # wait for localstorage to sync

    def dump_data(self):
        """Extract the objects Privacy Badger learned during its training
        run."""
        self.load_extension_page(OPTIONS)

        data = {}
        for obj in self.storage_objects:
            script = ("return chrome.extension.getBackgroundPage()."
                      "badger.storage.%s.getItemClones()" % obj)
            data[obj] = self.driver.execute_script(script)
        return data

    def clear_data(self):
        """Clear the training data Privacy Badger starts with."""
        self.load_extension_page(OPTIONS)
        self.driver.execute_script("chrome.extension.getBackgroundPage()."
                                   "badger.storage.clearTrackerData();")

    def timeout_workaround(self):
        """
        Selenium has a bug where a tab that raises a timeout exception can't
        recover gracefully. So we kill the tab and make a new one.
        TODO: find actual bug ticket
        """
        self.driver.close()  # kill the broken site
        self.driver.switch_to_window(self.driver.window_handles.pop())
        before = set(self.driver.window_handles)
        self.driver.execute_script('window.open()')

        new_window = (set(self.driver.window_handles) ^ before).pop()
        self.driver.switch_to_window(new_window)

    def get_domain(self, domain):
        """
        Try to load a domain over https, and fall back to http if the initial
        load times out. Then sleep `wait_time` seconds on the site to wait for
        AJAX calls to complete.
        """
        try:
            url = "https://%s/" % domain
            self.driver.get(url)
        except TimeoutException:
            self.logger.info('timeout on %s ', url)
            self.timeout_workaround()
            url = "http://%s/" % domain
            self.logger.info('trying %s', url)
            self.driver.get(url)

        time.sleep(self.wait_time)
        return url

    def start_browser(self):
        self.start_driver()
        self.clear_data()

    def restart_browser(self, data):
        self.logger.info('restarting browser...')

        # It's ugly, but this section needs to be ABSOLUTELY crash-proof.
        for _ in range(RESTART_RETRIES):
            try:
                self.driver.quit()
            except:  # noqa:E722 pylint:disable=bare-except
                pass

            try:
                del self.driver
            except:  # noqa:E722 pylint:disable=bare-except
                pass

            try:
                self.start_browser()
                self.load_user_data(data)
                self.logger.error('Success')
                break
            except Exception as e:
                self.logger.error('Error restarting browser. Trying again...')
                if isinstance(e, WebDriverException):
                    self.logger.error('%s: %s', type(e).__name__, e.msg)
                else:
                    self.logger.error('%s: %s', type(e).__name__, e)
        else:
            # If we couldn't restart the browser after all that, just quit.
            self.logger.error('Could not restart browser.')
            sys.exit(1)

    def crawl(self):
        """
        Visit the top `n_sites` websites in the Tranco List, in order, in
        a virtual browser with Privacy Badger installed. Afterwards, save the
        action_map and snitch_map that the Badger learned.
        """
        domains = get_domain_list(self.n_sites, self.exclude)
        self.logger.info(("starting new crawl:\n"
                          "\ttimeout: %ss\n"
                          "\twait time: %ss\n"
                          "\tbrowser: %s\n"
                          "\tsurvey mode: False\n"
                          "\tdomains to crawl: %d\n"
                          "\tTLDs to exclude: %s"), self.timeout,
                         self.wait_time, self.browser, self.n_sites,
                         self.exclude)

        # create an XVFB virtual display (to avoid opening an actual browser)
        self.vdisplay = Xvfb(width=1280, height=720)
        self.vdisplay.start()
        self.start_browser()

        # list of domains we actually visited
        visited = []
        old_snitches = {}

        for i, domain in enumerate(domains):
            try:
                # This script could fail during the data dump (trying to get
                # the options page), the data cleaning, or while trying to load
                # the next domain.
                last_data = self.dump_data()

                # try to fix misattribution errors
                if i >= 2:
                    clean_data = self.cleanup(domains[i - 2], domains[i - 1],
                                              last_data)
                    if last_data != clean_data:
                        self.clear_data()
                        self.load_user_data(clean_data)

                self.logger.info('visiting %d: %s', i + 1, domain)
                url = self.get_domain(domain)
                visited.append(url)
            except TimeoutException:
                self.logger.info('timeout on %s ', domain)
                # TODO: how to get rid of this nested try?
                try:
                    self.timeout_workaround()
                except WebDriverException as e:
                    if should_restart(e):
                        self.restart_browser(last_data)
            except WebDriverException as e:
                self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg)
                if should_restart(e):
                    self.restart_browser(last_data)
            finally:
                self.load_extension_page(OPTIONS)
                snitches = self.driver.execute_script(
                    "return chrome.extension.getBackgroundPage()."
                    "badger.storage.snitch_map._store;")
                diff = set(snitches) - set(old_snitches)
                if diff:
                    self.logger.info("new trackers in snitch_map: %s", diff)
                old_snitches = snitches

        self.logger.info("Finished scan. Visited %d sites and errored on %d.",
                         len(visited),
                         len(domains) - len(visited))

        try:
            self.logger.info('Getting data from browser storage...')
            data = self.dump_data()
        except WebDriverException:
            # If we can't load the background page here, just quit :(
            self.logger.error('Could not get badger storage.')
            sys.exit(1)

        self.driver.quit()
        self.vdisplay.stop()

        self.save(data)

    def cleanup(self, d1, d2, data):
        """
        Remove from snitch map any domains that appear to have been added as a
        result of bugs.
        """
        new_data = copy.deepcopy(data)
        snitch_map = new_data['snitch_map']
        action_map = new_data['action_map']

        # handle blank domain bug
        if '' in action_map:
            self.logger.info('Deleting blank domain from action map')
            self.logger.info(str(action_map['']))
            del action_map['']

        if '' in snitch_map:
            self.logger.info('Deleting blank domain from snitch map')
            self.logger.info(str(snitch_map['']))
            del snitch_map['']

        extract = TLDExtract()
        d1_base = extract(d1).registered_domain

        # handle the domain-attribution bug (Privacy Badger issue #1997).
        # If a domain we visited was recorded as a tracker on the domain we
        # visited immediately after it, it's probably a bug
        if d1_base in snitch_map and d2 in snitch_map[d1_base]:
            self.logger.info('Likely bug: domain %s tracking on %s', d1_base,
                             d2)
            snitch_map[d1_base].remove(d2)

            # if the bug caused d1 to be added to the action map, remove it
            if not snitch_map[d1_base]:
                self.logger.info(
                    'Deleting domain %s from action & snitch maps', d1_base)
                if d1 in action_map:
                    del action_map[d1]
                if d1_base in action_map:
                    del action_map[d1_base]
                del snitch_map[d1_base]

            # if the bug caused d1 to be blocked, unblock it
            elif len(snitch_map[d1_base]) == 2:
                if d1 in action_map:
                    self.logger.info(
                        'Downgrading domain %s from "block" to "allow"', d1)
                    action_map[d1]['heuristicAction'] = 'allow'
                if d1_base in action_map:
                    self.logger.info(
                        'Downgrading domain %s from "block" to "allow"',
                        d1_base)
                    action_map[d1_base]['heuristicAction'] = 'allow'

        return new_data

    def save(self, data, name='results.json'):
        data['version'] = self.version

        self.logger.info('Saving seed data version %s...', self.version)
        # save the snitch_map in a human-readable JSON file
        with open(os.path.join(self.out_path, name), 'w') as f:
            json.dump(data,
                      f,
                      indent=2,
                      sort_keys=True,
                      separators=(',', ': '))
        self.logger.info('Saved data to %s.', name)
Ejemplo n.º 41
0
    def crawl(self):
        """
        Visit the top `n_sites` websites in the Tranco List, in order, in
        a virtual browser with Privacy Badger installed. Afterwards, save the
        and snitch_map that the Badger learned.
        """
        if self.domain_list:
            domains = self.domain_list
        else:
            domains = get_domain_list(self.n_sites, self.exclude)

        self.logger.info(("starting new crawl:\n"
                          "\ttimeout: %ss\n"
                          "\twait time: %ss\n"
                          "\tbrowser: %s\n"
                          "\tsurvey mode: True\n"
                          "\tdomains to crawl: %d\n"
                          "\tTLDs to exclude: %s"), self.timeout,
                         self.wait_time, self.browser, self.n_sites,
                         self.exclude)

        # create an XVFB virtual display (to avoid opening an actual browser)
        self.vdisplay = Xvfb(width=1280, height=720)
        self.vdisplay.start()
        self.start_browser()

        # list of domains we actually visited
        visited = []
        last_data = None
        first_i = 0

        i = None
        for i, domain in enumerate(domains):
            # If we can't load the options page for some reason, treat it like
            # any other error
            try:
                # save the state of privacy badger before we do anything else
                last_data = self.dump_data()

                # If the localstorage data is getting too big, dump and restart
                if size_of(last_data) > self.max_data_size:
                    self.save(last_data, 'results-%d-%d.json' % (first_i, i))
                    first_i = i + 1
                    last_data = {}
                    self.restart_browser(last_data)

                self.logger.info('visiting %d: %s', i + 1, domain)
                url = self.get_domain(domain)
                visited.append(url)
            except TimeoutException:
                self.logger.info('timeout on %s ', domain)
                # TODO: how to get rid of this nested try?
                try:
                    self.timeout_workaround()
                except WebDriverException as e:
                    if should_restart(e):
                        self.restart_browser(last_data)
            except WebDriverException as e:
                self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg)
                if should_restart(e):
                    self.restart_browser(last_data)
            except KeyboardInterrupt:
                self.logger.warning(
                    "Keyboard interrupt. Ending scan after %d sites.", i + 1)
                break

        self.logger.info('Finished scan. Visited %d sites and errored on %d.',
                         len(visited), i + 1 - len(visited))
        self.logger.info('Getting data from browser storage...')

        try:
            data = self.dump_data()
        except WebDriverException:
            if last_data:
                self.logger.error(
                    "Could not get badger storage. Using cached data...")
                data = last_data
            else:
                self.logger.error('Could not export data. Exiting.')
                sys.exit(1)

        self.driver.quit()
        self.vdisplay.stop()

        self.save(data, 'results-%d-%d.json' % (first_i, i))
        self.save(self.merge_saved_data())
Ejemplo n.º 42
0
f = open('network_2.txt', 'w')
visited_f = open('already_visited_2.txt', 'w')

visited = []
frontier = []

f_rv = open("already_visited.txt", "r")
f_songs = open("songs_already_visited.txt", "w")
content = f_rv.readlines()
for line in content:
    visited.append(line.replace("\n", "").decode('utf-8'))

counter = 0
stuck = 0
display = Xvfb()
display.start()
driver = webdriver.Chrome(
    '/home/laurynas/workspace/knowledge-graph-data-extractor/chromedriver')
visited_size = len(visited)
while visited:
    time.sleep(1)

    seed_band = visited.pop(0)
    counter += 1

    print "band # " + str(counter) + "/" + str(visited_size) + "|" + str(
        seed_band.encode("utf8")) + "|" + " | with # of songs: "

    query = seed_band + u" songs"
    service_url = u'https://www.google.co.uk/search'
Ejemplo n.º 43
0
class SurveyCrawler(Crawler):
    def __init__(self, **kwargs):
        super(SurveyCrawler, self).__init__(**kwargs)

        self.max_data_size = kwargs.get('max_data_size')
        self.storage_objects = ['snitch_map']

        if kwargs.get('domain_list'):
            self.domain_list = []
            with open(kwargs.get('domain_list')) as f:
                for l in f:
                    self.domain_list.append(l.strip())
            if self.n_sites > 0:
                self.domain_list = self.domain_list[:self.n_sites]
        else:
            self.domain_list = None

    def set_passive_mode(self):
        self.load_extension_page(OPTIONS)
        script = '''
chrome.runtime.sendMessage({
    type: "updateSettings",
    data: { passiveMode: true }
});'''
        self.driver.execute_script(script)

    def start_browser(self):
        self.start_driver()
        # don't block anything, just listen and log
        self.set_passive_mode()

    def merge_saved_data(self):
        paths = glob.glob(os.path.join(self.out_path, 'results-*.json'))
        snitch_map = {}
        for p in paths:
            with open(p) as f:
                sm = json.load(f)['snitch_map']
            for tracker, snitches in sm.items():
                if tracker not in snitch_map:
                    snitch_map[tracker] = snitches
                    continue

                for snitch, data in snitches.items():
                    if snitch == 'length':
                        snitch_map[tracker]['length'] = \
                            int(snitch_map[tracker]['length']) + int(data)
                        continue
                    snitch_map[tracker][snitch] = data

        return {'version': self.version, 'snitch_map': snitch_map}

    def crawl(self):
        """
        Visit the top `n_sites` websites in the Tranco List, in order, in
        a virtual browser with Privacy Badger installed. Afterwards, save the
        and snitch_map that the Badger learned.
        """
        if self.domain_list:
            domains = self.domain_list
        else:
            domains = get_domain_list(self.n_sites, self.exclude)

        self.logger.info(("starting new crawl:\n"
                          "\ttimeout: %ss\n"
                          "\twait time: %ss\n"
                          "\tbrowser: %s\n"
                          "\tsurvey mode: True\n"
                          "\tdomains to crawl: %d\n"
                          "\tTLDs to exclude: %s"), self.timeout,
                         self.wait_time, self.browser, self.n_sites,
                         self.exclude)

        # create an XVFB virtual display (to avoid opening an actual browser)
        self.vdisplay = Xvfb(width=1280, height=720)
        self.vdisplay.start()
        self.start_browser()

        # list of domains we actually visited
        visited = []
        last_data = None
        first_i = 0

        i = None
        for i, domain in enumerate(domains):
            # If we can't load the options page for some reason, treat it like
            # any other error
            try:
                # save the state of privacy badger before we do anything else
                last_data = self.dump_data()

                # If the localstorage data is getting too big, dump and restart
                if size_of(last_data) > self.max_data_size:
                    self.save(last_data, 'results-%d-%d.json' % (first_i, i))
                    first_i = i + 1
                    last_data = {}
                    self.restart_browser(last_data)

                self.logger.info('visiting %d: %s', i + 1, domain)
                url = self.get_domain(domain)
                visited.append(url)
            except TimeoutException:
                self.logger.info('timeout on %s ', domain)
                # TODO: how to get rid of this nested try?
                try:
                    self.timeout_workaround()
                except WebDriverException as e:
                    if should_restart(e):
                        self.restart_browser(last_data)
            except WebDriverException as e:
                self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg)
                if should_restart(e):
                    self.restart_browser(last_data)
            except KeyboardInterrupt:
                self.logger.warning(
                    "Keyboard interrupt. Ending scan after %d sites.", i + 1)
                break

        self.logger.info('Finished scan. Visited %d sites and errored on %d.',
                         len(visited), i + 1 - len(visited))
        self.logger.info('Getting data from browser storage...')

        try:
            data = self.dump_data()
        except WebDriverException:
            if last_data:
                self.logger.error(
                    "Could not get badger storage. Using cached data...")
                data = last_data
            else:
                self.logger.error('Could not export data. Exiting.')
                sys.exit(1)

        self.driver.quit()
        self.vdisplay.stop()

        self.save(data, 'results-%d-%d.json' % (first_i, i))
        self.save(self.merge_saved_data())
class aCloudGuru_qUrlCollectorSpider(Spider):
    name = "aCloudGuru_qUrlCollectorSpider"
    allowed_domains = ["acloud.guru"]

    def __init__(self):
        self.start_urls = ["http://www.google.co.in"]

    def parse(self, response):

        self.setUpBrowser()

        dataDump = {}

        aCloudTopicUrls = {}

        aCloudTopicUrls['sa-pro-s3'] = {
            'awsTag': 'sa-pro-s3',
            'sourceUrl':
            'https://acloud.guru/forums/aws-certified-solutions-architect-professional/s3',
            'crawled': 'False',
            'pgCrawled': 0,
            'crawlPgLimit': '10',
            'pageLoadWaitTime': '30'
        }
        aCloudTopicUrls['sa-pro-new'] = {
            'awsTag': 'sa-pro-new',
            'sourceUrl':
            'https://acloud.guru/forums/aws-certified-solutions-architect-associate/newest?p=1',
            'crawled': 'False',
            'pgCrawled': 0,
            'crawlPgLimit': '10',
            'pageLoadWaitTime': '25'
        }

        # Lets be nice and crawl only limited pages
        try:
            dataDump = self.collectUrls(aCloudTopicUrls['sa-pro-new'])

            self.writeToFile(dataDump)

            # print "\n===========Printing in mains=========\n"
            # pprint(dataDump)
        except:
            print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
            print "            Unable to get grab links              "
            print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"

        self.tearDownBrowser()

    """
    Function to setup the Browser
    """

    def setUpBrowser(self):
        # Set the web browser parameters to not show gui ( aka headless )
        # Ref - https://github.com/cgoldberg/xvfbwrapper

        self.vdisplay = Xvfb(width=1280, height=720)
        self.vdisplay.start()

        self.driver = webdriver.Firefox()

    """
    Function to close the Browser
    """

    def tearDownBrowser(self):
        # Stop the browser & close the display

        # Although github says quit works, it throws me an error
        # Ref - https://github.com/SeleniumHQ/selenium/issues/1469
        self.driver.quit()
        self.vdisplay.stop()

    """
    Function to collect the Urls in a given page
    """

    def collectUrls(self, urlMetadata):
        urlItems = []

        # The XPATH Location identifiers to make it configurable

        xpathDict = {}

        ## The XPATH ID of the element for which the the page load waits before processing other requests
        xpathDict[
            'pgLoadConfirmElement'] = "//div[@class='col-sm-8 forum-room-thread-list']/thread-list-component/thread-list-item-component/div[@class='thread-list-item']/@href"
        xpathDict[
            'qPopular'] = "//ul[@class='nav nav-tabs']/li[@heading='Popular']/a"
        xpathDict[
            'qNewest'] = "//ul[@class='nav nav-tabs']/li[@heading='New']/a"
        xpathDict[
            'qText'] = "//div[@class='col-sm-8 forum-room-thread-list']/thread-list-component/thread-list-item-component"
        xpathDict['qURL'] = ".//div[@class='thread-list-item']"

        # nxtPageBtn_XPATH = "//div[@class='clearfix p']/li[@class='paginate_button next']/a"

        # The time to wait for the webpage to laod in seconds
        pgWtTime = int(urlMetadata['pageLoadWaitTime'])

        self.driver.set_page_load_timeout(pgWtTime)
        self.driver.get(urlMetadata['sourceUrl'])

        for crawlCount in range(int(urlMetadata['crawlPgLimit'])):
            try:

                # Check if the page has the necessary elements before we start scraping
                element_present_check_1 = WebDriverWait(
                    self.driver, pgWtTime).until(
                        EC.presence_of_all_elements_located(
                            (By.XPATH, xpathDict['pgLoadConfirmElement'])))
                # element_present_check_2 = WebDriverWait(self.driver, pgWtTime).until(EC.text_to_be_present_in_element_value((By.XPATH, ec_XPATH), "ago"))

                # Move to the most popular questions Tab
                # btnToClick = self.driver.find_element_by_xpath( xpathDict['qPopular'] )

                # Move to the New questions Tab
                btnToClick = self.driver.find_element_by_xpath(
                    xpathDict['qNewest'])

                self.driver.execute_script('arguments[0].click();', btnToClick)

                time.sleep(pgWtTime)

                # Find all the question div tags and iterate in for loop for the link reference
                qTextItems = self.driver.find_elements_by_xpath(
                    xpathDict['qText'])

                for qText in qTextItems:

                    qUrlList = qText.find_elements_by_xpath(xpathDict['qURL'])

                    for qUrl in qUrlList:
                        urlItems.append("https://acloud.guru" +
                                        qUrl.get_attribute('href'))

                urlMetadata['pgCrawled'] += 1

                print "\n\n\t~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
                print "\t  All done in page : {0}, Lets go to page : {1}".format(
                    (urlMetadata['pgCrawled'] - 1), urlMetadata['pgCrawled'])
                print "\t~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n"

            except TimeoutException:
                self.driver.execute_script("window.stop();")
                print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
                print "    Timeout Exception : THE PAGE DID NOT LOAD PROPERLY         "
                print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"

            except:
                print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
                print "           THE PAGE DID NOT LOAD PROPERLY         "
                print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"

            finally:
                crawlCount += 1
                nextBtn = self.driver.find_element_by_link_text('Next')

                # Wont work because of bug - https://github.com/SeleniumHQ/selenium/issues/2285
                # hover_over_nextBtn = self.driver.find_element_by_link_text('Next')
                # hover = ActionChains(self.driver).move_to_element(hover_over_nextBtn)
                # hover.perform()

                try:
                    # Click the next button only if is active and not disabled, else break
                    # find the parent and check if it is disabled
                    btnClassTxt = nextBtn.find_element_by_xpath(
                        '..').get_attribute('class').encode('utf-8')

                    if "disabled" not in btnClassTxt:
                        # Asynchronous execution
                        # self.driver.execute_async_script('arguments[0].click();', nextBtn)
                        self.driver.execute_script('arguments[0].click();',
                                                   nextBtn)
                    else:
                        print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
                        print "         REACHED THE END OF THE GALAXY          "
                        print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
                        break
                except httplib.BadStatusLine:
                    print "\n\n\t\tERROR : FAILED - To click on 'Next' button to navigate to next page\n"
                    # pass
                    break

        # Unique the list
        urlItemsSet = set(urlItems)

        # Prepare data to be dumpted to file
        urlMetadata['pgCrawled'] = str(urlMetadata['pgCrawled'])
        urlMetadata['uri'] = list(urlItemsSet)
        urlMetadata['crawled'] = 'True'
        urlMetadata['dateScraped'] = date.today().strftime(
            "%Y-%m-%d") + "-" + datetime.now().strftime('%H-%M')
        return urlMetadata

    def writeToFile(self, dataDump):

        outputDir = os.path.abspath(__file__ + "/../../../")
        outputFileName = '{0}-acloudguru-{1}.json'.format(
            dataDump['dateScraped'], dataDump['awsTag'])
        outputFileLoc = os.path.join(outputDir, "LnksToScrape", outputFileName)

        with open(outputFileLoc, 'w') as f:
            json.dump(dataDump, f, indent=4, sort_keys=True)
Ejemplo n.º 45
0
    def startup(self, detected_browsers):
        """Validate that all of the external dependencies are installed"""
        ret = True

        # default /tmp/wptagent as an alive file on Linux
        if self.options.alive is None:
            if platform.system() == "Linux":
                self.options.alive = '/tmp/wptagent'
            else:
                self.options.alive = os.path.join(os.path.dirname(__file__), 'wptagent.alive')
        self.alive()
        ret = self.requires('dns', 'dnspython') and ret
        ret = self.requires('monotonic') and ret
        ret = self.requires('PIL', 'pillow') and ret
        ret = self.requires('psutil') and ret
        ret = self.requires('requests') and ret
        if not self.options.android and not self.options.iOS:
            ret = self.requires('tornado') and ret
        # Windows-specific imports
        if platform.system() == "Windows":
            ret = self.requires('win32api', 'pywin32') and ret

        if self.options.webdriver and 'Firefox' in detected_browsers:
            ret = self.requires('selenium')

        # Optional imports
        self.requires('brotli')
        self.requires('fontTools', 'fonttools')

        # Try patching ws4py with a faster lib
        try:
            self.requires('wsaccel')
            import wsaccel
            wsaccel.patch_ws4py()
        except Exception:
            logging.debug('wsaccel not installed, Chrome debug interface will be slower than it could be')

        try:
            subprocess.check_output(['python', '--version'])
        except Exception:
            print("Make sure python 2.7 is available in the path.")
            ret = False

        try:
            subprocess.check_output('{0} -version'.format(self.image_magick['convert']), shell=True)
        except Exception:
            print("Missing convert utility. Please install ImageMagick and make sure it is in the path.")
            ret = False

        try:
            subprocess.check_output('{0} -version'.format(self.image_magick['mogrify']), shell=True)
        except Exception:
            print("Missing mogrify utility. Please install ImageMagick and make sure it is in the path.")
            ret = False

        if platform.system() == "Linux":
            try:
                subprocess.check_output(['traceroute', '--version'])
            except Exception:
                logging.debug("Traceroute is missing, installing...")
                subprocess.call(['sudo', 'apt', '-yq', 'install', 'traceroute'])

        if self.options.webdriver and 'Firefox' in detected_browsers:
            try:
                subprocess.check_output(['geckodriver', '-V'])
            except Exception:
                logging.debug("geckodriver is missing, installing...")
                subprocess.call(['sudo', 'apt', '-yq', 'install', 'firefox-geckodriver'])

        # If we are on Linux and there is no display, enable xvfb by default
        if platform.system() == "Linux" and not self.options.android and \
                not self.options.iOS and 'DISPLAY' not in os.environ:
            self.options.xvfb = True

        if self.options.xvfb:
            ret = self.requires('xvfbwrapper') and ret
            if ret:
                from xvfbwrapper import Xvfb
                self.xvfb = Xvfb(width=1920, height=1200, colordepth=24)
                self.xvfb.start()

        # Figure out which display to capture from
        if platform.system() == "Linux" and 'DISPLAY' in os.environ:
            logging.debug('Display: %s', os.environ['DISPLAY'])
            self.capture_display = os.environ['DISPLAY']
        elif platform.system() == "Darwin":
            proc = subprocess.Popen('ffmpeg -f avfoundation -list_devices true -i ""',
                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
            _, err = proc.communicate()
            for line in err.splitlines():
                matches = re.search(r'\[(\d+)\] Capture screen', line.decode('utf-8'))
                if matches:
                    self.capture_display = matches.group(1)
                    break
        elif platform.system() == "Windows":
            self.capture_display = 'desktop'

        if self.options.throttle:
            try:
                subprocess.check_output('sudo cgset -h', shell=True)
            except Exception:
                print("Missing cgroups, make sure cgroup-tools is installed.")
                ret = False

        # Fix Lighthouse install permissions
        if platform.system() != "Windows" and sys.version_info < (3, 0):
            from internal.os_util import run_elevated
            run_elevated('chmod', '-R 777 ~/.config/configstore/')
            try:
                import getpass
                run_elevated('chown', '-R {0}:{0} ~/.config'.format(getpass.getuser()))
            except Exception:
                pass

        # Check for Node 10+
        if self.get_node_version() < 10.0:
            if platform.system() == "Linux":
                # This only works on debian-based systems
                logging.debug('Updating Node.js to 12.x')
                subprocess.call('curl -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -',
                                shell=True)
                subprocess.call(['sudo', 'apt-get', 'install', '-y', 'nodejs'])
            if self.get_node_version() < 10.0:
                logging.warning("Node.js 10 or newer is required for Lighthouse testing")

        # Check the iOS install
        if self.ios is not None:
            ret = self.ios.check_install()

        if not self.options.android and not self.options.iOS and not self.options.noidle:
            self.wait_for_idle(300)
        if self.adb is not None:
            if not self.adb.start():
                print("Error configuring adb. Make sure it is installed and in the path.")
                ret = False
        self.shaper.remove()
        if not self.shaper.install():
            if platform.system() == "Windows":
                print("Error configuring traffic shaping, make sure secure boot is disabled.")
            else:
                print("Error configuring traffic shaping, make sure it is installed.")
            ret = False

        # Update the Windows root certs
        if platform.system() == "Windows":
            self.update_windows_certificates()

        return ret
Ejemplo n.º 46
0
 def __enter__(self):
     self.vdisplay = Xvfb(width=self.size[0], height=self.size[1])
     self.vdisplay.start()
     self.app = wx.App(False)
     wx.UIActionSimulator().MouseMove(0, 0)
     return self
Ejemplo n.º 47
0
def run_inputs(instance_id, base_name, base_page):
    instance_status = {}
    lp_status = 0
    instance_status[str(base_name+str(instance_id))] = {}
    instance_status[str(base_name+str(instance_id))]['timestamps'] = {}
    instance_status[str(base_name+str(instance_id))]['timestamps']['0_instance_born'] = str(datetime.utcnow())
    try:
        if sys.platform != 'darwin' and use_xvfb:
            print('Starting Xvfb')
            vdisplay = Xvfb(width=1280, height=740)
            vdisplay.start()

        chrome_options = webdriver.ChromeOptions()
        if sys.platform != 'darwin':
            chrome_options.add_argument('headless')
        chrome_options.add_argument('no-sandbox')
        # chrome_options.add_argument('window-size=1200x700')
        print('Will create driver')
        driver = webdriver.Chrome(chrome_options=chrome_options)
        print('driver:'+str(driver))
        # os.environ['MOZ_HEADLESS'] = '1'
        # driver = webdriver.Firefox()
    except BaseException as e:
        instance_status[str(base_name+str(instance_id))]['status'] = 'could_not_open'
        instance_status[str(base_name+str(instance_id))]['status_message'] = str(e)
        try:
            if sys.platform != 'darwin' and use_xvfb:
                print('Closing Xvfb')
                vdisplay.stop()
        except BaseException as e:
            print(e)
        return instance_status
    try:

        driver.get(base_page)
        instance_status[str(base_name+str(instance_id))]['timestamps']['1_requested_website'] = str(datetime.utcnow())

        # Wait for chat launcher
        WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID, 'nds-chat-launcher')))
        # Click on chat launcher
        instance_status[str(base_name+str(instance_id))]['timestamps']['2_chat_became_available'] = str(datetime.utcnow())
        sign_in_button = driver.find_element_by_id('nds-chat-launcher').click()
        # Move to iframe
        WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.ID, 'nds-chat-iframe')))
        iframe = driver.find_elements_by_tag_name('iframe')[0]
        driver.switch_to_frame(iframe)
        # Wait for segmento
        WebDriverWait(driver, 30, poll_frequency=0.1).until(EC.visibility_of_element_located((By.ID, 'nds-chatbot-message-3')))
        WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.text_to_be_present_in_element((By.XPATH, '//*[@id="nds-chatbot-message-3"]/div[1]/div'),'¿A qué segmento perteneces?'))
        instance_status[str(base_name+str(instance_id))]['timestamps']['3_segmento_cliente_available'] = str(datetime.utcnow())
        # Click on segmento
        driver.execute_script(""" (function(e,s){e.src=s;e.onload=function(){jQuery.noConflict();console.log('jQuery 2.2.4 injected');jQuery('#nds-chatbot-message-3 > div.nds-chat-comment-option-wrap > div:nth-child(3)').click()};document.head.appendChild(e);})(document.createElement('script'),'//code.jquery.com/jquery-2.2.4.min.js') """)
        instance_status[str(base_name+str(instance_id))]['timestamps']['4_segmento_cliente_selected'] = str(datetime.utcnow())
        time.sleep(10)
        # Wait for name question
        WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.visibility_of_element_located((By.ID, 'nds-chatbot-message-4')))
        WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.text_to_be_present_in_element((By.XPATH, '//*[@id="nds-chatbot-message-4"]/div/div'),'¿Cómo te llamas?'))
        instance_status[str(base_name+str(instance_id))]['timestamps']['5_como_te_llamas_prompted'] = str(datetime.utcnow())
        # Get text input field and send name
        input_field = driver.find_element_by_id('txMessage')
        instance_name = base_name + str(instance_id)
        input_field.send_keys(instance_name, Keys.ENTER)
        instance_status[str(base_name+str(instance_id))]['timestamps']['6_instance_name_sent'] = str(datetime.utcnow())
        # Wait for "¿Qué puedo hacer por ti?" prompt
        WebDriverWait(driver, 15, poll_frequency=0.1).until(EC.visibility_of_element_located((By.ID, 'nds-chatbot-message-5')))
        # Ask for agent
        input_field.send_keys('agente', Keys.ENTER)
        requested_agent_time = datetime.utcnow()
        instance_status[str(base_name+str(instance_id))]['timestamps']['7_requested_agent'] = str(requested_agent_time)
        # Live person status;
        # lp_status = 1 No Agents Available (last_element_text equals no_agents_available_text);
        # lp_status = 2 Connected to LivePerson (last_element_text equals connected_to_lp_text);
        no_agents_available_text_sub_1 = "Por el momento no hay agentes disponibles"
        no_agents_available_text_sub_2 = "Por favor comunícate a nuestro centro de contacto al (01 55) 5721-3390"
        no_agents_available_text = "Por el momento no hay agentes disponibles.Por favor comunícate a nuestro centro de contacto al (01 55) 5721-3390."
        connected_to_lp_text = "¡Buen día! Bienvenido al Chat de HSBC."
        time_out = 0
        while lp_status == 0:
            chat_nds_bubbles = driver.execute_script("""
            var z = document.getElementsByClassName('nds-chat-comment-by-nds-chat');
            var arr = Array.prototype.slice.call(z);
            t = arr.map(function(e){return e.innerText});
            return t
            """)
            last_element_text = ''
            for index, el in enumerate(chat_nds_bubbles):
                if connected_to_lp_text in el or (no_agents_available_text_sub_1 in el and no_agents_available_text_sub_2 in el):
                    last_element_text = el
                    break

            # Test if last_element_text equals no_agents_available_text
            if no_agents_available_text.replace(' ','').replace('\n','') in last_element_text.replace(' ','').replace('\n',''):
                lp_status = 1
                no_agents_available_time = datetime.utcnow()
                instance_status[str(base_name+str(instance_id))]['timestamps']['8_no_agents_available'] = str(no_agents_available_time)
                instance_status[str(base_name+str(instance_id))]['timestamps']['delta_since_agent_requested'] = str(no_agents_available_time - requested_agent_time)
                instance_status[str(base_name+str(instance_id))]['timestamps']['chatbot_history'] = str(chat_nds_bubbles)
                break
            elif connected_to_lp_text in last_element_text:
                connected_to_lp_time = datetime.utcnow()
                instance_status[str(base_name+str(instance_id))]['timestamps']['8_connected_to_lp_time'] = str(connected_to_lp_time)
                instance_status[str(base_name+str(instance_id))]['timestamps']['delta_since_agent_requested'] = str(connected_to_lp_time - requested_agent_time)
                instance_status[str(base_name+str(instance_id))]['timestamps']['chatbot_history'] = str(chat_nds_bubbles)
                lp_status = 2
                instance_status[str(base_name+str(instance_id))]['sent'] = []
                instance_status[str(base_name+str(instance_id))]['received'] = []
                # send first message
                try:
                    initial_input = 'hola!'
                    input_field = driver.find_element_by_id('txMessage')
                    input_field.send_keys(initial_input, Keys.ENTER)
                    sent_time = str(datetime.utcnow())
                    instance_status[str(base_name+str(instance_id))]['sent'].append({sent_time: initial_input})
                except BaseException as e:
                    print('Problems while sending initial_input '+str(instance_id))
                    print(e)
                    if 'lp_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors
                        instance_status[str(base_name+str(instance_id))]['lp_errors'] += str(e)
                    else:
                        instance_status[str(base_name+str(instance_id))]['lp_errors'] = str(e)
                break
            time.sleep(1)
            time_out += 1
            if time_out > 60*3.5:
                timed_out_timestamp = datetime.utcnow()
                instance_status[str(base_name+str(instance_id))]['timestamps']['8_timed_out_timestamp'] = str(timed_out_timestamp)
                instance_status[str(base_name+str(instance_id))]['timestamps']['delta_since_agent_requested'] = str(timed_out_timestamp - requested_agent_time)
                instance_status[str(base_name+str(instance_id))]['timestamps']['chatbot_history'] = str(chat_nds_bubbles)
                break

    except BaseException as e:
        print('Problems with instance '+str(instance_id))
        print(e)
        if 'other_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors if they exists
            instance_status[str(base_name+str(instance_id))]['other_errors'] += str(e)
        else:
            instance_status[str(base_name+str(instance_id))]['other_errors'] = str(e)


    if lp_status == 1:
        print(base_name+str(instance_id)+' got no agents available message')
        instance_status[str(base_name+str(instance_id))]['status'] = 'no_agents'
    elif lp_status == 2:
        print(base_name+str(instance_id)+' contacted liveperson')
        instance_status[str(base_name+str(instance_id))]['status'] = 'contacted_liveperson'

        time_in_lp = 60*3.5 + 1;
        finish_lp = 1
        time_to_send_next_batch = 0
        number_of_batches_to_send = 5
        current_send_batch = 0
        time_between_batches = int(int(time_in_lp / number_of_batches_to_send) - 1)
        last_nds_chatbubble_included = 0
        while finish_lp < time_in_lp:
            if finish_lp > time_to_send_next_batch and current_send_batch < number_of_batches_to_send:
                current_send_batch += 1
                time_to_send_next_batch += time_between_batches
                inputs_to_send = ['hola.. '+str(current_send_batch), 'como estas.. '+str(current_send_batch), 'quiero ayuda.. '+str(current_send_batch)]
                try:
                    input_field = driver.find_element_by_id('txMessage')
                    for input_ in inputs_to_send:
                        input_field.send_keys(input_, Keys.ENTER)
                        sent_time = str(datetime.utcnow())
                        instance_status[str(base_name+str(instance_id))]['sent'].append({sent_time: input_})
                except BaseException as e:
                    print('Problems during LivePerson with instance '+str(instance_id))
                    print(e)
                    if 'lp_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors
                        instance_status[str(base_name+str(instance_id))]['lp_errors'] += str(e)
                    else:
                        instance_status[str(base_name+str(instance_id))]['lp_errors'] = str(e)
            try: # Get received messages
                chat_nds_bubbles = driver.execute_script("""
                var z = document.getElementsByClassName('nds-chat-comment-by-nds-chat');
                var arr = Array.prototype.slice.call(z);
                t = arr.map(function(e){return e.innerText});
                return t
                """)
                if last_nds_chatbubble_included == 0: # Initiate last_nds_chatbubble_included to the index where the text contains '¡Buen día! Bienvenido al Chat de HSBC. ¿En qué le puedo ayudar?'
                    for index, el in enumerate(chat_nds_bubbles):
                        if connected_to_lp_text in el:
                            last_nds_chatbubble_included = index
                            break

                while last_nds_chatbubble_included < len(chat_nds_bubbles)-1:
                    last_nds_chatbubble_included += 1
                    new_m = chat_nds_bubbles[last_nds_chatbubble_included].replace('Justo ahora','').replace('\n','').replace('1 minuto antes','').replace('2 minutos antes','')
                    new_m_timestamp = str(datetime.utcnow())
                    instance_status[str(base_name+str(instance_id))]['received'].append({new_m_timestamp: new_m})
            except BaseException as e:
                print('Problems during LivePerson with instance '+str(instance_id))
                print(e)
                if 'lp_errors' in instance_status[str(base_name+str(instance_id))]: # Concatenate previous errors
                    instance_status[str(base_name+str(instance_id))]['lp_errors'] += str(e)
                else:
                    instance_status[str(base_name+str(instance_id))]['lp_errors'] = str(e)
            time.sleep(1)
            finish_lp += 1
    else:
        print(base_name+str(instance_id)+' timed out')
        instance_status[str(base_name+str(instance_id))]['status'] = 'timed_out'

    instance_status[str(base_name+str(instance_id))]['timestamps']['9_closing_instance'] = str(datetime.utcnow())
    print('Closing '+base_name+str(instance_id))
    try:
        driver.close()
        if sys.platform != 'darwin' and use_xvfb:
            vdisplay.stop()

    except BaseException as e:
        instance_status[str(base_name+str(instance_id))]['closing_error'] = str(e)

    return instance_status
Ejemplo n.º 48
0
def available_curses(carne, passw, visible=True, close=True):
    """
    Enters ematricula.ucr.ac.cr and then
    goes to the section that has the information
    about available careers.

    Example usage:

    Launch a invisible browser and get the curses.
    -> available_curses('b53777', 'mypassword')
    
    Launch a visible browser to see step by step what is done.
    -> available_curses('b53777', 'mypassword', visible=True)

    Launch a visible browser and don't close when finished.
    -> available_curses('b53777', 'mypassword', visible=True, close=False)
    """

    # Create and start a virtualdisplay
    vdisplay = Xvfb()

    # Launch the display if visible is false
    if not visible:
        vdisplay.start()

    # Open the browser and does the magic
    driver = webdriver.Firefox()
    driver.get('https://ematricula.ucr.ac.cr/ematricula/login.do')
    carne_box = driver.find_element_by_name('carne')
    pass_box = driver.find_element_by_name('pin')
    carne_box.send_keys(carne)
    pass_box.send_keys(passw)
    driver.find_element_by_name('crudMethod').click()
    wait_until_title_contains(driver, 'Sistema eMatricula')
    driver.find_element_by_link_text('Cursos Pendientes del Plan').click()
    wait_until_element_is_located(driver, 'formCarreras')
    career_dropdown = driver.find_element_by_name("carrera")
    careers = [
        x for x in career_dropdown.find_elements_by_tag_name('option')[1:]
    ]

    for each_career in careers:
        print(each_career.get_attribute('innerHTML'))
        print(each_career.get_attribute('value'))
        each_career.click()
        wait_until_class_is_located(driver, 'data')
        table = driver.find_element_by_class_name("data")
        body = table.find_element_by_xpath("//tbody")
        tr = [x for x in body.find_elements_by_xpath("//tr")[1:]]
        cambio = 0
        each_tr = tr[0]
        my_courses = []
        for data in each_tr.find_elements_by_xpath("//td"):
            my_courses.append(data.get_attribute('innerHTML').strip())

        print(
            "Los cursos se descargaron exitosamente. Cantidad de cursos faltantes"
        )
        print(len(tr))

        clean_courses = []
        file = open("cursos.txt", "w")
        for i in range(0, len(tr)):
            new_Subject = Subject()
            new_Subject.sigla = my_courses[i * 5]
            new_Subject.curso = my_courses[i * 5 + 1]
            new_Subject.creditos = my_courses[i * 5 + 2]
            clean_courses.append(new_Subject)
            file.write("Materia")
            file.write(my_courses[i * 5])
            #print (my_courses[i*5+1])
            #print (my_courses[i*5+2])
            #print (my_courses[i*5])
            #print (my_courses[i*5+1])
            #print (my_courses[i*5+2])

        file.close()
    # Quit the browser
    if close:
        driver.quit()

    # If not visible, stop display
    if not visible:
        vdisplay.stop()
Ejemplo n.º 49
0
def init():
    display = Xvfb(width=500, height=500)
    display.start()
    mlab.init_notebook()
            self.driver.execute_script("return window.performance"),
            ensure_ascii=False)
        self.__store_into_file(args, 'perf', performance)

    def stop_all(self):
        #stop server and driver
        from datetime import datetime
        print "%s: Finish" % (datetime.now())

        self.server.stop()
        self.driver.quit()


if __name__ == '__main__':
    # for headless execution
    with Xvfb() as xvfb:
        parser = argparse.ArgumentParser(
            description='Performance Testing using Browsermob-Proxy and Python'
        )
        parser.add_argument('-u', '--url', help='URL to test', required=True)
        parser.add_argument('-b',
                            '--browser',
                            help='Select Chrome or Firefox',
                            required=True)
        parser.add_argument('-p',
                            '--path',
                            help='Select path for output files',
                            required=False)
        args = vars(parser.parse_args())
        path = os.getenv('BROWSERMOB_PROXY_PATH',
                         '/browsermob-proxy-2.1.2/bin/browsermob-proxy')
Ejemplo n.º 51
0
class Client:

    COOKIE_FILE = "state/cookies.pkl"
    ROOT_URL = "http://www.tadpoles.com/"
    HOME_URL = "https://www.tadpoles.com/parents"
    MIN_SLEEP = 1
    MAX_SLEEP = 3

    def __init__(self):
        self.init_logging()

    def init_logging(self):
        logger = logging.getLogger('app')
        self.info = logger.info
        self.debug = logger.debug
        self.warning = logger.warning
        self.critical = logger.critical
        self.exception = logger.exception

    def __enter__(self):
        self.info("Starting xvfb display")
        self.vdisplay = Xvfb()
        self.vdisplay.start()
        self.info("Starting browser")
        self.br = self.browser = webdriver.Firefox()
        self.br.implicitly_wait(10)
        return self

    def __exit__(self, *args):
        self.info("Shutting down browser")
        self.browser.quit()
        self.info("Shutting down xfvb display")
        self.vdisplay.stop()

    def sleep(self, minsleep=None, maxsleep=None):
        _min = minsleep or self.MIN_SLEEP
        _max = maxsleep or self.MAX_SLEEP
        duration = randrange(_min * 100, _max * 100) / 100.0
        self.debug('Sleeping %r' % duration)
        time.sleep(duration)

    def navigate_url(self, url):
        self.info("Navigating to %r", url)
        self.br.get(url)

    def load_cookies(self):
        self.info("Loading cookies.")
        if not isdir('state'):
            os.mkdir('state')
        with open(self.COOKIE_FILE, "rb") as f:
            self.cookies = pickle.load(f)

    def dump_cookies(self):
        self.info("Dumping cookies.")
        with open(self.COOKIE_FILE, "wb") as f:
            pickle.dump(self.br.get_cookies(), f)

    def add_cookies_to_browser(self):
        self.info("Adding the cookies to the browser.")
        for cookie in self.cookies:
            if self.br.current_url.strip('/').endswith(cookie['domain']):
                self.br.add_cookie(cookie)

    def requestify_cookies(self):
        # Cookies in the form reqeusts expects.
        self.info("Transforming the cookies for requests lib.")
        self.req_cookies = {}
        for s_cookie in self.cookies:
            self.req_cookies[s_cookie["name"]] = s_cookie["value"]

    def switch_windows(self):
        '''Switch to the other window.'''
        self.info("Switching windows.")
        all_windows = set(self.br.window_handles)
        self.info("All windows.")
        self.info(all_windows)
        current_window = set([self.br.current_window_handle])
        self.info("Current windows.")
        self.info(current_window)
        other_window = (all_windows - current_window).pop()
        self.br.switch_to.window(other_window)

    def do_login(self):
        # Navigate to login page.
        self.info("Navigating to login page.")
        self.br.find_element_by_id("login-button").click()
        self.br.find_element_by_class_name("tp-block-half").click()
        self.br.find_element_by_class_name("other-login-button").click()

        self.info(self.br.current_url)

        # Enter email.
        self.info("  Sending username.")
        email = self.br.find_element_by_css_selector(
            ".controls input[type='text']")
        email.send_keys(input("Enter email: "))

        # Enter password.
        self.info("  Sending password.")
        passwd = self.br.find_element_by_css_selector(
            ".controls input[type='password']")
        passwd.send_keys(input("Enter password: "******"submit".
        self.info("Sleeping 2 seconds.")
        self.sleep(minsleep=2)
        self.info("Clicking 'sumbit' button.")
        self.br.find_element_by_css_selector(
            ".tp-left-contents .btn-primary").click()
        self.sleep(minsleep=2)
        self.info("New url")
        self.info(self.br.current_url)

    def do_google_login(self):
        # Navigate to login page.
        self.info("Navigating to login page.")
        self.br.find_element_by_id("login-button").click()
        self.br.find_element_by_class_name("tp-block-half").click()

        for element in self.br.find_elements_by_tag_name("img"):
            if "btn-google.png" in element.get_attribute("src"):
                self.info(element)
                self.info("Clicking Google Button.")
                element.click()

        #self.info(self.br.find_element_by_xpath('//img[@data-bind="click:loginGoogle"]').get_attribute('innerHTML'))
        #self.br.find_element_by_class_name("other-login-button").click()

        # Sleeping really quick.
        self.info("Sleeping 2 seconds.")
        self.sleep(minsleep=2)

        # Focus on the google auth popup.
        self.switch_windows()

        #select use another account
        #self.info("Selecting 'Use another account'.")
        #self.br.find_element_by_class_name("BHzsHc").click()

        # Enter email.
        email = self.br.find_element_by_id("identifierId")
        email.send_keys(input("Enter email: "))
        email.submit()
        self.br.find_element_by_id("identifierNext").click()

        self.info("Sleeping 2 seconds.")
        self.sleep(minsleep=2)

        # Enter password.
        #passwd = self.br.find_element_by_id("password")
        #passwd.send_keys(getpass("Enter password:"******"password")
        password.send_keys(getpass("Enter password:"******"passwordNext").click()

        self.info("Sleeping 2 seconds.")
        self.sleep(minsleep=2)

        # Enter 2FA pin.
        #pin = self.br.find_element_by_id("totpPin")
        #pin.send_keys(getpass("Enter google verification code: "))
        #pin.submit()
        #self.br.find_element_by_id("totpNext").click()

        #self.info("Sleeping 2 seconds.")
        #self.sleep(minsleep=2)

        # Click "approve".
        #self.info("Sleeping 2 seconds.")
        #self.sleep(minsleep=2)
        #self.info("Clicking 'approve' button.")
        #self.br.find_element_by_id("submit_approve_access").click()

        # Switch back to tadpoles.
        #self.switch_windows(self.window_handles[-1])

        self.info("Switching windows.")
        all_windows = set(self.br.window_handles)
        self.info(all_windows)
        self.info("Switching to window")
        main_window = all_windows.pop()
        self.info(main_window)
        self.br.switch_to.window(main_window)

    def iter_monthyear(self):
        '''Yields pairs of xpaths for each year/month tile on the
        right hand side of the user's home page.
        '''
        month_xpath_tmpl = '//*[@id="app"]/div[4]/div[1]/ul/li[%d]/div/div/div/div/span[%d]'
        month_index = 1
        while True:
            month_xpath = month_xpath_tmpl % (month_index, 1)
            year_xpath = month_xpath_tmpl % (month_index, 2)

            # Go home if not there already.
            if self.br.current_url != self.HOME_URL:
                self.navigate_url(self.HOME_URL)
            try:
                # Find the next month and year elements.
                month = self.br.find_element_by_xpath(month_xpath)
                year = self.br.find_element_by_xpath(year_xpath)
            except NoSuchElementException:
                # We reached the end of months on the profile page.
                self.warning("No months left to scrape. Stopping.")
                sys.exit(0)

            self.month = month
            self.year = year
            yield month, year

            month_index += 1

    def iter_urls(self):
        '''Find all the image urls on the current page.
        '''
        # For each month on the dashboard...
        for month, year in self.iter_monthyear():
            # Navigate to the next month.
            month.click()
            self.warning("Getting urls for month: %r" % month.text)
            self.sleep(minsleep=2)
            re_url = re.compile('\("([^"]+)')
            for div in self.br.find_elements_by_xpath(
                    "//div[@class='well left-panel pull-left']/ul/li/div"):
                url = re_url.search(div.get_attribute("style"))
                if not url:
                    continue
                url = url.group(1)
                url = url.replace('thumbnail=true', '')
                url = url.replace('&thumbnail=true', '')
                url = 'https://www.tadpoles.com' + url
                daymonth = div.find_element_by_xpath(
                    "./div/div[@class='header note mask']/span[@class='name']/span"
                ).text
                dayarray = daymonth.split('/')
                day = format(int(dayarray[1]), '02d')
                yield url, day

    def save_image(self, url, day):
        '''Save an image locally using requests.
        '''

        # Make the local filename.
        _, key = url.split("key=")
        filename_parts = ['img', self.year.text, self.month.text, '%s']
        filename_base = abspath(join(*filename_parts) % key)
        filename = filename_base + '.jpg'

        # Only download if the file doesn't already exist.
        if isfile(filename):
            self.debug("Already downloaded: %s" % filename)
            return
        elif isfile(filename_base + '.png'):
            self.debug("Already downloaded: %s.png" % filename_base)
            return
        else:
            self.info("Saving: %s" % filename)
            self.sleep()

        # Make sure the parent dir exists.
        dr = dirname(filename)
        if not isdir(dr):
            os.makedirs(dr)

        # Download it with requests.

        resp = requests.get(url, cookies=self.req_cookies, stream=True)
        if resp.status_code == 200:
            with open(filename, 'wb') as f:
                for chunk in resp.iter_content(1024):
                    f.write(chunk)
        else:
            msg = 'Error (%r) downloading %r'
            raise DownloadError(msg % (resp.status_code, url))

        ## set date for exif
        months = dict(jan="01",
                      feb="02",
                      mar="03",
                      apr="04",
                      may="05",
                      jun="06",
                      jul="07",
                      aug="08",
                      sep="09",
                      oct="10",
                      nov="11",
                      dec="12")
        yearmonth = self.year.text + ':' + months[
            self.month.text] + ':' + day + ' 12:00:00'
        ## check if the file is actually a png
        imgtype = imghdr.what(filename)
        if imghdr.what(filename) == 'png':
            self.info("  File is a png - renaming")
            os.rename(filename, filename_base + '.png')
            filename = filename_base + '.png'
            command = 'exiftool -overwrite_original "-PNG:CreationTime=' + yearmonth + '" "' + filename + '"'
            self.info("  Adding png exif: %s" % command)
            os.system(command)

        command = 'exiftool -overwrite_original "-AllDates=' + yearmonth + '" "' + filename + '"'
        self.info("  Adding exif: %s" % command)
        os.system(command)

    def download_images(self):
        '''Login to tadpoles.com and download all user's images.
        '''
        self.navigate_url(self.ROOT_URL)
        try:
            self.load_cookies()
        except FileNotFoundError:

            login_type = None
            while login_type is None:
                input_value = input(
                    "Login Type - [G]oogle or [E]mail/password: "******"Doing Google login...")
                    self.do_google_login()
                elif input_value == "E" or input_value == "e":
                    login_type = 'email'
                    self.info("Doing Email login...")
                    self.do_login()
                else:
                    self.info(
                        "-- Invalid choice entered - please choose 'G' or 'E'")

            self.dump_cookies()
            self.load_cookies()
            self.add_cookies_to_browser()
            self.navigate_url(self.HOME_URL)
        else:
            self.add_cookies_to_browser()
            self.navigate_url(self.HOME_URL)

        # Get the cookies ready for requests lib.
        self.requestify_cookies()

        for url in self.iter_urls():
            try:
                self.save_image(url[0], url[1])
            except DownloadError as exc:
                self.exception(exc)

    def main(self):
        with self as client:
            try:
                client.download_images()
            except Exception as exc:
                self.exception(exc)
Ejemplo n.º 52
0
 def setUp(self):
     self.xvfb = Xvfb(width=1024, height=768)
     self.xvfb.start()
     self.browser = webdriver.Firefox()
def imap_py(**kwargs):
    selenium_conn_id = kwargs.get('templates_dict',
                                  None).get('selenium_conn_id', None)
    filename = kwargs.get('templates_dict', None).get('filename', None)
    s3_conn_id = kwargs.get('templates_dict', None).get('s3_conn_id', None)
    s3_bucket = kwargs.get('templates_dict', None).get('s3_bucket', None)
    s3_key = kwargs.get('templates_dict', None).get('s3_key', None)
    date = kwargs.get('templates_dict', None).get('date', None)

    @provide_session
    def get_conn(conn_id, session=None):
        conn = (session.query(Connection).filter(
            Connection.conn_id == conn_id).first())
        return conn

    url = get_conn(selenium_conn_id).host
    email = get_conn(selenium_conn_id).user
    pwd = get_conn(selenium_conn_id).password

    vdisplay = Xvfb()
    vdisplay.start()
    caps = webdriver.DesiredCapabilities.FIREFOX
    caps["marionette"] = True

    profile = webdriver.FirefoxProfile()
    profile.set_preference("browser.download.manager.showWhenStarting", False)
    profile.set_preference('browser.helperApps.neverAsk.saveToDisk',
                           "text/csv")

    logging.info('Profile set...')
    options = Options()
    options.set_headless(headless=True)
    logging.info('Options set...')
    logging.info('Initializing Driver...')
    driver = webdriver.Firefox(firefox_profile=profile,
                               firefox_options=options,
                               capabilities=caps)
    logging.info('Driver Intialized...')
    driver.get(url)
    logging.info('Authenticating...')
    elem = driver.find_element_by_id("email")
    elem.send_keys(email)
    elem = driver.find_element_by_id("password")
    elem.send_keys(pwd)
    elem.send_keys(Keys.RETURN)

    logging.info('Successfully authenticated.')

    sleep_time = 15

    logging.info('Downloading File....Sleeping for {} Seconds.'.format(
        str(sleep_time)))
    time.sleep(sleep_time)

    driver.close()
    vdisplay.stop()

    dest_s3 = S3Hook(s3_conn_id=s3_conn_id)

    os.chdir('/root/Downloads')

    csvfile = open(filename, 'r')

    output_json = 'file.json'

    with open(output_json, 'w') as jsonfile:
        reader = csv.DictReader(csvfile)

        for row in reader:
            row = dict((boa.constrict(k), v) for k, v in row.items())
            row['run_date'] = date
            json.dump(row, jsonfile)
            jsonfile.write('\n')

    dest_s3.load_file(filename=output_json,
                      key=s3_key,
                      bucket_name=s3_bucket,
                      replace=True)

    dest_s3.connection.close()
Ejemplo n.º 54
0
class TestPlotting(unittest.TestCase):
    def setUp(self):
        if os.getenv('TRAVIS', False):
            from xvfbwrapper import Xvfb
            self.vdisplay = Xvfb()
            self.vdisplay.start()

        # To have plt.show() non blocking
        import matplotlib.pyplot as p
        p.switch_backend('Agg')

    @classmethod
    def tearDownClass(cls):
        os.remove('planar_mesh_tmp_3.msh')

    def test_plot_planar(self):
        ''' Check plotting of planar sensor.
        '''

        thickness = 200  # [um]
        width = 40  # [um]

        def potential_function(x, y):
            return fields.get_weighting_potential_analytic(x,
                                                           y,
                                                           D=thickness,
                                                           S=width,
                                                           is_planar=True)

        def field_function(x, y):
            return fields.get_weighting_field_analytic(x,
                                                       y,
                                                       D=thickness,
                                                       S=width,
                                                       is_planar=True)

        # Plot with analytical field function
        plot.plot_planar_sensor(pot_func=potential_function,
                                width=width,
                                pitch=width,
                                thickness=thickness,
                                n_pixel=1,
                                V_backplane=0,
                                V_readout=1,
                                field_func=field_function)

        # Plot without a field function
        plot.plot_planar_sensor(pot_func=potential_function,
                                width=width,
                                pitch=width,
                                thickness=thickness,
                                n_pixel=1,
                                V_backplane=0,
                                V_readout=1,
                                field_func=None)

    def test_plot_mesh(self):
        mesh = geometry.mesh_planar_sensor(n_pixel=5,
                                           width=50.,
                                           thickness=100.,
                                           resolution=100.,
                                           filename='planar_mesh_tmp_3.msh')

        plot.plot_mesh(mesh)
Ejemplo n.º 55
0
 def test_start_fails_with_unknown_kwargs(self):
     xvfb = Xvfb(foo='bar')
     with self.assertRaises(RuntimeError):
         xvfb.start()
Ejemplo n.º 56
0
class BrowserHandler:
    def __init__(self, args):
        self.args = args
        if self.args and not self.args.show_browser:
            self.display = Xvfb()
            self.display.start()

        log_level = self._define_log_level(self.args)
        capabilities = self._create_browser_capabilities(log_level)
        options = self._create_browser_options(log_level)
        profile = self._create_browser_profile()

        self.browser = Firefox(
            firefox_profile=profile,
            capabilities=capabilities,
            options=options,
            log_path=f"{TIMESTAMP}_geckodriver.log",
        )
        # https://stackoverflow.com/questions/42754877/cant-upload-file-using-selenium-with-python-post-post-session-b90ee4c1-ef51-4  # pylint: disable=line-too-long
        self.browser._is_remote = False  # pylint: disable=protected-access
        self.browser.maximize_window()

    @staticmethod
    def _define_log_level(args):
        if args and args.verbose and args.verbose >= 3:
            log_level = "trace"
        elif args and args.verbose and args.verbose == 2:
            log_level = "debug"
        elif args and args.verbose and args.verbose == 1:
            log_level = "info"
        else:
            log_level = "warn"

        return log_level

    @staticmethod
    def _create_browser_capabilities(log_level):
        capabilities = DesiredCapabilities.FIREFOX.copy()
        capabilities["moz:firefoxOptions"] = {
            "log": {
                "level": log_level,
            },
        }
        return capabilities

    @staticmethod
    def _create_browser_options(log_level):
        options = Options()
        options.log.level = log_level
        return options

    @staticmethod
    def _create_browser_profile():
        profile = FirefoxProfile()
        profile.set_preference("browser.download.folderList", 2)
        profile.set_preference("browser.download.manager.showWhenStarting",
                               False)
        profile.set_preference("browser.download.dir", EXPORTS_FOLDER)
        profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                               "text/csv, application/zip")
        profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        profile.set_preference("devtools.jsonview.enabled", False)
        profile.set_preference("media.volume_scale", "0.0")
        # https://github.com/mozilla/geckodriver/issues/858#issuecomment-322512336
        profile.set_preference("dom.file.createInChild", True)

        return profile

    def kill(self):
        self.browser.stop_client()
        self.browser.close()
        try:
            self.browser.quit()
        except WebDriverException:
            pass

        if self.args and not self.args.show_browser:
            self.display.stop()
Ejemplo n.º 57
0
def train(env_config,
          env_ext,
          model_config,
          model_ext,
          exp_dir,
          seed,
          local_test,
          override_expe=True,
          save_n_random_q_images=0):
    import argparse

    from rl_agent.agent_utils import render_state_and_q_values

    from config import load_config
    from rl_agent.dqn_agent import DQNAgent

    import torch

    print("Expe",
          env_config,
          env_ext,
          model_config,
          model_ext,
          exp_dir,
          seed,
          sep='  ')
    print("Is cuda available ?", torch.cuda.is_available())

    if not local_test:
        assert len(ray.get_gpu_ids()) == 1
        assert torch.cuda.device_count(
        ) == 1, "Should be only 1, is {}".format(torch.cuda.device_count())

    if local_test:
        display = open('nothing.txt', 'w')
    else:
        from xvfbwrapper import Xvfb
        display = Xvfb(width=100, height=100, colordepth=16)

    full_config, expe_path = load_config(env_config_file=env_config,
                                         model_config_file=model_config,
                                         env_ext_file=env_ext,
                                         model_ext_file=model_ext,
                                         out_dir=exp_dir,
                                         seed=seed)

    MAX_STATE_TO_REMEMBER = 50  # To avoid storing too much images in tensorboard
    DEFAULT_LOG_STATS = 500
    log_stats_every = full_config.get("log_stats_every", DEFAULT_LOG_STATS)

    max_iter_expe = full_config["stop"]["max_iter_expe"]
    score_success = full_config["stop"]["episode_reward_mean"]

    if override_expe == False:
        # Check that the experiment has run more than a few episodes
        # If so, DON'T rerun everything (useful for grid search)

        rerun_expe = True

        for dir in os.listdir(expe_path):

            last_iter = 0

            if "tfevents" in dir:
                tf_event_path = os.path.join(expe_path, dir)

                try:
                    for i, elem in enumerate(
                            tf.train.summary_iterator(tf_event_path)):
                        if elem.step:
                            last_iter = max(last_iter, elem.step)

                    if last_iter < max_iter_expe - log_stats_every:
                        os.remove(tf_event_path)
                        print("Experiment doesn't seem to be over, rerun.")
                    else:
                        rerun_expe = False

                except tf.errors.DataLossError as e:
                    print(e)
                    os.remove(tf_event_path)

        if rerun_expe == False:
            print("Expe was over, don't rerun")
            return True

    writer = tensorboardX.SummaryWriter(expe_path)
    print("Expe path : ", expe_path)

    if "racing" in full_config["env_name"].lower():

        from env_tools.car_racing import CarRacingSafe
        from env_tools.wrapper import CarFrameStackWrapper, CarActionWrapper

        reset_when_out = full_config["reset_when_out"]
        reward_when_falling = full_config["reward_when_out"]
        max_steps = full_config["max_steps"]

        game = CarRacingSafe(reset_when_out=reset_when_out,
                             reward_when_out=reward_when_falling,
                             max_steps=max_steps)

        DEFAULT_FRAME_SKIP = 3
        n_frameskip = full_config.get("frameskip", DEFAULT_FRAME_SKIP)

        game = CarActionWrapper(game)
        game = CarFrameStackWrapper(game, n_frameskip=n_frameskip)

    elif "minigrid" in full_config['env_name'].lower():

        from gym_minigrid.envs.safe_crossing import SafeCrossing
        from env_tools.wrapper import MinigridFrameStacker

        reward_when_falling = full_config["reward_when_out"]
        size = full_config["size_env"]
        feedback_when_wall_hit = full_config["feedback_when_wall_hit"]
        proba_reset = full_config["proba_reset"]
        use_lava = full_config["use_lava"]
        n_zone = full_config["n_zone"]
        good_zone_action_proba = full_config["good_zone_action_proba"]
        bad_zone_action_proba = full_config["bad_zone_action_proba"]
        obstacle_type = full_config["obstacle_type"]
        prevent_bad_action = full_config["prevent_bad_action"]

        game = SafeCrossing(size=size,
                            reward_when_falling=reward_when_falling,
                            proba_reset=proba_reset,
                            feedback_when_wall_hit=feedback_when_wall_hit,
                            use_lava=use_lava,
                            n_zone=n_zone,
                            good_zone_action_proba=good_zone_action_proba,
                            bad_zone_action_proba=bad_zone_action_proba,
                            obstacle_type=obstacle_type,
                            prevent_bad_action=prevent_bad_action,
                            seed=seed)

        game = MinigridFrameStacker(game, full_config["n_frameskip"])

    elif "zork" in full_config['env_name'].lower():
        raise NotImplementedError(
            "Zork is a pain in the A#%?, i'll do it later")
        #game = textworld.start('./zork1.z5')

    elif "text" in full_config['env_name'].lower():

        import textworld.gym as tw_gym
        from textworld.envs.wrappers.filter import EnvInfos
        from env_tools.wrapper import TextWorldWrapper

        EXTRA_GAME_INFO = {
            "inventory": True,
            "description": True,
            "intermediate_reward": full_config["use_intermediate_reward"],
            "admissible_commands": True,
            "policy_commands": full_config["use_intermediate_reward"],
        }

        reward_when_falling = 0

        game_path = os.path.join("text_game_files", full_config['ulx_file'])
        env_id = tw_gym.register_game(
            game_path,
            max_episode_steps=full_config["max_episode_steps"],
            name="simple1",
            request_infos=EnvInfos(**EXTRA_GAME_INFO))
        game = gym.make(env_id)
        game = TextWorldWrapper(
            env=game,
            use_intermediate_reward=EXTRA_GAME_INFO["intermediate_reward"])

    else:
        game = gym.make(full_config["env_name"])

    discount_factor = full_config["discount_factor"]
    total_iter = 0
    success_count = 0

    num_episode = 0
    early_stopping = False

    reward_wo_feedback_list = []
    reward_undiscount_list = []
    reward_discount_list = []
    feedback_per_ep_list = []
    percentage_tile_seen_list = []

    iter_this_ep_list = []
    last_reward_undiscount_list = []
    last_reward_discount_list = []

    self_destruct_list = []
    self_destruct_trial_list = []

    best_undiscount_reward = -float("inf")

    model_type = full_config["agent_type"]
    if model_type == "dqn":
        model = DQNAgent(config=full_config["dqn_params"],
                         action_space=game.action_space,
                         obs_space=game.observation_space,
                         discount_factor=discount_factor,
                         writer=writer,
                         log_stats_every=log_stats_every)
    else:
        raise NotImplementedError("{} not available for model".format(
            full_config["agent_type"]))

    save_images_at = set(full_config["save_images_at"])

    with display as xvfb:

        while total_iter < max_iter_expe and not early_stopping:

            state = game.reset()

            #game.render('human')
            done = False
            iter_this_ep = 0
            reward_wo_feedback = 0
            reward_total_discounted = 0
            reward_total_not_discounted = 0
            percentage_tile_seen = 0

            n_feedback_this_ep = 0

            self_kill_trial = 0

            rendered_images = []

            # Do we store images of state and q function associated with it ?
            if save_n_random_q_images > 0:
                steps_images_to_save = np.random.randint(
                    0, game.env.max_steps, save_n_random_q_images)
            elif num_episode in save_images_at:
                steps_images_to_save = range(0, int(1e6))  # save everything
            else:
                steps_images_to_save = []

            while not done:

                # Render state, and compute q values to visualize them later
                if iter_this_ep in steps_images_to_save:
                    array_rendered = render_state_and_q_values(model=model,
                                                               game=game,
                                                               state=state)
                    rendered_images.append(array_rendered)

                    # Save only the last frames, to avoid overloading tensorboard
                    if len(rendered_images) > MAX_STATE_TO_REMEMBER:
                        rendered_images.pop(0)

                action = model.select_action(state['state'])
                next_state, reward, done, info = game.step(action=action)

                if done:
                    next_state['state'] = None

                model.push(state['state'], action, next_state['state'], reward,
                           next_state['gave_feedback'])
                model.optimize(total_iter=total_iter, env=game)

                state = next_state

                total_iter += 1
                iter_this_ep += 1

                percentage_tile_seen = max(
                    info.get('percentage_road_visited', 0),
                    percentage_tile_seen)
                n_feedback_this_ep += info['gave_feedback']
                self_kill_trial += info.get('tried_destruct', 0)

                assert next_state['gave_feedback'] == info[
                    'gave_feedback'], "Problem, info should contain the same info as state"

                reward_total_discounted += reward * (discount_factor**
                                                     iter_this_ep)
                reward_total_not_discounted += reward

                reward_wo_feedback += reward - info[
                    'gave_feedback'] * reward_when_falling

                #=======================
                # LOG STATS HERE
                if total_iter % log_stats_every == 0:
                    reward_discount_mean = np.mean(reward_discount_list)
                    reward_undiscount_mean = np.mean(reward_undiscount_list)

                    last_rewards_discount = np.mean(
                        last_reward_undiscount_list)
                    last_rewards_undiscount = np.mean(
                        last_reward_discount_list)

                    last_reward_wo_feedback = np.mean(reward_wo_feedback_list)

                    iter_this_ep_mean = np.mean(iter_this_ep_list)

                    last_feedback_mean = np.mean(feedback_per_ep_list)

                    if "racing" in full_config["env_name"].lower():
                        writer.add_scalar("data/percentage_tile_seen",
                                          np.mean(percentage_tile_seen_list),
                                          total_iter)

                    writer.add_scalar("data/number_of_feedback",
                                      last_feedback_mean, total_iter)

                    writer.add_scalar(
                        "data/number_of_feedback_over_iter_per_ep",
                        last_feedback_mean / iter_this_ep_mean, total_iter)

                    # writer.add_scalar("data/reward_discounted", last_rewards_discount, total_iter)
                    # writer.add_scalar("data/reward_not_discounted", last_rewards_undiscount, total_iter)

                    writer.add_scalar("data/reward_wo_feedback(unbiaised)",
                                      last_reward_wo_feedback, total_iter)
                    writer.add_scalar("data/n_episodes", num_episode,
                                      total_iter)

                    #writer.add_scalar("data/self_destruct_trial", np.mean(self_destruct_trial_list), total_iter)
                    #writer.add_scalar("data/self_destruct", np.mean(self_destruct_list), total_iter)

                    # writer.add_scalar("data/running_mean_reward_discounted", reward_discount_mean, total_iter)
                    # writer.add_scalar("data/running_mean_reward_not_discounted", reward_undiscount_mean, total_iter)
                    writer.add_scalar("data/iter_per_ep", iter_this_ep_mean,
                                      total_iter)
                    #writer.add_scalar("data/epsilon", model.current_eps, total_iter)
                    # writer.add_scalar("data/model_update", model.num_update_target, total_iter)
                    writer.add_scalar("data/n_episode_since_last_log",
                                      len(last_reward_discount_list),
                                      total_iter)
                    # writer.add_scalar("data/model_update_ep", model.num_update_target, num_episode)

                    if last_rewards_undiscount > best_undiscount_reward:
                        best_undiscount_reward = reward_discount_mean
                        torch.save(model.policy_net.state_dict(),
                                   os.path.join(expe_path, "best_model.pth"))

                    torch.save(model.policy_net.state_dict(),
                               os.path.join(expe_path, "last_model.pth"))

                    # Reset feedback and percentage
                    feedback_per_ep_list = []
                    percentage_tile_seen_list = []
                    last_reward_undiscount_list = []
                    last_reward_discount_list = []
                    iter_this_ep_list = []
                    reward_wo_feedback_list = []

            # DONE, GO HERE :
            # ================

            # Save images of state and q func associated
            if rendered_images != []:
                for i, array_rendered in enumerate(rendered_images):
                    num_iter = iter_this_ep - len(rendered_images) + i + 1
                    writer.add_image('data/{}/state_and_q'.format(num_episode),
                                     global_step=num_iter,
                                     img_tensor=array_rendered,
                                     dataformats="HWC")

            # Update target network if needed
            #model.callback(epoch=num_episode)

            reward_undiscount_list.append(reward_total_not_discounted)
            reward_discount_list.append(reward_total_discounted)

            last_reward_undiscount_list.append(reward_total_not_discounted)
            last_reward_discount_list.append(reward_total_discounted)

            feedback_per_ep_list.append(n_feedback_this_ep)
            percentage_tile_seen_list.append(percentage_tile_seen)
            iter_this_ep_list.append(iter_this_ep)

            self_destruct_list.append(info.get('self_destruct', 0))
            self_destruct_trial_list.append(self_kill_trial)
            reward_wo_feedback_list.append(reward_wo_feedback)

            print(
                "End of ep #{}, n_timesteps (estim) {}, iter_this_ep : {}, current_eps {}, zone {}"
                .format(num_episode, total_iter,
                        np.mean(iter_this_ep_list[-1]), model.current_eps,
                        state.get('zone', "Not applicable")))

            print(
                "(Estim) Discounted rew : {} undiscounted : {}, unbiaised : {},  n_feedback {} \n\n"
                .format(np.mean(last_reward_discount_list[-1]),
                        np.mean(last_reward_undiscount_list[-1]),
                        reward_wo_feedback_list[-1],
                        np.mean(feedback_per_ep_list[-1])))

            assert total_iter >= reward_wo_feedback_list[
                -1] + feedback_per_ep_list[-1]

            if reward_total_discounted > score_success:
                success_count += 1
                if success_count > 5:
                    early_stopping = True
            else:
                success_count = 0

            num_episode += 1

        print("Experiment over")

    # Enforce cleaning
    writer.close()
    del model.memory
    del model
    del game
    torch.cuda.empty_cache()
    return True
Ejemplo n.º 58
0
from xvfbwrapper import Xvfb
from selenium import webdriver

vdisplay = Xvfb()
vdisplay.start()
driver = webdriver.Chrome()
driver.get('http://www.adidas.ru/krossovki-deerupt-runner/B41768.html')
print(driver.title)
Ejemplo n.º 59
0
class ProxiedBrowser(object):
    def __init__(self, dir, tor_port=9150):
        self.dir = dir
        self.tor_port = int(tor_port)

    def open(self):
        cap_dir = join(self.dir, "caps")
        try:
            os.makedirs(cap_dir)
        except:
            pass
        self.proxy = TCP(cap_dir=cap_dir, tor_port=self.tor_port)
        self.proxy.start()
        while not self.proxy.running:
            logger.info("Waiting for proxy to start...")
            sleep(1)

        logger.info("Starting Xvfb virtual display")
        self.vdisplay = Xvfb(width=1280, height=740)
        self.vdisplay.start()

        logger.info("Webdriver starting..")
        self.binary = FirefoxBinary(firefox_path=join(self.dir, FIREFOX_PATH),
                                    log_file=open("firefox.log", "w"))
        self.binary.add_command_line_options("--verbose")
        self.profile = FirefoxProfile(
            profile_directory=join(self.dir, FIREFOX_PROFILE))
        self.profile.set_preference("network.proxy.socks_port",
                                    self.proxy.port)
        self.profile.set_preference("extensions.torlauncher.start_tor",
                                    False)  # We started tor manually earlier.
        self.profile.set_preference("browser.startup.homepage", "")
        self.profile.set_preference("browser.startup.page", 0)
        self.profile.update_preferences()

        try:
            self.driver = webdriver.Firefox(firefox_binary=self.binary,
                                            firefox_profile=self.profile)
            sleep(2)  # hack: wait until homepage etc have loaded.
        except Exception as ex:
            self.proxy.close()
            raise ex
        return self

    def __enter__(self):
        return self.open()

    def get(self, url):
        print("Fetching {url}".format(url=url))
        self.proxy.consume_results(
        )  # clear anything previous, e.g the browsers homepage, whatever update checkers etc.
        self.driver.get(url)
        capture_files = self.proxy.consume_results()
        responses = list()
        for capture_file in capture_files:
            responses += extract_from_capturefile(capture_file)
            os.remove(capture_file)
        return responses

    def close(self):
        logging.info("Closing webdriver")
        self.driver.quit()
        logging.info("Closing virtual display")
        self.vdisplay.stop()
        logging.info("Closing proxy")
        self.proxy.close()
        self.proxy.join()

    def __exit__(self, type, value, traceback):
        self.close()
Ejemplo n.º 60
0
    def crawl(self):
        """
        Visit the top `n_sites` websites in the Tranco List, in order, in
        a virtual browser with Privacy Badger installed. Afterwards, save the
        action_map and snitch_map that the Badger learned.
        """
        domains = get_domain_list(self.n_sites, self.exclude)
        self.logger.info(("starting new crawl:\n"
                          "\ttimeout: %ss\n"
                          "\twait time: %ss\n"
                          "\tbrowser: %s\n"
                          "\tsurvey mode: False\n"
                          "\tdomains to crawl: %d\n"
                          "\tTLDs to exclude: %s"), self.timeout,
                         self.wait_time, self.browser, self.n_sites,
                         self.exclude)

        # create an XVFB virtual display (to avoid opening an actual browser)
        self.vdisplay = Xvfb(width=1280, height=720)
        self.vdisplay.start()
        self.start_browser()

        # list of domains we actually visited
        visited = []
        old_snitches = {}

        for i, domain in enumerate(domains):
            try:
                # This script could fail during the data dump (trying to get
                # the options page), the data cleaning, or while trying to load
                # the next domain.
                last_data = self.dump_data()

                # try to fix misattribution errors
                if i >= 2:
                    clean_data = self.cleanup(domains[i - 2], domains[i - 1],
                                              last_data)
                    if last_data != clean_data:
                        self.clear_data()
                        self.load_user_data(clean_data)

                self.logger.info('visiting %d: %s', i + 1, domain)
                url = self.get_domain(domain)
                visited.append(url)
            except TimeoutException:
                self.logger.info('timeout on %s ', domain)
                # TODO: how to get rid of this nested try?
                try:
                    self.timeout_workaround()
                except WebDriverException as e:
                    if should_restart(e):
                        self.restart_browser(last_data)
            except WebDriverException as e:
                self.logger.error('%s %s: %s', domain, type(e).__name__, e.msg)
                if should_restart(e):
                    self.restart_browser(last_data)
            finally:
                self.load_extension_page(OPTIONS)
                snitches = self.driver.execute_script(
                    "return chrome.extension.getBackgroundPage()."
                    "badger.storage.snitch_map._store;")
                diff = set(snitches) - set(old_snitches)
                if diff:
                    self.logger.info("new trackers in snitch_map: %s", diff)
                old_snitches = snitches

        self.logger.info("Finished scan. Visited %d sites and errored on %d.",
                         len(visited),
                         len(domains) - len(visited))

        try:
            self.logger.info('Getting data from browser storage...')
            data = self.dump_data()
        except WebDriverException:
            # If we can't load the background page here, just quit :(
            self.logger.error('Could not get badger storage.')
            sys.exit(1)

        self.driver.quit()
        self.vdisplay.stop()

        self.save(data)