def virtual_display_if_enabled(enabled): if enabled: display = Display(visible=0, size=(800, 600)) display.start() return display else: return NoopDisplay()
def run(self): """Run the SelScraper.""" display = Display(visible=0, size=(800, 600)) display.start() #self._set_xvfb_display() if not self._get_webdriver(): raise_or_log('{}: Aborting due to no available selenium webdriver.'.format(self.name), exception_obj=SeleniumMisconfigurationError) try: self.webdriver.set_window_size(400, 400) self.webdriver.set_window_position(400 * (self.browser_num % 4), 400 * (math.floor(self.browser_num // 4))) except WebDriverException as e: out('Cannot set window size: {}'.format(e), lvl=4) super().before_search() if self.startable: self.build_search() self.search() if self.webdriver: self.webdriver.close()
def webthumb(url, filename, is_flash=False): script = """ var s = document.createElement('script'); s.src = 'http://cruels.net/sb/flashfix.js'; document.body.appendChild(s); """ print "webthumb(%s, %s)" % (url, filename) display = Display(visible=0, size=(1200, 900)) display.start() browser = webdriver.Firefox() browser.get(url) if is_flash: time.sleep(1) else: browser.execute_script(script) time.sleep(6) tmpfile = "%s.tmp" % filename browser.get_screenshot_as_file(tmpfile) img = pil.open(tmpfile) width, height = img.size if is_flash: resized = img.resize((LIBRARYFILE_THUMB_WIDTH, LIBRARYFILE_THUMB_HEIGHT), pil.ANTIALIAS) else: ratio = float(width) / float(height) resized = img.resize((LIBRARYFILE_THUMB_WIDTH, int(LIBRARYFILE_THUMB_WIDTH / ratio)), pil.ANTIALIAS) resized.save(filename) os.remove(tmpfile) print "Saved %s." % filename browser.quit() display.stop() return True
class BrowserManager: def __init__(self): self._lock = False def bootup(self): self._display = Display(visible=0, size=(1024, 768)) self._display.start() profile = {} if 'HTTP_PROXY' in os.environ: proxy_url = os.environ['HTTP_PROXY'] proxy_server = proxy_url.split(':')[1][2:] proxy_port = proxy_url.split(':')[-1] profile['network.proxy.type'] = 1 profile['network.proxy.http'] = proxy_server profile['network.proxy.http_port'] = proxy_port profile['network.proxy.https'] = proxy_server profile['network.proxy.https_port'] = proxy_port self.browser = Browser(profile_preferences=profile) def obtain(self,background): while self._lock: background.wait('Browser lock', 15) self._lock = True return self.browser def release(self,background): self._lock = False def shutdown(self): self.browser.quit() self._display.stop()
class UITestCase(LiveServerTestCase): def use_xvfb(self): from pyvirtualdisplay import Display self.display = Display('xvfb', visible=1, size=(1280, 1024)) self.display.start() self.driver = WebDriver() def setUp(self): try: self.driver = WebDriver() ui_is_not_available = False except WebDriverException: ui_is_not_available = True if ui_is_not_available: self.use_xvfb() self.driver.implicitly_wait(10) super(UITestCase, self).setUp() def tearDown(self): self.driver.quit() if hasattr(self, 'display'): self.display.stop() super(UITestCase, self).tearDown()
def main(): '''business logic for when running this module as the primary one!''' display = Display(visible=0, size=(1024, 768)) display.start() fresh_cl_post = find_cl_post() prev_cl_post = {"title":"","link":""} old_cl_post = {"title":"","link":""} # find_cl_post() while True: # print "TEST" + str(datetime.date.today()) fresh_cl_post = find_cl_post() try: if fresh_cl_post['title'] != prev_cl_post['title']: old_cl_post = prev_cl_post prev_cl_post = fresh_cl_post send_cl_email(fresh_cl_post) except: print "Failed to test & send mail at: "+str(datetime.datetime.now()) gc.collect() time.sleep(SLEEP_SECONDS) display.stop()
class Xvfb(object): def __init__(self, width=1366, height=768, visible=0): self.__virtual_display = None self.width = width self.height = height self.visible = visible def __init_display(self): if self.__virtual_display is None: self.__virtual_display = Display(visible=self.visible, size=(self.width, self.height)) self.__virtual_display.start() def __enter__(self): self.__init_display() def __exit__(self, exc_type, exc_val, exc_tb): self._close_display() def _close_display(self): if self.__virtual_display: try: self.__virtual_display.close() except: pass self.__virtual_display = None @staticmethod def run(func, *args, **kwargs): runner = Xvfb() with runner: return func(*args, **kwargs)
def __init__(self, domain, dte): self.domain = domain self.dte = dte self.mydump = "mydump_pro_similatr" try: os.makedirs(self.mydump) except: pass self.directory = "dirpro%s" % (time.strftime("%d%m%Y")) try: os.makedirs(self.directory) except: pass display = Display() self.display = display.start() fp = webdriver.FirefoxProfile() fp.set_preference("browser.download.folderList", 2) fp.set_preference("browser.download.manager.showWhenStarting", False) fp.set_preference("browser.download.dir", os.getcwd()) fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv") self.driver = webdriver.Firefox(firefox_profile=fp) self.driver.maximize_window() self.driver.implicitly_wait(60) self.driver.set_page_load_timeout(120)
def getupc(data, sleeptime): display = Display(visible=0, size=(800, 600)) display.start() a = webdriver.Firefox() a.get('https://www.google.com/ncr') time.sleep(sleeptime) search = WebDriverWait(a, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@type='text']"))) for i in data: ActionChains(a).move_to_element(search).click(search).send_keys(i['name'] + ' upc', Keys.ENTER).perform() time.sleep(sleeptime) contents = WebDriverWait(a, 5).until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='g']"))) try: upc = next( (re.split(r'/', href.find_element_by_tag_name('a').get_attribute('href'))[-1] for href in contents if href.find_element_by_tag_name('a').get_attribute('href').startswith( 'http://www.upcitemdb.com/upc'))) i['upc'] = upc except StopIteration: pass search = WebDriverWait(a, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@type='text']"))) search.clear() a.close() display.stop() return data
def load(self): min_time = 3600 # 1 hour in seconds max_time = 7179 # 2 hours in seconds (less 21) tasktime = randint(min_time, max_time) threading.Timer(tasktime, self.load).start() tasktime_m , tasktime_s = divmod( tasktime , 60) tasktime_h , tasktime_m = divmod( tasktime_m , 60) output_content = "Load execution - waiting %dh %02dmin %02dsec for the next time." % (tasktime_h, tasktime_m, tasktime_s) print "[KeepUp]" , output_content from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.common.keys import Keys from pyvirtualdisplay import Display # Initial display = Display(visible=0, size=(1600, 900)) display.start() profile = webdriver.FirefoxProfile() profile.set_preference("browser.cache.disk.enable", False) profile.set_preference("browser.cache.memory.enable", False) profile.set_preference("browser.cache.offline.enable", False) profile.set_preference("network.http.use-cache", False) driver = webdriver.Firefox() driver.get("https://c9.io/dashboard.html") driver.save_screenshot(self.directory_img + 'login.png') #Username username = driver.find_element_by_id("id-username") username.click() username.clear() username.send_keys(self.user, Keys.ARROW_DOWN) #Password password = driver.find_element_by_id("id-password") password.click() password.clear() password.send_keys(self.password, Keys.ARROW_DOWN) #Submit submit_button = driver.find_element_by_css_selector("button[type=submit]") # print submit_button.text # Click submition submit_button.click(); time.sleep(5) driver.save_screenshot(self.directory_img + 'user_profile.png') # Target dir driver.get(self.target_workspace) time.sleep(10) self.log({'log_html': driver.page_source, 'log_file': output_content}) #make log driver.save_screenshot(self.directory_img + 'final_workspace.png') # End driver.quit() display.stop()
class BCCVLTestCase(unittest.TestCase): def setUp(self): # acquire URL, username and password from environment variables, or use default values for dev env. self.username = os.getenv("BCCVL_TEST_USERNAME", "admin") self.password = os.getenv("BCCVL_TEST_PASSWORD", "admin") self.url = os.getenv("BCCVL_TEST_URL", "https://192.168.100.200/") # The amount of time selenium will potentially wait in searching for elements. This is blocking. implicit_wait = int(os.getenv("BCCVL_TEST_IMPLICIT_WAIT", "15")) # Run tests in a virtual display (xvfb) virtual_display = os.getenv("BCCVL_TEST_VIRTUAL_DISPLAY", "false") == "true" # Setup the virtual display if virtual_display: self.display = Display(visible=0, size=(1920, 1080)) self.display.start() else: self.display = None # Setup the Firefox Profile and webdriver self.driver = webdriver.Firefox() self.driver.implicitly_wait(implicit_wait) # Maximize the window # self.driver.maximize_window() self.driver.set_window_size(1200, 800) # Go to the bccvl homepage self.driver.get(self.url) def tearDown(self): if self.display: self.display.stop() self.driver.quit()
def rzhd(): directions=[create_url(),] while raw_input('Want to add more directions? y/n ')=='y': directions.append(create_url()) print "------------------" # n=raw_input('Check tickets every ...(seconds)? ') n = 60 place=choose_place() i = 0 display = Display(visible=0, size=(5, 5)) display.start() # Запускаем вирутальный дисплей while len(directions)!=0: i+=1 print print "----------------->Searching for PLATSKART<-----------------" print "try #",i print time.asctime() print for url in directions: if find_train(url, place)==True: send_email('*****@*****.**', url) if raw_input('Did you buy ticket? y/n ')=='y': directions.remove(url) if len(directions) == 0: print "Successfully bought all tickets!" return True print str(n)+" seconds until next try..." time.sleep(float(n)) # Дадим браузеру корректно завершиться display.stop() # Закрываем виртуальный дисплей
def get_screenshot(site_id, update_id): """ Create a screenshot and save it to the database """ # Get the objects we're working with site = Site.objects.get(id=site_id) update = Update.objects.get(id=update_id) # Fire up a headless display to work in display = Display(visible=0, size=(1680, 1050)) display.start() # Fire up a Selenium browsers browser = webdriver.Firefox() # Set a timeout for the pageload seconds = 15 browser.command_executor._commands['setPageLoadTimeout'] = ( 'POST', '/session/$sessionId/timeouts' ) browser.execute("setPageLoadTimeout", { 'ms': 1000*seconds, 'type':'page load' }) # Snap a screenshot of the target site logger.debug("Opening %s" % site.url) timestamp = timezone.now() try: browser.get(site.url + "?x=" + get_random_string()) logger.debug("Response received for %s" % site.url) except TimeoutException, e: logger.error("Request for %s timed out" % site.url) pass
def main(param): if len(param) != 2: sys.exit(-9) if len(param[1]) <= 0: sys.exit(-8) paths = param[0] shotsdir = paths.get('path', 'output.shotsdir').lstrip('"').rstrip('"') targets = param[1] display = Display(visible=0, size=(800, 600)) display.start() binary = FirefoxBinary('/opt/firefox/firefox') browser = webdriver.Firefox(firefox_binary=binary) tgt_len = len(targets) for i, tgt in enumerate(targets): browser.get(tgt[0]) browser.save_screenshot(shotsdir+'/'+tgt[1]+'.png') print '( %3d / %3d ) Took %s.png' % (i+1, tgt_len, tgt[1]) browser.quit() display.stop()
class TestContext(object): def open_browser(self): # if test_config.SELENIUM_USE_REMOTE: # dc = getattr(DesiredCapabilities, self.driver.upper()) # dc['name'] = test_config.SELENIUM_TEST_NAME # cmd_exec = test_config.SELENIUM_REMOTE_CMD_EXEC # self.browser = webdriver.Remote(desired_capabilities=dc, command_executor=cmd_exec) if test_config.SELENIUM_USE_VIRTUALDISPLAY: self.virtualdisplay = Display(backend=test_config.SELENIUM_VIRTUALDISPLAY_BACKEND, size=(600, 800)).start() self.browser = webdriver.Firefox(firefox_binary=FirefoxBinary(test_config.SELENIUM_FIREFOX_PATH)) self.browser.implicitly_wait(test_config.SELENIUM_PAGE_WAIT) def close(self): self.browser.quit() if hasattr(self, 'virtualdisplay'): self.virtualdisplay.stop() def get(self, url): self.browser.get(url) self.url = url def follow_link(self, link): link.click() self.url = self.browser.current_url def wait_for(self, by, thing): wait = WebDriverWait(self.browser, test_config.SELENIUM_PAGE_WAIT) wait.until(EC.presence_of_element_located((by, thing)))
def loadSite(url): profile = webdriver.FirefoxProfile() profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.http", "74.84.131.34") profile.set_preference("network.proxy.http_port", int('80')) profile.update_preferences() # display = Display(visible=0, size=(800, 600)) display.start() path_to_chromedriver = '/home/alexandr/www/html/python/prs/files/geckodriver' browser = webdriver.Firefox(firefox_profile = profile, executable_path = path_to_chromedriver) # browser.delete_all_cookies() browser.get(url) #print(browser.page_source) #print(browser.page_source) tree = etree.HTML( browser.page_source) # browser.close() display.stop() # nodes = tree.xpath('//table[@class="network-info"]//tr/td') for node in nodes: print(node.text) return 1
def process_install_form (self): if (self.args.xvfb): print "Omeka is being installed in: " + self.folder_name display = Display(visible=0, size=(800, 600)) display.start() driver = webdriver.Firefox() driver.get("http://localhost/omeka/" + self.folder_name + "/install") inputElement = driver.find_element_by_name("username") inputElement.send_keys(self.omeka_user) inputElement = driver.find_element_by_name("password") inputElement.send_keys(self.omeka_passwd) inputElement = driver.find_element_by_name("password_confirm") inputElement.send_keys(self.omeka_passwd) inputElement = driver.find_element_by_name("super_email") inputElement.send_keys("*****@*****.**") inputElement = driver.find_element_by_name("administrator_email") inputElement.send_keys("*****@*****.**") inputElement = driver.find_element_by_name("site_title") inputElement.send_keys(self.omeka_title) inputElement.submit() try: WebDriverWait(driver, 10).until( lambda driver : driver.find_element_by_partial_link_text("Tableau")) finally: driver.quit()
class TestCase(unittest.TestCase): def setUp(self): app.config['TESTING'] = True app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:' self.app = app.test_client() db.create_all() self.display = Display(visible=0, size=(800, 600)) self.display.start() self.driver = webdriver.Firefox() def tearDown(self): db.session.remove() db.drop_all() self.driver.quit() self.display.stop() def test_extract_funds(self): funds = extract_funds( # some javascript going on that I can't figure out how to mock #'file:///%s/t/test_files/list_mutual_funds.html' % basedir, self.driver ) self.assertTrue(len(funds) > 110)
def main(args): parser = argparse.ArgumentParser(description="Program for running tests on the PATRIC web interface.") parser.add_argument("user", metavar="user", help="Patric login username.") parser.add_argument("passwd", metavar="passwd", help="Patric login password.") parser.add_argument("--firebug", action="store_true", help="Open Firebug during test.") args = parser.parse_args() fp = webdriver.FirefoxProfile() if args.firebug: fp.add_extension(extension='extras/firebug-2.0.9.xpi') fp.set_preference("extensions.firebug.currentVersion", "2.0.9") #Avoid startup screen fp.set_preference("extensions.firebug.console.enableSites", "true") fp.set_preference("extensions.firebug.net.enableSites", "true") fp.set_preference("extensions.firebug.script.enableSites", "true") fp.set_preference("extensions.firebug.allPagesActivation", "on") # Create virtual display display = Display(visible=0, size=(1400, 950)) display.start() # Create webdriver and retrieve url driver = webdriver.Firefox(firefox_profile=fp) driver.get(SITE_URL + '/login') # Wait for username input box to appear WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.ID, "dijit_form_TextBox_0"))) # Set username and password, click login button userElement = driver.find_element_by_id("dijit_form_TextBox_0") pwdElement = driver.find_element_by_id("dijit_form_TextBox_1") userElement.send_keys(args.user) pwdElement.send_keys(args.passwd) loginElement = driver.find_element_by_id("dijit_form_Button_1") loginElement.click() time.sleep(3) # Retrieve home page, wait for an expected page element to load, take a screenshot driver.get(SITE_URL + '/portal/portal/patric/Home') WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.ID, "cart"))) driver.set_window_size(1400, 950) driver.execute_script("window.scrollTo(0,0);") driver.get_screenshot_as_file("homepage_after_login.jpg") print "Saved screenshot to: homepage_after_login.jpg\n" # Retrieve ws url, wait for create folder button to appear ws_url = SITE_URL + '/workspace/' + args.user + '@patricbrc.org/home' driver.get(ws_url) WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, "ActionButtonContainer"))) time.sleep(5) # Have to reload page, because often time the workspace is empty on first load driver.get(ws_url) WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, "ActionButtonContainer"))) # createFolderButton = driver.find_element_by_class_name("ActionButton fa icon-folder-plus fa-2x") # createFolderButton.click() time.sleep(30) driver.quit() display.stop() return 0
def get_news(): if check_wlan(): from pyvirtualdisplay import Display import re display = Display(visible=0, size=(800, 600)) display.start() driver = webdriver.Firefox() url = "http://www.deutschlandfunk.de/" driver.get(url) source = driver.find_element_by_xpath('//*[@id="wrapper"]/div/section[2]/div[1]').get_attribute('innerHTML') n_articles = source.count('<article') print(str(n_articles) + " articles found.") lst = re.findall('<h3>(.+)</h3>', source) result = lst driver.close() display.stop() return result else: print("Error: Not connected to the internet")
class FunctionalTest(StaticLiveServerTestCase): @classmethod def setUpClass(cls): for arg in sys.argv: if 'liveserver' in arg: cls.server_url = 'http://' + arg.split('=')[1] return super().setUpClass() cls.server_url = cls.live_server_url @classmethod def tearDownClass(cls): if cls.server_url == cls.live_server_url: super().tearDownClass() def setUp(self): self.display = Display(visible=0, size=(1024, 768)) self.display.start() self.browser = webdriver.Firefox() # self.browser.implicitly_wait(3) def tearDown(self): self.browser.quit() self.display.stop() def check_for_row_in_list_table(self, row_text): table = self.browser.find_element_by_id('id_list_table') rows = table.find_elements_by_tag_name('tr') self.assertIn(row_text, [row.text for row in rows])
class AdminTestCase(LiveServerTestCase): def setUp(self): self.display = Display(visible=0, size=(800, 600)) self.display.start() self.selenium = webdriver.Firefox() super(AdminTestCase, self).setUp() def tearDown(self): self.selenium.quit() self.display.stop() super(AdminTestCase, self).tearDown() def test_payment(self): """ payment will be successful. """ self.selenium.get("%s/pay" % self.live_server_url) self.selenium.implicitly_wait(20) self.selenium.maximize_window() self.selenium.find_element_by_name("amount").send_keys("100000") pay_button = self.selenium \ .find_element_by_xpath('//input[@value="pay"]') pay_button.click() return_to_site_button = self.selenium.find_element_by_id("btn3") return_to_site_button.click() self.assertIn("successful", self.selenium.page_source)
def get_image(self): ## Uses supplied scrape site to find new pictures url = self.scrape_site # virtual display for headless runs display = Display(visible=0, size=(800, 600)) display.start() with closing(Firefox()) as browser: browser.get(url) time.sleep(5) # TODO: fix with something less static, but still # multipurpose considering scrape_site as a db var imgs = browser.find_elements_by_tag_name('img') # TODO: fix this temporary workaround that prevents ad server data # from reaching the image checks no_ad_imgs = [i for i in imgs if 'adsrvr' not in \ i.get_attribute('src')] for img in no_ad_imgs: src = img.get_attribute('src') alt = img.get_attribute('alt') image_id = re.findall("/photo/(.+?)/", src)[0] if(self._check_id(image_id) and self._check_ratios(src)): self.img_id = image_id self.description = alt self._save_hd_image() break display.stop() if (self.img_id): return raise Exception('Failed to find a suitable image: all out or bugged')
def get_all_items(): #list to store alll scraped data all_items = list() #Display - read about pyvirtualdisplay display = Display(visible=0, size=(1024, 768)) display.start() #webdriver - read about selenium.webdriver driver = webdriver.Firefox() #this is a starting page we are scraping driver.get("http://www.federalreserve.gov/apps/reportforms/default.aspx") #Every element on the HTML page can be located using CSS selectors. #Opening the starting page in Chrome, right click on the drop-down menu, click "Inspect" we see a tag on the right highlighted, we copy it's id - MainContent_ddl_ReportForms #Knowing the id of dropdown menu, we can locate it with Selenium like this main_menu = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_ddl_ReportForms"))) #Drop down menu is an HTML table of options which can be verified in Chrome browser (Developer Tools, that pop up when you right click and press "Inspect" on an element) #Following returns all of the options - rows in that table form_options = main_menu.find_elements_by_tag_name("option") #We count them option_count = len(form_options) #Next, we loop over all of them - essentially like we scrolling down the drop down menu and clicking on each every form for form_i in xrange(1,option_count): #Get web element corresponding to a form form = form_options[form_i] #Click as a mouse click-action in browser form.click() #Get text, because we need to store the form number form_id = form.text #Locate a web element corresponding to the submit button. By CSS selector which we found by inspection in Chrome browser (same logic as above) submit_button = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_btn_GetForm"))) #Click as a mouse click-action in browser submit_button.click() #Prepare data structures to store all the info we want to scrape a = dict.fromkeys(['Description','OMB','Background','RespondentPanel','Frequency','PublicRelease']) #We are on a web page after submit-click, following will search for all items of interest. Or for corresponding #web-elements for el in a.keys(): try: item = driver.find_element_by_css_selector("#MainContent_lbl_"+el+"_data") #Once found it will store them in our dictionary, if not it will proceed to "except" section and do nothing a[el] = item.text except: #case when there is no such field pass #we need form number as well a['FormNumber'] = form_id #keeping them all in one list, which will have a dictionary per Form Number - and later, a row in your excel file per Form number all_items.append(a) #Ok, that part bothers me a little: it looks like I have to refresh "form_options" each time... #Otherwise I get following exception: selenium.common.exceptions.StaleElementReferenceException: Message: Element not found in the cache - perhaps the page has changed since it was looked up driver.get("http://www.federalreserve.gov/apps/reportforms/default.aspx") main_menu = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_ddl_ReportForms"))) form_options = main_menu.find_elements_by_tag_name("option") driver.close() display.stop() return all_items
class Spider(scrapy.Spider): name = "mayors" allowed_domains = ["www.cec.gov.tw"] start_urls = ["https://www.cec.gov.tw/pc/zh_TW/IDX/indexC.html",] download_delay = 1 def __init__(self, ad=None, *args, **kwargs): super(Spider, self).__init__(*args, **kwargs) self.display = Display(visible=0, size=(800, 600)) self.display.start() self.driver = webdriver.Chrome("/var/chromedriver/chromedriver") def spider_closed(self, spider): self.display.close() def parse(self, response): self.driver.get(response.url) nodes = scrapy.Selector(text=self.driver.page_source).xpath('//a[@target="_top"]') for node in nodes: county = node.xpath('text()').extract_first() print county yield response.follow(node, callback=self.parse_list, meta={'meta': county}) def parse_list(self, response): for tr in response.css(u'table.tableT tr.trT'): d = {} d['type'] = 'mayors' d['county'] = response.meta['meta'] d['constituency'] = 0 d['elected'] = tr.xpath('td[1]/text()').extract_first().strip() d['number'] = int(tr.xpath('td[2]/text()').extract_first()) d['votes'] = int(re.sub('\D', '', tr.xpath('td[5]/text()').extract_first())) d['votes_percentage'] = tr.xpath('td[6]/text()').extract_first() yield d
def openurl(companyname=first_arg): display = Display(visible=0, size=(1024, 768)) display.start() browser = webdriver.Firefox() time.sleep(randint(8,10)) try: browser.get('http://www.google.com') time.sleep(5) search = browser.find_element_by_name('q') input_text = companyname + str(" crunchbase") search.send_keys(input_text) time.sleep(randint(10,15)) search.send_keys(Keys.RETURN) time.sleep(randint(10,15)) gn = browser.find_element_by_tag_name('h3').text gnc = str(gn).split(' | ')[0].replace(" ","") output_file = '0515' + gnc + '.html' browser.find_element_by_link_text(gn).click() time.sleep(randint(55,60)) company_html = browser.page_source time.sleep(randint(5,10)) with open("smallname.txt", 'a') as myfile: json.dump(output_file,myfile) with open(output_file, 'a+') as myfile: myfile.write(company_html) except: company_html = 'none' with open("missedname.txt", "a") as myfile: json.dump(companyname,myfile) time.sleep(1) browser.close() time.sleep(1) display.stop() return company_html
def process_screenshots(app, env): if not hasattr(env, 'screenshot_all_screenshots'): return if not app.config['screenshots_create']: print("Not doing screenshots on maggies farm no more") return if 'SPHINX_SS_USE_PVD' in os.environ.keys() and os.environ['SPHINX_SS_USE_PVD'] == "true": from pyvirtualdisplay import Display # Start a virtual headless display display = Display(visible=0, size=(1024, 768)) display.start() else: display = None # Don't bother building screenshots if we're just collecting messages. # Just checks if we invoked the build command with "gettext" in there somewhere if "gettext" in sys.argv: return all_args = map(lambda x: x['from_str_arg'], env.screenshot_all_screenshots) # If building in a different language, start the server in a different language command = SCREENSHOT_COMMAND + SCREENSHOT_COMMAND_OPTS + \ [re.sub(r"\s", r"", "--from-str={0}".format(json.dumps(all_args)))] language = env.config.language if language: command += ["--lang={0}".format(language)] subprocess = Popen(command) subprocess.wait() try: if subprocess.returncode: raise Exception("Screenshot process had nonzero return code: {0}".format(subprocess.returncode)) finally: if display: display.stop()
def run_selenium(landmark): display = Display(visible=0, size=(800, 600)) display.start() logTo(TEST_LOG,'Selenium : Starting Selenium for '+landmark,'INFO','a') interFace=open(HOME_DIR+'/Desktop/one-time-test-suite/iface.txt','r') tmp=interFace.readlines() iface=tmp[0].split('\n')[0] tmpstmp=datetime.now().strftime("%s") profile = webdriver.FirefoxProfile() profile.update_preferences() browser = webdriver.Firefox(firefox_profile=profile) # assign profile to browser browser.delete_all_cookies() logTo(TEST_LOG,' Selenium : Starting tcpdump .. ','INFO','a') tcpcmd='tcpdump -i '+iface+' -w '+EXP_DIR+'/'+'tcpdump_'+landmark.split('.')[0]+'_'+tmpstmp args=shlex.split(tcpcmd) ptcpdmp=sub.Popen((args)) time.sleep(10) logTo(TEST_LOG,' Selenium : Starting get '+landmark,'INFO','a') browser.get('http://www.'+landmark) time.sleep(5) perfData=browser.execute_script('return window.performance.timing') fname=EXP_DIR+'/'+'perfdata_'+landmark.split('/')[0] fname=fname.replace('.','-') pickle.dump(perfData,open(fname,'wb')) logTo(TEST_LOG,'Selenium : Writing done to '+EXP_DIR+'/perfdata_'+landmark,'INFO','a') browser.quit() display.stop() ptcpdmp.terminate() logTo(TEST_LOG,'Finished Selenium for '+landmark,'INFO','a')
class SeleniumRunner(object): def __call__(self, f): @functools.wraps(f) def decorated(_self, *args, **kwargs): with self as driver: return f(_self, driver, *args, **kwargs) return decorated def __enter__(self): self.display = Display(visible=0, size=(800, 600)) self.display.start() self.driver = webdriver.Chrome() return self.driver def __exit__(self, *args, **kwargs): try: self.driver.quit() except (AttributeError,) as e: # Someone has messed with our browser pass try: self.display.stop() except (AttributeError,) as e: # Someone has messed with our display pass
def work(): logging.info("start weeklys screenshot work") print ("start ... ") if not DISPLAY: print ("hide display ... ") display = Display(visible=0, size=(1366, 768)) display.start() config = getConfigObj() if config == None: return False userName = config.get("USER", "UserName") userPWD = config.get("USER", "userPWD") ret = getTowerWeeklyScreenshot(userName, userPWD, DEFAULT_SAVE_PATH) if not ret: print ('Error, abort. Please check the log file "%s"' % LOG_FILE) return False logging.info("finish all work, exit.") if not DISPLAY: display.stop() return True
def setUp(self): self.frontend_config = { "backend": "remote", "docker_daemons": [{ "remote_host": "192.168.59.103", "remote_docker_port": 2375, "remote_agent_port": 63456 }], "mongo_opt": { "host": "localhost", "database": "INGIniousFrontendTest" }, "tasks_directory": "./inginious/tasks", "containers": { "default": "ingi/inginious-c-default", "sekexe": "ingi/inginious-c-sekexe", }, "superadmins": ["test"], "plugins": [{ "plugin_module": "inginious.frontend.webapp.plugins.auth.demo_auth", "users": { "test": "test", "test2": "test", "test3": "test" } }] } if TEST_ENV == "boot2docker": self.display = None self.driver = webdriver.Remote( command_executor=(CUSTOM_SELENIUM_EXECUTOR or 'http://192.168.59.103:4444/wd/hub'), desired_capabilities=DesiredCapabilities.FIREFOX) self.base_url = CUSTOM_SELENIUM_BASE_URL or "http://192.168.59.3:8081" self.frontend_host = "192.168.59.3" self.frontend_port = 8081 self.frontend_ssh_port = 8082 elif TEST_ENV == "boot2docker-local": self.display = None self.driver = webdriver.Firefox() self.base_url = CUSTOM_SELENIUM_BASE_URL or "http://127.0.0.1:8081" self.frontend_host = "127.0.0.1" self.frontend_port = 8081 self.frontend_ssh_port = 8082 elif False and TEST_ENV == "jenkins": self.display = Display(visible=0, size=(1920, 1080)) self.display.start() self.driver = webdriver.Firefox() self.base_url = CUSTOM_SELENIUM_BASE_URL or "http://localhost:8081" self.frontend_host = "localhost" self.frontend_port = 8081 self.frontend_ssh_port = 8082 self.frontend_config["backend"] = "local" else: raise SkipTest( "Env variable TEST_ENV is not properly configured. Please take a look a the documentation to properly configure your " "test environment.") self.driver.maximize_window() self.driver.implicitly_wait(30) self.verificationErrors = [] self.accept_next_alert = True _drop_database(self.frontend_config["mongo_opt"]) self.frontend_thread, self.frontend_server, self.close_app_func = _start_frontend( self.frontend_config, self.frontend_host, self.frontend_port, self.frontend_ssh_port)
def __init__(self): display = Display(visible=0, size=(800, 800)) display.start() os.environ["PATH"] = "YOUR PATHS" capa = DesiredCapabilities.FIREFOX self.browser = webdriver.Firefox(capabilities=capa)
class QQRobot(object): LOGIN_URL = "https://xui.ptlogin2.qq.com/cgi-bin/xlogin?appid=522005705&daid=4&s_url=https://mail.qq.com/cgi-bin/login?vt=passport%26vm=wpt%26ft=loginpage%26target=&style=25&low_login=1&proxy_url=https://mail.qq.com/proxy.html&need_qr=0&hide_border=1&border_radius=0&self_regurl=http://zc.qq.com/chs/index.html?type=1&app_id=11005?t=regist&pt_feedback_link=http://support.qq.com/discuss/350_1.shtml&css=https://res.mail.qq.com/zh_CN/htmledition/style/ptlogin_input24e6b9.css" def __init__(self, username, passwd, proxy_ip=None, proxy_port=None): """ :param username: 用户名 :param passwd: 密码 :param proxy_ip: 访问QQ邮箱使用的IP, 为空是,默认选择本地IP :param proxy_port: 当IP不为空是, 通过端口port与IP通信, 默认为3128, 就是代理服务squid的默认端口 """ self.username = username self.passwd = passwd self.proxy_ip = proxy_ip self.proxy_port = proxy_port or 31218 self.is_login = False self.platform = platform if self.platform == "win32": self.geckopath = "F:\software\geckodriver\geckodriver.exe" else: self.geckopath = "/usr/bin/geckodriver" def refresh(self): log.info("refresh firefox, user: {}, proxy_ip: {}".format(self.username, self.proxy_ip)) self.driver.refresh() def quit(self): log.info("quit user: {}, proxy_ip: {}".format(self.username, self.proxy_ip)) try: self.driver.quit() except BaseException as e: log.info(e) if self.platform == "linux": try: self.display.stop() except BaseException as e: log.info(e) def login(self): self.set_driver() self.set_login() if self.set_login_check(timeout=1): return True self.set_login_verify() if self.set_login_check(timeout=3): return True self.quit() raise ValueError(u"不能登录QQ邮箱,重试") def set_profile(self): """ 设置代理 """ profile = None if self.proxy_ip: profile = webdriver.FirefoxProfile() profile.set_preference('network.proxy.type', 1) profile.set_preference('network.proxy.http', self.proxy_ip) profile.set_preference('network.proxy.http_port', self.proxy_port) profile.set_preference('network.proxy.ssl', self.proxy_ip) profile.set_preference('network.proxy.ssl_port', self.proxy_port) profile.update_preferences() return profile def set_driver(self): """ 设置浏览器 """ try: if self.platform == "linux": self.display = Display(visible=0, size=(800, 600)) self.display.start() self.driver = webdriver.Firefox(executable_path=self.geckopath, firefox_profile=self.set_profile()) self.driver.delete_all_cookies() # 防止页面加载个没完 self.driver.set_page_load_timeout(300) self.driver.implicitly_wait(10) self.wait = WebDriverWait(self.driver, 30) # 设置初始登录页面 self.driver.get(self.LOGIN_URL) except BaseException as e: self.quit() log.error(traceback.format_exc()) raise LoginError("WebDriverException, can not set driver...") def set_login(self): """ 登录 """ try: self.set_login_submit() # 断言登陆成功 assert "退出" in self.driver.page_source # self.driver.find_element_by_xpath('''//div[@id="newVcodeIframe"]/iframe[1]''') except BaseException as e: try: log.info("login user: {}, retry login...".format(self.username)) self.set_login_submit() except: pass def set_login_check(self, timeout=5): """ 检测是否已经登录 """ index = 3 while index: if self.driver.title.strip() == u"QQ邮箱": self.is_login = True return True index -= 1 time.sleep(timeout) return False def set_login_submit(self): """ 登录提交 """ self.driver.find_element_by_id("switcher_plogin").click() # self.wait.until(EC.presence_of_element_located((By.ID, 'u'))) elem_user = self.driver.find_element_by_name("u") elem_user.clear() time.sleep(0.1) elem_user.send_keys(self.username) elem_pwd = self.driver.find_element_by_name("p") elem_pwd.clear() time.sleep(0.1) elem_pwd.send_keys(self.passwd) elem_but = self.driver.find_element_by_id("login_button") # elem_pwd.send_keys(Keys.RETURN) time.sleep(0.1) elem_but.click() def set_login_verify(self): """ 遇到验证码登录 """ index = 3 while index: try: time.sleep(0.5) log.info("get captcha_img user: {}, index: {}".format(self.username, index)) newVcodeIframe = self.driver.find_element_by_xpath('''//div[@id="newVcodeIframe"]/iframe[1]''') self.driver.switch_to.frame(newVcodeIframe) captcha_img = self.set_login_save_img('capImg') rs, verify_code = get_qq_captcha_code(captcha_img) log.info( 'login user: {} captcha_img: {}, verifycode: {}'.format(self.username, captcha_img, verify_code)) if not rs: log.error('login user: {}, verify img fail'.format(self.username)) index -= 1 continue ele_verifycode = self.driver.find_element_by_id("capAns") ele_verifycode.send_keys(verify_code) self.driver.find_element_by_id("submit").click() except BaseException as e: log.error('user: %s, verifycode err, msg: %s' % (self.username, e)) # log.error(traceback.format_exc()) index -= 1 if index == 1: log.info("verify_login user: {}, retry login...".format(self.username)) self.set_login() def set_login_save_img(self, imgid, uid=None): """ 保存验证码 """ if not uid: uid = str(uuid.uuid1()) screenshot_img = os.path.join(IMG_DIR, "screenshot_{}.png".format(uid)) captcha_img = os.path.join(IMG_DIR, "captcha_{}.png".format(uid)) self.driver.save_screenshot(screenshot_img) img = self.driver.find_element_by_id(imgid) loc = img.location print("loc:") print(loc) image = cv2.imread(screenshot_img, True) # roi = image[int(loc['y']):int(loc['y']) + 40, int(loc['x']):int(loc['x']) + 138] roi = image[int(loc['y']):int(loc['y'])+48, int(loc['x']):int(loc['x'])+130] cv2.imwrite(captcha_img, roi) return captcha_img @login_required def check(self, addrs): res = None index = 3 while index: try: if index == 2: self.refresh() if index == 1: time.sleep(5) # 直接跳出所有frame self.driver.switch_to.default_content() # 点击写信 # self.wait.until(EC.presence_of_element_located((By.ID, 'composebtn'))) elem_but_w = self.driver.find_element_by_id("composebtn") elem_but_w.click() # 切换至右侧 主iframe main_Frame1 = self.driver.find_element_by_id("mainFrame") self.driver.switch_to.frame(main_Frame1) # 发件人 check_addrs = "{};[email protected];".format(addrs) if addrs else "[email protected];" self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]/div[2]/input''').send_keys(check_addrs) count = 30 while count: _t = self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]''') errors = _t.find_elements_by_css_selector("div.addr_base.addr_error") res = [e.text.strip().replace(";", "") for e in errors] if res and res[-1] == '*****@*****.**': break count -= 1 time.sleep(0.5) index = 0 except BaseException as e: log.error('user: %s, check err, msg: %s' % (self.username, e)) log.error(traceback.format_exc()) index -= 1 if res is None: self.is_login = False return res @login_required def send_email(self, addrs, subject, content, subtype="html"): try: self.driver.switch_to.default_content() # 点击写信 # self.wait.until(EC.presence_of_element_located((By.ID, 'composebtn'))) elem_but_w = self.driver.find_element_by_id("composebtn") elem_but_w.click() # 切换至右侧 主iframe main_Frame1 = self.driver.find_element_by_id("mainFrame") self.driver.switch_to.frame(main_Frame1) # 发件人 self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]/div[2]/input''').send_keys(addrs) # 输入主题 # self.driver.find_element_by_xpath('''//input[@id="subject"]''').send_keys(subject) self.driver.find_element_by_id('subject').send_keys(subject) # self.driver.find_element_by_xpath('''//input[@id="subject"]''').send_keys(subject) # 输入正文 o = self.driver.find_elements_by_class_name("qmEditorIfrmEditArea") o[0].click() # !!!!!!!must click!!!!!!! o[0].send_keys(content) time.sleep(1) # 点击发送按钮 self.driver.find_element_by_xpath("//*[@id='toolbar']/div/a[1]").click() # driver.find_element_by_xpath('//a[@name="sendbtn" and @tabindex="9"]').click() time.sleep(3) # 断言发送成功 assert "再写一封" in self.driver.page_source except: log.error("弹出验证框") self.refresh() return try: self.driver.switch_to.default_content() log.error("弹出验证框") # time.sleep(600) captcha_img = self.set_login_save_img('QMVerify_QMDialog_verify_img_code') rs, verify_code = get_qq_captcha_code(captcha_img) log.info( 'send email user: {} captcha_img: {}, verifycode: {}'.format( self.username, captcha_img, verify_code)) if not rs: log.error('login user: {}, verify img fail'.format(self.username)) raise ele_verifycode = self.driver.find_element_by_id("QMVerify_QMDialog_verifycodeinput") ele_verifycode.send_keys(verify_code) self.driver.find_element_by_id("QMVerify_QMDialog_btnConfirm").click() time.sleep(3) assert "再写一封" in self.driver.page_source except: log.error(traceback.format_exc()) self.is_login = False time.sleep(3600) # 关闭浏览器 self.quit()
logfile = './logger.txt' #日志文件的保存位置 fh = logging.FileHandler(logfile, mode='w') #日志文件输出配置 fh.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.WARNING) #控制台输出配置 formatter = logging.Formatter( "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s") fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) #设置display display = Display(visible=0, size=(1440, 900)) display.start() class MyWeb: def __init__(self): self.groupDict = {} #self.chromedirverPath = '/Users/Homosum/Downloads/chromedriver' #chromedirver的位置 self.chromedirverPath = '/Users/Homosum/Downloads/geckodriver' self.qqSavePath = '/opt/scripts/qqspider/qqfile' #结果csv文件的保存位置 self.groupSavePath = '/opt/scripts/qqspider/groupfile' def get_group(self, user, password): driver = webdriver.Firefox() driver.get("https://qun.qq.com/member.html") print('i got it')
from queue import Queue from selenium import webdriver import requests from selenium.webdriver.support.ui import Select from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import os import re lock = threading.Lock() import shutil from pyvirtualdisplay import Display display = Display(visible=0, size=(1280, 1024)) display.start() DB_ADDRESS = "35.154.103.232" DB_USER = "******" DB_PASS = "******" DB_NAME = "office_nw" KARVY_USER = '******' KARVY_PASS = '******' #CAMS_USER = '******' #CAMS_PASS = '******' CAMS_USER = '******' CAMS_PASS = '******'
def get_browser(): global display display = Display(visible=0, size=(1024, 768)) display.start() return webdriver.Firefox()
def before_all(context): atexit.register(cleanup, context, True) if not hasattr(context, "step_registry"): raise Exception("you must use a test runner that exposes " "step_registry on the context object") # We need to set these to None so that cleanup does not fail. It # expects to be able to check these fields without having to check # first for their existence. context.driver = None context.wm = None context.display = None context.server = None context.server_tempdir = None context.download_dir = None context.tunnel_id = None context.tunnel = None context.builder = None context.created_documents = {} context.selenium_quit = os.environ.get("SELENIUM_QUIT") context.behave_keep_tempdirs = os.environ.get("BEHAVE_KEEP_TEMPDIRS") context.visible = os.environ.get("SELENIUM_VISIBLE") userdata = context.config.userdata context.builder = builder = Builder(conf_path, userdata) dump_config(builder) if userdata.get("check_selenium_config", False): exit(0) setup_screenshots(context) browser_to_tag_value = { "INTERNETEXPLORER": "ie", "CHROME": "ch", "FIREFOX": "ff", "EDGE": "edge" } values = { 'browser': browser_to_tag_value[builder.config.browser], } platform = builder.config.platform if platform.startswith("OS X "): values['platform'] = 'osx' elif platform.startswith("WINDOWS "): values['platform'] = 'win' elif platform == "LINUX" or platform.startswith("LINUX "): values['platform'] = 'linux' # We have some cases that need to match a combination of platform # and browser values['platform_browser'] = values['platform'] + "," + values['browser'] context.active_tag_matcher = ActiveTagMatcher(values) # Without this, window sizes vary depending on the actual browser # used. initial_window_size = context.initial_window_size = \ {"width": 1366, "height": 768} if not builder.remote: visible = context.visible or \ context.selenium_quit in ("never", "on-success", "on-enter") context.display = Display(visible=visible, size=(initial_window_size["width"], initial_window_size["height"])) context.display.start() print("Display started") builder.update_ff_binary_env('DISPLAY') context.wm = subprocess.Popen(["openbox", "--sm-disable"]) print("Window manager started") chrome_options = builder.local_conf.get("CHROME_OPTIONS", None) if chrome_options: # We set a temporary directory for Chrome downloads. Even if # we do not test downloads, this will prevent Chrome from # polluting our *real* download directory. context.download_dir = tempfile.mkdtemp() prefs = {"download.default_directory": context.download_dir} chrome_options.add_experimental_option("prefs", prefs) else: context.display = None context.wm = None tunnel_id = os.environ.get("TUNNEL_ID") if not tunnel_id: context.tunnel_id = builder.start_tunnel() else: builder.set_tunnel_id(tunnel_id) driver = builder.get_driver() context.driver = driver print("Obtained driver") context.util = selenic.util.Util(driver, 5) behave_wait = os.environ.get("BEHAVE_WAIT_BETWEEN_STEPS") context.behave_wait = behave_wait and float(behave_wait) context.server_tempdir = tempfile.mkdtemp() server_write_fifo = os.path.join(context.server_tempdir, "fifo_to_server") os.mkfifo(server_write_fifo) server_read_fifo = os.path.join(context.server_tempdir, "fifo_from_server") os.mkfifo(server_read_fifo) nginx_port = str(builder.get_unused_port()) server = subprocess.Popen([ "utils/start_server", server_write_fifo, server_read_fifo, nginx_port ], close_fds=True) context.server = ServerControl(server, server_read_fifo, server_write_fifo) signal.signal(signal.SIGCHLD, lambda *_: sigchld(context)) # We must add the port to the server context.builder.SERVER += ":" + nginx_port context.selenium_logs = os.environ.get("SELENIUM_LOGS", False) remove_server_limit() lognames = subprocess.check_output( ["./manage.py", "btwworker", "lognames"]) context.log_checkers = [LogChecker(name) for name in lognames.splitlines()]
class Agent: def __init__(self, sess, n_agent, agent_to_env, env_to_agent, replay_buffer, writer, filename, learner_policy_parameters, agent_to_learner, learner_to_agent): print("Initializing agent " + str(n_agent) + "...") # Saving inputs to self object for later use self.n_agent = n_agent self.sess = sess self.replay_buffer = replay_buffer self.filename = filename self.learner_policy_parameters = learner_policy_parameters self.agent_to_env = agent_to_env self.env_to_agent = env_to_agent self.agent_to_learner = agent_to_learner self.learner_to_agent = learner_to_agent # Build this Agent's actor network self.build_actor() # Build the operations to update the actor network self.build_actor_update_operation() # Establish the summary functions for TensorBoard logging. self.create_summary_functions() self.writer = writer # If we want to record video, launch one hidden display if Settings.RECORD_VIDEO and self.n_agent == 1: self.display = Display(visible = False, size = (1400,900)) self.display.start() print("Agent %i initialized!" % self.n_agent) def create_summary_functions(self): # Logging the timesteps used for each episode for each agent self.timestep_number_placeholder = tf.placeholder(tf.float32) self.episode_reward_placeholder = tf.placeholder(tf.float32) timestep_number_summary = tf.summary.scalar("Agent_" + str(self.n_agent) + "/Number_of_timesteps", self.timestep_number_placeholder) episode_reward_summary = tf.summary.scalar("Agent_" + str(self.n_agent) + "/Episode_reward", self.episode_reward_placeholder) self.regular_episode_summary = tf.summary.merge([timestep_number_summary, episode_reward_summary]) # If this is agent 1, the agent who will also test performance, additionally log the reward if self.n_agent == 1: test_time_episode_reward_summary = tf.summary.scalar("Test_agent/Episode_reward", self.episode_reward_placeholder) test_time_timestep_number_summary = tf.summary.scalar("Test_agent/Number_of_timesteps", self.timestep_number_placeholder) self.test_time_episode_summary = tf.summary.merge([test_time_episode_reward_summary, test_time_timestep_number_summary]) def build_actor(self): # Generate the actor's policy neural network agent_name = 'agent_' + str(self.n_agent) # agent name 'agent_3', for example self.state_placeholder = tf.placeholder(dtype = tf.float32, shape = [None, Settings.OBSERVATION_SIZE], name = 'state_placeholder') # the * lets Settings.OBSERVATION_SIZE be not restricted to only a scalar ############################# #### Generate this Actor #### ############################# self.policy = BuildActorNetwork(self.state_placeholder, scope = agent_name) def build_actor_update_operation(self): # Update agent's policy network parameters from the most up-to-date version from the learner update_operations = [] source_variables = self.learner_policy_parameters destination_variables = self.policy.parameters # For each parameters in the network for source_variable, destination_variable in zip(source_variables, destination_variables): # Directly copy from the learner to the agent update_operations.append(destination_variable.assign(source_variable)) # Save the operation that performs the actor update self.update_actor_parameters = update_operations def reset_action_augment_log(self): # Create state-augmentation queue (holds previous actions) self.past_actions = queue.Queue(maxsize = Settings.AUGMENT_STATE_WITH_ACTION_LENGTH) # Fill it with zeros to start for i in range(Settings.AUGMENT_STATE_WITH_ACTION_LENGTH): self.past_actions.put(np.zeros(Settings.ACTION_SIZE), False) def augment_state_with_actions(self, total_state): # Just received a total_state from the environment, need to augment # it with the past action data and return it past_action_data = np.asarray(self.past_actions.queue).reshape([-1]) # past actions reshaped into a column augmented_state = np.concatenate([total_state, past_action_data]) # Remove the oldest entry from the action log queue self.past_actions.get(False) return augmented_state def run(self, stop_run_flag, replay_buffer_dump_flag, starting_episode_number): # Runs the agent in its own environment # Runs for a specified number of episodes or until told to stop print("Starting to run agent %i at episode %i." % (self.n_agent, starting_episode_number[self.n_agent -1])) # Initializing parameters for agent network self.sess.run(self.update_actor_parameters) # Getting the starting episode number. If we are restarting a training # run that has crashed, the starting episode number will not be 1. episode_number = starting_episode_number[self.n_agent - 1] # Resetting the noise scale noise_scale = 0. # Start time start_time = time.time() # Creating the temporary memory space for calculating N-step returns self.n_step_memory = deque() # For all requested episodes or until user flags for a stop (via Ctrl + C) while episode_number <= Settings.NUMBER_OF_EPISODES and not stop_run_flag.is_set(): #################################### #### Getting this episode ready #### #################################### # Clearing the N-step memory for this episode self.n_step_memory.clear() # Reset the action_log, if applicable if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0: self.reset_action_augment_log() # Checking if this is a test time (when we run an agent in a # noise-free environment to see how the training is going). # Only agent_1 is used for test time test_time = (self.n_agent == 1) and (episode_number % Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES == 0 or episode_number == 1) # Resetting the environment for this episode by sending a boolean if test_time and Settings.TEST_ON_DYNAMICS: self.agent_to_env.put((True, test_time)) # Reset into a dynamics environment only if it's test time and desired else: self.agent_to_env.put((False, test_time)) # Reset into a kinematics environment total_state = self.env_to_agent.get() # Augment total_state with past actions, if appropriate if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0: total_state = self.augment_state_with_actions(total_state) # Calculating the noise scale for this episode. The noise scale # allows for changing the amount of noise added to the actor during training. if test_time: # It's test time! Run this episode without noise (if desired) to evaluate performance. if Settings.NOISELESS_AT_TEST_TIME: noise_scale = 0 # Additionally, if it's time to render, make a statement to the user if Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1): # Also log the states & actions encountered in this episode because we are going to render them! raw_total_state_log = [] observation_log = [] action_log = [] next_observation_log = [] instantaneous_reward_log = [] cumulative_reward_log = [] done_log = [] discount_factor_log = [] guidance_position_log = [] raw_total_state_log.append(total_state) else: # Regular training episode, use noise. # Noise is decayed during the training noise_scale = Settings.NOISE_SCALE * Settings.NOISE_SCALE_DECAY ** episode_number # Normalizing the total_state to 1 separately along each dimension # to avoid the 'vanishing gradients' problem if Settings.NORMALIZE_STATE: total_state = (total_state - Settings.STATE_MEAN)/Settings.STATE_HALF_RANGE # Discarding irrelevant states to obtain the observation observation = np.delete(total_state, Settings.IRRELEVANT_STATES) # Resetting items for this episode episode_reward = 0 timestep_number = 0 done = False # Stepping through time until episode completes. while not done: ############################## ##### Running the Policy ##### ############################## action = self.sess.run(self.policy.action_scaled, feed_dict = {self.state_placeholder: np.expand_dims(observation,0)})[0] # Expanding the observation to be a 1x3 instead of a 3 # Calculating random action to be added to the noise chosen from the policy to force exploration. if Settings.UNIFORM_OR_GAUSSIAN_NOISE: # Uniform noise (sampled between -/+ the action range) exploration_noise = np.random.uniform(low = -Settings.ACTION_RANGE, high = Settings.ACTION_RANGE, size = Settings.ACTION_SIZE)*noise_scale else: # Gaussian noise (standard normal distribution scaled to half the action range) exploration_noise = np.random.normal(size = Settings.ACTION_SIZE)*Settings.ACTION_RANGE*noise_scale # random number multiplied by the action range # Add exploration noise to original action, and clip it incase we've exceeded the action bounds action = np.clip(action + exploration_noise, Settings.LOWER_ACTION_BOUND, Settings.UPPER_ACTION_BOUND) # Adding the action taken to the past_actions log if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0: self.past_actions.put(action) ################################################ #### Step the dynamics forward one timestep #### ################################################ # Send the action to the environment process self.agent_to_env.put((np.concatenate([action, np.zeros([1])]),)) # The concatenated 0 is to command 0 altitude acceleration # Receive results from stepped environment next_total_state, reward, done, *guidance_position = self.env_to_agent.get() # The * means the variable will be unpacked only if it exists # Add reward we just received to running total for this episode episode_reward += reward # Augment total_state with past actions, if appropriate if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0: next_total_state = self.augment_state_with_actions(next_total_state) if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym': if not done: raw_total_state_log.append(next_total_state) # Normalize the state if Settings.NORMALIZE_STATE: next_total_state = (next_total_state - Settings.STATE_MEAN)/Settings.STATE_HALF_RANGE # Discarding irrelevant states next_observation = np.delete(next_total_state, Settings.IRRELEVANT_STATES) # Store the data in this temporary buffer until we calculate the n-step return self.n_step_memory.append((observation, action, reward)) # If the n-step memory is full enough and we aren't testing performance if (len(self.n_step_memory) >= Settings.N_STEP_RETURN): # Grab the oldest data from the n-step memory observation_0, action_0, reward_0 = self.n_step_memory.popleft() # N-step reward starts with reward_0 n_step_reward = reward_0 # Initialize gamma discount_factor = Settings.DISCOUNT_FACTOR for (observation_i, action_i, reward_i) in self.n_step_memory: # Calculate the n-step reward n_step_reward += reward_i*discount_factor discount_factor *= Settings.DISCOUNT_FACTOR # for the next step, gamma**(i+1) # Dump data into large replay buffer # If the prioritized replay buffer is currently dumping data, # wait until that is done before adding more data to the buffer if not test_time: replay_buffer_dump_flag.wait() # blocks until replay_buffer_dump_flag is True self.replay_buffer.add((observation_0, action_0, n_step_reward, next_observation, done, discount_factor)) # If this episode is being rendered, log the state for rendering later if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym': observation_log.append(observation_0) action_log.append(action_0) next_observation_log.append(next_observation) cumulative_reward_log.append(episode_reward) instantaneous_reward_log.append(n_step_reward) done_log.append(done) discount_factor_log.append(discount_factor) guidance_position_log.append(guidance_position) # End of timestep -> next state becomes current state observation = next_observation timestep_number += 1 # If this episode is done, drain the N-step buffer, calculate # returns, and dump in replay buffer unless it is test time. if done: # Episode has just finished, calculate the remaining N-step entries while len(self.n_step_memory) > 0: # Grab the oldest data from the n-step memory observation_0, action_0, reward_0 = self.n_step_memory.popleft() # N-step reward starts with reward_0 n_step_reward = reward_0 # Initialize gamma discount_factor = Settings.DISCOUNT_FACTOR for (observation_i, action_i, reward_i) in self.n_step_memory: # Calculate the n-step reward n_step_reward += reward_i*discount_factor discount_factor *= Settings.DISCOUNT_FACTOR # for the next step, gamma**(i+1) # dump data into large replay buffer if not test_time: replay_buffer_dump_flag.wait() self.replay_buffer.add((observation_0, action_0, n_step_reward, next_observation, done, discount_factor)) # If this episode is being rendered, log the state for rendering later if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym': observation_log.append(observation_0) action_log.append(action_0) next_observation_log.append(next_observation) cumulative_reward_log.append(episode_reward) instantaneous_reward_log.append(n_step_reward) done_log.append(done) discount_factor_log.append(discount_factor) guidance_position_log.append(guidance_position) ################################ ####### Episode Complete ####### ################################ # If this episode is being rendered, render it now. if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym': print("Rendering Actor %i at episode %i" % (self.n_agent, episode_number)) os.makedirs(os.path.dirname(Settings.MODEL_SAVE_DIRECTORY + self.filename + '/trajectories/'), exist_ok=True) np.savetxt(Settings.MODEL_SAVE_DIRECTORY + self.filename + '/trajectories/' + str(episode_number) + '.txt',np.asarray(raw_total_state_log)) # Ask the learner to tell us the value distributions of the state-action pairs encountered in this episode self.agent_to_learner.put((np.asarray(observation_log), np.asarray(action_log), np.asarray(next_observation_log), np.asarray(instantaneous_reward_log), np.asarray(done_log), np.asarray(discount_factor_log))) # Wait for the results try: critic_distributions, target_critic_distributions, projected_target_distribution, loss_log = self.learner_to_agent.get(timeout = 3) bins = np.linspace(Settings.MIN_V, Settings.MAX_V, Settings.NUMBER_OF_BINS) # Render the episode environment_file.render(np.asarray(raw_total_state_log), np.asarray(action_log), np.asarray(instantaneous_reward_log), np.asarray(cumulative_reward_log), critic_distributions, target_critic_distributions, projected_target_distribution, bins, np.asarray(loss_log), np.squeeze(np.asarray(guidance_position_log)), episode_number, self.filename, Settings.MODEL_SAVE_DIRECTORY) except queue.Empty: print("Skipping this animation!") raise SystemExit # Periodically update the agent with the learner's most recent version of the actor network parameters if episode_number % Settings.UPDATE_ACTORS_EVERY_NUM_EPISODES == 0: self.sess.run(self.update_actor_parameters) # Periodically print to screen how long it's taking to run these episodes if episode_number % Settings.DISPLAY_ACTOR_PERFORMANCE_EVERY_NUM_EPISODES == 0: print("Actor " + str(self.n_agent) + " ran " + str(Settings.DISPLAY_ACTOR_PERFORMANCE_EVERY_NUM_EPISODES) + " episodes in %.1f minutes, and is now at episode %i" % ((time.time() - start_time)/60, episode_number)) start_time = time.time() ################################################### ######## Log training data to tensorboard ######### ################################################### # Logging the number of timesteps and the episode reward. feed_dict = {self.episode_reward_placeholder: episode_reward, self.timestep_number_placeholder: timestep_number} if test_time: summary = self.sess.run(self.test_time_episode_summary, feed_dict = feed_dict) else: summary = self.sess.run(self.regular_episode_summary, feed_dict = feed_dict) self.writer.add_summary(summary, episode_number) # Increment the episode counter episode_number += 1 ################################# ##### All episodes complete ##### ################################# # If were recording video, stop the display if Settings.RECORD_VIDEO and self.n_agent == 1: self.display.stop() # Notify user of completion print("Actor %i finished after running %i episodes!" % (self.n_agent, episode_number - 1))
def AdjustResolution(): display = Display(visible=0, size=(800, 800)) display.start()
} } return elemInfo; } return JSON.stringify(getElemInfo()); ''' returned = driver.execute_script(js_script) with open('page_elements.json','w',encoding='utf-8') as f: f.write(returned) try: print("Initializing...") display = Display(visible=0, size=(1280, 720)) display.start() firefox_profile = FirefoxProfile() driver = webdriver.Firefox(firefox_profile) driver.set_page_load_timeout(90) driver.implicitly_wait(30) print("Initialization completed") # Load jQuery with open('jquery.min.js') as f: driver.execute_script(f.read()) main() finally: print("Running Garbage Collection") driver.quit()
class Deamonizer: def __init__(self): self.main_functionality = { "function": None, "args": None, "kwargs": None } self.pre_functionality = { "function": None, "args": None, "kwargs": None } self.print_text = None self.visibility = False self.display = None def format_log(self, priority, description, text): """ DEBUG - for genuinely debug-level info; will not be seen in production or shipped product, as INFO will be the minimum level; good for capturing timings, number of occurrences of events, etc INFO - minimum level for production/shipped usage; record data likely to be useful in forensic investigations and to confirm successful outcomes ("stored 999 items in DB OK"); all info here must be such that you would be OK with end users/customers seeing it and sending you it, if need be (no secrets, no profanity!) WARN - not an error level as such, but useful to know the system may be entering dodgy territory, e.g. business logic stuff like "number of ordered products < 0" which suggests a bug somewhere, but isn't a system exception; I tend not to use it that much to be honest, finding things tend to be more natural fits to INFO or ERROR ERROR - use this for exceptions (unless there's a good reason to reduce to WARN or INFO); log full stacktraces along with important variable values without which diagnosis is impossible; use only for app/system errors, not bad business logic circumstances FATAL - only use this for an error of such high severity that it literally prevents the app from starting / continuing (http://stackoverflow.com/questions/7486596/commons-logging-priority-best-practices) Retrieved 1453177472 """ start_color = '' end_color = bcolors.ENDC priotity = priority.lower() if 'info' in priority: start_color = bcolors.OKBLUE elif 'warn' in priority: start_color = bcolors.WARNING elif 'error' in priority: start_color = bcolors.FAIL elif 'fatal' in priority: start_color = bcolors.FAIL else: end_color = '' if len(description) > 30: description = description[:27] + '...' return '{4}{0!s:30} {1!s:20} {2!s:30} {3}{5}'.format( time.ctime(time.time()), priority, description, text, start_color, end_color) def parse_command_line(self): output_deamonizer = 'stdout' self.print_text = print_optional(output_deamonizer) # ------------------------------------------------------------------------------ # | Routines for parsing the command line | # | | # | Usage: python name_of_file.py [stdout|file_name] [visible|nonvisible] | # ------------------------------------------------------------------------------ if len(sys.argv) == 2: if sys.argv[1] == 'compile': # Saving of the run file fileTmpName = os.path.basename(__file__) filenameRun = 'run_' + fileTmpName.replace('.py', '') + '.sh' result_string = '' result_string += 'cd ' + os.path.dirname( os.path.abspath(fileTmpName) ) + ' && ' + sys.executable + ' ' + fileTmpName + ' ' + fileTmpName.replace( '.py', '') + '.log nonvisible 2>&1' f = open(filenameRun, 'w') f.write(result_string) f.close() # Saving of the crontab script filename = 'crontab_' + fileTmpName.replace('.py', '') + '.txt' result_string = '' result_string += '30 7 * * * sh ' + os.path.dirname( os.path.abspath(fileTmpName)) + '/' + filenameRun print result_string f = open(filename, 'w') f.write(result_string) f.close() sys.exit() else: sys.stdout = open(sys.argv[1], 'a') output_deamonizer = 'file' self.print_text = print_optional(output_deamonizer) elif len(sys.argv) == 3: if sys.argv[1] != 'stdout': sys.stdout = open(sys.argv[1], 'a') output_deamonizer = 'file' self.print_text = print_optional(output_deamonizer) self.visibility = sys.argv[2] == 'visible' sys.stderr = sys.stdout if not self.visibility: from pyvirtualdisplay import Display self.display = Display(visible=0, size=(1024, 768)) self.display.start() self.print_text( self.format_log('debug', 'message', 'Using virtual display.')) else: self.print_text( self.format_log('debug', 'message', 'Not using virtual display.')) def run(self): self.parse_command_line() # Try to set up a counter for exceptions. exceptionsTimeouts = 0 # Run Pre if self.pre_functionality["function"] != None: self.pre_functionality["function"]( *self.pre_functionality["args"], **self.pre_functionality["kwargs"]) while True: self.print_text( self.format_log('debug', 'message', 'Inside the infinite loop.')) # Scape the fate of no internet self.print_text( self.format_log('info', 'current time', str(datetime.now()))) while not connected_to_internet(): self.print_text( self.format_log( 'warning', 'connection error', 'Not connected to the internet. Going to sleep for five minutes' )) time.sleep(60 * 5) self.print_text( self.format_log('debug', 'message', 'About to start the try except')) try: if self.main_functionality["function"] != None: self.main_functionality["function"]( *self.main_functionality["args"], **self.main_functionality["kwargs"]) except TimeoutException as e: self.print_text( self.format_log( 'error', 'exception', 'Timeout exception of selenium. Trying again.')) exceptionsTimeouts += 1 # if exceptionsTimeouts % 6 == 0: # os.system("python send_text.py \"Error in quickbooks Too many timeouts. "+str(e)+"\"") except Exception as e: self.print_text( self.format_log('fatal', 'exception', 'Unrecognized exception.')) self.print_text( self.format_log('debug', 'message', 'Going to sleep for five minutes')) time.sleep(60 * 5) if not self.visibility: self.display.stop()
def deploy_firefox(status_queue, browser_params, manager_params, crash_recovery): """ launches a firefox instance with parameters set by the input dictionary """ root_dir = os.path.dirname(__file__) # directory of this file logger = loggingclient(*manager_params['logger_address']) display_pid = None display_port = None fp = webdriver.FirefoxProfile() browser_profile_path = fp.path + '/' status_queue.put(('STATUS','Profile Created',browser_profile_path)) # Set all prefs related to mobile js mobile_platform = browser_params['mobile_platform'] configure_firefox.set_mobile_prefs(fp, mobile_platform) profile_settings = None # Imported browser settings if browser_params['profile_tar'] and not crash_recovery: logger.debug("BROWSER %i: Loading initial browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar'])) profile_settings = load_profile(browser_profile_path, manager_params, browser_params, browser_params['profile_tar'], load_flash=browser_params['disable_flash'] is False) elif browser_params['profile_tar']: logger.debug("BROWSER %i: Loading recovered browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar'])) profile_settings = load_profile(browser_profile_path, manager_params, browser_params, browser_params['profile_tar']) status_queue.put(('STATUS','Profile Tar',None)) if browser_params['random_attributes'] and profile_settings is None: logger.debug("BROWSER %i: Loading random attributes for browser" % browser_params['crawl_id']) profile_settings = dict() # choose a random screen-res from list resolutions = list() with open(os.path.join(root_dir, 'screen_resolutions.txt'), 'r') as f: for line in f: resolutions.append(tuple(line.strip().split(','))) profile_settings['screen_res'] = random.choice(resolutions) # set a random user agent from list ua_strings = list() with open(os.path.join(root_dir, 'user_agent_strings.txt'), 'r') as f: for line in f: ua_strings.append(line.strip()) profile_settings['ua_string'] = random.choice(ua_strings) # If profile settings still not set - set defaults if profile_settings is None: profile_settings = dict() if mobile_platform == "android": profile_settings['screen_res'] = ANDROID_SCREEN_RES profile_settings['ua_string'] = "Mozilla/5.0 (Android 7.0; Mobile; rv:55.0) Gecko/55.0 Firefox/55.0" profile_settings['color_depth'] = 24 elif mobile_platform == "iphone": profile_settings['screen_res'] = IPHONE_SCREEN_RES profile_settings['ua_string'] = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) FxiOS/7.5b3349 Mobile/14F89 Safari/603.2.4" profile_settings['color_depth'] = 32 else: raise ValueError("Mobile mobile_platform value is not recognized") if profile_settings['ua_string'] is not None: logger.debug("BROWSER %i: Overriding user agent string with the following: %s" % (browser_params['crawl_id'], profile_settings['ua_string'])) fp.set_preference("general.useragent.override", profile_settings['ua_string']) if browser_params['headless']: display = Display(visible=0, size=profile_settings['screen_res'])#,color_depth=profile_settings['color_depth']) display.start() display_pid = display.pid display_port = display.cmd_param[5][1:] status_queue.put(('STATUS','Display',(display_pid, display_port))) # Write extension configuration if browser_params['extension_enabled']: ext_loc = os.path.join(root_dir + "/../", 'Extension/firefox/openwpm.xpi') ext_loc = os.path.normpath(ext_loc) fp.add_extension(extension=ext_loc) fp.set_preference("*****@*****.**", "all") extension_config = dict() extension_config.update(browser_params) extension_config['logger_address'] = manager_params['logger_address'] extension_config['sqlite_address'] = manager_params['aggregator_address'] if manager_params.has_key('ldb_address'): extension_config['leveldb_address'] = manager_params['ldb_address'] else: extension_config['leveldb_address'] = None extension_config['testing'] = manager_params['testing'] with open(browser_profile_path + 'browser_params.json', 'w') as f: json.dump(extension_config, f) logger.debug("BROWSER %i: OpenWPM Firefox extension loaded" % browser_params['crawl_id']) if browser_params['proxy']: logger.warning("BROWSER %i: Use of the proxy is DEPRECATED and will be " "removed from future releases. Use http_instrument." % browser_params['crawl_id']) PROXY_HOST = "localhost" PROXY_PORT = browser_params['proxy'] # Direct = 0, Manual = 1, PAC = 2, AUTODETECT = 4, SYSTEM = 5 fp.set_preference("network.proxy.type", 1) fp.set_preference("network.proxy.http", PROXY_HOST) fp.set_preference("network.proxy.http_port", PROXY_PORT) fp.set_preference("network.proxy.ssl", PROXY_HOST) # https sites fp.set_preference("network.proxy.ssl_port", PROXY_PORT) # set this to exclude sites from using proxy # http://kb.mozillazine.org/Network.proxy.no_proxies_on fp.set_preference("network.proxy.no_proxies_on", "") # copy the dbs into temp profile # these were created by manually adding the cert to # a previous tmp selenium profile shutil.copy(os.path.join(root_dir + "/../", 'Proxy/key3.db'), fp.path + '/key3.db') shutil.copy(os.path.join(root_dir + "/../", 'Proxy/cert8.db'), fp.path + '/cert8.db') # Disable flash if browser_params['disable_flash']: fp.set_preference('plugin.state.flash', 0) # Configure privacy settings configure_firefox.privacy(browser_params, fp, root_dir, browser_profile_path) # Set various prefs to improve speed and eliminate traffic to Mozilla configure_firefox.optimize_prefs(fp) # Launch the webdriver status_queue.put(('STATUS','Launch Attempted',None)) fb = FirefoxBinary(root_dir + "/../../firefox-bin/firefox") driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=fb) status_queue.put(('STATUS','Browser Launched',(int(driver.binary.process.pid), profile_settings))) # set window size driver.set_window_size(*profile_settings['screen_res']) return driver, browser_profile_path, profile_settings
def __init__(self): display = Display(visible=0, size=(1600, 1024)) display.start() self.driver = webdriver.Firefox() self.driver.delete_all_cookies()
# У$ - Economics # С$ - Sociology # Ч$ - CultureС$ ENV = 'PRODUCTION' # ENV = '' if ENV == 'PRODUCTION': from pyvirtualdisplay import Display options = webdriver.ChromeOptions() options.add_argument('--disable-dev-shm-usage') options.add_argument('--no-sandbox') options.binary_location = '/usr/bin/google-chrome-stable' display = Display(visible=0, size=(1024, 768)) display.start() driver = webdriver.Chrome( executable_path= '/srv/graduate-work-table/graduate_report/parser/unix/chromedriver', service_args=['--verbose'], chrome_options=options) else: driver = webdriver.Chrome('win32/chromedriver.exe') driver.get( 'http://irbis-nbuv.gov.ua/cgi-bin/irbis64r_81/cgiirbis_64.exe?C21COM=F&I21DBN=ARD_EX&P21DBN=ARD&S21FMT=&S21ALL=&Z21ID=' ) time.sleep(5)
def generate_map(config): renderer = config.pop('renderer', 'leaflet') output_format = config.pop('format', 'byte') width = config.pop('width') height = config.pop('height') display = Display(visible=0, size=(width, height)) display.start() fp = webdriver.FirefoxProfile() fp.set_preference('browser.download.folderList', 2) fp.set_preference('browser.download.manager.showWhenStarting', False) fp.set_preference('browser.download.dir', TMP_DIR) fp.set_preference('browser.helperApps.neverAsk.saveToDisk', 'octet/stream') browser = webdriver.Firefox(firefox_profile=fp) dx, dy = browser.execute_script( 'let w=window; return [w.outerWidth - w.innerWidth, w.outerHeight - w.innerHeight];' ) browser.set_window_size(width + dx, height + dy) with open('{}.html'.format(renderer), 'r') as f: html = f.read() image = None try: html_fd, html_path = tempfile.mkstemp(suffix='.html', dir=WORK_DIR) image_name = str(uuid.uuid4()) config['image_name'] = image_name with open(html_path, 'w') as f: f.write( html % { 'WD': os.getcwd(), 'WIDTH': width, 'HEIGHT': height, 'CONFIG': json.dumps(config) }) os.close(html_fd) browser.get('file://{}'.format(html_path)) delay = 10 tries = 3 while True: try: WebDriverWait(browser, delay).until( EC.presence_of_element_located((By.ID, 'Ready'))) screenshot = browser.get_screenshot_as_base64() if output_format == 'byte': image = base64.b64decode(screenshot) elif output_format == 'base64': image = bytes(screenshot, 'ascii') break except TimeoutException: if tries == 0: break tries -= 1 browser.quit() display.stop() return image finally: os.remove(html_path) return image
def multi_mode(cli_parsed): dbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db') dbm.open_connection() if not cli_parsed.resume: dbm.initialize_db() dbm.save_options(cli_parsed) m = Manager() targets = m.Queue() lock = m.Lock() multi_counter = m.Value('i', 0) display = None def exitsig(*args): dbm.close() if current_process().name == 'MainProcess': print '' print 'Resume using ./EyeWitness.py --resume {0}'.format(cli_parsed.d + '/ew.db') os._exit(1) signal.signal(signal.SIGINT, exitsig) if cli_parsed.resume: pass else: url_list, rdp_list, vnc_list = target_creator(cli_parsed) if any((cli_parsed.web, cli_parsed.headless)): for url in url_list: dbm.create_http_object(url, cli_parsed) for rdp in rdp_list: dbm.create_vnc_rdp_object('rdp', rdp, cli_parsed) for vnc in vnc_list: dbm.create_vnc_rdp_object('vnc', vnc, cli_parsed) if any((cli_parsed.web, cli_parsed.headless)): if cli_parsed.web and not cli_parsed.show_selenium: display = Display(visible=0, size=(1920, 1080)) display.start() multi_total = dbm.get_incomplete_http(targets) if multi_total > 0: if cli_parsed.resume: print 'Resuming Web Scan ({0} Hosts Remaining)'.format(str(multi_total)) else: print 'Starting Web Requests ({0} Hosts)'.format(str(multi_total)) if multi_total < cli_parsed.threads: num_threads = multi_total else: num_threads = cli_parsed.threads for i in xrange(num_threads): targets.put(None) try: workers = [Process(target=worker_thread, args=( cli_parsed, targets, lock, (multi_counter, multi_total))) for i in xrange(num_threads)] for w in workers: w.start() for w in workers: w.join() except Exception as e: print str(e) # Set up UA table here if cli_parsed.cycle is not None: ua_dict = get_ua_values(cli_parsed.cycle) if not cli_parsed.ua_init: dbm.clear_table("ua") completed = dbm.get_complete_http() completed[:] = [x for x in completed if x.error_state is None] for item in completed: for browser, ua in ua_dict.iteritems(): dbm.create_ua_object(item, browser, ua) cli_parsed.ua_init = True dbm.clear_table("opts") dbm.save_options(cli_parsed) for browser, ua in ua_dict.iteritems(): targets = m.Queue() multi_counter.value = 0 multi_total = dbm.get_incomplete_ua(targets, browser) if multi_total > 0: print("[*] Starting requests for User Agent {0}" " ({1} Hosts)").format(browser, str(multi_total)) if multi_total < cli_parsed.threads: num_threads = multi_total else: num_threads = cli_parsed.threads for i in xrange(num_threads): targets.put(None) workers = [Process(target=worker_thread, args=(cli_parsed, targets, lock, (multi_counter, multi_total), (browser, ua))) for i in xrange(num_threads)] for w in workers: w.start() for w in workers: w.join() if any((cli_parsed.vnc, cli_parsed.rdp)): log._LOG_LEVEL = log.Level.ERROR multi_total, targets = dbm.get_incomplete_vnc_rdp() if multi_total > 0: print '' print 'Starting VNC/RDP Requests ({0} Hosts)'.format(str(multi_total)) app = QtGui.QApplication(sys.argv) timer = QTimer() timer.start(10) timer.timeout.connect(lambda: None) # add qt4 reactor import qt4reactor qt4reactor.install() from twisted.internet import reactor for target in targets: if os.path.dirname(cli_parsed.d) != os.path.dirname(target.screenshot_path): target.set_paths(cli_parsed.d) tdbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db') if target.proto == 'vnc': reactor.connectTCP( target.remote_system, target.port, vnc_module.RFBScreenShotFactory( target.screenshot_path, reactor, app, target, tdbm)) else: reactor.connectTCP( target.remote_system, int(target.port), rdp_module.RDPScreenShotFactory( reactor, app, 1200, 800, target.screenshot_path, cli_parsed.timeout, target, tdbm)) reactor.runReturn() app.exec_() if display is not None: display.stop() results = dbm.get_complete_http() vnc_rdp = dbm.get_complete_vnc_rdp() dbm.close() m.shutdown() write_vnc_rdp_data(cli_parsed, vnc_rdp) sort_data_and_write(cli_parsed, results)
from pyvirtualdisplay import Display from selenium import webdriver print 'start' display = Display(visible=0, size=(800, 600)) display.start() browser = webdriver.Firefox() browser.get('http://www.baidu.com') print browser.title browser.quit() display.stop()
def setUpClass(cls): cls.display = Display(visible=0, size=(800, 600)) cls.display.start() super(FMTestCase, cls).setUpClass()
def get_urls(query, url, verbose=False, warning=True, user_agent=None, proxy=None, **kwargs): """ Bypass Google captchas and Google API by using selenium-webdriver to gather the Google URL. This will open a robot controlled browser window and attempt to get a URL from Google that will be used for scraping afterwards. Only downside to this method is that your IP and user agent will be visible until the application pulls the URL. """ if verbose: logger.debug( set_color("setting up the virtual display to hide the browser...", level=10)) ff_display = Display(visible=0, size=(800, 600)) ff_display.start() logger.info( set_color( "firefox browser display will be hidden while it performs the query..." )) if warning: logger.warning( set_color( "your web browser will be automated in order for Zeus to successfully " "bypass captchas and API calls. this is done in order to grab the URL " "from the search and parse the results. please give selenium time to " "finish it's task...", level=30)) if verbose: logger.debug( set_color("running selenium-webdriver and launching browser...", level=10)) if verbose: logger.debug( set_color( "adjusting selenium-webdriver user-agent to '{}'...".format( user_agent), level=10)) if proxy is not None: proxy_type = proxy.keys() proxy_to_use = Proxy({''.join(proxy_type): proxy[''.join(proxy_type)]}) if verbose: logger.debug( set_color("setting selenium proxy to '{}'...".format( ''.join(proxy_type) + "://" + ''.join(proxy.values())), level=10)) else: proxy_to_use = None profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", user_agent) browser = webdriver.Firefox(profile, proxy=proxy_to_use) logger.info(set_color("browser will open shortly...")) browser.get(url) if verbose: logger.debug( set_color( "searching search engine for the 'q' element (search button)...", level=10)) search = browser.find_element_by_name('q') logger.info( set_color("searching '{}' using query '{}'...".format(url, query))) search.send_keys(query) search.send_keys(Keys.RETURN) # hit return after you enter search text time.sleep(3) if verbose: logger.debug(set_color("obtaining URL from selenium...")) retval = browser.current_url if verbose: logger.debug( set_color("found current URL from selenium browser '{}'...".format( retval), level=10)) logger.info(set_color("closing the browser and continuing process..")) browser.close() ff_display.stop() return retval
from pyvirtualdisplay import Display import scipy.optimize import random import scipy.misc import torch from scripts.rl_zforcing import ZForcing import os import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.cm as cm import numpy as np display_ = Display(visible=0, size=(550, 500)) display_.start() # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("--env", required=True, help="name of the environment to be run (REQUIRED)") parser.add_argument("--demos-origin", default=None, help="origin of the demonstrations: human | agent (REQUIRED or --model REQUIRED)") parser.add_argument("--seed", type=int, default=None, help="random seed (default: 0 if model agent, 1 if demo agent)") parser.add_argument("--model", default=None, help="name of the trained model (REQUIRED or --demos-origin REQUIRED)")
### Grab the information from our configuration file config = scraperfunctions.load_config() ### Get the current time if we don't already have one (and transform into a date object) curr_time = scraperfunctions.get_curr_time(curr_time, parsefile) ### Establish our MySQL Connection (for logging, etc.) engine, connection, metadata, mysql_table_name, mysql_log_name = scraperfunctions.create_mysql_engine(config) ########### Download actions if download_desktop == 1: try: ### Initiate our virtual display print("Initiating virtual display") display = Display(visible=0, size=(1920, 1080)) display.start() ### Let's start our browser browser = scraperfunctions.create_browser() ### Let's load the page work scraperfunctions.load_homepage(browser, pubshort, puburl) ### See if the MV list requires extra actions if puburl_mv_extraactions != None: ### Actions for acquiring MV List pass ### Let's first store the source code html_code = browser.page_source
def setup_func(): "set up test fixtures" global process, screen screen = Display(visible=0) screen.start() process = EasyProcess('gnumeric').start().sleep(3)
start = time.process_time() env = gym.make('Pong-v4') print("Number of obswervations: {}".format(env.observation_space)) print("Number of allowed actions: {}".format(env.action_space)) print(tf.__version__) print(tf.keras.__version__) optimizer = tf.train.AdamOptimizer(learning_rate) model = create_model() # model.load_weights('model/agentcycle1750-agent99gamma1kepochs') # print(model.summary()) # print('Model loaded successfully!') memory = Memory() import skvideo.io from pyvirtualdisplay import Display display = Display(visible=0) display.start() out = skvideo.io.FFmpegWriter(filename+'.mp4') start_training = time.process_time() for cycle in range(num_cycles): observation = env.reset() previous_frame = pre_process(observation) while True: frame = env.render(mode='rgb_array') out.writeFrame(np.array(frame)) current_frame = pre_process(observation) delta_frame = current_frame - previous_frame action = next_action(observations = delta_frame, model = model) next_observation, reward, done, info = env.step(action) memory.add_to_memory(delta_frame, action, reward) if done:
def find_data(): display = Display(visible=0, size=(1920, 1080)) display.start() # Init headless ChromeDriver #chrome_path = r'/home/adam/chromedriver.exe' chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome(chrome_options=chrome_options) driver.set_window_size(1920, 1080) url = 'https://foodlicensing.fssai.gov.in/index.aspx' # Recieve JSON data as dictionary inputs = request.get_json() # Parse Inputs instate = str(inputs.get('state')) inbizname = str(inputs.get('business_name')) indist = str(inputs.get('district')) inregnum = inputs.get('registration_num') inbizkind = str(inputs.get('business_kind')) inproddesc = str(inputs.get('product_desc')) # Go to URL try: driver.get(url) driver.find_element_by_xpath( '//*[@id="demo-tabs-vertical"]/ul[2]/li[2]').click() except NoSuchElementException: driver.find_element_by_link_text('Click here to Refresh').click() # Click on the 'FBO Search' tab try: WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.ID, 'demo-tabs-vertical'))) driver.find_element_by_xpath( '//*[@id="demo-tabs-vertical"]/ul[2]/li[2]').click() except StaleElementReferenceException: print('Its taking too long!') ## Fill in Form with provided inputs # Find the matching State select0 = Select( driver.find_element_by_xpath('//*[@id="ctl00_content_ddlState"]')) print([o.text for o in select0.options]) select0.select_by_visible_text(instate) # Find the matchin District if indist != '': select1 = Select( driver.find_element_by_xpath( '//*[@id="ctl00_content_ddlDistrict"]')) print([o.text for o in select1.options]) select1.select_by_visible_text(indist) # Enter Company Name if inbizname != '': try: select2 = driver.find_element_by_xpath( '//*[@id="ctl00_content_txtName"]') select2.click() select2.send_keys(inbizname) select2.submit() except StaleElementReferenceException: select2 = driver.find_element_by_xpath( '//*[@id="ctl00_content_txtName"]') select2.click() select2.send_keys(inbizname) select2.submit() # Enter License/Registration number if inregnum != '': try: select3 = driver.find_element_by_xpath( '//*[@id="ctl00_content_txtLicense"]') select3.click select3.send_keys(inregnum) select3.submit() except StaleElementReferenceException: select3 = driver.find_element_by_xpath( '//*[@id="ctl00_content_txtLicense"]') select3.click select3.send_keys(inregnum) select3.submit() # Select Kind of Business if inbizkind != '': select4 = Select( driver.find_element_by_xpath('//*[@id="ctl00_content_ddlKOB"]')) print([o.text for o in select4.options]) select4.select_by_visible_text(inbizkind) # Enter Product Description if inproddesc != '': try: select5 = driver.find_element_by_xpath( '//*[@id="ctl00_content_txtProduct"]') select5.click() select5.send_keys(inproddesc) select5.submit() except StaleElementReferenceException: select5 = driver.find_element_by_xpath( '//*[@id="ctl00_content_txtProduct"]') select5.click() select5.send_keys(inproddesc) select5.submit() # Click search driver.find_element_by_xpath('//*[@id="ctl00_content_btnsearch"]').click() # Wait for new tab to load try: WebDriverWait(driver, 7).until( EC.presence_of_element_located((By.ID, 'ctl00_content_update'))) except TimeoutException: print('Results took too long to load!') # Switch active tad to search results window_before = driver.window_handles[0] window_after = driver.window_handles[1] driver.switch_to.window(window_after) # Handle Pagination try: select6 = Select( driver.find_element_by_xpath('//*[@id="ctl00_content_ddlPage"]')) print([o.text for o in select6.options]) select6.select_by_visible_text('300') try: WebDriverWait(driver, 5).until( EC.presence_of_element_located( (By.ID, 'ctl00_content_update'))) except TimeoutException: print('Results took too long to load!') except NoSuchElementException: print('No pagination!') # Scrape Data info = [] rows = driver.find_element_by_xpath( '//*[@id="ctl00_content_gvDetails"]/tbody').find_elements_by_tag_name( 'tr') row_number = 1 for row in rows: cells = row.find_element_by_xpath( '//*[@id="ctl00_content_gvDetails"]/tbody/tr[' + str(row_number) + ']').text name = cells.split(", ")[0] name = name[2:] registration = 0 regis = [int(s) for s in cells.split() if s.isdigit()] registr = [word for word in regis if len(str(word)) == 14] if len(registr) == 1: registration = registr[0] row_number += 1 info.append([name, instate, registration]) info = info[1:] return jsonify(info)
from easyprocess import EasyProcess from pyvirtualdisplay import Display #~ from pyvirtualdisplay.smartdisplay import SmartDisplay # needs pyscreenshot import logging logging.basicConfig(level=logging.DEBUG) import time _W = 700 _H = 600 # height percents hp1 = 0.6 hp2 = 1 - hp1 Display(visible=1, size=(_W, _H)).start() # EasyProcess.start() # spawns process in background # EasyProcess.check() # loops process in foreground try: EasyProcess('awesome -c rc.lua').start() except Exception, detail: print detail time.sleep(2) try: EasyProcess('bash -c "cd $HOME && scite"').start() except Exception, detail: print detail time.sleep(2)
def codechef(): display = Display(visible=0, size=(800, 600)) display.start() def check_exists_by_tag_name(scope_to_search, tag): try: scope_to_search.find_element_by_tag_name(tag) except NoSuchElementException: return False return True driver = webdriver.Firefox() def get_lang_for_submission(File_Path): if File_Path.endswith(".py"): return "python3" elif File_Path.endswith(".cpp"): return "cpp" elif File_Path.endswith(".java"): return "java" elif File_Path.endswith(".c"): return "c" elif File_Path.endswith(".php"): return "php" elif File_Path.endswith(".pl"): return "perl" elif File_Path.endswith(".rb"): return "ruby" elif File_Path.endswith(".go"): return "go" elif File_Path.endswith(".sh"): return "bash" elif File_Path.endswith(".sql"): return "sql" elif File_Path.endswith(".pas"): return "pascal" elif File_Path.endswith(".cs"): return "csharp" elif File_Path.endswith(".r"): return "r" elif File_Path.endswith(".js"): return "rhino" elif File_Path.endswith(".m"): return "octave" elif File_Path.endswith(".coffee"): return "coffeescript" elif File_Path.endswith(".b"): return "brainfuck" elif File_Path.endswith(".swift"): return "swift" elif File_Path.endswith(".lua"): return "lua" elif File_Path.endswith(".kt"): return "kotlin" else: return None def codechef_login(user, passw): username = driver.find_element_by_id("edit-name") username.send_keys(user) password = driver.find_element_by_id("edit-pass") password.send_keys(passw) submit = driver.find_element_by_id("edit-submit") submit.click() url = driver.current_url if url == "https://www.codechef.com/session/limit": box = driver.find_elements_by_xpath("//input[@type='checkbox']") for check in box: check.click() box[len(box) - 1].click() submit_session = driver.find_element_by_id("edit-submit") submit_session.click() language_choice = get_lang_for_submission(sys.argv[2]) option_value = "" if language_choice == "cpp": option_value = "44" elif language_choice == "java": option_value = "10" elif language_choice == "python3": option_value = "116" elif language_choice == "c": option_value = "11" elif language_choice == "rhino": option_value = "35" else: option_value = None code_script = open(sys.argv[2], 'r') code_script = code_script.read() time.sleep(5) text_area = driver.find_element_by_id("edit-program") text_area.send_keys(code_script) select = Select(driver.find_element_by_id("edit-language")) select.select_by_value(option_value) # langauge_button=driver.find_element_by_xpath("//select[@name='language']") # driver.execute_script("arguments[0].click();",langauge_button) # language_choose=driver.find_element_by_xpath("//select[@name='language']/option[text()='C++14(gcc 6.3)']") # language_choose=driver.find_element_by_xpath("//select[@name='language']/option[@value='"+option_value+"']") # driver.execute_script("arguments[0].click();",language_choose) code_submit = driver.find_element_by_id("edit-submit-1") driver.execute_script("arguments[0].click();", code_submit) time.sleep(4) while True: result = driver.find_element_by_id("display_result") if check_exists_by_tag_name(result, "strong") == False: time.sleep(4) else: result_has_come = result.find_element_by_tag_name("strong") print(result_has_come.text) break GREEN = '\033[92m' GRAY = '\033[90m' CYAN = '\033[36m' RED = '\033[31m' YELLOW = '\033[33m' END = '\033[0m' UNDERLINE = '\033[4m' BOLD = '\033[1m' print(RED + BOLD + "Contest[Y/N]" + END, end=' ') choice = input() contest_id = "" if choice[0] == "Y": print(YELLOW + BOLD + "Enter contest_id" + END, end=' ') contest_id = input() codechef_link = "https://www.codechef.com/" if contest_id != "": codechef_link = codechef_link + contest_id + "/submit/" else: codechef_link = codechef_link + "submit/" print(RED + BOLD + "Enter question id:" + END, end=' ') question_id = input() print(GRAY + BOLD + "Enter your username:"******"Enter your password:"******"Enter your password: " + END) codechef_link = codechef_link + question_id driver.get(codechef_link) codechef_login(user, passw) display.stop() # codechef()
class Scraper(object): def __init__(self): self.project_directory = os.path.dirname( os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe())))) # self.output_dir_path = os.path.join(self.project_directory, "scraper_output") self.output_dir_path = '/home/InternResults' self.download_dir_path = os.path.join(self.project_directory, "proxy_download_directory") self.headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36" } self.prefs = {} self.prefs["profile.default_content_settings.popups"] = 0 self.prefs["download.default_directory"] = self.download_dir_path self.initialize_driver() self.get_states() # subprocess.check_output("cp {}/results.csv /home/InternResults/".format(self.output_dir_path), shell=True) def initialize_driver(self): self.display = Display(visible=0, size=(800, 600)) self.display.start() # Set driver options self.driver_options = webdriver.ChromeOptions() self.driver_options.add_experimental_option("prefs", self.prefs) self.driver_options.add_argument('--headless') self.driver_options.add_argument('--no-sandbox') self.driver_options.add_argument('--disable-dev-shm-usage') self.driver = webdriver.Chrome(options=self.driver_options) # Page load timeout 30 seconds self.driver.implicitly_wait(30) # self.random_request_limit = scraper_utils.generate_random_request_count() # self.random_delay = scraper_utils.return_randomized_delay() # self.current_proxy = "" self.driver.get('http://www.redfin.com/sitemap') def get_states(self): state_urls = self.driver.find_elements_by_xpath( "//div[@class='sitemap-section']/div[2]/ul[@class='list']/li/div/a" ) for i in range(len(state_urls)): self.driver.find_elements_by_xpath( "//div[@class='sitemap-section']/div[2]/ul[@class='list']/li/div/a" )[i].click() # state_url.click() self.timeout(10) self.get_county_for_state() break def get_county_for_state(self): county_urls = self.driver.find_elements_by_xpath( "//div[@class='sitemap-section'][1]/div[2]/ul[@class='list']/li/div/a" ) for i in range(len(county_urls)): print(i) self.driver.find_elements_by_xpath( "//div[@class='sitemap-section'][1]/div[2]/ul[@class='list']/li/div/a" )[i].click() # county_url.click() self.timeout(10) self.get_listings_for_each_county() # break self.driver.back() self.timeout(10) def get_listings_for_each_county(self): try: recently_sold_url = self.driver.find_element_by_xpath( "//div[@class='sitemap-section'][2]//ul/li//a[contains(text(), 'Recently Sold Homes')]" ) recently_sold_url.click() self.timeout(4) jscode = 'var list = document.querySelectorAll(\'script[type="application/ld+json"]\');\nld_list = Array(list.length).join(",").split(",").map((i, index) => list[index].text.replace(\'[\', \'\').replace(\']\',\'\'));\nreturn ld_list;' try: print(self.driver.current_url) # Save the results of the first page if not os.path.isdir(self.output_dir_path): os.mkdir(self.output_dir_path) # print("Here") # print(self.driver.execute_script(jscode)) with open('{}/results.csv'.format(self.output_dir_path), 'a') as file: file.write("\n".join(self.driver.execute_script(jscode))) # first iteration use find_element_by_xpath, if element not present this throws error pagination_result = self.driver.find_element_by_xpath( "//div[@class='PagingControls']/button[@class='clickable buttonControl button-text']" ) # print("Pagination found") pagination_result.click() # page 1 to page n forward_count = 1 pagination_result = self.driver.find_elements_by_xpath( "//div[@class='PagingControls']/button[@class='clickable buttonControl button-text']" ) while (len(pagination_result) == 2): if not os.path.isdir(self.output_dir_path): os.mkdir(self.output_dir_path) with open('{}/results.csv'.format(self.output_dir_path), 'a') as file: file.write("\n".join( self.driver.execute_script(jscode))) pagination_result[1].click() self.timeout(4) pagination_result = self.driver.find_elements_by_xpath( "//div[@class='PagingControls']/button[@class='clickable buttonControl button-text']" ) forward_count += 1 # Save the results of the nth page if not os.path.isdir(self.output_dir_path): os.mkdir(self.output_dir_path) with open('{}/results.csv'.format(self.output_dir_path), 'a') as file: file.write("\n".join(self.driver.execute_script(jscode))) # page n to page 1 while (forward_count != 1): self.driver.back() forward_count -= 1 self.timeout(4) except: pass # print("No pagination found") self.driver.back() self.timeout(5) except: pass self.driver.back() self.timeout(5) def timeout(self, explicit_time=0): if not explicit_time: self.random_delay = scraper_utils.return_randomized_delay() time.sleep(self.random_delay) else: time.sleep(explicit_time)
def startBrowsing(): display = Display(visible=0, size=(800, 600)) display.start() driver = webdriver.Chrome() return driver
def real_scrapping(): display = Display(visible=0, size=(1200, 900)) display.start() try: Real = webdriver.Chrome( executable_path=os.path.abspath("/usr/bin/chromedriver")) Real.get("http://affiliates.realdealbet.com/") Real.find_element_by_name("username").send_keys("id_betfyuk") Real.find_element_by_name("password").send_keys("dontfuckwithme") pwd = Real.find_element_by_name("password") pwd.send_keys(Keys.RETURN) Real.implicitly_wait(10) window_after = Real.window_handles[1] Real.switch_to_window(window_after) mtd_valArr = [] table = Real.find_element(by=By.ID, value="dashboard_quick_stats") mtds_val = table.find_element(by=By.CLASS_NAME, value="row_light_color") for mtd_val in mtds_val.find_elements_by_tag_name("td"): mtd_valArr.append(mtd_val.text) time.sleep(2) Real.find_element_by_xpath( '//*[@id="dashboard"]/div[1]/div[1]/div/div[1]/div/div/select[1]/option[2]' ).click() time.sleep(40) table = Real.find_element(by=By.ID, value="dashboard_quick_stats") mtds_val = table.find_element(by=By.CLASS_NAME, value="row_light_color") for mtd_val in mtds_val.find_elements_by_tag_name("td"): if mtd_val.text != 'Total -': mtd_valArr.append(mtd_val.text) Real.get( "https://partners.realdealbet.com/reporting/quick_summary_report.asp" ) toDate = Real.find_element_by_id('enddate').get_attribute('value') toDateObj = datetime.datetime.strptime(toDate, '%Y/%m/%d').date() delta = datetime.timedelta(days=1) aDayAgo = toDateObj - delta aDayAgoObj = aDayAgo.strftime("%Y/%m/%d") reportDiv = Real.find_element_by_id("reportcriteria") merchantDiv = reportDiv.find_elements_by_tag_name("tr")[3] merchantId = merchantDiv.find_element_by_tag_name("select") merchant = merchantId.find_elements_by_tag_name("option")[0] Real.execute_script( "document.getElementById('startdate').value = '{0}'".format( aDayAgoObj)) Real.execute_script( "document.getElementById('enddate').value = '{0}'".format( aDayAgoObj)) merchant.click() time.sleep(5) Real.find_element_by_class_name("button").click() time.sleep(20) tableDiv = Real.find_element_by_id("internalreportdata") table = tableDiv.find_element_by_tag_name("table") todayVal = table.find_elements_by_tag_name("tr") pattern = re.compile(r'[\-\d.\d]+') impreto = pattern.search(todayVal[1].text).group(0) mtd_valArr.append(impreto) clito = pattern.search(todayVal[2].text).group(0) mtd_valArr.append(clito) regto = pattern.search(todayVal[4].text).group(0) mtd_valArr.append(regto) ndto = pattern.search(todayVal[7].text).group(0) mtd_valArr.append(ndto) commito = pattern.search(todayVal[-1].text).group(0) mtd_valArr.append(commito) mtd_valArr.append(aDayAgoObj) print(mtd_valArr) return mtd_valArr finally: Real.quit() display.stop()
def Main(): parser = OptionParser() parser.add_option("--crawl", dest="crawl", action="store_true", help="crawl url", default=False) parser.add_option("--crawl-landing", dest="crawl_landing", action="store_true", help="crawl url", default=False) parser.add_option("--working-dir", dest="workingdir", type="string", help="working directory", default='.') parser.add_option("--db-name", dest="db_name", type="string", help="database name", default='skillscommons') parser.add_option("--table-name", dest="table_name", type="string", help="table name", default='skill') parser.add_option("--main-table-name", dest="main_table_name", type="string", help="main table name", default='skillscommons') parser.add_option("--attachment-table-name", dest="attachment_table_name", type="string", help="attachment table name", default='attachment') parser.add_option("--meta-table-name", dest="meta_table_name", type="string", help="meta table name", default='meta_data') parser.add_option("--use-firefox", dest="use_firefox", action="store_true", help="use-firefox", default=True) (options, args) = parser.parse_args() workingdir = options.workingdir.rstrip('/') if not os.path.exists(workingdir): parser.error("workingdir not exists") try: display = None from pyvirtualdisplay import Display display = Display(visible=0, size=(1000,900)) display.start() except: print 'No Xvfb!' db = mysql.DB(db=options.db_name) db.set_autocommit(True) driver = crawlutils.open_driver(use_firefox=options.use_firefox) links=["https://www.skillscommons.org/discover?rpp=2000&page=1&group_by=none&etal=0", "https://www.skillscommons.org/discover?rpp=2000&page=2&group_by=none&etal=0", "https://www.skillscommons.org/discover?rpp=2000&page=3&group_by=none&etal=0"] try: if options.crawl: count = 0 for link in links: print "Link :",link driver.get(link) time.sleep(5) medium_results=driver.find_element_by_class_name("medium-results") li=medium_results.find_elements_by_tag_name("li") for tag in li: count+=1 print "Count :",count link_tag=tag.find_element_by_tag_name("a") title=link_tag.text.strip() url=link_tag.get_attribute("href") types=tag.find_elements_by_class_name("type") if len(types)==2: type=types[0].text.strip() institution=types[1].text.strip() else: type=None institution=types[0].text.strip() description=tag.find_element_by_class_name("abstract").text.strip() print "title :", title print "url :",url print "type :",type print "institution :",institution print "description :",description data = { 'title':title, 'institution':institution, 'url':url, 'type':type, 'description':description, } db.insert(options.table_name, data) if options.crawl_landing: count=0 skill=db.query("select distinct url from skill where crawled=0") print "Number of urls to crawl ",len(skill) for (src_url,) in skill: print "source url :",src_url print "count %s"%count count+=1 driver.get(src_url) author=None col=driver.find_element_by_class_name("col-sm-8") title=col.find_element_by_tag_name("h1").text.strip() m = hashlib.md5() m.update(title+src_url) document_id=m.hexdigest() toc_html="/mnt/data/kendavar/skillscommons/%s.html"%document_id file(toc_html,"w","utf8").write(driver.page_source) authors=col.find_element_by_class_name("authors") if not authors.find_elements_by_tag_name("div"): author=authors.text.strip() description=col.find_element_by_class_name("abstract").text files=col.find_element_by_class_name("files") file_information=files.find_elements_by_class_name("file-information") attachment=[] for attach in file_information: attachment.append((attach.text.strip(),attach.find_element_by_tag_name("a").get_attribute("href"))) dls=col.find_elements_by_tag_name("dl") meta={} string='' for dl in dls: for div in dl.find_elements_by_tag_name("div"): string='' dd=div.find_element_by_tag_name("dd") if dd.find_elements_by_tag_name("li"): for li in dd.find_elements_by_tag_name("li"): string=string+li.text.strip()+"," elif dd.find_elements_by_tag_name("a"): string=[dd.text.strip()] anchors=[] for anchor in dd.find_elements_by_tag_name("a"): if anchor.get_attribute("href") not in anchors: anchors.append(anchor.get_attribute("href")) string.append(anchor.get_attribute("href")) else: string=dd.text.strip() meta[div.find_element_by_tag_name("dt").text.replace(":","").strip()]=string print "title :",title print "author :",author print "description :",description print "toc_path",toc_html data={ "document_id":document_id, "title":title, "author":author, "description":description, "toc_path":toc_html } db.insert(options.main_table_name, data) for (attachment_title,attachment_url) in attachment: print "document_id":document_id, print "attachment_title":attachment_title, print "attachment_url":attachment_url data={ "document_id":document_id, "attachment_title":attachment_title, "attachment_url":attachment_url } db.insert(options.attachment_table_name, data) for key,value in meta.iteritems(): if value[-1]==",": value=value[:-1] print '%s : %s'%(key,value) if type(value) is list: for val in value: meta_title=key if i%2==0 : meta_value=val else: meta_url=val print "meta_title":meta_title print "meta_value":meta_value print "meta_url":meta_url data={ "document_id":document_id, "meta_title":meta_title, "meta_value":meta_value, "meta_url":meta_url } db.insert(options.meta_table_name, data) else: meta_title=key meta_url=None meta_value=value print "meta_title":meta_title print "meta_value":meta_value print "meta_url":meta_url data={ "document_id":document_id, "meta_title":meta_title, "meta_value":meta_value, "meta_url":meta_url } db.insert(options.meta_table_name, data) data={ "crawled":1 } db.update(options.table_name,data,"url='%s'"%src_url) print "updated the table" except: traceback.print_exc() if driver: driver.save_screenshot(workingdir + '/error.png') print workingdir + '/error.png' finally: if driver: driver.quit() if display: display.stop()