def setUpClass(cls): if settings.HEADLESS: cls.display = Display(visible=0, size=(1024, 768)) cls.display.start() cls.selenium = WebDriver() super(BrowserTestBase, cls).setUpClass()
def send_message(name, email, phone_num, url, custom_msg_bool): fail_count = 0 # counter for how many times this script fails while True: page_restart = 0 # tracker for restarting the finction options = Options() options.add_argument('-headless') # run in window less mode driver = webdriver.Firefox()#executable_path='/usr/local/bin/geckodriver',firefox_binary='/usr/bin/firefox',firefox_options=options) driver.set_page_load_timeout(30) # set a time out for 30 secons print('Drive Launched!') try: display = Display(visible=0, size=(1024, 768)) # start display display.start() # start the display driver.get(url) # start the driver window print(driver.title) # print the title of the page except WebDriverException: # check for webdriver exception driver.quit() display.stop() fail_count += 1 page_restart = 1 if fail_count > 3: return "Failed: New Address" if page_restart == 0: # check if the page needs to be restarted page_num = 1 # indicates the page type CAN BE 1 OR 2 send_handle = None # variable for the button click try: name_handle = driver.find_element_by_css_selector(page_one_name__css) except NoSuchElementException: driver.quit() display.stop() print('Listing Already Sold') return "Failed: New Address" print('PAGE NUMBER IS ' + str(page_num)) # set the variables name_css = page_one_name__css email_css = page_one_email_css phone_css = page_one_phone_css send_css = page_one_send__css #while name_handle == 0: name_cond = EC.presence_of_element_located((By.CSS_SELECTOR,name_css)) name_handle = wait_and_get(driver, name_cond, 30) name_handle.send_keys(name) # once it is found, send the name string to it # send the email string email_handle = driver.find_element_by_css_selector(email_css) email_handle.send_keys((str(email) + '@gmail.com')) # send the phone string phone_handle = driver.find_element_by_css_selector(phone_css) phone_handle.send_keys(str(phone_num)) if custom_msg_bool != True: message_handle = driver.find_element_by_css_selector(page_one_mess__css) message_handle.click() text_box = driver.find_element_by_css_selector(page_one_textb_css) text_box.send_keys(Keys.COMMAND + 'a') text_box.send_keys(Keys.BACKSPACE) text_box.send_keys(custom_message) send_handle = driver.find_element_by_css_selector(send_css) # Send inquiry. This should be commented out until ready to test # send_handle.click() # save the current time and date time_sent = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print('all handled') # sleep incase the page doesn't finish sending the inquiry sleep(2) #quit the driver and display driver.quit() display.stop() return time_sent
from selenium import webdriver from selenium.webdriver.support.ui import Select from pyvirtualdisplay import Display import time import urllib import os # Para poder observar la pantalla virtual cambie el parámetro visible = 1 xephyr = Display(visible=0, size=(320, 240)).start() # Las variables que ingresan son strings, a excepción del # parámetro i, que en este caso es el contador def sendTigo(name, Tigo, Mensaje, j): driver = webdriver.Chrome() driver.get('http://www.bolivia-sms.com/tigo.html') driver.switch_to_frame(driver.find_element_by_tag_name("iframe")) Alias = driver.find_element_by_id("nickname") Alias.click() for i in name: Alias.send_keys(i) select1 = Select(driver.find_element_by_id('carea')) select1.select_by_value(Tigo[:2]) number = driver.find_element_by_id('celnum') number.click() for i in Tigo[2:]: number.send_keys(i)
def multi_mode(cli_parsed): dbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db') dbm.open_connection() if not cli_parsed.resume: dbm.initialize_db() dbm.save_options(cli_parsed) m = Manager() targets = m.Queue() lock = m.Lock() multi_counter = m.Value('i', 0) display = None def exitsig(*args): dbm.close() if current_process().name == 'MainProcess': print '' print 'Resume using ./EyeWitness.py --resume {0}'.format(cli_parsed.d + '/ew.db') os._exit(1) signal.signal(signal.SIGINT, exitsig) if cli_parsed.resume: pass else: url_list, rdp_list, vnc_list = target_creator(cli_parsed) if any((cli_parsed.web, cli_parsed.headless)): for url in url_list: dbm.create_http_object(url, cli_parsed) for rdp in rdp_list: dbm.create_vnc_rdp_object('rdp', rdp, cli_parsed) for vnc in vnc_list: dbm.create_vnc_rdp_object('vnc', vnc, cli_parsed) if any((cli_parsed.web, cli_parsed.headless)): if cli_parsed.web and not cli_parsed.show_selenium: display = Display(visible=0, size=(1920, 1080)) display.start() multi_total = dbm.get_incomplete_http(targets) if multi_total > 0: if cli_parsed.resume: print 'Resuming Web Scan ({0} Hosts Remaining)'.format(str(multi_total)) else: print 'Starting Web Requests ({0} Hosts)'.format(str(multi_total)) if multi_total < cli_parsed.threads: num_threads = multi_total else: num_threads = cli_parsed.threads for i in xrange(num_threads): targets.put(None) try: workers = [Process(target=worker_thread, args=( cli_parsed, targets, lock, (multi_counter, multi_total))) for i in xrange(num_threads)] for w in workers: w.start() for w in workers: w.join() except Exception as e: print str(e) # Set up UA table here if cli_parsed.cycle is not None: ua_dict = get_ua_values(cli_parsed.cycle) if not cli_parsed.ua_init: dbm.clear_table("ua") completed = dbm.get_complete_http() completed[:] = [x for x in completed if x.error_state is None] for item in completed: for browser, ua in ua_dict.iteritems(): dbm.create_ua_object(item, browser, ua) cli_parsed.ua_init = True dbm.clear_table("opts") dbm.save_options(cli_parsed) for browser, ua in ua_dict.iteritems(): targets = m.Queue() multi_counter.value = 0 multi_total = dbm.get_incomplete_ua(targets, browser) if multi_total > 0: print("[*] Starting requests for User Agent {0}" " ({1} Hosts)").format(browser, str(multi_total)) if multi_total < cli_parsed.threads: num_threads = multi_total else: num_threads = cli_parsed.threads for i in xrange(num_threads): targets.put(None) workers = [Process(target=worker_thread, args=(cli_parsed, targets, lock, (multi_counter, multi_total), (browser, ua))) for i in xrange(num_threads)] for w in workers: w.start() for w in workers: w.join() if any((cli_parsed.vnc, cli_parsed.rdp)): log._LOG_LEVEL = log.Level.ERROR multi_total, targets = dbm.get_incomplete_vnc_rdp() if multi_total > 0: print '' print 'Starting VNC/RDP Requests ({0} Hosts)'.format(str(multi_total)) app = QtGui.QApplication(sys.argv) timer = QTimer() timer.start(10) timer.timeout.connect(lambda: None) # add qt4 reactor import qt4reactor qt4reactor.install() from twisted.internet import reactor for target in targets: if os.path.dirname(cli_parsed.d) != os.path.dirname(target.screenshot_path): target.set_paths(cli_parsed.d) tdbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db') if target.proto == 'vnc': reactor.connectTCP( target.remote_system, target.port, vnc_module.RFBScreenShotFactory( target.screenshot_path, reactor, app, target, tdbm)) else: reactor.connectTCP( target.remote_system, int(target.port), rdp_module.RDPScreenShotFactory( reactor, app, 1200, 800, target.screenshot_path, cli_parsed.timeout, target, tdbm)) reactor.runReturn() app.exec_() if display is not None: display.stop() results = dbm.get_complete_http() vnc_rdp = dbm.get_complete_vnc_rdp() dbm.close() m.shutdown() write_vnc_rdp_data(cli_parsed, vnc_rdp) sort_data_and_write(cli_parsed, results)
def textRelayCrawler(gameIdLst, path): display = Display(visible=0, size=(800, 800)) display.start() browser_text = webdriver.Chrome() browser_line = webdriver.Chrome() browser_info = webdriver.Chrome() for id in gameIdLst: newText = True try: state = open(RAW_TXT_PATH + "/current_data.txt", "r") for line in state.readlines(): if line.strip() == id: newText = False state.close() except IOError: open(RAW_TXT_PATH+'current_data.txt','w+').close() #if newText and id[:4]=="2015": # for 2015 season if id[:4]=="2015": # for 2015 season date = id[:8] if not os.path.exists("%s/%s"%(path, date)): os.makedirs("%s%s"%(path, date)) text = open("%s%s/%s"%(path, date, id + ".txt"), "w+") date = id[:8] # need to changed to datetime # date here if date > "20150806": print date break url_text = 'http://sports.news.naver.com/gameCenter/miniTextRelay.nhn?category=kbo&date=%s&gameId=%s'%(date, id) url_line = 'http://sports.news.naver.com/gameCenter/textRelay.nhn?gameId=%s&category=kbo'%id url_gameInfo = 'http://sports.news.naver.com/gameCenter/gameRecord.nhn?gameId=%s&category=kbo'%id print "parsing from %s"%id browser_text.get(url_text) browser_line.get(url_line) browser_info.get(url_gameInfo) try: while True: try: # crawling gameInfo away = get_team_by_code(id[8:10]).sponsor home = get_team_by_code(id[10:12]).sponsor d = datetime.datetime.strptime(id[0:8], "%Y%m%d") text.write(d.strftime("%Y.%-m.%d") + " " + away + " vs " + home + "\n") print d.strftime("%Y.%-m.%d") + " " + away + " vs " + home + "\n" game_place = browser_info.find_element_by_id('box_gamePlace') stadium = game_place.text text.write("구장 : "+stadium[13:15]+"구장\n") print "구장 : "+stadium[13:15]+"구장\n" detail_info = browser_info.find_element_by_id("box_detail") text.write(detail_info.text+"\n") print detail_info.text+"\n" # crawling lineup button = browser_line.find_element_by_id('away_lineup_btn') button.click() content = browser_line.find_element_by_id('lineup_box') away_raw = content.text button.click() button = browser_line.find_element_by_id('home_lineup_btn') button.click() content = browser_line.find_element_by_id('lineup_box') home_raw = content.text away_lst = [elem for elem in away_raw.split("\n") if elem!=""] home_lst = [elem for elem in home_raw.split("\n") if elem!=""] #for elem in home_lst: #print elem text.write("LineUp\n") text.write("%s" % away_raw) text.write("%s" % home_raw) # writing home lineup text.write("Home\n") for name, pos in parseContent(home_lst): text.write("%s - %s\n"%(name, pos)) # writing away lineup text.write("Away\n") for name, pos in parseContent(away_lst): text.write("%s - %s\n"%(name, pos)) break except selenium.common.exceptions.NoSuchElementException: pass text.write("Start Relay Text\n") # crawling relay text button = browser_text.find_element_by_id('inning_tab_all') button.click() # Get Relay_Text relay_text = browser_text.find_element_by_id('relay_text') text.write(relay_text.text) text.close() # write to current data state = open(RAW_TXT_PATH+"current_data.txt", "a+") state.write(id + "\n") state.close() #print relay_text.text except selenium.common.exceptions: pass
## Extract after loading ## from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from pyvirtualdisplay import Display def getElementValue(driver, search_type, css_class): try: element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((search_type, css_class)) ) # element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, cssclass)) ) if element.text: return element.text else: return None except TimeoutException: print "Selenium timeout for <"+css_class+">" finally: pass display = Display(visible=0, size=(1024, 768)) display.start() driver = webdriver.Firefox() driver.get('https://www.google.com/search?q=rowe+digital') twitter = getElementValue(driver, By.CLASS_NAME, '_ksh') #_ksh wikipedia = getElementValue(driver, By.CLASS_NAME, 'kno-ecr-pt') sitelinks = getElementValue(driver, By.CSS_SELECTOR, 'table.nrgt')
def twitter(self): cr_name = 'twitter' # 이미지파일 저장 장소 확인 save_path = os.path.join(self.img_path, cr_name) if os.path.isdir(save_path): print(cr_name + ' 이미지 경로 확인 완료') elif os.path.isdir(self.img_path): os.mkdir(save_path) else: os.mkdir(self.img_path) os.mkdir(save_path) text_save_path = os.path.join(self.text_path, cr_name) if os.path.isdir(text_save_path): print(cr_name + ' 텍스트 경로 확인 완료') elif os.path.isdir(self.text_path): os.mkdir(text_save_path) else: os.mkdir(self.text_path) os.mkdir(text_save_path) keyword = self.scan_name # if self.platform == 'linux': # print('System platform : Linux') # self.driver_path = './static/lib/webDriver/chromedriver_lnx' # from pyvirtualdisplay import Display # self.display = Display(visible=0, size=(800, 600)) # self.display.start() # 웹 셋팅 if self.platform == 'linux': display = Display(visible=0, size=(1024, 768)) display.start() options = Options() options.binary_location = "/usr/bin/google-chrome" # chrome_options = webdriver.ChromeOptions() options.headless = True options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-gpu') options.add_argument('--disable-dev-shm-usage') chrome = webdriver.Chrome(executable_path=self.driver_path, options=options) else: chrome = self.generate_chrome(driver_path=self.driver_path, headless=self.headless, download_path=self.DOWNLOAD_DIR) # 웹접속 - 네이버 이미지 접속 print("Twitter 접속중") # driver = webdriver.Chrome(executable_path="./chromedriver.exe") # driver.implicitly_wait(30) url = 'https://twitter.com/search?q={}&src=typed_query'.format(keyword) chrome.get(url) chrome.implicitly_wait(30) body = chrome.find_element_by_css_selector('body') text2 = chrome.find_elements_by_css_selector( '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(2) > div > div > section > div' ) result = [] for i in range(10): for q in range(3): body.send_keys(Keys.PAGE_DOWN) time.sleep(1) for ttt in text2: result.append(re.sub('\n', '', ttt.text)) print(result) time.sleep(1) if self.platform == 'linux': chrome.close() display.stop() t = Twitter() t.add_dictionary(self.sajun(), 'Noun') print('단어사전 추출완료') tokens_ko = [] for i in range(len(result)): tokens_ko.append(t.nouns(result[i])) final = [] for _, q in enumerate(tokens_ko): for i in range(len(q)): final.insert(-1, q[i]) print('형태소분석 완료!') ko = nltk.Text(final, name="첫번째") data = ko.vocab().most_common(1000) # 텍스트파일에 댓글 저장하기 file = open(text_save_path + '/twitter{}.txt'.format(self.date), 'w', encoding='utf-8') for review in result: file.write(review + '\n') file.close() tmp_data = dict(data) wordcloud = WordCloud( font_path=self.fontPath, background_color='white', max_words=230).generate_from_frequencies(tmp_data) plt.figure(figsize=(10, 8)) plt.imshow(wordcloud) plt.axis('off'), plt.xticks([]), plt.yticks([]) plt.tight_layout() plt.subplots_adjust(left=0, bottom=0, right=1, top=1, hspace=0, wspace=0) plt.savefig(save_path + "/twitter_{}.png".format(self.date), bbox_inces='tight', dpi=400, pad_inches=0)
def deploy_firefox(status_queue, browser_params, manager_params, crash_recovery): """ launches a firefox instance with parameters set by the input dictionary """ firefox_binary_path = get_firefox_binary_path() root_dir = os.path.dirname(__file__) # directory of this file fp = FirefoxProfile() browser_profile_path = fp.path + "/" status_queue.put(("STATUS", "Profile Created", browser_profile_path)) # Use Options instead of FirefoxProfile to set preferences since the # Options method has no "frozen"/restricted options. # https://github.com/SeleniumHQ/selenium/issues/2106#issuecomment-320238039 fo = Options() profile_settings = None # Imported browser settings if browser_params["seed_tar"] and not crash_recovery: logger.info("BROWSER %i: Loading initial browser profile from: %s" % (browser_params["browser_id"], browser_params["seed_tar"])) profile_settings = load_profile( browser_profile_path, manager_params, browser_params, browser_params["seed_tar"], ) elif browser_params["recovery_tar"]: logger.debug( "BROWSER %i: Loading recovered browser profile from: %s" % (browser_params["browser_id"], browser_params["recovery_tar"])) profile_settings = load_profile( browser_profile_path, manager_params, browser_params, browser_params["recovery_tar"], ) status_queue.put(("STATUS", "Profile Tar", None)) if browser_params["random_attributes"] and profile_settings is None: logger.debug("BROWSER %i: Loading random attributes for browser" % browser_params["browser_id"]) profile_settings = dict() # choose a random screen-res from list resolutions = list() with open(os.path.join(root_dir, "screen_resolutions.txt"), "r") as f: for line in f: resolutions.append(tuple(line.strip().split(","))) profile_settings["screen_res"] = random.choice(resolutions) # set a random user agent from list ua_strings = list() with open(os.path.join(root_dir, "user_agent_strings.txt"), "r") as f: for line in f: ua_strings.append(line.strip()) profile_settings["ua_string"] = random.choice(ua_strings) # If profile settings still not set - set defaults if profile_settings is None: profile_settings = dict() profile_settings["screen_res"] = DEFAULT_SCREEN_RES profile_settings["ua_string"] = None if profile_settings["ua_string"] is not None: logger.debug( "BROWSER %i: Overriding user agent string to '%s'" % (browser_params["browser_id"], profile_settings["ua_string"])) fo.set_preference("general.useragent.override", profile_settings["ua_string"]) display_mode = browser_params["display_mode"] display_pid = None display_port = None if display_mode == "headless": fo.set_headless(True) fo.add_argument("--width={}".format(DEFAULT_SCREEN_RES[0])) fo.add_argument("--height={}".format(DEFAULT_SCREEN_RES[1])) if display_mode == "xvfb": try: display = Display(visible=0, size=profile_settings["screen_res"]) display.start() display_pid, display_port = display.pid, display.cmd_param[-1][1:] except EasyProcessError: raise RuntimeError("Xvfb could not be started. \ Please ensure it's on your path. \ See www.X.org for full details. \ Commonly solved on ubuntu with `sudo apt install xvfb`") # Must do this for all display modes, # because status_queue is read off no matter what. status_queue.put(("STATUS", "Display", (display_pid, display_port))) if browser_params[ "callstack_instrument"] and not browser_params["js_instrument"]: raise BrowserConfigError ("The callstacks instrument currently doesn't work without " "the JS instrument enabled. see: " "https://github.com/mozilla/OpenWPM/issues/557") if browser_params["save_content"]: if isinstance(browser_params["save_content"], str): configured_types = set(browser_params["save_content"].split(",")) if not configured_types.issubset(ALL_RESOURCE_TYPES): diff = configured_types.difference(ALL_RESOURCE_TYPES) raise BrowserConfigError(( "Unrecognized resource types provided ", "in browser_params['save_content`] (%s)" % diff, )) if browser_params["extension_enabled"]: # Write config file extension_config = dict() extension_config.update(browser_params) extension_config["logger_address"] = manager_params["logger_address"] extension_config["aggregator_address"] = manager_params[ "aggregator_address"] if "ldb_address" in manager_params: extension_config["leveldb_address"] = manager_params["ldb_address"] else: extension_config["leveldb_address"] = None extension_config["testing"] = manager_params["testing"] ext_config_file = browser_profile_path + "browser_params.json" with open(ext_config_file, "w") as f: json.dump(extension_config, f) logger.debug("BROWSER %i: Saved extension config file to: %s" % (browser_params["browser_id"], ext_config_file)) # TODO restore detailed logging # fo.set_preference("*****@*****.**", "all") # Configure privacy settings configure_firefox.privacy(browser_params, fp, fo, root_dir, browser_profile_path) # Set various prefs to improve speed and eliminate traffic to Mozilla configure_firefox.optimize_prefs(fo) # Intercept logging at the Selenium level and redirect it to the # main logger. This will also inform us where the real profile # directory is hiding. interceptor = FirefoxLogInterceptor(browser_params["browser_id"], browser_profile_path) interceptor.start() # Set custom prefs. These are set after all of the default prefs to allow # our defaults to be overwritten. for name, value in browser_params["prefs"].items(): logger.info("BROWSER %i: Setting custom preference: %s = %s" % (browser_params["browser_id"], name, value)) fo.set_preference(name, value) # Launch the webdriver status_queue.put(("STATUS", "Launch Attempted", None)) fb = FirefoxBinary(firefox_path=firefox_binary_path) driver = webdriver.Firefox( firefox_profile=fp, firefox_binary=fb, firefox_options=fo, log_path=interceptor.fifo, ) # Add extension if browser_params["extension_enabled"]: # Install extension ext_loc = os.path.join(root_dir, "../Extension/firefox/openwpm.xpi") ext_loc = os.path.normpath(ext_loc) driver.install_addon(ext_loc, temporary=True) logger.debug("BROWSER %i: OpenWPM Firefox extension loaded" % browser_params["browser_id"]) # set window size driver.set_window_size(*profile_settings["screen_res"]) # Get browser process pid if hasattr(driver, "service") and hasattr(driver.service, "process"): pid = driver.service.process.pid elif hasattr(driver, "binary") and hasattr(driver.binary, "process"): pid = driver.binary.process.pid else: raise RuntimeError("Unable to identify Firefox process ID.") status_queue.put( ("STATUS", "Browser Launched", (int(pid), profile_settings))) return driver, driver.capabilities["moz:profile"], profile_settings
//*[@id="site-layout"]/div[5]/div[2]/div/div/ul/li[1]/button """ url1 = "https://www.realcanadiansuperstore.ca/Shop-by-Category/c/017377000000" url0 = "https://www.realcanadiansuperstore.ca/" ITEM = [] PRICE = [] PRICE_CLEAN = [] RUN_ON_PI = False # Set variable if running on pi if os.getcwd() != '/home/girard/Scripts/Python/WebScraping/superstore_scraper': from pyvirtualdisplay import Display RUN_ON_PI = True display = Display(visible=0, size=(1024, 768)) # For headless RPi display.start() print("Running headless. Display started.") #DRIVER_PATH = '()' # edit driver paths; make explicit driver = webdriver.Chrome() WAIT_TIME = 20 wait = WebDriverWait(driver, 10, 5) else: #DRIVER_PATH = '/home/girard/Scripts/Python/WebScraping/WebDriver/chromedriver' # edit driver paths; make explicit driver = webdriver.Chrome( '/home/girard/Scripts/Python/WebScraping/WebDriver/chromedriver') WAIT_TIME = 10 wait = WebDriverWait(driver, 10, 2) #driver = webdriver.Chrome(DRIVER_PATH)
def update_now(self, entity, attribute, old, new, kwargs): try: login_name = self.args["login_name"] login_pass = self.args["login_pass"] site_url = self.args["site_url"] img_save_path = self.args["img_save_path"] display = Display(visible=0, size=(800, 600)) display.start() browser = webdriver.Firefox() self.log("Browser created.") browser.implicitly_wait(30) browser.get(site_url) self.log("Getting website %s." % site_url) username = browser.find_element_by_id("input_0") self.log("Found username field.") password = browser.find_element_by_id("input_1") self.log("Found password field.") submit = browser.find_element_by_id("btnLogin") self.log("Found submit button.") username.send_keys(login_name) self.log("Sent username %s." % login_name) password.send_keys(login_pass) self.log("Sent password %s." % login_pass) submit.click() browser.implicitly_wait(60) self.log( browser.find_element_by_class_name("tank-pct").get_attribute( "innerHTML")) pct = browser.find_element_by_class_name("tank-pct").get_attribute( "innerHTML").replace("%", "") self.log(pct + "%") svg_part_1 = """<svg xmlns="http://www.w3.org/2000/svg" class="tank" viewBox="0 0 200 125"> <path class="tankpath" d="M97.624 4.98c-13.16 0-23.755 10.262-23.755 23.008h-21.379c-26.32 0-47.51 20.523-47.51 46.016s21.189 46.016 47.51 46.016h95.02c26.32 0 47.51-20.523 47.51-46.016s-21.189-46.016-47.51-46.016h-21.379c0-12.746-10.595-23.008-23.755-23.008h-4.751z" id="path4610" style="stroke:#bafcfb;stroke-opacity:1;fill:#0d7ba4;fill-opacity:1" />""" if int(pct) >= 75: svg_part_2 = """<path class="tank-fill-80 " d="M 15.505,44.973 184.805,45 c 22.28241,30.147861 5.54653,71.45116 -34.28348,74.79999 l -101.21636,0.2852 C 1.3631941,114.55969 -3.5255293,64.489954 15.506,44.972 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) >= 65 and int(pct) < 75: svg_part_2 = """<path class="tank-fill-70 " d="m 12.629,53.413 177.35876,-0.011 c 15.39061,29.063593 -5.37286,64.63153 -40.23765,66.49429 l -99.097722,0.0926 C 15.827493,118.94423 -5.6706678,82.216778 9.9325439,53.412 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) >= 55 and int(pct) < 65: svg_part_2 = """<path class="tank-fill-60 tank-fill" d="m 193.64863,62.992 c 5.94794,25.706756 -10.02215,54.65902 -45.03407,57.07382 l -95.991981,-0.0278 C 13.207023,118.4254 0.1674334,83.2465 6.3659784,63.018038 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) >= 45 and int(pct) < 55: print(int(pct)) svg_part_2 = """<path class="tank-fill-50 " d="m 195.0035,72.006962 c 0.81316,24.467867 -16.32249,46.210268 -46.73481,48.045218 l -97.27547,0.002 C 18.478405,117.63067 4.3676285,93.353155 5.0073944,72.006 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) >= 35 and int(pct) < 45: svg_part_2 = """<path class="tank-fill-40 " d="m 194.47138,81.018076 c -3.50791,22.329874 -23.13371,38.913544 -47.10875,39.023664 l -96.016052,-0.0157 C 26.224568,119.8284 8.2369447,100.00246 5.532886,80.98 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) >= 25 and int(pct) < 35: svg_part_2 = """<path class="tank-fill-30 " d="m 192.07627,89.983924 c -6.91925,17.768676 -23.43993,29.707806 -44.66549,30.033746 l -95.498266,0.003 C 31.529321,119.69798 14.693792,107.61371 7.9073832,89.989976 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) >= 15 and int(pct) < 25: svg_part_2 = """<path class="tank-fill-20 " d="m 187.43954,99.026048 c -6.40291,10.147762 -20.35578,20.968682 -39.68157,20.992082 l -96.411392,-0.004 C 38.907512,120.14951 22.607557,114.15437 12.627,98.982 Z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" elif int(pct) < 15: svg_part_2 = """<path class="tank-fill-10 " d="m 179.56145,108.01008 c -9.99212,8.26245 -18.08352,11.23693 -31.75342,12.06777 l -96.36515,-0.0398 c -11.127238,0.2693 -23.04633,-5.29912 -31.096147,-12.05319 z" style="display:inline;fill:#0056a4;fill-opacity:1;stroke-width:0.99000001" ></path>""" svg_part_3 = """<text class="tank-pct" alignment-baseline="baseline" x="111.86662" y="77.073944" id="text4608" style="font-size:37.13151169px;text-anchor:middle;display:inline;fill:#00b7ea;fill-opacity:1;stroke-width:3.09429264" transform="scale(0.89216954,1.1208632)">""" svg_part_4 = "%</text></svg>" svg_tank = svg_part_1 + svg_part_2 + svg_part_3 + pct + svg_part_4 svg2png(bytestring=svg_tank, write_to=img_save_path) finally: browser.quit() display.sendstop()
def main(): """ Main function """ ipaddr = sys.argv[1] abr_algo = sys.argv[2] dummy_run_time = int(sys.argv[3]) process_id = sys.argv[4] trace_file = sys.argv[5] sleep_time = sys.argv[6] # Prevent multiple process from being synchronized sleep(int(sleep_time)) # Generate URL url = "http://{}/myindex_{}.html".format(ipaddr, abr_algo) mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Server URL: {}".format(url)) # Set timeout alarm and handler signal.signal(signal.SIGALRM, timeout_handler) # Set timeout value depending on what algorithm is being used # FIXED and BOLA take longer time to playback (from experience) if abr_algo == "FIXED": curr_runtime = FIXED_RUN_TIME elif abr_algo == "BOLA": curr_runtime = BOLA_RUN_TIME else: curr_runtime = ABR_RUN_TIME # Timeout set after current run time as decided signal.alarm(curr_runtime) mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Run time alarm set at {}.".format(curr_runtime)) try: # Copy over the chrome user dir default_chrome_user_dir = "../abr_browser_dir/chrome_data_dir" chrome_user_dir = "/tmp/chrome_user_dir_id_{}".format(process_id) system("rm -r {}".format(chrome_user_dir)) system("cp -r {} {}".format(default_chrome_user_dir, chrome_user_dir)) # Display the page in browser: Yes/No if BROWSER_DISPLAY: mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Started display on browser.") else: display = Display(visible=0, size=(800, 600)) display.start() mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Started supressed display.") # Initialize Chrome driver options = Options() chrome_driver = "../abr_browser_dir/chromedriver" options.add_argument("--user-data-dir={}".format(chrome_user_dir)) options.add_argument("--ignore-certificate-errors") driver = webdriver.Chrome(chrome_driver, chrome_options=options) # Run Chrome driver.set_page_load_timeout(10) driver.get(url) mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Video playback started.") # Sleep until lock is created by ABR server lock_file_path = "./locks/video_log_" + abr_algo + "_" + trace_file + ".lock" mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Looking for log file: {}".format(opa(lock_file_path))) sleep(200) # running time of video is 193s while not ope(lock_file_path): mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Not found lock file, going back to sleep for 20 secs.") sleep(20) # Remove lock after it's existence is known orm(lock_file_path) # Quit the video playback driver.quit() if BROWSER_DISPLAY: mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Stopped Chrome driver.") else: display.stop() mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Stopped supressed display and Chrome driver.") print 'DONE!' except Exception as exception1: mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Exception: {}".format(exception1)) if not BROWSER_DISPLAY: try: display.stop() mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Exception Handler: Stopped suppressed display.") except Exception as exception2: mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Exception Again (Suppressed display): {}".format( exception2)) try: driver.quit() mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Exception Handler (Chrome driver): Quit Chrome driver.") except Exception as exception3: mlog(abr_algo=abr_algo, trace_file=trace_file, msg="Exception Again (Chrome driver): {}".format(exception3))
def open_virtual_display(self): from pyvirtualdisplay import Display display = Display(visible=0, size=(1080, 608)) display.start()
def __init__(self, ad=None, *args, **kwargs): super(Spider, self).__init__(*args, **kwargs) self.display = Display(visible=0, size=(800, 600)) self.display.start() self.driver = webdriver.Chrome("/var/chromedriver/chromedriver")
# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import argparse as ap, datetime as dt, numpy as np, numpy.random as npr, os, psutil, random, requests, signal, sys, tarfile, time import urllib.request, urllib.robotparser as robotparser, urllib.parse as uprs from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from io import BytesIO from faker import Factory # headless Raspberry Pi try: from pyvirtualdisplay import Display display = Display(visible=0, size=(1296, 1018)) display.start() except ImportError: pass # nice this process on UNIX if hasattr(os, 'nice'): os.nice(15) gb_per_month = 50 # How many gigabytes to pollute per month max_links_cached = 100000 # Maximum number of links to cache for download max_links_per_page = 200 # Maximum number of links to add per page max_links_per_domain = 400 # Maximum number of links to add per domain search_url = 'http://www.google.com/search' # keep unencrypted for ISP DPI wordsite_url = 'http://svnweb.freebsd.org/csrg/share/dict/words?view=co&content-type=text/plain' timeout = 20
def run(self, options): """ Define the code to be run by this plugin app. """ print(Gstr_title) print('Version: %s' % self.get_version()) # fetch input data with open('{}/prediction-default.json'.format(options.inputdir)) as f: classification_data = json.load(f) try: with open('{}/severity.json'.format(options.inputdir)) as f: severityScores = json.load(f) except: severityScores = None # output pdf here print("Creating pdf file in {}...".format(options.outputdir)) template_file = "pdf-covid-positive-template.html" if classification_data[ 'prediction'] != "COVID-19" or severityScores is None: template_file = "pdf-covid-negative-template.html" # put image file in pdftemple folder to use it in pdf shutil.copy(options.inputdir + '/' + options.imagefile, "pdftemplate/") with open("pdftemplate/{}".format(template_file)) as f: txt = f.read() # replace the values txt = txt.replace("${PATIENT_TOKEN}", options.patientId) txt = txt.replace("${PREDICTION_CLASSIFICATION}", classification_data['prediction']) txt = txt.replace("${COVID-19}", classification_data['COVID-19']) txt = txt.replace("${NORMAL}", classification_data['Normal']) txt = txt.replace("${PNEUMONIA}", classification_data['Pneumonia']) txt = txt.replace("${X-RAY-IMAGE}", options.imagefile) time = datetime.datetime.now() txt = txt.replace("${month-date}", time.strftime("%c")) txt = txt.replace("${year}", time.strftime("%Y")) # add the severity value if prediction is covid if template_file == "pdf-covid-positive-template.html": txt = txt.replace("${GEO_SEVERITY}", severityScores["Geographic severity"]) txt = txt.replace("${GEO_EXTENT_SCORE}", severityScores["Geographic extent score"]) txt = txt.replace("${OPC_SEVERITY}", severityScores["Opacity severity"]) txt = txt.replace("${OPC_EXTENT_SCORE}", severityScores['Opacity extent score']) with open("pdftemplate/specificPatient.html", 'w') as writeF: writeF.write(txt) try: disp = Display().start() pdfkit.from_file(['pdftemplate/specificPatient.html'], '{}/patient_analysis.pdf'.format( options.outputdir)) finally: disp.stop() # cleanup os.remove("pdftemplate/specificPatient.html") os.remove("pdftemplate/{}".format(options.imagefile))
def download_images(self, tag=None): # set tag: if (tag != None): self.set_tag(tag) # create folder if one does not exist: if (not os.path.isdir(self.savePath + self.tag + "/")): os.mkdir(self.savePath + self.tag + "/") #url to go to: page_url = "https://www.instagram.com/explore/tags/" + self.tag + "/" display = Display(visible=0, size=(800, 600)) display.start() #Chrome stuff: driver = webdriver.Chrome() driver.get(page_url) soup = BeautifulSoup(driver.page_source, "lxml") # check how many images are in the folder already: count = len(glob(self.savePath + self.tag + "/" + "*jpg")) self.txtFiles = glob(self.savePath + self.tag + "/" + "*txt") assert count == len(self.txtFiles) #iterate over the pictures for index, pictureParent in enumerate( soup.find_all('div', attrs={'class': '_4rbun'})): # omit the top posts if (index >= 9): # get the description: try: caption = pictureParent.contents[0]['alt'] captionText = caption.encode( 'UTF-8').lower() # lowercase the description f = open("./temp.txt", "w+") #the + lets it create the file f.write(captionText) f.close() # proceed only if actual hashtag is in the description and if file has already been downloaded: if ("#" + self.tag in captionText and self.check_if_file_exists()): count += 1 #download the image: url = pictureParent.contents[0]['src'] # name the image file: imageFilename = self.savePath + self.tag + "/" + str( count - 1) + ".jpg" urllib.urlretrieve(url, imageFilename) #save the caption in a text file: # name the description file: textFilename = self.savePath + self.tag + "/" + str( count - 1) + ".txt" f = open(textFilename, "w+") #the + lets it create the file f.write(captionText) f.close() except: print "Some error" driver.quit() display.stop()
import time from pyvirtualdisplay import Display def strike_card(url): driver.get(url) time.sleep(1) strike_card_btn = driver.find_element_by_xpath( "//button[@class='btn btn-large btn-success']") strike_card_btn.click() print("打卡成功,请注意查收~") input("press any key。。。") driver.quit() display = Display(visible=0, size=(1920, 1080)) display.start() chrom_options = Options() chrom_options.add_argument('--headless') chrom_options.add_argument('--disable-gpu') driver = webdriver.Chrome(chrome_options=chrom_options) driver.set_window_size(1920, 1080) try: driver.get("https://www.shanbay.com/web/account/login/") username_input = input("请输入用户名:") password_input = input("请输入密码:") username = driver.find_element_by_name("username") username.send_keys(username_input) password = driver.find_element_by_name("password") password.send_keys(password_input) form = driver.find_element_by_name("login-form")
import requests from lxml import etree import json import pdb from base import * from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from pyvirtualdisplay import Display display = Display(visible=0, size=(1600, 1200)) display.start() class Rchs_pastperfectonline_com: name = 'rchs_pastperfectonline_com' base_url = 'https://rchs.pastperfectonline.com' delay_time = 10 def __init__(self): self.db, self.cursor = connect_mysql_db() self.session = requests.Session() options = Options() # self.driver = webdriver.Chrome('./chromedriver.exe', options=options) self.driver = webdriver.Chrome(options=options) self.start_requests() def start_requests(self):
def deploy_firefox(status_queue, browser_params, manager_params, crash_recovery): """ launches a firefox instance with parameters set by the input dictionary """ root_dir = os.path.dirname(__file__) # directory of this file logger = loggingclient(*manager_params['logger_address']) display_pid = None display_port = None fp = webdriver.FirefoxProfile() browser_profile_path = fp.path + '/' status_queue.put(('STATUS','Profile Created',browser_profile_path)) profile_settings = None # Imported browser settings if browser_params['profile_tar'] and not crash_recovery: logger.debug("BROWSER %i: Loading initial browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar'])) profile_settings = load_profile(browser_profile_path, manager_params, browser_params, browser_params['profile_tar'], load_flash=browser_params['disable_flash'] is False) elif browser_params['profile_tar']: logger.debug("BROWSER %i: Loading recovered browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar'])) profile_settings = load_profile(browser_profile_path, manager_params, browser_params, browser_params['profile_tar']) status_queue.put(('STATUS','Profile Tar',None)) if browser_params['random_attributes'] and profile_settings is None: logger.debug("BROWSER %i: Loading random attributes for browser" % browser_params['crawl_id']) profile_settings = dict() # choose a random screen-res from list resolutions = list() with open(os.path.join(root_dir, 'screen_resolutions.txt'), 'r') as f: for line in f: resolutions.append(tuple(line.strip().split(','))) profile_settings['screen_res'] = random.choice(resolutions) # set a random user agent from list ua_strings = list() with open(os.path.join(root_dir, 'user_agent_strings.txt'), 'r') as f: for line in f: ua_strings.append(line.strip()) profile_settings['ua_string'] = random.choice(ua_strings) # If profile settings still not set - set defaults if profile_settings is None: profile_settings = dict() profile_settings['screen_res'] = DEFAULT_SCREEN_RES profile_settings['ua_string'] = None if profile_settings['ua_string'] is not None: logger.debug("BROWSER %i: Overriding user agent string with the following: %s" % (browser_params['crawl_id'], profile_settings['ua_string'])) fp.set_preference("general.useragent.override", profile_settings['ua_string']) if browser_params['headless']: display = Display(visible=0, size=profile_settings['screen_res']) display.start() display_pid = display.pid display_port = display.cmd_param[5][1:] status_queue.put(('STATUS','Display',(display_pid, display_port))) if browser_params['debugging']: firebug_loc = os.path.join(root_dir, 'firefox_extensions/firebug-1.11.0.xpi') fp.add_extension(extension=firebug_loc) fp.set_preference("extensions.firebug.currentVersion", "1.11.0") # Avoid startup screen if browser_params['extension']['enabled']: ext_loc = os.path.join(root_dir + "/../", 'Extension/firefox/@openwpm-0.0.1.xpi') ext_loc = os.path.normpath(ext_loc) fp.add_extension(extension=ext_loc) with open(browser_profile_path + 'database_settings.txt', 'w') as f: host, port = manager_params['aggregator_address'] crawl_id = browser_params['crawl_id'] f.write(host + ',' + str(port) + ',' + str(crawl_id)) f.write(','+str(browser_params['extension']['cookieInstrument'])) f.write(','+str(browser_params['extension']['jsInstrument'])) f.write(','+str(browser_params['extension']['cpInstrument'])) logger.debug("BROWSER %i: OpenWPM Firefox extension loaded" % browser_params['crawl_id']) if browser_params['proxy']: PROXY_HOST = "localhost" PROXY_PORT = browser_params['proxy'] # Direct = 0, Manual = 1, PAC = 2, AUTODETECT = 4, SYSTEM = 5 fp.set_preference("network.proxy.type", 1) fp.set_preference("network.proxy.http", PROXY_HOST) fp.set_preference("network.proxy.http_port", PROXY_PORT) fp.set_preference("network.proxy.ssl", PROXY_HOST) # https sites fp.set_preference("network.proxy.ssl_port", PROXY_PORT) # set this to exclude sites from using proxy # http://kb.mozillazine.org/Network.proxy.no_proxies_on fp.set_preference("network.proxy.no_proxies_on", "") # copy the dbs into temp profile # these were created by manually adding the cert to # a previous tmp selenium profile shutil.copy(os.path.join(root_dir + "/../", 'Proxy/key3.db'), fp.path + '/key3.db') shutil.copy(os.path.join(root_dir + "/../", 'Proxy/cert8.db'), fp.path + '/cert8.db') # Disable flash if browser_params['disable_flash']: fp.set_preference('plugin.state.flash', 0) # Configure privacy settings configure_firefox.privacy(browser_params, fp, root_dir, browser_profile_path) # Set various prefs to improve speed and eliminate traffic to Mozilla configure_firefox.optimize_prefs(fp) # Launch the webdriver status_queue.put(('STATUS','Launch Attempted',None)) fb = FirefoxBinary(root_dir + "/../../firefox-bin/firefox") driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=fb) status_queue.put(('STATUS','Browser Launched',(int(driver.binary.process.pid), profile_settings))) # set window size driver.set_window_size(*profile_settings['screen_res']) return driver, browser_profile_path, profile_settings
def paradox_polling(): options = Options() options.headless = True # Set screen resolution to 1366 x 768 like most 15" laptops display = Display(visible=0, size=(1366, 768)) display.start() driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=options) ipaddress = os.environ['PARADOX_IPADDRESS'] if not paradox_login(driver, ipaddress, os.environ['PARADOX_USERCODE'], os.environ['PARADOX_PASSWORD']): print('Login failed.') driver.quit() exit() producer = boto3.client('firehose') try: #Getting Info print('Getting info..') driver.get('http://{}/index.html'.format(ipaddress)) sleep(2) area_name = get_array_from_source('tbl_areanam', driver.page_source) area_name = [x.replace('"', '') for x in area_name] print('Area Name: {}'.format(area_name)) zone_name = get_array_from_source('tbl_zone', driver.page_source) zone_name = [x.replace('"', '') for x in zone_name] zone_name = [x.replace(' ', '_') for x in zone_name] print('Zone Name: {}'.format(zone_name)) if len(area_name) == 0: print('Server in use. Exit.') driver.quit() exit() if 'KEYPRESS_CHECK' in os.environ: print('Starting loop. Press ENTER to exit.') stay = True zone_status, last_zone_status = [], [] area_status, last_area_status = [], [] while stay: sleep(1) driver.get('http://{}/statuslive.html'.format(ipaddress)) zone_status = get_array_from_source('tbl_statuszone', driver.page_source) zone_status = [int(x) for x in zone_status] area_status = get_array_from_source('tbl_useraccess', driver.page_source) area_status = [int(x) for x in area_status] if (len(zone_status) == 0) or (len(area_status) == 0): stay = False else: if (zone_status != last_zone_status) or (area_status != last_area_status): print('Status Zone: {}'.format(zone_status)) print('Status Area: {}'.format(area_status)) firehose_record = { 'time': datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S") } for i in range(len(area_status)): if area_status[i] != 0: firehose_record['area.{}.{}'.format( i + 1, area_name[i])] = area_status[i] for i in range(len(zone_name) // 2): if int(zone_name[i * 2]) != 0: firehose_record['area.{}.zone.{}'.format( zone_name[i * 2], zone_name[i * 2 + 1])] = zone_status[i] response = producer.put_record( DeliveryStreamName=os.environ['KINESIS_STREAM'], Record={'Data': json.dumps(firehose_record) + '\n'}) last_area_status = area_status last_zone_status = zone_status if 'KEYPRESS_CHECK' in os.environ: if sys.stdin in select.select([sys.stdin], [], [], 0)[0]: line = input() break finally: print('Logout..') driver.get('http://{}/logout.html'.format(ipaddress)) sleep(1) driver.quit() display, stop()
def Naver(self): cr_name = 'naver' # 이미지파일 저장 장소 확인 save_path = os.path.join(self.img_path, cr_name) if os.path.isdir(save_path): print(cr_name + ' 이미지 경로 확인 완료') elif os.path.isdir(self.img_path): os.mkdir(save_path) else: os.mkdir(self.img_path) os.mkdir(save_path) text_save_path = os.path.join(self.text_path, cr_name) if os.path.isdir(text_save_path): print(cr_name + ' 텍스트 경로 확인 완료') elif os.path.isdir(self.text_path): os.mkdir(text_save_path) else: os.mkdir(self.text_path) os.mkdir(text_save_path) # 네이버 헤드라인 가져오는소스 result = [] res = [] # 웹 셋팅 if self.platform == 'linux': display = Display(visible=0, size=(800, 600)) display.start() options = Options() options.binary_location = "/usr/bin/google-chrome" # chrome_options = webdriver.ChromeOptions() options.headless = True options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-gpu') options.add_argument('--disable-dev-shm-usage') chrome = webdriver.Chrome(executable_path=self.driver_path, options=options) else: chrome = self.generate_chrome(driver_path=self.driver_path, headless=self.headless, download_path=self.DOWNLOAD_DIR) # 웹접속 - 네이버 이미지 접속 print("Naver 접속중") # driver = webdriver.Chrome(executable_path="./chromedriver.exe") # driver.implicitly_wait(30) url = 'https://news.naver.com/main/ranking/popularDay.nhn?rankingType=popular_day&date={}'.format( self.date) chrome.get(url) chrome.implicitly_wait(30) # scroll(3) for sun in range(4, 10): pr = chrome.find_elements_by_xpath( '//*[@id="wrap"]/table/tbody/tr/td[2]/div/div[{}]'.format(sun)) for p in pr: result.append(p.find_elements_by_tag_name('a')) # print(result) for i, q in enumerate(result): for e in q: res.append(e.get_attribute('href')) http = list(set(res)) len(http) https = [] for idx in range(len(http)): if http[idx].find('popularDay') >= 0: continue else: https.append(http[idx]) files = pd.DataFrame() if self.platform == 'linux': chrome.close() display.stop() for i in range(len(https)): res = requests.get(https[i]) soup = BeautifulSoup(res.content, 'html.parser') body = soup.select('._article_body_contents') files = files.append( pd.DataFrame( { 'Title': soup.find('div', attrs={ 'class': 'article_info' }).h3.text, 'Contents': re.sub( ' ', '', re.sub( ' ', '', re.sub( '\t', '', self.cleanText(body[0].text)[ (self.cleanText(body[0].text) ).find('{}') + 2:]))), 'link': https[i] }, index=[i])) text2 = files.Contents # 텍스트파일에 저장 csv files.to_csv(text_save_path + '/네이버종합뉴스_{}.csv'.format(self.date), index=False, encoding='utf-8') # ------------------------------------- # 사전만들기 t = Twitter() t.add_dictionary(self.sajun(), 'Noun') tokens_ko = [] for i in range(len(text2)): tokens_ko.append(t.nouns(text2[i])) final = [] for _, q in enumerate(tokens_ko): for i in range(len(q)): final.insert(-1, q[i]) ko = nltk.Text(final, name="첫번째") data = ko.vocab().most_common(1000) data_1 = [] for i in range(len(data)): for q in range(0, 1, 1): if len(data[i][0]) >= 2: data_1.append(data[i]) tmp_data = dict(data_1) wordcloud = WordCloud( font_path=self.fontPath, background_color='white', max_words=230).generate_from_frequencies(tmp_data) plt.figure(figsize=(10, 8)) plt.imshow(wordcloud) plt.axis('off'), plt.xticks([]), plt.yticks([]) plt.tight_layout() plt.subplots_adjust(left=0, bottom=0, right=1, top=1, hspace=0, wspace=0) plt.savefig(save_path + "/naver_{}.png".format(self.date), bbox_inces='tight', dpi=400, pad_inches=0)
for link in link_collection.find(): if link['url'] not in item_collection_links: mongo.close() return False mongo.close() return True if __name__ == "__main__": Macys_Scraper() while not check_all_links_visited(): mongo = MongoClient() database = mongo['Macys_Scraper'] link_collection = database['jeans_product_links'] item_collection = database['jeans'] display = Display(visible=0, size=(1500, 800)) display.start() mongo = MongoClient() driver = webdriver.Chrome( '/home/nishaf/PycharmProjects/Forever21_Scraper/chromedriver') driver.implicitly_wait(20) count = 0 item_count = 1 visited_links = [doc['link'] for doc in item_collection.find()] print(visited_links) for link in link_collection.find(no_cursor_timeout=False): print(len(visited_links)) if link['url'] in visited_links: print("Visited Before") print("Deleting link data --> " + link['url']) item_collection.delete_many({"link": link['url']})
from random import choice from time import sleep def get_words(source): soup = Soup(source) words = [] for div in soup.findAll("div", {"id": "row1"}): for span in div.findAll("span"): words.append(span.text) print "[>] Gathered words: {}".format(", ".join(words)) return words if __name__ == "__main__": display = Display(visible=1, size=(800, 800)) display.start() chrome_options = Options() chrome_options.add_argument("--no-sandbox") chrome_path = "/usr/bin/chromedriver" browser = webdriver.Chrome(chrome_path, chrome_options=chrome_options) browser.get("https://10fastfingers.com/account/facebook_login") email = browser.find_element_by_xpath('//*[@id="email"]') email.send_keys("") password = browser.find_element_by_xpath('//*[@id="pass"]') password.send_keys("") password.send_keys(u'\ue007') sleep(2) words = get_words(browser.page_source) type_form = browser.find_element_by_xpath('//*[@id="inputfield"]')
def _search(self, start): payload = {} try: payload['q'] = self.query.encode('utf8') # query to lookup except: payload['q'] = self.query # query to lookup payload['start'] = start # start point payload['gl'] = self.country # query from country payload['hl'] = self.language # user query language payload['lr'] = 'lang_%s' % self.language # restrict language pages payload['num'] = GoogleSelenium.PAGE_LIMIT payload['safe'] = 'off' params = urllib.urlencode(payload) display = Display(visible=0, size=(800, 600)) try: display.start() proxyInfo = ProxyManager.getNextProxy() myProxy = '%s:%s' % (proxyInfo.host, proxyInfo.port) proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': myProxy, 'ftpProxy': myProxy, 'sslProxy': myProxy, 'noProxy': '' # set this value as desired }) driver = webdriver.Firefox(proxy=proxy) try: ''' Cualquier fallo aquí, revisar que la ip está dada de alta en buyproxies.com ''' driver.implicitly_wait(10) driver.get('%s?%s' % (self.googleHost, params)) app_error_logger.info(u"%s" % driver.current_url) results = [] h3List = driver.find_elements_by_xpath("//h3[@class='r']") for h3 in h3List: link = h3.find_element_by_tag_name('a') results.append(link.get_attribute("href")) except Exception as ex: raise ex finally: driver.close() except Exception as ex: raise ex finally: display.stop() if not results: ProxyManager.invalidateProxy() return results
def AdjustResolution(): display = Display(visible=0, size=(800, 800)) display.start()
part_sec = 86400 / int(part) # how many seconds in 1 part per- day #prt = int(sys.argv[4]) opsy = platform.system() #operation system (windows or linux) #35.185.98.205 proxy_ = [ "163.172.39.13", "163.172.39.13", "51.15.13.157", "51.15.13.157", "209.205.212.34" ] port_start = [1151, 1163, 3226, 3239, 3000] port_end = [1162, 1175, 3238, 3250, 3250] if (opsy == 'Linux'): #for server run with virtual display from pyvirtualdisplay import Display display = Display(visible=0, size=(1366, 768)) display.start() repeat = 0 while (1): pp = 0 vv = 0 while (vv < int(part)): try: if (opsy == 'Linux'): common.heart.kill_process(pid) driver.close() except: err = 1 pos_ = int(proxy_number)
from utils import ( clip_states, info_extractor, display_info, save_progress, save_graph, prep_data_to_send, send_result, ) from pyvirtualdisplay import Display # Group Info : GROUP_NAME = "ADMIN" display = Display(visible=0, size=(1400, 900)) display.start() class SpaceXRL: def __init__(self): self.landed_ticks = 0 self.number_of_landings = 0 self.fraction_good_landings = 0 self.cracked = False self.env = None self.level_number = 0 def run( self, n_episodes,
def __init__(self): """ Instantiates Selenium WebDriver and Chrome instance. Chrome is headless or not depending on the value of :any:`EcAppParam.gcm_headless` (boolean) """ # instantiates class logger self.m_logger = logging.getLogger('BrowserDriver') # create members so that they exist in __init__. In fact their real instantiation is in login_as_scrape() self.m_creationDate = datetime.datetime.now(tz=pytz.utc) self.m_expirationDate = datetime.datetime.now( tz=pytz.utc) + datetime.timedelta(days=3650) self.m_vpn_handle = None # FB User ID for progress messages self.m_phantomID = '' # FB User ID and password for API access self.m_user_api = '' self.m_pass_api = '' # FB token for API access + expiry date self.m_token_api = '' self.m_token_expiry = datetime.datetime.now( tz=pytz.utc) + datetime.timedelta(hours=1) if EcAppParam.gcm_headless: # if headless mode requested, starts the pyvirtualdisplay xvfb driver self.m_logger.info("Launching xvfb") self.m_display = Display(visible=0, size=(EcAppParam.gcm_headlessWidth, EcAppParam.gcm_headlessHeight)) self.m_display.start() else: self.m_display = None # Launch Chrome (or Firefox) Webdriver if EcAppParam.gcm_browser == 'Chrome': # option object to be passed to chrome l_option = Options() # notification disabling option to be passed to Chrome l_option.add_argument('disable-notifications') if not EcAppParam.gcm_headless: l_option.add_argument('start-maximized') else: l_option.add_argument('start-fullscreen') # Create a new instance of the Chrome driver self.m_logger.info("Launching Chrome") self.m_driver = webdriver.Chrome(chrome_options=l_option) if not EcAppParam.gcm_headless: # Move the window to position x/y self.m_driver.set_window_position(700, 0) # Resize the window to the screen width/height self.m_driver.set_window_size(EcAppParam.gcm_width, EcAppParam.gcm_height) self.m_browserWidth, self.m_browserHeight = EcAppParam.gcm_width, EcAppParam.gcm_height else: self.m_browserWidth, self.m_browserHeight = \ EcAppParam.gcm_headlessWidth, EcAppParam.gcm_headlessHeight elif EcAppParam.gcm_browser == 'Firefox': # Create a new instance of the Firefox driver self.m_logger.info("Launching Firefox") self.m_driver = webdriver.Firefox() if not EcAppParam.gcm_headless: # Resize the window to the screen width/height self.m_driver.set_window_size(EcAppParam.gcm_width, EcAppParam.gcm_height) # Move the window to position x/y self.m_driver.set_window_position(800, 0) self.m_browserWidth, self.m_browserHeight = EcAppParam.gcm_width, EcAppParam.gcm_height else: self.m_browserWidth, self.m_browserHeight = \ EcAppParam.gcm_headlessWidth, EcAppParam.gcm_headlessHeight else: l_message = '[BrowserDriver] Browser type not supported: {0}'.format( EcAppParam.gcm_browser) self.m_logger.critical(l_message) raise BrowserDriverException(l_message) self.m_dnl_ses_id = None self.m_loggedIn = False
#!/usr/bin/env python from pyvirtualdisplay import Display from selenium import webdriver display = Display(visible=0, size=(800, 600)) display.start() # now Firefox will run in a virtual display. # you will not see the browser. browser = webdriver.Firefox() # Cannot test from inside against devops.vcloudair.io not gateway support for hairpin nat (out and back in again) browser.get('http://*****:*****@value='Submit']") #submit = browser.find_element_by_xpath("//form[1]/input[3]") submit.click() browser.save_screenshot('screenshot.png') element = browser.find_element_by_css_selector('a').click()
def do_selenium(url, user_agent, domain, source): # start up the virtual display display = Display(visible=0, size=(1366, 768)) display.start() # start up browser profile = webdriver.FirefoxProfile() profile.set_preference("general.useragent.override", user_agent) browser = webdriver.Firefox(firefox_profile=profile) browser.set_page_load_timeout(15) try: with Timeout(60): browser.get(url) except Timeout.Timeout: print bcolors.WARNING + " [-] " + url + " has timed out. :(" + bcolors.ENDC return False except Exception: e = sys.exc_info()[0] print bcolors.WARNING + " [-] " + url + " has errored: %s" % e + bcolors.ENDC return False # accept a pop up alert if one comes up try: alert = browser.switch_to.alert print "\n[+] Popup alert observed: %s\n" % alert.text if re.search("(?:requesting your username|zeus|call microsoft|call apple|call support)", alert.text, re.IGNORECASE): print "\n [-] This looks like it might be a tech support scam user/password popup, leaving it alone." pass else: alert.accept() print "[+] Popup Alert observed, bypassing..." except Exception: pass # check page source to eliminate looking at pages that are parked and stuff we dont care about try: pagesource = browser.page_source if re.search("<iframe src=\"http:\/\/mcc\.godaddy\.com\/park\/|https:\/\/www\.godaddy\.com\/domains\/search\.aspx|\/px\.js\?ch=1\"><\/script>|<td bgcolor=\"#788298\">|http:\/\/findbetterresults\.com\/\?dn=|Sponsored Listings displayed above are served automatically by a third party|La Caja Verde RSS Feed|http:\/\/imptestrm\.com\/rg\-erdr\.php\?_dnm=|http:\/\/c\.parkingcrew\.net\/|parking\.jino\.ru\/static\/main\.js|cdn\.dsultra\.com\/js\/registrar\.js|http:\/\/www\.findingresult\.com\/\?dn=|This page is parked free, courtesy of Media Temple|\/js\/standard\.js\?rte=1&tm=2&dn=|This error is generated when there was no web page with the name you specified at the web site|url: \'\/logpstatus\.php|the domain that was pointed to by this Ow\.ly link has been blocked because it was used|href=\"\/hosting_static_403\/style\.css|Ow\.ly link bandito \(404 error\)|Please contact your service provider for more details|The page that you have requested could not be found", pagesource): print bcolors.FAIL + " [-] Pagesource triggered a known FP string, omitting screenshot." + bcolors.ENDC return False except Exception: return False # do the screencap and sort it into known tp, known fp, or unknown try: pagetitle = browser.title.lower() if not re.search("403|404|503|301|500|nicht verf[^\s]+gbar|request rejected|sayfa bulunamad|^error$|nothing found|contact support|nie znaleziono|strona nie zosta.+znaleziona|is for sale\!|not found|forbidden|account suspended|bandwidth limit exceeded|pagina non trovata|no se encontr|o encontrada|has expired|coming soon|host is not delegated|maintenance mode|website is blocked|site unavailable|unknown domain|1freehosting\.com|under construction|sucuri website firewall|pagina suspendata|site maintenance|page non trouv|sitio web suspendido|there has been an error processing your request|this website is temporarily suspended|seite wurde nicht gefunden|hugedomains.com|pagina niet gevonden|^wordpress.+error$|hosting linux e windows|bluehost.com|000webhost\.com|resource limit is reached|your access to this site has been limited|whoops\! there was an error|suspended by ranca\.com|under construction|400 bad request|seite wurde nicht gefunden|sidan kunde inte hittas|site not installed|web site currently not available|domain for sale|coming soon: another fine website hosted by|502 bad gateway|this website is currently unavailable|this account has been suspended|we can't find that page|web filter violation|girls near you|pagina niet gevonden|pagina nu a fost|sua imobiliaria em brumadinho|database error|website is inactive|seite nicht gefunden|^4club$|account disabled|the page cannot be found|we don\'t have that page|forbes|service unavailable|suspended website|gwen stefani shares blake shelton|ukraine\.com\.ua|apache http server test page|temporary error 502|expired registration recovery policy|welcome to nginx|absolutely free dynamic dns|suspended site|create a website |dropbox \- 460|410 gone|a small hello|bad request|sitebuilder|cuenta suspendida|contact admin|domain default page|apache http server test page|microsoft azure app service|site en construction|student from cornell university|shrink your urls and get paid|cheap domain names|domain does not exist|linkbucks\.com|hostmonster|the request could not be satisfied|hospedagem de sites|apache2 ubuntu default page|no se encuentra la|domain seo service registration corp|site no longer available|buy sell rent properties in|parallels h\-sphere|domain profile \- afternic|cloudyfiles\.co|free reliable file hosting|canadian web hosting|web hosting canada|film streaming|byethost free hosting|unlimited free subdomain hosting|something lost|error page|dns resolution error|appserv open project|powered by discuz!|free web hosting|high cpu notice|powered by phpwind", pagetitle): shot_name = time.strftime("%Y%m%d-%H%M%S") + '-' + source + '-' + domain + '.png' try: browser.save_screenshot(shot_name) print " [+] Screencapped %s as %s" % (url, shot_name) except Exception: print bcolors.FAIL + " [-] Unable to screencap " + url + bcolors.ENDC pass # putting this section on hold, as the fuzzy hashing is giving me FP # gs_shot = "gs_" + shot_name # subprocess.call(['convert', shot_name, '-colorspace', 'Gray', gs_shot]) # resized_shot = "rs_" + gs_shot # subprocess.call(['convert', gs_shot, '-resize', '250x250!', resized_shot]) # subprocess.call(['rm', gs_shot]) # hash_response_fp=subprocess.check_output(['ssdeep', '-bm', 'gs_phish_fps.hashes', resized_shot]) # hash_response_tp=subprocess.check_output(['ssdeep', '-bm', 'gs_phish_positives.hashes', resized_shot]) # if 'matches' in hash_response_fp: # shot_result="False Positive" # subprocess.call(['mv', shot_name, 'fp_screencaps']) # subprocess.call(['rm', resized_shot]) # elif 'matches' in hash_response_tp: # shot_result="True Positive" # subprocess.call(['mv', shot_name, 'tp_screencaps']) # subprocess.call(['rm', resized_shot]) # else: # shot_result="Unknown" # subprocess.call(['mv', shot_name, 'unk_screencaps']) # subprocess.call(['rm', resized_shot]) else: print bcolors.FAIL + " [-] Pagetitle triggered a known FP string, omitting screenshot." + bcolors.ENDC pass except Exception: print bcolors.FAIL + " [-] An error occured, unable to screencap " + url + bcolors.ENDC pass # screencaps.close() browser.quit() display.stop() return True