def __set_webdriver_log_level(self, log_level): # Nicer method to setup webdriver's log level (too verbose by default) from selenium.webdriver.remote.remote_connection import LOGGER if log_level: LOGGER.setLevel(log_level) else: LOGGER.setLevel(logging.INFO)
def setUpClass(cls): if settings.GUIDISPLAY == False: settings.DISPLAY.start() LOGGER.setLevel(logging.WARNING) # Instantiating the WebDriver will load your browser cls.wd = CustomWebDriver() cls.server_url = settings.TEST_SERVER_URL cls.base64String = base64.encodestring("%s:%s" % ("ionadmin", "ionadmin")).replace("\n", "") ##cls.server_url = 'http://ts-sandbox.itw' cls.wd.get("%s%s" % (cls.server_url, '/login')) ##the login now persists between tests ##so we only need to login with the username/password if the page ##has the username fields try: #enter the username and password cls.wd.find_css('#id_username').send_keys('ionadmin') cls.wd.find_css("#id_password").send_keys('ionadmin') #click the login link cls.wd.find_element_by_xpath('//button[@type="submit"]').click() #wait for the Ajax on the HOME page cls.wd.wait_for_ajax() except NoSuchElementException: pass
def setUpPackage(): test_app = App() thread = threading.Thread(target=test_app.start) thread.daemon = True thread.start() web_actors['server'] = test_app web_actors['browser'] = Browser() LOGGER.setLevel(logging.WARNING)
def __init__(self, thread_id, config, url): logging.debug("Instantiating the thread '%s' for the class '%s'" % (thread_id, self.__class__.__name__)) self._cfg = config self._url = url self._thread_id = thread_id self.data = None self.log = None self.browser = webdriver.Firefox() LOGGER.setLevel(logging.WARNING) threading.Thread.__init__(self)
def _init_selenium_driver(self): LOGGER.setLevel(logging.WARNING) # CHROMEDRIVER_BIN = '/usr/local/bin/chromedriver' # os.environ['webdriver.chrome.driver'] = CHROMEDRIVER_BIN # self.d = webdriver.Chrome(CHROMEDRIVER_BIN) self.d = webdriver.Firefox() # self.d.implicitly_wait(1) # self.d = webdriver.PhantomJS('/usr/local/bin/phantomjs') self.d.set_window_size(config.BROWSER_WIDTH, config.BROWSER_HEIGHT)
def with_firefox(): LOGGER.setLevel(logging.WARNING) user_platform = platform.platform() if "win" in user_platform: driver_path = os.path.dirname(os.path.realpath(__file__)).replace("app", "conf\geckodriver.exe") elif "linux" in user_platform: driver_path = os.path.dirname(os.path.realpath(__file__)).replace("app", "conf\geckodriver-linux") elif "darwin" in user_platform: driver_path = os.path.dirname(os.path.realpath(__file__)).replace("app", "conf\geckodriver-macos") world.browser = webdriver.Firefox(executable_path = driver_path) world.browser.maximize_window() yield world.browser.quit() delattr(world, 'browser')
def __init__(self): path = abspath(join(dirname(dirname(__file__)), "phantomjs-2.1.1-64")) sys.path.append(path) # phantomjs needs to be in path when running from pycharm cap = dict(DesiredCapabilities.PHANTOMJS) cap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0 ") service_args=["--webdriver-loglevel=DEBUG", "--cookies-file=ghostdriver.cookies"] ghostdriver_logger.setLevel(logging.DEBUG) #self.driver = webdriver.Firefox() self.driver = webdriver.PhantomJS(executable_path=path, desired_capabilities=cap, service_args=service_args) self.driver.timeout = { # adds field to use only one of these values for a timeout "implicit": 10, "explicit": 10, "page_load": 30 } self.driver.implicitly_wait(self.driver.timeout["implicit"]) self.driver.set_window_size(1280, 768) self.driver.maximize_window() self.driver.set_page_load_timeout(self.driver.timeout["page_load"]) # driver.get uses this timeout when calling requests.get
def __init__(self, xvfb=True, driver='Firefox', mootools=False, timeout=90, width=1440, height=1200, firefox_version=46, desired_capabilities='FIREFOX', command_executor='http://127.0.0.1:4444/wd/hub'): self.driver = driver # Firefox, PhantomJS (Must be installed...) self.xvfb = xvfb # This is for headless running. self.mootools = mootools # Use MooTools instead of jQuery self.timeout = timeout # Global timeout option for all wait_* functions self.width = width # XVFB virtual monitor width self.height = height # XVFB virtual monitor width self.desired_capabilities = desired_capabilities self.command_executor = command_executor if os.environ.get('skip_xvfb'): self.xvfb = False # Turn off annoying selenium logs s_logger.setLevel(logging.WARNING)
def _init_selenium_driver(self): LOGGER.setLevel(logging.WARNING) CHROMEDRIVER_BIN = "/usr/lib/chromium-browser/chromedriver" # os.environ['webdriver.chrome.driver'] = CHROMEDRIVER_BIN self.d = webdriver.Chrome(executable_path=CHROMEDRIVER_BIN) # firefox_profile = webdriver.FirefoxProfile() # firefox_profile.set_preference("network.proxy.type", 1) # firefox_profile.set_preference("network.proxy.http", '127.0.0.1') #set your ip # firefox_profile.set_preference("network.proxy.http_port", 8080) #set your port # self.d = webdriver.Firefox(firefox_profile=firefox_profile) # self.d = webdriver.Firefox() # self.d.implicitly_wait(1) # self.d = webdriver.PhantomJS('/home/tunnelshade/Downloads/phantomjs') # service_args=[ # '--proxy=127.0.0.1:8080', # '--proxy-tpe=http']) self.d.set_window_size(config.BROWSER_WIDTH, config.BROWSER_HEIGHT)
import os, inspect, nose, re, logging DB_URL_RE = re.compile(r'([^\:]+)\:\/\/([^\:]+)\:([^\@]+)\@([^\/]+)\/([^\s]+)') environ = {} if not pylons.test.pylonsapp: logging.error("Cannot run tests: need to specify config file") # if this isn't set up right, let it fail in order for nose to display the error SetupCommand('setup-app').run([pylons.test.pylonsapp.config['__file__']]) try: from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.INFO) except ImportError, e: print "These tests are likely going to fail as Selenium had an import error..." def CSS(source, selector): return source.find_elements_by_css_selector(selector) class SeleniumContextTest(TestCase): """ A Selenium test that correctly operates within a QTools context. Whether or not a subclass of this test runs depends on whether it is in a directory specified by the 'nose.selenium.test_paths' variable in the active Pylons configuration. It will also trigger Firefox as the webdriver to check the front-end
""" import logging import os import time import unittest import warnings from pyvirtualdisplay import Display from selenium import webdriver from selenium.common.exceptions import NoSuchElementException, WebDriverException from selenium.webdriver.chrome.options import Options from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.WARNING) target_cache = {} class WebdriverTestPrimitives(unittest.TestCase): """Webdriver Test Primitives""" url = 'http://localhost' headless = True size = (1024, 768) logger = logging.getLogger(__name__) path = '.' driver_name = os.environ.get('ZOOM_TEST_DRIVER', 'chrome') def setUp(self):
patch_pytest_selenium_screenshot() def pytest_addoption(parser): """Add support for headless option to pytest-selenium.""" parser.addoption('--headless', action='store', help='enable headless mode for supported browsers.') # Disable selenium's remote_connection.py DEBUG statements that pollute # test stdout. # They aren't needed as driver logs are included into links of HTML report. SELENIUM_LOGGER.setLevel(logging.INFO) @pytest.fixture(autouse=True) def reset_dict_executed_fixtures(): """Reset dict_executed_fixtures between tests""" dynamic_fixtures.dict_executed_fixtures = {} @pytest.fixture(scope="session") def session_capabilities(session_capabilities): """Log browser (console log) and performance (request / response headers) data. They will appear in `Links` section of pytest-html's html report. """ session_capabilities["loggingPrefs"] = {
except urllib.error.HTTPError as error: print('Error code: ', error.code) return error.code except Exception as detail: print("ERROR:", detail) return True return False SCHED = BlockingScheduler() REQ_PROXY = RequestProxy() PROXIES = REQ_PROXY.get_proxy_list() NEXT_RUN = datetime.now() + timedelta(hours=3) selenium_logger.setLevel(logging.WARNING) class Parser: """ Main Parser class. """ def __init__(self, use_proxy=False): """ Parser initialisation by launching the browser. """ self.browser = self.browser_setup(use_proxy=use_proxy) self.tg_sources_path = 'telegram_parsing/channels.txt' self.keywords = Keywords() def parse_telegram(self):
def execute(): import logging from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.WARNING) from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup from selenium.webdriver.common.keys import Keys import csv, random, time from handlers import globalvars DRIVER_PATH = 'chromedriver.exe' SOURCE_URL = 'https://www.amazon.ca' NUM_PAGES = 4 NUM_ITEMS = 100 # if an error occurs where elemtn cannot be found then # can't run headless due to captcha # try to capture element name for captcha to practice options = Options() options.headless = True browser = webdriver.Chrome(DRIVER_PATH, options=options) browser.set_page_load_timeout(3) while True: PROXY = globalvars.req_proxy_list[random.randint( 0, len(globalvars.req_proxy_list) - 1)].get_address() try: webdriver.DesiredCapabilities.CHROME['proxy'] = { "httpProxy": PROXY, "ftpProxy": PROXY, "sslProxy": PROXY, "proxyType": "MANUAL", } browser.get(SOURCE_URL) break except: if PROXY in globalvars.req_proxy_list: globalvars.req_proxy_list.remove(PROXY) pass # search_content = input ("What would you like to scrape?\n") search_content = 'scooter' browser.save_screenshot("sc.png") search = browser.find_element_by_tag_name("div") try: search = WebDriverWait(browser, 5).until( EC.presence_of_element_located((By.ID, 'twotabsearchtextbox'))) except TimeoutException: print("oh oh..where it go?") return search.send_keys(search_content) search.send_keys(Keys.RETURN) search_split = search_content.split(' ') file_name = search_content + '_amazon_results.csv' f = open(file_name, 'w+', newline='', encoding='utf-8') c = csv.writer(f) c.writerow(["Item Name", "Price", "URL"]) item_map = {} i = 0 while i < NUM_PAGES and len(item_map) < NUM_ITEMS: html = browser.page_source soup = BeautifulSoup(html, 'html.parser') items = soup.find_all( "div", { "class": "sg-col-4-of-12 s-result-item s-asin sg-col-4-of-16 sg-col sg-col-4-of-20" }) for item in items: name = item.find( "span", {"class": "a-size-base-plus a-color-base a-text-normal"}) if not any(word in name.get_text().lower() for word in search_split): continue link = name.parent name = name.get_text() if link.name == 'a': link = link['href'] link = SOURCE_URL + link price = item.find("span", {"class": "a-offscreen"}) if not price: continue price = price.get_text() item_map[id(link)] = [name, price, link] if len(item_map) >= NUM_ITEMS: break i += 1 try: next_page_ele = browser.find_element_by_xpath( "//li[@class='a-last']") next_page_ele.find_element_by_tag_name("a").click() except: break for ele in my_sorted(item_map): c.writerow(item_map[ele]) f.close() browser.quit()
def setUpClass(self): def striptext_in_file(line, file): """ look for the line as a starting line in the file, stripping whitespace """ line = line.strip().replace(" ", "") for l in file: if l.strip().replace(" ", "").startswith(line): return True return False db_py_loc = os.path.realpath( os.path.join(web2py_app_dir, "models", "db.py")) with open(db_py_loc, 'r') as db_py: assert striptext_in_file( "is_testing=True", db_py ), "To do any functional testing you must set is_testing=True in " + db_py_loc self.db = get_db_connection() try: self.web2py = Web2py_server(self.appconfig_loc) except AttributeError: self.web2py = Web2py_server() try: resp = requests.get(base_url + "API/search_for_sciname.json", params=dict(query="H**o sapiens", leaves_only=1)).json() self.humanID, self.humanOTT = resp["result"][0][0:2] resp = requests.get(base_url + "API/search_for_sciname.json", params=dict(query="Canis lupus", leaves_only=1)).json() self.dogID, self.dogOTT = resp["result"][0][0:2] resp = requests.get(base_url + "API/search_for_sciname.json", params=dict(query="Felis silvestris", leaves_only=1)).json() self.catID, self.catOTT = resp["result"][0][0:2] resp = requests.get(base_url + "API/search_for_sciname.json", params=dict(query="Quercus robur", leaves_only=1)).json() self.oakID, self.oakOTT = resp["result"][0][0:2] resp = requests.get(base_url + "API/search_for_sciname.json", params=dict(query="Mammalia", nodes_only=1)).json() self.mammalID, self.mammalOTT = resp["result"][0][ 0:2] #use the ID not the OTT for nodes except (LookupError): assert False, "Could not find human, dog, cat, oak, or mammal OTTs" logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) selenium_logger.setLevel(logging.WARNING) #chrome_options = webdriver.ChromeOptions() #chrome_options.add_experimental_option("mobileEmulation", { "deviceName": "iPhone 7" }) self.caps = webdriver.ChromeOptions().to_capabilities() # enable browser logging self.caps['loggingPrefs'] = {'browser': 'ALL'} self.browser = webdriver.Chrome(desired_capabilities=self.caps) self.browser.implicitly_wait(1)
def parse(self, response): LOGGER.setLevel(logging.WARNING) try: driver = webdriver.Chrome() driver.get(response.url) time.sleep(2) driver.implicitly_wait(50) f = open('magicagents.csv', 'w') cont = driver.find_elements_by_xpath('//div[@class="srpBlock"]') # f = open('agents.csv','a') # f.write('Name,Company,Phone,Emails,"Date of Addition"\n') # f.close() for c in cont: self.item['platform'] = 'Magicbricks' self.item['listing_date'] = dt.now().strftime( '%m/%d/%Y %H:%M:%S') try: self.item['company'] = c.find_element_by_xpath( 'div[@class="proDetail"]/div[@class="proNameAndPrice"]/div[@class="proName"]/p[@class="proHeading"]/strong' ).text print(self.item['company']) self.item['agent_name'] = c.find_element_by_xpath( 'div[@class="proDetail"]/div[@class="proNameAndPrice"]/div[@class="proName"]/p[@class="proGroup"]' ).text.replace('Contact Person: ', '') print(self.item['agent_name']) driver.implicitly_wait(300) try: buttn = c.find_element_by_xpath( 'div[@class="srpBtnWrap"]/div[@class="srpBlockLeftBtn"]/ul/li[2]/a' ) act_butt = ActionChains(driver) act_butt.move_to_element(buttn) act_butt.click(buttn).perform() driver.implicitly_wait(50) except Exception as e: print(e) if (self.count == 1 ) and ('display: block;' in c.find_element_by_xpath( 'div[@class="srpBtnWrap"]/div[@class="contactForms"]/div[@class="formsWrap viewPhoneForm"]' ).get_attribute('style')): ind = c.find_element_by_xpath( './/input[contains(@id,"userTypeA")]') act_ind = ActionChains(driver) act_ind.move_to_element(ind) act_ind.click(ind).perform() driver.implicitly_wait(50) name = c.find_element_by_xpath( './/input[contains(@id,"name")]' ) # div[@class="srpBtnWrap"]/div[@class="contactForms"]/div[@class="formsWrap viewPhoneForm"]/div[@class=" "]/div[@class="formCont propUpdatePop forForm"]/div[@class="formCont propUpdatePop"]/form/div/div[@class="formBlock"]/ul/li[2]/div[@class="formValue"]/input act_name = ActionChains(driver) act_name.move_to_element(name) act_name.click(name) act_name.send_keys('Chintan').perform() driver.implicitly_wait(50) time.sleep(2) mob = c.find_element_by_xpath( './/input[contains(@id,"userMobile")]' ) # div[@class="srpBtnWrap"]/div[@class="contactForms"]/div[@class="formsWrap viewPhoneForm"]/div[@class=" "]/div[@class="formCont propUpdatePop forForm"]/div[@class="formCont propUpdatePop"]/form/div/div[@class="formBlock"]/ul/li[3]/div[@class="formValue"]/div[@class="ftlt"]/input act_mob = ActionChains(driver) act_mob.move_to_element(mob) act_mob.click(mob) act_mob.send_keys('7715093176').perform() driver.implicitly_wait(50) time.sleep(2) email = c.find_element_by_xpath( './/input[contains(@id,"userEmail")]' ) #div[@class="srpBtnWrap"]/div[@class="contactForms"]/div[@class="formsWrap viewPhoneForm"]/div[@class=" "]/div[@class="formCont propUpdatePop forForm"]/div[@class="formCont propUpdatePop"]/form/div/div[@class="formBlock"]/ul/li[4]/div[@class="formValue"]/input act_email = ActionChains(driver) act_email.move_to_element(email) act_email.click(email) act_email.send_keys('*****@*****.**').perform() driver.implicitly_wait(50) time.sleep(2) view = c.find_element_by_xpath( './/form[contains(@id,"propertyCForm")]/div/div[2]/ul/li[6]/a' ) act_view = ActionChains(driver) act_view.move_to_element(view) act_view.click(view).perform() driver.implicitly_wait(50) time.sleep(10) if "pupWrap popContainer" in driver.page_source: driver.quit() time.sleep(2) dig = c.find_element_by_id('smsNo') act_dig = ActionChains(driver) act_dig.move_to_element(dig) act_dig.click(dig) act_dig.send_keys('100').perform() driver.implicitly_wait(50) time.sleep(2) press = c.find_element_by_xpath( '//div[@id="smsWrapper"]/div[2]/div[2]/a') act_press = ActionChains(driver) act_press.move_to_element(press) act_press.click(press).perform() driver.implicitly_wait(50) self.count = 0 time.sleep(2) except Exception as e: print(e) if "pupWrap popContainer" in driver.page_source: driver.quit() time.sleep(2) try: self.item['phone'] = c.find_element_by_xpath( '//div[contains(@id,"mobileDiv")]/strong').text except: self.item['phone'] = 'None' if ',' in self.item['phone']: self.item['phone'] = self.item['phone'].replace(',', ';') f.write('"' + self.item['agent_name'] + '","' + self.item['company'] + '",' + self.item['phone'] + ',"*****@*****.**","' + dt.strftime(dt.now(), '%Y-%m-%d %H:%M:%S') + '","' + self.item['platform'] + '"' + '\n') driver.implicitly_wait(50) print("++++++++++++++++++++++++++++++++++++++") print(self.item) print("++++++++++++++++++++++++++++++++++++++") time.sleep(2) f.close() driver.quit() except Exception as e: print(e)
# Django integration from django.utils import timezone from decimal import Decimal # Logging: Scrapy native, Selenium import logging from selenium.webdriver.remote.remote_connection import LOGGER as logger_Selenium # Debugging from scrapy.shell import inspect_response # Get your settings from settings.py: settings = get_project_settings() # Configure Selenium logging logger_Selenium.setLevel(logging.INFO) # Configure Scrapy logging # Raise loglevel for this spider logging.getLogger('scrapy').setLevel(logging.INFO) def null_to_blank(s): if s is None: return "" else: return s class DecathlonSpider(scrapy.Spider): name = "decathlon" start_urls = []
class ProductsSpider(scrapy.Spider): basedir = os.path.dirname(os.path.realpath('__file__')) chrome_driver_path = os.path.join(basedir, 'chromedriver') LOGGER.setLevel(logging.WARNING) name = 'products' allowed_domains = ['uniqlo.com'] start_urls = ['https://www.uniqlo.com/sg/en'] def __init__(self): chrome_options = Options() chrome_options.add_argument("--window-size=1920x1080") chrome_options.add_argument('--headless') self.driver = webdriver.Chrome(options=chrome_options, executable_path=self.chrome_driver_path) def start_requests(self): for url in self.start_urls: yield SplashRequest(url, self.parse, args={'wait': 2}) def parse(self, response): # links to men, women, child, kids # links = response.css('a[class="fr-global-nav-item px-s"]').css('a::attr(href)').getall() count = 0 for url, text in self.get_category_links_selenium(response.url): yield SplashRequest(url, self.parse_category_link, meta={ 'url': url, 'text': text, 'nav': 'women' }, args={'wait': 5}) count += 1 print('\n\n', count, '\n\n') print('\n\n', text, '\n\n') def get_category_links_selenium(self, url): self.driver.get(url) wait = WebDriverWait(self.driver, 3) navs = wait.until( EC.presence_of_all_elements_located( (By.CSS_SELECTOR, ".fr-global-nav-item"))) nav_women = navs[0] print('\n\n' + nav_women.text + '\n\n') nav = nav_women ActionChains(self.driver).move_to_element(nav).perform() categories = wait.until( EC.presence_of_all_elements_located( (By.CSS_SELECTOR, ".w12-f [href]"))) categories_urls = [elem.get_attribute('href') for elem in categories] categories_text = [elem.get_attribute('text') for elem in categories] return zip(categories_urls, categories_text) def parse_category_link(self, response): item_links = response.css(".fr-grid-item").css('.w4').css( 'a::attr(href)').getall() if item_links: for item_link in item_links: url = response.urljoin(item_link) yield SplashRequest(url, self.parse_item, args={'wait': 5}) def parse_item(self, response): item = UniqloItem() name = response.xpath( '//span[has-class("title", "fr-no-uppercase")]/text()').get() price = response.css('div[data-test="product-detail-summary"]').css( 'div.price').css('span::text').get() description = response.css('div[data-test="product-detail-summary"]' ).css('div.fr-text::text').get() item['name'] = name item['price'] = price item['description'] = description print('\n\n') print(item) print('\n\n') yield item
def __init__(self, driver): LOGGER.setLevel(logging.WARNING) self.driver = driver super(Login, self).__init__(self.driver) self.logger = logging.getLogger(__name__)
def __enter__(self): LOGGER.setLevel(logging.WARNING) self._browser = webdriver.Firefox() return self._browser
a nested X server, so you see what is going on: selenium/webdriver makes the browser do things. """ import logging import os # from pyvirtualdisplay import Display from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.remote_connection import LOGGER from subprocess import call import time import unittest from webdriver_utils import (Server, Client) LOGGER.setLevel(logging.WARNING) # silence logging """ this setting controls whether the browser will be visible (1) or not (0) """ is_visible = 1 # configuration of testing framework cfg = { 'app': { 'host': '0.0.0.0', 'port': '6543', 'db': 'webdrivertest.db', 'ini': "webdrivertest.ini", 'appSettings': {}, }, }
use_proxy = True http_proxy = "157.65.25.144:3128" logs = True start_year = 2004 # If this is something other than 2004 then please change the month_days accordingly. start_day = 1 """ CONFIGURATION_SETUP -> Using the configuration to setup the program. """ if autoinstall_driver: chromedriver_autoinstaller.install(cwd=True) os.environ['PATH'] += os.path.pathsep + magick_path LOGGER.setLevel(logging.WARNING) # Remove unncesessary logs from the console. chrome_options = Options() chrome_options.add_argument( 'log-level=3') # Remove unnecessary logs from the console. if chrome_headless: chrome_options.headless = True capabilities = webdriver.DesiredCapabilities.CHROME if use_proxy: proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = http_proxy proxy.add_to_capabilities(capabilities) print("Using proxy =", proxy.http_proxy) """
from splinter import Browser import datetime import json import os import logging import time logging.basicConfig() logger = logging.getLogger("splinter_tests.__init__") # Quiet down the Selenium logging from selenium.webdriver.remote.remote_connection import \ LOGGER as _selenium_logger _selenium_logger.setLevel(logging.INFO) SERVER_URL = "http://localhost:5859" #SERVER_URL = "https://xbeewifi.herokuapp.com" #SERVER_URL = "https://xbgw-333.herokuapp.com" def make_url(uri): return '{}{}'.format(SERVER_URL, uri) browser = None driver = "phantomjs" def _put_local_phantomjs_on_path(): # Use local PhantomJS installation (from node_modules) by putting the path
def __init__(self,selenium_driver): LOGGER.setLevel(logging.WARNING) self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) self.driver = selenium_driver self.driver.maximize_window()
# from splinter import Browser import datetime import json import os import logging import time logging.basicConfig() logger = logging.getLogger("splinter_tests.__init__") # Quiet down the Selenium logging from selenium.webdriver.remote.remote_connection import \ LOGGER as _selenium_logger _selenium_logger.setLevel(logging.INFO) SERVER_URL = "http://localhost:5859" #SERVER_URL = "https://xbeewifi.herokuapp.com" #SERVER_URL = "https://xbgw-333.herokuapp.com" def make_url(uri): return '{}{}'.format(SERVER_URL, uri) browser = None driver = "phantomjs" def _put_local_phantomjs_on_path():
def download(cfg): from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.WARNING) retCode = False filename_new = cfg.get('download', 'filename_new') filename_old = cfg.get('download', 'filename_old') login = cfg.get('download', 'login') password = cfg.get('download', 'password') url_lk = cfg.get('download', 'url_lk') url_file = cfg.get('download', 'url_file') download_path = os.path.join(os.getcwd(), 'tmp') if not os.path.exists(download_path): os.mkdir(download_path) for fName in os.listdir(download_path): os.remove(os.path.join(download_path, fName)) dir_befo_download = set(os.listdir(download_path)) if os.path.exists('geckodriver.log'): os.remove('geckodriver.log') try: ffprofile = webdriver.FirefoxProfile() ffprofile.set_preference("browser.download.dir", download_path) ffprofile.set_preference("browser.download.folderList", 2) ffprofile.set_preference( "browser.helperApps.neverAsk.saveToDisk", ",application/octet-stream" + ",application/vnd.ms-excel" + ",application/vnd.msexcel" + ",application/x-excel" + ",application/x-msexcel" + ",application/zip" + ",application/xls" + ",application/vnd.ms-excel" + ",application/vnd.ms-excel.addin.macroenabled.12" + ",application/vnd.ms-excel.sheet.macroenabled.12" + ",application/vnd.ms-excel.template.macroenabled.12" + ",application/vnd.ms-excelsheet.binary.macroenabled.12" + ",application/vnd.ms-fontobject" + ",application/vnd.ms-htmlhelp" + ",application/vnd.ms-ims" + ",application/vnd.ms-lrm" + ",application/vnd.ms-officetheme" + ",application/vnd.ms-pki.seccat" + ",application/vnd.ms-pki.stl" + ",application/vnd.ms-word.document.macroenabled.12" + ",application/vnd.ms-word.template.macroenabed.12" + ",application/vnd.ms-works" + ",application/vnd.ms-wpl" + ",application/vnd.ms-xpsdocument" + ",application/vnd.openofficeorg.extension" + ",application/vnd.openxmformats-officedocument.wordprocessingml.document" + ",application/vnd.openxmlformats-officedocument.presentationml.presentation" + ",application/vnd.openxmlformats-officedocument.presentationml.slide" + ",application/vnd.openxmlformats-officedocument.presentationml.slideshw" + ",application/vnd.openxmlformats-officedocument.presentationml.template" + ",application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ",application/vnd.openxmlformats-officedocument.spreadsheetml.template" + ",application/vnd.openxmlformats-officedocument.wordprocessingml.template" + ",application/x-ms-application" + ",application/x-ms-wmd" + ",application/x-ms-wmz" + ",application/x-ms-xbap" + ",application/x-msaccess" + ",application/x-msbinder" + ",application/x-mscardfile" + ",application/x-msclip" + ",application/x-msdownload" + ",application/x-msmediaview" + ",application/x-msmetafile" + ",application/x-mspublisher" + ",application/x-msschedule" + ",application/x-msterminal" + ",application/x-mswrite" + ",application/xml" + ",application/xml-dtd" + ",application/xop+xml" + ",application/xslt+xml" + ",application/xspf+xml" + ",application/xv+xml" + ",application/excel") if os.name == 'posix': driver = webdriver.Firefox( ffprofile, executable_path= r'/usr/local/Cellar/geckodriver/0.19.1/bin/geckodriver') elif os.name == 'nt': driver = webdriver.Firefox(ffprofile) driver.implicitly_wait(30) driver.get(url_lk) time.sleep(2) driver.find_element_by_id( "cntMain_ctrlLogin_loginControl_Username").clear() driver.find_element_by_id("cntMain_ctrlLogin_loginControl_Username" ).send_keys("*****@*****.**") driver.find_element_by_id( "cntMain_ctrlLogin_loginControl_Password").clear() driver.find_element_by_id( "cntMain_ctrlLogin_loginControl_Password").send_keys("hsBNxPiRCY") driver.find_element_by_id( "cntMain_ctrlLogin_loginControl_Login").click() time.sleep(3) driver.find_element_by_id("cntMain_btnDownload").click() time.sleep(3) driver.find_element_by_id("cntModal_chkTerms").click() time.sleep(3) driver.find_element_by_id("cntModal_btnOneSheetExcel").click() time.sleep(3) driver.find_element_by_id("cntModal_lbDownload").click() time.sleep(5) driver.find_element_by_xpath("(//button[@type='button'])[3]").click() time.sleep(3) driver.quit() except Exception as e: log.debug('Exception: <' + str(e) + '>') dir_afte_download = set(os.listdir(download_path)) new_files = list(dir_afte_download.difference(dir_befo_download)) print(new_files) if len(new_files) == 0: log.error('Не удалось скачать файл прайса ') return False elif len(new_files) > 1: log.error('Скачалось несколько файлов. Надо разбираться ...') return False else: new_file = new_files[0] # загружен ровно один файл. new_ext = os.path.splitext(new_file)[-1].lower() DnewFile = os.path.join(download_path, new_file) new_file_date = os.path.getmtime(DnewFile) log.info( 'Скачанный файл ' + new_file + ' имеет дату ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(new_file_date))) print(new_ext) if new_ext in ('.xls', '.xlsx', '.xlsb', '.xlsm', '.csv'): if os.path.exists(filename_new) and os.path.exists(filename_old): os.remove(filename_old) os.rename(filename_new, filename_old) if os.path.exists(filename_new): os.rename(filename_new, filename_old) shutil.copy2(DnewFile, filename_new) return True elif new_ext == '.zip': # ветка устаревшая, не проверялась # Архив. Обработка не завершена log.debug('Zip-архив. Разархивируем.') work_dir = os.getcwd() os.chdir(os.path.join(download_path)) dir_befo_download = set(os.listdir(os.getcwd())) os.system('unzip -oj ' + new_file) os.remove(new_file) dir_afte_download = set(os.listdir(os.getcwd())) new_files = list(dir_afte_download.difference(dir_befo_download)) if len(new_files) == 1: new_file = new_files[0] # разархивирован ровно один файл. new_ext = os.path.splitext(new_file)[-1] DnewFile = os.path.join(os.getcwd(), new_file) new_file_date = os.path.getmtime(DnewFile) log.debug('Файл из архива ' + DnewFile + ' имеет дату ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(new_file_date))) DnewPrice = DnewFile elif len(new_files) > 1: log.debug('В архиве не единственный файл. Надо разбираться.') DnewPrice = "dummy" else: log.debug( 'Нет новых файлов после разархивации. Загляни в папку юниттеста поставщика.' ) DnewPrice = "dummy" os.chdir(work_dir)
def parse(self, response): LOGGER.setLevel(logging.WARNING) item = ProrealtorsItem() try: for i in range(580, 10000): driver = Chrome() try: driver.get('http://prorealtors.in/Home/Login') time.sleep(2) mail_input = driver.find_element_by_id('txtUserName') mail_input.send_keys('*****@*****.**') password_input = driver.find_element_by_id('txtPassword') password_input.send_keys('prorealtor') login_submit = driver.find_element_by_id('btnSubmit') login_submit.click() time.sleep(2) except Exception as e: print(e) quit(1) try: print('Current No:', i) driver.get('http://prorealtors.in/') time.sleep(2) ca_assist = driver.find_element_by_xpath('//section[@id="main"]/section[1]/section/div/div/ul/li[3]/a') ca_assist.click() time.sleep(2) ca_input = driver.find_element_by_id('txtBuildNameC') ca_input.send_keys(str(i)) time.sleep(2) find_ca_assist = driver.find_element_by_xpath('//div[@id="tab3"]/form/div[@class="inner"]/a/button') find_ca_assist.click() time.sleep(2) # print('Windows Handles as of Now : ', driver.window_handles) table_rows = driver.find_elements_by_xpath('/html/body/section[3]/div/div/table/tbody/tr') for row in table_rows[1:]: try: item['Building_Name'] = row.find_element_by_xpath('./td[2]').text item['CS_Info'] = row.find_element_by_xpath('./td[4]').text item['Location'] = row.find_element_by_xpath('./td[6]').text item['platform'] = 'http://prorealtors.in' item['scraped_time'] = datetime.now().strftime('%m/%d/%Y') except: pass finally: yield item except: pass driver.quit() except Exception as e: print(e)
from glob import glob from bs4 import BeautifulSoup from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.remote.remote_connection import LOGGER, logging from webdriver_manager.chrome import ChromeDriverManager LOGGER.setLevel(logging.FATAL) options = Options() options.add_argument('--headless') options.add_argument('--disable-gpu') driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options) app = FastAPI() # origins = [ # "http://localhost.tiangolo.com", # "https://localhost.tiangolo.com", # "http://localhost", # "http://localhost:8080", # ] origins = '*' app.add_middleware( CORSMiddleware,
def start_logging() -> None: logging.basicConfig(level=logging.DEBUG, format='%(message)s') logging.getLogger('urllib3').setLevel(logging.ERROR) # Reduce urllib logs. SELENIUM_LOGGER.setLevel(logging.WARNING) # No info and debug logs. warnings.filterwarnings('ignore') # Hide warnings logs
import logging import os # from pyvirtualdisplay import Display from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.remote_connection import LOGGER from subprocess import call import time import unittest from webdriver_utils import ( Server, Client ) LOGGER.setLevel(logging.WARNING) # silence logging """ this setting controls whether the browser will be visible (1) or not (0) """ is_visible = 1 # configuration of testing framework cfg = { 'app': { 'host': '0.0.0.0', 'port': '6543', 'db': 'webdrivertest.db', 'ini': "webdrivertest.ini", 'appSettings': {}, },
# import all required modules from selenium import webdriver import time, re, hashlib, csv import pandas as pd from selenium.webdriver.common.keys import Keys import usaddress import logging, datetime, sys from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.ERROR) username = '******' salutation = '' #Mr. Miss client_name = '' client_address = '' billing_card_number = '' card_cvv_number = '' billing_address = '' billing_expiry_date = '' # log file initialize logging.basicConfig( level=logging.DEBUG, filename='logfile_' + username + '.log', # log to this file format='%(asctime)s -- %(message)s') # include timestamp headers = ['Date' , 'Client_Name', 'State', 'County', 'searchterm', 'recordyear', 'last_document_type', \ 'last_document_left', 'right_side_name', 'right_side_address', 'right_side_date', 'Doctypes', 'Record_Count', 'last data record found', \ 'parsed_rows', 'client_name', 'client_address', 'billing_card_number', 'card_cvv_number', 'billing_address', 'billing_expiry_date'] ignore_keywords = [ 'escrow', 'law', 'associates', 'recorder', 'recorders', '$', 'iiii', 'service', 'closing', 'title'
def main(): """ Main Startet das ganze Programm """ # --- Setup Chrome Optionen --- options = webdriver.ChromeOptions() # Browser wird nach beenden des Programmes nicht beendet options.add_experimental_option("detach", True) # Falsche Warnemldung unterdruecken (Bluetooth error) options.add_experimental_option("excludeSwitches", ["enable-logging"]) # --- Setup Argumente --- parser = init_argument_parser() arguments = parser.parse_args() if not arguments.NOTuserinteractive: arguments = parser.parse_args(get_arguments_from_user()) code = arguments.code plz = arguments.plz bundesland = arguments.bundesland driver_path = arguments.driver minimized = arguments.minimized sound = arguments.soundpath pushsafer_code = arguments.pushsaferCode wait = arguments.wait zyklus = arguments.zyklus debug = arguments.debug url = "https://www.impfterminservice.de/impftermine" # --- Setup Logger --- seleniumLogger.setLevel(logging.WARNING) urllibLogger.setLevel(logging.WARNING) file_handler = logging.FileHandler(f"debug-{plz}.log") file_handler.setLevel(logging.DEBUG) stream_handler = logging.StreamHandler(sys.stdout) stream_handler.setLevel(logging.INFO) logging.basicConfig( format='%(asctime)s\t%(levelname)s\t%(lineno)d: %(message)s', handlers=[file_handler, stream_handler], level=logging.DEBUG) # --- Start Nachricht --- logging.info("=== START ===") logging.info("Code: %s", code) logging.info("PLZ: %s", plz) logging.info("Bundesland: %s", bundesland) logging.info("Treiber: %s", driver_path) logging.info("Minimiert: %s", minimized) logging.info("Sound Pfad: %s", sound) logging.info("Warte auf Seite: %s sek.", wait) logging.info("Zyklus: %s sek.", zyklus) logging.info("url: %s", url) logging.info("debug: %s", debug) logging.info("=============") while True: # --- Erstelle Driver --- driver = webdriver.Chrome(driver_path, options=options) if minimized: logging.debug("Minimize Funktion ausser Betrieb") # driver.minimize_window() # --- Starte Aufrufe der Webseiten try: # Startseite driver.get(url) print_countdown(wait, "Warte auf Seite... ") # Impfzentrum waehlen impfzentrum_waehlen(bundesland, plz, driver) print_countdown(wait, "Warte auf Seite... ") # evtl. Warteraum skippen check_queue(driver) print_countdown(wait, "Warte auf Seite... ") # Code eingeben vermittlungscode_eingeben(code, driver, wait) print_countdown(wait, "Warte auf Seite... ") # Termin suchen termin_suchen(driver) print_countdown(wait + 15, "Warte auf Seite... ") if driver.page_source.find("Fehler") != -1: raise Exception # --- Fehlerbehandlung wenn Element nicht gefunden worden ist --- except NoSuchElementException as error: if driver.page_source.find("Warteraum") != -1: logging.warning( "Seite befindet sich im Warteraum - Pause von %s sek wird eingelegt", zyklus) else: if debug: create_screenshot(driver, "debug_NoSuchElementException") logging.warning( "Element zum klicken konnte nicht gefunden werden, bitte prüfen - Pause von %s sek wird eingelegt", zyklus) logging.debug("Error message: ", exc_info=error) driver.quit() print_countdown(zyklus) # --- Neustart wenn das Programm zu viele Anfragen hat --- except StuckedException as error: logging.info( "Programm wird aufgrund von vielen Fehlveruchen neu gestartet - evtl. Zyklus hochsetzten" ) logging.info( "Längere Pause von 5min wird eingelegt um das Problem zu beheben" ) driver.quit() print_countdown(60 * 5) # --- Fehler werden von WebDriverWait erzeugt --- except (ElementClickInterceptedException, TimeoutException) as error: if debug: create_screenshot(driver, "debug_ElementClickInterceptedException") logging.info("Element zum klicken konnte nicht gefunden werden") logging.debug("Fehler: ", exc_info=error) driver.quit() print_countdown(zyklus) except KeyboardInterrupt as error: logging.info("Programm beenden", exc_info=error) driver.quit() sys.exit(0) # --- Beenden wenn es ein unbekannter Fehler ist --- except Exception as error: if debug: create_screenshot(driver, "debug_allgemeine_Exception") logging.critical("Unerwarteter Fehler!", exc_info=error) driver.quit() print_countdown(60 * 5, "Starte neu in...") # --- Seiten konnten erfolgreich aufgerufen werden. Checken ob Termin verfügbar --- else: if driver.page_source.find("keine Termine") == -1: if debug: create_screenshot(driver, "debug_termin_gefunden") logging.info("Termin gefunden! - %s", plz) if pushsafer_code: send_push_nachricht(f"Termine verfügbar!!! - {plz}", pushsafer_code) logging.info("Pushbenachrichtigung gesendet") play_sound(sound) logging.info("Sound abgespielt") input("Zum Beenden Enter drücken...") sys.exit(0) else: driver.close() logging.info( "Kein Termin gefunden - Browser geschlossen - erneuter Versuch in %s Sekunden", zyklus) print_countdown(zyklus)
import sys import os import pickle from urllib.parse import quote import asyncio from getpass import getuser from logging import WARNING from itertools import zip_longest if getuser() != 'pi': # orangepi 上不检查优惠券信息 from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(WARNING) from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.firefox.options import Options #-# from pyvirtualdisplay import Display else: webdriver = None NoSuchElementException = None from IPython import embed embed if __name__ == '__main__': sys.path.append( os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) from applib.tools_lib import pcformat from applib.conf_lib import getConf from applib.log_lib import app_log info, debug, warn, error = app_log.info, app_log.debug, app_log.warning, app_log.error
def __init__(self): scrapy.Spider.__init__(self) self.driver = webdriver.PhantomJS( executable_path='/usr/local/bin/phantomjs') LOGGER.setLevel(logging.WARNING)
from pageobjects import SideBar from pageobjects import Cluster from pageobjects import Variables from pageobjects import Plan from pageobjects import Deploy from terraformCmd import TerraformCmd from selenium.webdriver.remote.remote_connection import (LOGGER as seleniumLogger) from urllib3.connectionpool import log as urllibLogger import logging import uuid import pytest import time import pickle seleniumLogger.setLevel(logging.INFO) urllibLogger.setLevel(logging.INFO) @pytest.fixture def prepare_env(cmdopt, logger): logger.info("Prepare for GKE testing") creds = open(os.environ.get('GCE_SERVICE_ACCOUNT')).read() domain = (os.environ.get('FQDN') or "gke{}.anton.bear454.codes".format(str(uuid.uuid4())[:3])) email = (os.environ.get('EMAIL') or '*****@*****.**') variables_values = { 'project': os.environ.get('GCE_PROJECT'), 'location': os.environ.get('GCE_TEST_LOCATION'), 'credentials_json': creds, 'dns_credentials_json': creds,
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging import time from selenium import webdriver from selenium.webdriver.support.ui import Select from selenium.common.exceptions import NoSuchElementException, NoAlertPresentException from selenium.webdriver.remote.remote_connection import LOGGER LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) logger = logging.getLogger(__name__) LOGGER.setLevel(logging.INFO) IMPLICIT_WAIT_TIME = 20 def addLog(f): def wrapped_f(*args, **kwargs): logger.info("Enter: {}".format(f.__name__)) result = f(*args, **kwargs) logger.info("Exit: {}".format(f.__name__)) return result return wrapped_f class CucmWebApp(object): ccmadmin = "ccmadmin"
#logger.addHandler(fileHandler) logger.addHandler(streamHandler) # 설정 출력 logger.info("Ini file Location : " + file_name) logger.info("Driver Location : " + driver_location) logger.info("Telegram Bot Token : " + my_token) logger.info("Telegram Channel ID : " + channel_id) logger.info("Start Timer : " + str(start_timer)) logger.info("Delay Timer : " + str(delay_timer)) logger.info("초기 구성 완료.") # 웹드라이버 및 봇 초기화 my_bot = telegram.Bot(token=my_token) driver = webdriver.Chrome(driver_location) LOGGER.setLevel(logging.CRITICAL) driver.maximize_window() driver.get("https://upbit.com/exchange?") # 프로그램 시작 upbit_avail = [] binance_avail = [] if program_mode == "Test": upbit_init = True binance_init = True else: upbit_init = False binance_init = False sleep(start_timer)
import os from logging import WARNING from splinter.browser import Browser from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(WARNING) def before_all(context): browser = context.config.browser or os.environ.get('browser') or 'firefox' context.browser = Browser(browser) def after_all(context): context.browser.quit() context.browser = None
from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import urlparse from demerio_utils.log import * from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.WARNING) TIME_TO_WAIT = 10 def convert_url_to_query_dict(url): return dict(urlparse.parse_qsl(urlparse.urlsplit(url).query)) def enter_text_by_id(driver, id, content, press_return=True): wait = WebDriverWait(driver, TIME_TO_WAIT) element = wait.until(EC.element_to_be_clickable((By.ID, id))) element.send_keys(content) if press_return: element.send_keys(Keys.RETURN) def click_button_by_id(driver, id): wait = WebDriverWait(driver, TIME_TO_WAIT) btn = wait.until(EC.element_to_be_clickable((By.ID, id))) btn.click() def enter_text_by_xpath(driver, xpath, content, press_return=True):
def download(cfg): from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.WARNING) retCode = False filename_new = cfg.get('download', 'filename_new') filename_old = cfg.get('download', 'filename_old') login = cfg.get('download', 'login') password = cfg.get('download', 'password') url_lk = cfg.get('download', 'url_lk') url_file = cfg.get('download', 'url_file') download_path = os.path.join(os.getcwd(), 'tmp') if not os.path.exists(download_path): os.mkdir(download_path) for fName in os.listdir(download_path): os.remove(os.path.join(download_path, fName)) dir_befo_download = set(os.listdir(download_path)) if os.path.exists('geckodriver.log'): os.remove('geckodriver.log') try: ffprofile = webdriver.FirefoxProfile() ffprofile.set_preference("browser.download.dir", download_path) ffprofile.set_preference("browser.download.folderList", 2) ffprofile.set_preference( "browser.helperApps.neverAsk.saveToDisk", ",application/octet-stream" + ",application/vnd.ms-excel" + ",application/vnd.msexcel" + ",application/x-excel" + ",application/x-msexcel" + ",application/zip" + ",application/xls" + ",application/vnd.ms-excel" + ",application/vnd.ms-excel.addin.macroenabled.12" + ",application/vnd.ms-excel.sheet.macroenabled.12" + ",application/vnd.ms-excel.template.macroenabled.12" + ",application/vnd.ms-excelsheet.binary.macroenabled.12" + ",application/vnd.ms-fontobject" + ",application/vnd.ms-htmlhelp" + ",application/vnd.ms-ims" + ",application/vnd.ms-lrm" + ",application/vnd.ms-officetheme" + ",application/vnd.ms-pki.seccat" + ",application/vnd.ms-pki.stl" + ",application/vnd.ms-word.document.macroenabled.12" + ",application/vnd.ms-word.template.macroenabed.12" + ",application/vnd.ms-works" + ",application/vnd.ms-wpl" + ",application/vnd.ms-xpsdocument" + ",application/vnd.openofficeorg.extension" + ",application/vnd.openxmformats-officedocument.wordprocessingml.document" + ",application/vnd.openxmlformats-officedocument.presentationml.presentation" + ",application/vnd.openxmlformats-officedocument.presentationml.slide" + ",application/vnd.openxmlformats-officedocument.presentationml.slideshw" + ",application/vnd.openxmlformats-officedocument.presentationml.template" + ",application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ",application/vnd.openxmlformats-officedocument.spreadsheetml.template" + ",application/vnd.openxmlformats-officedocument.wordprocessingml.template" + ",application/x-ms-application" + ",application/x-ms-wmd" + ",application/x-ms-wmz" + ",application/x-ms-xbap" + ",application/x-msaccess" + ",application/x-msbinder" + ",application/x-mscardfile" + ",application/x-msclip" + ",application/x-msdownload" + ",application/x-msmediaview" + ",application/x-msmetafile" + ",application/x-mspublisher" + ",application/x-msschedule" + ",application/x-msterminal" + ",application/x-mswrite" + ",application/xml" + ",application/xml-dtd" + ",application/xop+xml" + ",application/xslt+xml" + ",application/xspf+xml" + ",application/xv+xml" + ",application/excel") if os.name == 'posix': driver = webdriver.Firefox( ffprofile, executable_path= r'/usr/local/Cellar/geckodriver/0.19.1/bin/geckodriver') elif os.name == 'nt': driver = webdriver.Firefox(ffprofile) driver.implicitly_wait(30) driver.get(url_lk) time.sleep(1) driver.set_page_load_timeout(10) driver.find_element_by_id("Email").clear() driver.find_element_by_id("Email").send_keys(login) driver.find_element_by_id("Password").clear() driver.find_element_by_id("Password").send_keys(password) driver.find_element_by_xpath(u"//input[@value='Войти']").click() driver.find_element_by_link_text(u"Kабинет").click() driver.find_element_by_link_text(u"Мои документы").click() driver.find_element_by_xpath("(//button[@type='submit'])[2]").click() ''' time.sleep(1) try: driver.get(url_file) time.sleep(10) except Exception as e: log.debug(e) #print(driver.page_source) #driver.find_element_by_css_selector("input.button-container-m.btn_ExportAll").click() #time.sleep(50) ''' time.sleep(10) driver.quit() except Exception as e: log.debug('Exception: <' + str(e) + '>') dir_afte_download = set(os.listdir(download_path)) new_files = list(dir_afte_download.difference(dir_befo_download)) print(new_files) if len(new_files) == 0: log.error('Не удалось скачать файл прайса ') return False elif len(new_files) > 1: log.error('Скачалось несколько файлов. Надо разбираться ...') return False else: new_file = new_files[0] # загружен ровно один файл. new_ext = os.path.splitext(new_file)[-1].lower() DnewFile = os.path.join(download_path, new_file) new_file_date = os.path.getmtime(DnewFile) log.info( 'Скачанный файл ' + new_file + ' имеет дату ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(new_file_date))) print(new_ext) if new_ext in ('.xls', '.xlsx', '.xlsb', '.xlsm', '.csv'): if os.path.exists(filename_new) and os.path.exists(filename_old): os.remove(filename_old) os.rename(filename_new, filename_old) if os.path.exists(filename_new): os.rename(filename_new, filename_old) shutil.copy2(DnewFile, filename_new) return True
def download(cfg): from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.remote_connection import LOGGER LOGGER.setLevel(logging.WARNING) retCode = False filename_new = cfg.get('download', 'filename_new') filename_old = cfg.get('download', 'filename_old') login = cfg.get('download', 'login') password = cfg.get('download', 'password') url_lk = cfg.get('download', 'url_lk') url_file = cfg.get('download', 'url_file') download_path = os.path.join(os.getcwd(), 'tmp') if not os.path.exists(download_path): os.mkdir(download_path) for fName in os.listdir(download_path): os.remove(os.path.join(download_path, fName)) dir_befo_download = set(os.listdir(download_path)) if os.path.exists('geckodriver.log'): os.remove('geckodriver.log') try: ffprofile = webdriver.FirefoxProfile() ffprofile.set_preference("browser.download.dir", download_path) ffprofile.set_preference("browser.download.folderList", 2) ffprofile.set_preference( "browser.helperApps.neverAsk.saveToDisk", ",application/octet-stream" + ",application/vnd.ms-excel" + ",application/vnd.msexcel" + ",application/x-excel" + ",application/x-msexcel" + ",application/zip" + ",application/xls" + ",application/vnd.ms-excel" + ",application/vnd.ms-excel.addin.macroenabled.12" + ",application/vnd.ms-excel.sheet.macroenabled.12" + ",application/vnd.ms-excel.template.macroenabled.12" + ",application/vnd.ms-excelsheet.binary.macroenabled.12" + ",application/vnd.ms-fontobject" + ",application/vnd.ms-htmlhelp" + ",application/vnd.ms-ims" + ",application/vnd.ms-lrm" + ",application/vnd.ms-officetheme" + ",application/vnd.ms-pki.seccat" + ",application/vnd.ms-pki.stl" + ",application/vnd.ms-word.document.macroenabled.12" + ",application/vnd.ms-word.template.macroenabed.12" + ",application/vnd.ms-works" + ",application/vnd.ms-wpl" + ",application/vnd.ms-xpsdocument" + ",application/vnd.openofficeorg.extension" + ",application/vnd.openxmformats-officedocument.wordprocessingml.document" + ",application/vnd.openxmlformats-officedocument.presentationml.presentation" + ",application/vnd.openxmlformats-officedocument.presentationml.slide" + ",application/vnd.openxmlformats-officedocument.presentationml.slideshw" + ",application/vnd.openxmlformats-officedocument.presentationml.template" + ",application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ",application/vnd.openxmlformats-officedocument.spreadsheetml.template" + ",application/vnd.openxmlformats-officedocument.wordprocessingml.template" + ",application/x-ms-application" + ",application/x-ms-wmd" + ",application/x-ms-wmz" + ",application/x-ms-xbap" + ",application/x-msaccess" + ",application/x-msbinder" + ",application/x-mscardfile" + ",application/x-msclip" + ",application/x-msdownload" + ",application/x-msmediaview" + ",application/x-msmetafile" + ",application/x-mspublisher" + ",application/x-msschedule" + ",application/x-msterminal" + ",application/x-mswrite" + ",application/xml" + ",application/xml-dtd" + ",application/xop+xml" + ",application/xslt+xml" + ",application/xspf+xml" + ",application/xv+xml" + ",application/excel") if os.name == 'posix': #driver = webdriver.Firefox(ffprofile, executable_path=r'/usr/local/Cellar/geckodriver/0.19.1/bin/geckodriver') driver = webdriver.Firefox( ffprofile, executable_path=r'/usr/local/bin/geckodriver') elif os.name == 'nt': driver = webdriver.Firefox( ffprofile ) #, executable_path='c:\\Program Files (x86)\\geckodriver\\geckodriver.exe') driver.implicitly_wait(20) driver.set_page_load_timeout(20) driver.get(url_lk) driver.set_window_size(1000, 800) driver.find_element(By.CSS_SELECTOR, "#dropdown-login > strong").click() driver.find_element(By.XPATH, "//fieldset/div[2]/input").send_keys(login) print('debug-00') time.sleep(1) driver.find_element(By.XPATH, "//fieldset/div[3]/div/input").click() driver.find_element(By.XPATH, "//fieldset/div[3]/div/input").send_keys(password) print('debug-01') time.sleep(1) # -- пример работы с некликабельным элементом element = driver.find_element(By.CSS_SELECTOR, ".btn:nth-child(6)") actions = ActionChains(driver) actions.move_to_element(element).click_and_hold().perform() print('debug-2') time.sleep(1) element = driver.find_element(By.XPATH, "//button[@type=\'submit\']") actions = ActionChains(driver) actions.move_to_element(element).perform() print('debug-3') element = driver.find_element(By.CSS_SELECTOR, ".btn:nth-child(6)") actions = ActionChains(driver) actions.move_to_element(element).release().perform() print('debug-4') time.sleep(5) # -- конец примера driver.get("https://www.extron.com/myaccount/excelpricelist") print('debug-5') time.sleep(2) driver.find_element(By.XPATH, "//p[2]/a").click() time.sleep(3) except Exception as e: log.debug('Exception: <' + str(e) + '>') time.sleep(2) driver.quit() dir_afte_download = set(os.listdir(download_path)) new_files = list(dir_afte_download.difference(dir_befo_download)) print(new_files) if len(new_files) == 0: log.error('Не удалось скачать файл прайса ') retCode = False elif len(new_files) > 1: log.error('Скачалось несколько файлов. Надо разбираться ...') retCode = False else: new_file = new_files[0] # загружен ровно один файл. new_ext = os.path.splitext(new_file)[-1].lower() DnewFile = os.path.join(download_path, new_file) new_file_date = os.path.getmtime(DnewFile) log.info( 'Скачанный файл ' + new_file + ' имеет дату ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(new_file_date))) print(new_ext) if new_ext in ('.xls', '.xlsx', '.xlsb', '.xlsm', '.csv'): if os.path.exists(filename_new) and os.path.exists(filename_old): os.remove(filename_old) os.rename(filename_new, filename_old) if os.path.exists(filename_new): os.rename(filename_new, filename_old) shutil.copy2(DnewFile, filename_new) retCode = True elif new_ext == '.zip': # ветка устаревшая, не проверялась # Архив. Обработка не завершена log.debug('Zip-архив. Разархивируем.') work_dir = os.getcwd() os.chdir(os.path.join(download_path)) dir_befo_download = set(os.listdir(os.getcwd())) os.system('unzip -oj ' + new_file) os.remove(new_file) dir_afte_download = set(os.listdir(os.getcwd())) new_files = list(dir_afte_download.difference(dir_befo_download)) os.chdir(work_dir) if len(new_files) == 1: new_file = new_files[0] # разархивирован ровно один файл. new_ext = os.path.splitext(new_file)[-1] DnewFile = os.path.join(download_path, new_file) new_file_date = os.path.getmtime(DnewFile) log.debug('Файл из архива ' + DnewFile + ' имеет дату ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(new_file_date))) filename_in = cfg.get('basic', 'filename_in') if os.path.exists(filename_new) and os.path.exists( filename_old): os.remove(filename_old) os.rename(filename_new, filename_old) if os.path.exists(filename_new): os.rename(filename_new, filename_old) shutil.copy2(DnewFile, filename_new) retCode = True elif len(new_files) > 1: log.debug('В архиве не единственный файл. Надо разбираться.') retCode = False else: log.debug( 'Нет новых файлов после разархивации. Загляни в папку юниттеста поставщика.' ) retCode = False return retCode
def parse(self, response): LOGGER.setLevel(logging.WARNING) item = DoglicensesItem() driver = webdriver.Chrome() driver.get( 'http://www.mcgm.gov.in/irj/portal/anonymous/qldoglican?guest_user=english' ) time.sleep(5) driver.implicitly_wait(5) try: driver.switch_to.frame( driver.find_element_by_id('ivuFrm_page0ivu0')) time.sleep(3) driver.implicitly_wait(5) driver.switch_to.frame( driver.find_element_by_xpath('//frameset[1]/frame[1]')) time.sleep(3) driver.implicitly_wait(5) first_ward_no = driver.find_element_by_id('WD1C') first_ward_no.clear() first_ward_no.send_keys('50000226') second_ward_no = driver.find_element_by_id('WD20') second_ward_no.clear() second_ward_no.send_keys('50000227') search_records = driver.find_element_by_id('WD5B') search_records.click() time.sleep(5) while True: try: table_rows = driver.find_elements_by_xpath( '//td[@id="WD62-content"]/table/tbody/tr') for rows in table_rows[1:]: try: item['ward'] = rows.find_element_by_xpath( './/td[2]/span').text item['valid_from'] = rows.find_element_by_xpath( './/td[3]').text item['valid_to'] = rows.find_element_by_xpath( './/td[4]').text item['house_no'] = rows.find_element_by_xpath( './/td[5]').text item['house_name'] = rows.find_element_by_xpath( './/td[6]').text item['street_no'] = rows.find_element_by_xpath( './/td[7]').text item['area'] = rows.find_element_by_xpath( './/td[8]').text item['area1'] = rows.find_element_by_xpath( './/td[9]').text item['postal_code'] = rows.find_element_by_xpath( './/td[10]').text item['tele_no'] = rows.find_element_by_xpath( './/td[11]').text item['dog_name'] = rows.find_element_by_xpath( './/td[12]').text item['gender'] = rows.find_element_by_xpath( './/td[13]').text item['breed'] = rows.find_element_by_xpath( './/td[14]').text item['age_year'] = rows.find_element_by_xpath( './/td[15]').text item['age_month'] = rows.find_element_by_xpath( './/td[16]').text item['vaccinate'] = rows.find_element_by_xpath( './/td[17]').text item['dr_name'] = rows.find_element_by_xpath( './/td[18]').text item['scraped_time'] = datetime.now().strftime( '%m/%d/%Y') item[ 'platform'] = 'http://www.mcgm.gov.in/irj/portal/anonymous/qldoglican?guest_user=english' except: pass finally: yield item try: next_record = driver.find_element_by_id('WDBE-btn-3') next_record.click() time.sleep(2) except: break except Exception as e: break except Exception as exc: print(exc) time.sleep(1) driver.quit()
import sys import furl as furl import logging import time from selenium.webdriver.remote.remote_connection import LOGGER from requests import get from requests.exceptions import RequestException from contextlib import closing from bs4 import BeautifulSoup from selenium import webdriver LOGGER.setLevel(logging.WARNING) # Selenium logging level chromepath = 'C:/chromedriver/chromedriver.exe' # //change this to your chromedriver path def simple_get(url): try: with closing(get(url, stream=True)) as resp: if is_good_response(resp): return resp.content else: return None except RequestException as e: print('Error during requests to {0} : {1}'.format(url, str(e))) return None def is_good_response(resp): content_type = resp.headers['Content-Type'].lower() return (resp.status_code == 200 and content_type is not None
from time import sleep from .utils import ( Utils, URLS, TIMEOUTS, Participant, tries_n_time_until_true, names_cleaner, ) from .exceptions import LoginError from .interfaces import IModule, IOnAirModule from selenium.webdriver.remote.remote_connection import LOGGER import logging LOGGER.setLevel(logging.WARNING) # reducing selenium verbosity @retry(stop_max_attempt_number=3) def _create_hangout_event(browser, name, attendees): """ Creates hangout event on google plus. As arguments takes event name and attendees list, also should be provided with 'browser' object where visitor is logged in """ browser.get(URLS.onair) browser.by_text('Create a Hangout On Air').click(TIMEOUTS.fast) # Setting name browser.xpath( '//input[@aria-label="Give it a name"]').send_keys(name) # cleaning 'share' field and send attendees list there
from sys import platform import tempfile import logging import click import time import json import os def open_config(): from anime_downloader.config import Config return Config cache = False serverLogger.setLevel(logging.ERROR) logger = logging.getLogger(__name__) TEMP_FOLDER = os.path.join(tempfile.gettempdir(), 'AnimeDL-SeleniumCache') data = open_config() if not os.path.isdir(TEMP_FOLDER): os.makedirs(TEMP_FOLDER) def get_data_dir(): ''' Gets the folder directory selescrape will store data, such as cookies or browser extensions and logs. ''' APP_NAME = 'anime downloader' return os.path.join(click.get_app_dir(APP_NAME), 'data')