Esempio n. 1
0
def getDriver():
    # 构建请求头
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (headers)
    # 从USER_AGENTS列表中随机选一个浏览器头,伪装浏览器
    dcap["phantomjs.page.settings.userAgent"] = (
        random.choice(user_agent_list))
    # 不载入图片,爬页面速度会快很多
    dcap["phantomjs.page.settings.loadImages"] = False
    headers["preProxy"] = get_proxy().get("proxy")
    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': headers["preProxy"]
        # 'httpProxy': proxyPool[random.randint(0,
        #                                       len(proxyPool) - 1)]  # 代理ip和端口
    })
    # 把代理ip加入到技能中
    proxy.add_to_capabilities(dcap)
    driver = webdriver.PhantomJS(executable_path='download/phantomjs.exe',
                                 desired_capabilities=dcap)
    # 隐式等待5秒,可以自己调节
    # driver.implicitly_wait(5)
    # 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项
    # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
    driver.set_page_load_timeout(5)
    # 设置10秒脚本超时时间
    driver.set_script_timeout(5)
    return driver
Esempio n. 2
0
    def openDriver(self):
        '123.115.240.148:8118'  # 普匿
        '111.77.100.60:8118'  # 高匿
        proxy = Proxy({
            # 'proxyType': ProxyType.MANUAL,  # 用不用都行
            # 'httpProxy': '112.87.131.160:8118'
            # 'httpProxy': '111.77.100.60:8118'
            # 'httpProxy': '115.28.148.192:8118'  # 27.上午
            # 'httpProxy': '62.221.41.130:8080'  # 27.上午
            'httpProxy': '117.63.156.123:8118'
        })
        # 新建一个“期望技能”,哈哈
        desired_capabilities = DesiredCapabilities.FIREFOX.copy()
        # # 把代理ip加入到技能中
        proxy.add_to_capabilities(desired_capabilities)
        host = "https://cordis.europa.eu"
        chromePath = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
        # firePath = "D:\\software\\Firefox\\geckodriver.exe"
        # self.driver = webdriver.Firefox(executable_path=firePath,desired_capabilities=desired_capabilities)

        self.driver = webdriver.Chrome(
            executable_path=chromePath,
            desired_capabilities=desired_capabilities)

        self.driver.get(host)
        self.driver.maximize_window()
        self.driver.implicitly_wait(10)
Esempio n. 3
0
    def _create_remote_driver(self, driver, **kwargs):
        if not 'remote_url' in kwargs:
            raise ValueError('Remote drivers require the declaration of a remote_url')

        remote_url = kwargs.get('remote_url')

        logger.info('Creating remot driver "%s" (remote_url=%s)', driver, remote_url)

        try:
            # Get a copy of the desired capabilities object. (to avoid overwriting the global.)
            capabilities = self.DRIVER_CAPABILITIES[driver].copy()
        except KeyError:
            raise TypeError("Unsupported Browser Type {0}".format(driver))

        if 'capabilities' in kwargs:
            for c in kwargs.get('capabilities'):
                capabilities.update(c)

        if 'proxy' in kwargs:
            proxy_url = kwargs.get('proxy')
            proxy = Proxy({
                'httpProxy': proxy_url,
                'ftpProxy': proxy_url,
                'sslProxy': proxy_url,
                'noProxy': None,
                'proxyType': ProxyType.MANUAL,
                'autodetect': False
            })
            proxy.add_to_capabilities(capabilities)

        driver_instance = webdriver.Remote(
            desired_capabilities=capabilities,
            command_executor=remote_url
        )
        return driver_instance
Esempio n. 4
0
    def __open_browser(use_proxy: bool = False):
        # TODO: add user agent
        chrome_options = webdriver.ChromeOptions()
        capabilities = webdriver.DesiredCapabilities.CHROME
        if use_proxy:
            random_proxy = Proxies.get_random_proxy()
            # Parse Proxy
            if '@' in random_proxy:
                auth, ip_port = random_proxy.split('@')
                user, pwd = auth.split(':')
                ip, port = ip_port.split(':')

                with zipfile.ZipFile(plugin_file, 'w') as zp:
                    zp.writestr("manifest.json", manifest_json)
                    zp.writestr("background.js",
                                background_js % (ip, port, user, pwd))
                chrome_options.add_extension(plugin_file)
            else:
                prox = Proxy()
                prox.proxy_type = ProxyType.MANUAL
                prox.http_proxy = random_proxy
                prox.socks_proxy = random_proxy
                prox.ssl_proxy = random_proxy
                capabilities = webdriver.DesiredCapabilities.CHROME
                prox.add_to_capabilities(capabilities)

        return webdriver.Chrome(chrome_options=chrome_options)
Esempio n. 5
0
    def __init__(self, *args, seleniumwire_options=None, **kwargs):
        """Initialise a new Firefox WebDriver instance.

        Args:
            seleniumwire_options: The seleniumwire options dictionary.
        """
        if seleniumwire_options is None:
            seleniumwire_options = {}

        try:
            firefox_options = kwargs['options']
        except KeyError:
            firefox_options = FirefoxOptions()
            kwargs['options'] = firefox_options

        # Prevent Firefox from bypassing the Selenium Wire proxy
        # for localhost addresses.
        firefox_options.set_preference(
            'network.proxy.allow_hijacking_localhost', True)
        firefox_options.accept_insecure_certs = True

        config = self._setup_backend(seleniumwire_options)

        if seleniumwire_options.get('auto_config', True):
            if SELENIUM_V4:
                # From Selenium v4.0.0 the browser's proxy settings can no longer
                # be passed using desired capabilities and we must use the options
                # object instead.
                proxy = Proxy()
                proxy.http_proxy = config['proxy']['httpProxy']
                proxy.ssl_proxy = config['proxy']['sslProxy']

                try:
                    proxy.no_proxy = config['proxy']['noProxy']
                except KeyError:
                    pass

                firefox_options.proxy = proxy
            else:
                # Earlier versions of Selenium use capabilities to pass the settings.
                capabilities = kwargs.get('capabilities',
                                          kwargs.get('desired_capabilities'))
                if capabilities is None:
                    capabilities = DesiredCapabilities.FIREFOX
                capabilities = capabilities.copy()

                capabilities.update(config)
                kwargs['capabilities'] = capabilities

        super().__init__(*args, **kwargs)
Esempio n. 6
0
    def openDriver(self):

        # Proxy是代理类,初始化参数是一个字典包含多个参数
        # myProxy = "182.138.242.128:8118"
        # myProxy = "182.108.47.231:808"
        # myProxy = "113.140.1.82:53281"

        # myProxy = "47.94.135.32:8118"
        myProxy = "120.83.106.27:9999"
        proxy = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': myProxy,
            'ftpProxy': myProxy,
            'sslProxy': myProxy
        })

        # executable_path="chromedriver",  浏览器驱动路径
        # port=0,                          端口
        # options=None,                    选项
        # service_args=None,               要传递给驱动程序服务的参数列表
        # desired_capabilities=None,       渴望能力,浏览器的Dictionary对象,仅限于“代理”或“日志记录首选项”等功能。
        # service_log_path=None,           日志信息路径
        # chrome_options=None,
        # keep_alive=True
        myCapabilities = DesiredCapabilities.CHROME.copy()  # 创建自己的期望
        proxy.add_to_capabilities(myCapabilities)  # 将代理加入到期望值中
        self.driver = webdriver.Chrome(executable_path=self.chromePath,
                                       desired_capabilities=myCapabilities)
        # self.driver = webdriver.Chrome(executable_path=self.chromePath)
        # self.driver = webdriver.Firefox(executable_path=self.firefoxPath)
        self.driver.maximize_window()
        self.driver.get(self.searchUrl)

        try:
            self.driver.implicitly_wait(10)
        except Exception as e:
            print(e)

        tabLi = self.driver.find_element_by_id("1_3")
        if None != tabLi:
            tabA = tabLi.find_element(By.TAG_NAME, "a")
            if None != tabA:
                tabA.click()
                try:
                    self.driver.implicitly_wait(10)
                except Exception as e:
                    print(e)
                self.listSearch()
        else:
            print("没有点击 专业检索!")
    def prepare_desired_capabilities(self):
        capabilities = DesiredCapabilities.FIREFOX.copy()
        capabilities['javascriptEnabled'] = True
        # capabilities['pageLoadStrategy'] = 'normal'

        # Set proxy
        proxy_string = configs.RANDOM_PROXY(return_tuple=False)
        proxy = Proxy()
        proxy.proxy_type = ProxyType.MANUAL
        proxy.http_proxy = proxy_string
        proxy.ssl_proxy = proxy_string
        # proxy.ftp_proxy = proxy_string
        # prox.socks_proxy = proxy_string
        # proxy.add_to_capabilities(capabilities)

        return capabilities
Esempio n. 8
0
def _create_firefox_driver(proxy_url):
    _proxy = None
    if proxy_url:
        _proxy = Proxy({
            'proxyType': 'MANUAL',
            'httpProxy': proxy_url,
            'ftpProxy': proxy_url,
            'sslProxy': proxy_url
        })
    return webdriver.Firefox(proxy=_proxy)
Esempio n. 9
0
def get_proxy(proxy_url: str):

    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': proxy_url,
        'ftpProxy': proxy_url,
        'sslProxy': proxy_url,
        'noProxy': 'localhost'  # set this value as desired
    })
    return proxy
Esempio n. 10
0
    def setUpClass(cls):
        myproxy = "http://rproxy.mcp.com:3128"

        proxy = Proxy({
            'proxyType': ProxyType.MANUAL,
            'httpProxy': myproxy,
            'ftpProxy': myproxy,
            'sslProxy': myproxy,
            'noProxy': ''}
        )
        cls.selenium = WebDriver(proxy=proxy)

        super(MySeleniumTests, cls).setUpClass()
def webDriverPhantomJS(url, **kwargs):
    ssl._create_default_https_context = ssl._create_unverified_context
    print("[info]webDriver:设置Header/代理IP")
    dcap = dict(DesiredCapabilities.PHANTOMJS)
    dcap["phantomjs.page.settings.userAgent"] = (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 "
    )
    ipDict = getRandomOneIP()
    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': ipDict['ip'] + ':' + ipDict['port']
    })  # 代理ip和端口
    proxy.add_to_capabilities(dcap)
    print("[info]webDriver:获取代理IP成功{}".format(str(ipDict)))
    if 'Linux' in platform.system():
        driver = webdriver.PhantomJS(
            executable_path=
            '/root/xiaociwei_download/zywa_crawl_platform/plug/geckodriver/phantomjs-2.1.1-linux-x86_64/bin/phantomjs',
            service_args=['--ssl-protocol=any'],
            desired_capabilities=dcap)
    else:
        driver = webdriver.PhantomJS(
            executable_path=
            '/Users/magic/PycharmProjects/zywa-spider-xiaociwei/plug/geckodriver/phantomjs-2.1.1-macosx/bin/phantomjs',
            service_args=['--ssl-protocol=any'],
            desired_capabilities=dcap)
    try:
        print("[info]webDriver:初始化webDriver成功")
        driver.get(url)
        print("[info]webDriver:访问成功")
        __doAction(kwargs.get('action'), driver)
        print("[info]执行操作码成功")
        driver.save_screenshot('test2.png')
        return driver.page_source
    finally:
        print("[info]关闭driver成功")
        driver.quit()
Esempio n. 12
0
 def process_request(self, request, spider):
     desired_capabilities = DesiredCapabilities.CHROME.copy()
     proxy = Proxy(
         {
             'proxyType': ProxyType.MANUAL,
             'httpProxy': 'ip:port'  # 代理ip和端口
         }
     )
     # proxy.add_to_capabilities(desired_capabilities)
     # spider.chrome.start_session(desired_capabilities)
     spider.chrome.get(request.url)
     content = self.get_response_content(spider.chrome)
     windows = spider.chrome.window_handles
     if len(windows) > 1:
         spider.chrome.close()
     body = content.encode('utf-8')
     response = TextResponse(url=request.url, request=request, body=body)
     return response
Esempio n. 13
0
 def openDriver(self):
     proxy = Proxy({
         # 'proxyType': ProxyType.MANUAL,  # 用不用都行
         # 'httpProxy': '122.137.185.240:80'
         'httpProxy': '123.232.199.89:8118'  # 27.上午
         # 'httpProxy': '115.159.155.83:8118'  # 27.上午
         # 'httpProxy': '115.219.12.145:8118'
     })
     # 新建一个“期望技能”,哈哈
     # desired_capabilities = DesiredCapabilities.FIREFOX.copy()
     # # 把代理ip加入到技能中
     # proxy.add_to_capabilities(desired_capabilities)
     host = "https://cordis.europa.eu"
     chromePath = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
     firePath = "D:\\software\\Firefox\\geckodriver.exe"
     #带代理请求
     # self.driver = webdriver.Firefox(executable_path=firePath,desired_capabilities=desired_capabilities)
     self.driver = webdriver.Firefox(executable_path=firePath)
     self.driver.get(host)
     # 窗口最大化
     self.driver.maximize_window()
     # 隐式等待10s
     self.driver.implicitly_wait(10)
Esempio n. 14
0
def setup_driver():
    myProxy = get_proxy()

    proxy = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': myProxy,
        'ftpProxy': myProxy,
        'sslProxy': myProxy,
        'noProxy': ''
    })
    profile = webdriver.FirefoxProfile()
    options = Options()
    # options.preferences.update({"javascript.enabled": True})
    options.preferences.update({
        "general.useragent.override":
        "Mozilla/5.0 Gecko/20100101 Firefox/66"
    })
    options.preferences.update({"extensions.lastPlatformVersion": "66"})
    options.preferences.update({"distribution.abut": "Mozilla Firefox"})
    options.preferences.update({"intl.accept_languages": "en,en_US"})
    driver = webdriver.Firefox(firefox_profile=profile,
                               options=options,
                               proxy=proxy)
    return driver
Esempio n. 15
0
import os
import time

from selenium import webdriver
from selenium.webdriver import Proxy, FirefoxProfile
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options as firefox_options

p = Proxy()
p.http_proxy = "1.1.1.1"

fp = FirefoxProfile()
fp.accept_untrusted_certs = True
fp.assume_untrusted_cert_issuer = False
# fp.set_proxy(proxy)

chrome_options = Options()
firefox_options = firefox_options()
firefox_options.headless = True
chrome_options.headless = False

# driver = webdriver.firefox(executable_path=os.getcwd() + os.path.sep + "geckodriver_mac")
driver = webdriver.Chrome(options=chrome_options, executable_path=os.getcwd() + os.path.sep + "chromedriver_2.45")
driver.implicitly_wait(30)
driver.maximize_window()
driver.get("https://www.google.com")
print(driver.title)
driver.find_element_by_name("q").send_keys("ramnath gokul", Keys.SHIFT)
driver.find_element_by_name("q").send_keys(Keys.ENTER)
# driver.find_element_by_name("btnK").click()
Esempio n. 16
0
from pathlib import Path

from selenium import webdriver
from selenium.webdriver import FirefoxProfile, Proxy
import os, time

from selenium.webdriver.common.proxy import ProxyType
from selenium.webdriver.firefox.options import Options


p = Proxy()
p.proxy_type = ProxyType.MANUAL
p.httpProxy = "1.1.1.1:8080"

x = Options()
x.accept_insecure_certs = True

# x.headless = True
# x.proxy = p
# x.accept_insecure_certs = True
# x.set_preference("browser.download.defaultFolder", str(Path(os.getcwd()).parent) + os.path.sep +  "AutomationDownloads")

# myProxy = "86.111.144.194:3128"
# proxy = Proxy({
#     'proxyType': ProxyType.MANUAL,
#     'httpProxy': myProxy,
#     'ftpProxy': myProxy,
#     'sslProxy': myProxy,
#     'noProxy':''})

Esempio n. 17
0
    def __init__(self,
                 src,
                 dest=None,
                 unknown=None,
                 chromedriver_location=None,
                 proxy_server=None,
                 fast_proxy=False):

        if chromedriver_location is None:
            self.chromedriver_location = os.path.abspath(
                os.path.dirname(
                    sys.argv[0])) + "\chromedriver_win32\chromedriver.exe"
        else:
            self.chromedriver_location = chromedriver_location
        if debug:
            print("Chrome location:", self.chromedriver_location)

        if debug:
            print("src:", src)
            print("dest:", dest)

        self.f = open(r'C:\Python34\Projects\pimp-my-collection\text.txt', 'a')
        self.f.write('\n' + str(datetime.today()) + '\n')
        self.titles = []

        #Куда кидать отсортированные
        if dest is None:
            try:
                self.dest = os.path.abspath(os.path.dirname(sys.argv[0]))
                os.chdir(self.dest)
                os.mkdir("sorted_images")
            except OSError:
                if debug:
                    print("dest folder already exists")
                pass
            finally:
                self.dest = (self.dest + "\sorted_images")
                os.chdir(self.dest)
        else:
            self.dest = dest
            try:
                os.chdir(self.dest)
            except FileNotFoundError:
                print("No such directory:", self.dest)
                exit(1)

        #Папка для картинок без сурса
        if unknown is None:
            try:
                os.mkdir("unknown")
            except OSError:
                if debug:
                    print("unkn folder already exists")
                pass
            finally:
                self.unknown = self.dest + r"\unknown"
        else:
            self.unknown = unknown
            try:
                os.mkdir(self.unknown)
            except OSError:
                if debug:
                    print("unknown folder already exists")
                pass

        if debug:
            print("dest:", self.dest)
            print("unknown:", self.unknown)

        #Откуда берем картинки
        self.folder = src
        try:
            self.images = os.listdir(path=self.folder)
        except FileNotFoundError:
            print("No such directory:", self.folder)
            exit(1)

        if debug:
            for i in self.images:
                try:
                    print(i)
                except UnicodeEncodeError:
                    i = i.encode('ascii', 'ignore')
                    print("bad unicode:", i)

        self.sleep_time = 3
        self.proxy_sleep_time = 3
        self.waiting_time = 15
        self.fast_proxy = fast_proxy
        #Новая версия - новая прокси
        from selenium.webdriver import Proxy
        if proxy_server is None:
            proxy_server = "163.172.175.210:3128"  #https://free-proxy-list.net/
            settings = {"httpProxy": proxy_server, "sslProxy": proxy_server}
            self.proxy_server = Proxy(settings)
        else:
            settings = {"httpProxy": proxy_server, "sslProxy": proxy_server}
            self.proxy_server = Proxy(settings)

        from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
        cap = DesiredCapabilities.CHROME.copy()
        cap['platform'] = "WINDOWS"
        cap['version'] = "10"
        #Без прокси
        self.driver = ChromeDriver(desired_capabilities=cap,
                                   executable_path=self.chromedriver_location)

        #С прокси
        self.proxy_server.add_to_capabilities(cap)
        self.driver2 = ChromeDriver(desired_capabilities=cap,
                                    executable_path=self.chromedriver_location)
Esempio n. 18
0
    'security.warn_entering_secure.show_once': True,
    'security.warn_entering_weak': False,
    'security.warn_entering_weak._show_once': True,
    'security.warn_leaving_secure': False,
    'security.warn_leaving_secure.show_once': True,
    'security.warn_leaving_weak': False,
    'security.warn_leaving_weak._show_once': True,
    'security.warn_submit_insecure': False,
    'security.warn_viewing_mixed': False,
    'security.warn_viewing_mixed.show_once': True,
}
profile = FirefoxProfile()
for name, value in default_profile.items():
    profile.set_preference(name, value)

proxy = Proxy()
proxy.ftp_proxy = proxy.ssl_proxy = proxy.http_proxy = None

browser = Firefox(firefox_profile=profile, proxy=proxy)

tokyo_url = 'https://ticketcamp.net/venue/tokyo/'  #東京に接続
browser.get(tokyo_url)
'''

'''

url_list = queue.Queue()


def get_url_from_this_page():
    tags = browser.find_elements_by_class_name('name')
Esempio n. 19
0
def get_tuned_driver(parser_name: str,
                     logger: 'Logger',
                     proxy_ip: Optional[str] = None,
                     proxy_port: Optional[str] = None,
                     headless: bool = True) -> 'WebDriver':
    os.environ["DISPLAY"] = ':99'

    chrome_options = Options()

    capabilities = DesiredCapabilities.CHROME
    capabilities['goog:loggingPrefs'] = {'browser': 'ALL'}
    if proxy_ip and proxy_port:
        prox = Proxy()
        prox.proxy_type = ProxyType.MANUAL
        prox.http_proxy = f"{proxy_ip}:{proxy_port}"
        prox.ssl_proxy = f"{proxy_ip}:{proxy_port}"
        try:
            response = requests.get('https://google.com',
                                    proxies={
                                        'http': f'{proxy_ip}:{proxy_port}',
                                        'https': f'{proxy_ip}:{proxy_port}',
                                    })
        except requests.RequestException:
            update_proxy_status(proxy_ip, AccessStatus.fail)
            raise
        if response.status_code != 200:
            update_proxy_status(proxy_ip, AccessStatus.fail)
            logger.critical(f'proxy {proxy_ip}:{proxy_port} not work')
            exit(-1)
        update_proxy_status(proxy_ip, AccessStatus.success)
        prox.add_to_capabilities(capabilities)

        logger.info(f'{parser_name} use proxy: {proxy_ip}:{proxy_port}')
    if headless:
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--remote-debugging-port=9222")
        chrome_options.add_argument("--disable-infobars")
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--no-sandbox")

        driver = webdriver.Chrome(options=chrome_options,
                                  desired_capabilities=capabilities)
    else:
        driver = webdriver.Chrome(options=chrome_options,
                                  desired_capabilities=capabilities)

    prefs = {"profile.default_content_setting_values.notifications": 2}
    chrome_options.add_experimental_option('prefs', prefs)
    chrome_options.add_experimental_option('useAutomationExtension', False)
    chrome_options.add_experimental_option('excludeSwitches',
                                           ['enable-automation'])
    chrome_options.add_argument('start-maximized')
    chrome_options.add_argument('incognito')

    driver.execute_cdp_cmd(
        "Page.addScriptToEvaluateOnNewDocument", {
            "source":
            """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => undefined,
          enumerable: false,
          configurable: true
        });
        const newProto = navigator.__proto__;
        delete newProto.webdriver;
        navigator.__proto__ = newProto;
        delete navigator.webdriver;
      """
        })

    driver.execute_cdp_cmd(
        'Network.setUserAgentOverride', {
            "userAgent":
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/83.0.4103.53 Safari/537.36'
        })

    driver.implicitly_wait(5)
    return driver
Esempio n. 20
0
class Pimper:
    def __init__(self,
                 src,
                 dest=None,
                 unknown=None,
                 chromedriver_location=None,
                 proxy_server=None,
                 fast_proxy=False):

        if chromedriver_location is None:
            self.chromedriver_location = os.path.abspath(
                os.path.dirname(
                    sys.argv[0])) + "\chromedriver_win32\chromedriver.exe"
        else:
            self.chromedriver_location = chromedriver_location
        if debug:
            print("Chrome location:", self.chromedriver_location)

        if debug:
            print("src:", src)
            print("dest:", dest)

        self.f = open(r'C:\Python34\Projects\pimp-my-collection\text.txt', 'a')
        self.f.write('\n' + str(datetime.today()) + '\n')
        self.titles = []

        #Куда кидать отсортированные
        if dest is None:
            try:
                self.dest = os.path.abspath(os.path.dirname(sys.argv[0]))
                os.chdir(self.dest)
                os.mkdir("sorted_images")
            except OSError:
                if debug:
                    print("dest folder already exists")
                pass
            finally:
                self.dest = (self.dest + "\sorted_images")
                os.chdir(self.dest)
        else:
            self.dest = dest
            try:
                os.chdir(self.dest)
            except FileNotFoundError:
                print("No such directory:", self.dest)
                exit(1)

        #Папка для картинок без сурса
        if unknown is None:
            try:
                os.mkdir("unknown")
            except OSError:
                if debug:
                    print("unkn folder already exists")
                pass
            finally:
                self.unknown = self.dest + r"\unknown"
        else:
            self.unknown = unknown
            try:
                os.mkdir(self.unknown)
            except OSError:
                if debug:
                    print("unknown folder already exists")
                pass

        if debug:
            print("dest:", self.dest)
            print("unknown:", self.unknown)

        #Откуда берем картинки
        self.folder = src
        try:
            self.images = os.listdir(path=self.folder)
        except FileNotFoundError:
            print("No such directory:", self.folder)
            exit(1)

        if debug:
            for i in self.images:
                try:
                    print(i)
                except UnicodeEncodeError:
                    i = i.encode('ascii', 'ignore')
                    print("bad unicode:", i)

        self.sleep_time = 3
        self.proxy_sleep_time = 3
        self.waiting_time = 15
        self.fast_proxy = fast_proxy
        #Новая версия - новая прокси
        from selenium.webdriver import Proxy
        if proxy_server is None:
            proxy_server = "163.172.175.210:3128"  #https://free-proxy-list.net/
            settings = {"httpProxy": proxy_server, "sslProxy": proxy_server}
            self.proxy_server = Proxy(settings)
        else:
            settings = {"httpProxy": proxy_server, "sslProxy": proxy_server}
            self.proxy_server = Proxy(settings)

        from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
        cap = DesiredCapabilities.CHROME.copy()
        cap['platform'] = "WINDOWS"
        cap['version'] = "10"
        #Без прокси
        self.driver = ChromeDriver(desired_capabilities=cap,
                                   executable_path=self.chromedriver_location)

        #С прокси
        self.proxy_server.add_to_capabilities(cap)
        self.driver2 = ChromeDriver(desired_capabilities=cap,
                                    executable_path=self.chromedriver_location)

    def find_on_yandere(self):
        try:
            source = self.driver.find_element_by_class_name(
                'tag-type-copyright')
        except NoSuchElementException:
            if debug:
                print("no source")
            return None

        if debug:
            print(source)
            print(source.text)

        source2 = source.find_elements_by_css_selector('a')

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print(source2[1].text)

        return source2[1].text

    def find_on_sankaku(self, addr):
        try:
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out")
            self.driver2.get(addr)
            sleep(self.proxy_sleep_time)
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except NoSuchElementException:
            if not self.fast_proxy:
                if debug:
                    print("no element")
                self.driver2.get(addr)
                sleep(self.proxy_sleep_time)

                try:
                    source = self.driver2.find_element_by_class_name(
                        'tag-type-copyright')
                    sleep(self.proxy_sleep_time)
                except NoSuchElementException:
                    if debug:
                        print("actually no element")
                    return None
            else:
                return None

        if debug:
            print(source)
            print(source.text)

        if not self.fast_proxy:
            sleep(self.proxy_sleep_time)

        try:
            source2 = source.find_elements_by_css_selector('a')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            sleep(self.proxy_sleep_time)

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print(source2[0].text)

        return source2[0].text

    def find_on_eshuushuu(self):
        got_source = False
        source = self.driver.find_elements_by_class_name('quicktag')
        check = self.driver.find_elements_by_tag_name('dt')

        if debug:
            for i in source:
                it = i.text
                try:
                    print(it)
                    print(i.get_attribute('span'))
                except UnicodeEncodeError:
                    it = it.encode('ascii', 'ignore')
                    print("bad unicode:", it)

            print(check)
            print("possible source:",
                  source[1].text[1:len(source[1].text) - 1])

        for i in check:
            if debug:
                print(i.text)
            if i.text.find("Source") != -1:
                return source[1].text[1:len(source[1].text) - 1]

        return None

    def find_on_danbooru(self, addr):
        try:
            source = self.driver2.find_element_by_class_name('category-3')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out")
            self.driver2.get(addr)
            sleep(self.proxy_sleep_time)
            source = self.driver2.find_element_by_class_name('category-3')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except NoSuchElementException:
            if not self.fast_proxy:
                if debug:
                    print("no element")
                self.driver2.get(addr)
                sleep(self.proxy_sleep_time)

                try:
                    source = self.driver2.find_element_by_class_name(
                        'category-3')
                    sleep(self.proxy_sleep_time)
                except NoSuchElementException:
                    if debug:
                        print("actually no element")
                    return None
            else:
                return None

        if debug:
            print(source)

        try:
            source2 = source.find_elements_by_css_selector('a')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out source 2")
            sleep(self.proxy_sleep_time)

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print("source:", source2[1].text)

        return source2[1].text

    def find_on_gelbooru(self, addr):
        try:
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out")
            self.driver2.get(addr)
            sleep(self.proxy_sleep_time)
            source = self.driver2.find_element_by_class_name(
                'tag-type-copyright')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except NoSuchElementException:
            if not self.fast_proxy:
                if debug:
                    print("no element")
                self.driver2.get(addr)
                sleep(self.proxy_sleep_time)

                try:
                    source = self.driver2.find_element_by_class_name(
                        'tag-type-copyright')
                    sleep(self.proxy_sleep_time)
                except NoSuchElementException:
                    if debug:
                        print("actually no element")
                    return None
            else:
                return None

        if debug:
            print(source)

        try:
            source2 = source.find_elements_by_css_selector('a')
            if not self.fast_proxy:
                sleep(self.proxy_sleep_time)
        except TimeoutException:
            if debug:
                print("time out source 2")
            sleep(self.proxy_sleep_time)

        if debug:
            print(source2)
            for i in source2:
                print(i.text)
                print(i.get_attribute('href'))

            print("source:", source2[1].text)

        return source2[1].text

    def move_image(self, folder_name):
        img = (self.img_name[1:len(self.img_name)]).encode('ascii', 'ignore')
        #Сурс не нашелся
        if folder_name is None:
            dest = (self.unknown).encode('ascii', 'ignore')
            try:
                if debug:
                    print("src:", self.folder + self.img_name)
                    print("dst:", dest)
                shutil.copy(self.folder + self.img_name, self.unknown)
                os.remove(self.folder + self.img_name)
                print("image", img, "successfully moved in", dest)
            except:
                print("Error while moving image", img)
        #Сурс найден
        else:
            dest = (self.dest + r'\n'[:-1] + folder_name).encode(
                'ascii', 'ignore')
            #Убираем запрещенные символы для имени папки
            forbidden_symbols = re.findall('[*|\:"<>?/]', folder_name)
            for symb in forbidden_symbols:
                if debug:
                    print(symb)
                folder_name = folder_name.replace(symb, "").lower()
            if debug:
                print("new folder name:", folder_name)
            print("writing...")
            if (folder_name not in self.titles):
                try:
                    self.f.write(folder_name + '\n')
                    self.titles.append(folder_name)
                except UnicodeEncodeError:
                    pass
            try:
                os.mkdir(folder_name)
            except OSError:
                if debug:
                    print("folder", folder_name, "already exists")
                pass
            try:
                shutil.copy(self.folder + self.img_name, folder_name)
                os.remove(self.folder + self.img_name)
                print("image", img, "successfully moved in", dest)
            except OSError:
                print("Error while moving image", img)
            sleep(self.sleep_time)

    #Приоритет сайтов
    def sort_addresses(self, pic_addr):
        variants = self.driver.find_element_by_id(
            'pages').find_elements_by_tag_name('td')
        if debug:
            print("find %")
            for i in variants:
                try:
                    print(i.text)
                except UnicodeEncodeError:
                    new_i = i.text.encode('ascii', 'ignore')
                    print("bad unicode:", new_i)
            for addr in pic_addr:
                addr2 = addr.get_attribute('href')
                print("trying", addr2)
            print("1st variant:", variants[6].text, "len =", len(variants),
                  "len var = ", len(variants[6].text))

        #Второе найденное similarity
        if len(variants[6].text) == 0:
            pos = 9
        else:
            pos = 10
        priority = 6
        best_addr = pic_addr[0].get_attribute('href')
        if (best_addr.find("danbooru")) != -1:
            if debug:
                print("danbooru[0]")
            priority = 3
        elif (best_addr.find("sankaku")) != -1:
            if debug:
                print("sankaku[0]")
            priority = 4
        elif (best_addr.find("gelbooru")) != -1:
            if debug:
                print("gelbooru[0]")
            priority = 5
        elif (best_addr.find("shuushuu")) != -1:
            if debug:
                print("shuushuu[0]")
            priority = 2
        elif (best_addr.find("yande")) != -1:
            if debug:
                print("yandere[0]")
            priority = 1

        if priority > 1:
            for addr in pic_addr[1:len(pic_addr)]:
                addr2 = addr.get_attribute('href')

                if pos > len(variants):
                    break
                similarity = int(re.search('\d+', variants[pos].text).group())
                if debug:
                    print("similarity =", similarity)
                #if similarity >= 70:
                if (addr2.find("danbooru")) != -1:
                    if debug:
                        print("danbooru", priority)
                    if priority > 3:
                        best_addr = addr2
                        priority = 3
                elif (addr2.find("sankaku")) != -1:
                    if debug:
                        print("sankaku", priority)
                    if priority > 4:
                        best_addr = addr2
                        priority = 4
                elif (addr2.find("gelbooru")) != -1:
                    if debug:
                        print("gelbooru", priority)
                    if priority > 5:
                        best_addr = addr2
                        priority = 5
                elif (addr2.find("shuushuu")) != -1:
                    if debug:
                        print("shuushuu", priority)
                    if priority > 2:
                        best_addr = addr2
                        priority = 2
                        break
                elif (addr2.find("yande")) != -1:
                    if debug:
                        print("yandere", priority)
                    if priority > 1:
                        best_addr = addr2
                        priority = 1
                        break

                pos += 4  #Следующее similarity

        if debug:
            print("best_addr:", best_addr)
        return best_addr, priority

    def search_for_source(self, pic_addr):
        best_addr, priority = self.sort_addresses(pic_addr)
        folder_name = None

        if debug:
            print("trying", best_addr)

        if priority == 1:
            print("searching on yandere")
            try:
                self.driver.get(best_addr)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_yandere()
        elif priority == 4:
            print("searching on sankaku")
            try:
                self.driver2.get(best_addr)
                sleep(self.proxy_sleep_time)
            except TimeoutException:
                if debug:
                    print("time out in if")
                sleep(self.proxy_sleep_time)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_sankaku(best_addr)
        elif priority == 2:
            print("searching on e-shuushuu")
            try:
                self.driver.get(best_addr)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_eshuushuu()
        elif priority == 3:
            print("searching on danbooru")
            try:
                self.driver2.get(best_addr)
                sleep(self.proxy_sleep_time)
            except TimeoutException:
                if debug:
                    print("time out in if")
                sleep(self.proxy_sleep_time)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_danbooru(best_addr)
        elif priority == 5:
            print("searching on gelbooru")
            try:
                self.driver2.get(best_addr)
                sleep(self.proxy_sleep_time)
            except TimeoutException:
                if debug:
                    print("time out in if")
                sleep(self.proxy_sleep_time)
            except WebDriverException as inst:
                if debug:
                    print(inst)
                exit(1)
            folder_name = self.find_on_gelbooru(best_addr)

        if folder_name is None:
            print("No relevant match for", self.img_name[1:len(self.img_name)])
        self.move_image(folder_name)

    def iqdb_actions(self):
        for image in self.images:

            print("\nprocessing",
                  self.images.index(image) + 1, "of", len(self.images))

            self.img_name = r'\n'[:-1] + image

            if debug:
                try:
                    print(self.folder + self.img_name)
                except UnicodeEncodeError:
                    print("bad unicode")
            sleep(self.sleep_time)

            if ((image[len(image) - 4:] != ".jpg")
                    and (image[len(image) - 4:] != ".png")
                    and (image[len(image) - 5:] != ".jpeg")):
                try:
                    print("Unsupported format:", image)
                except UnicodeEncodeError:
                    image = image.encode('ascii', 'ignore')
                    print(image)
            else:
                self.driver.get("http://iqdb.org/")
                #Вставляем изображение
                element = ui.WebDriverWait(
                    self.driver, self.waiting_time).until(
                        lambda driver: self.driver.find_element_by_id("file"))

                if debug:
                    print(element)

                element.send_keys(self.folder + self.img_name)

                #Сабмитим
                element = ui.WebDriverWait(
                    self.driver, self.waiting_time).until(
                        lambda driver: self.driver.find_element_by_xpath(
                            "//input[@value='submit']"))

                if debug:
                    print(element)

                try:
                    element.click()
                except TimeoutException:
                    sleep(self.sleep_time)

                sleep(self.sleep_time)

                #Ищем лучшее совпадение
                try:
                    pic_addr = ui.WebDriverWait(
                        self.driver, self.waiting_time).until(
                            lambda driver: self.driver.
                            find_elements_by_css_selector('.image a'))
                except TimeoutException:
                    print("Image", image, "is to o large")
                    self.move_image(None)
                else:
                    if debug:
                        print(pic_addr)

                    matches = ui.WebDriverWait(
                        self.driver, self.waiting_time
                    ).until(lambda driver: self.driver.find_element_by_xpath(
                        '//*[@id="pages"]/div[2]/table/tbody/tr[1]/th'))

                    if debug:
                        print("matches:", matches)
                        print(matches.text)

                    if (matches.text.find("No")) != -1:
                        print(matches.text, "for", image)
                        self.move_image(None)
                    else:
                        self.search_for_source(pic_addr)

                    sleep(self.sleep_time)

    def pimp(self):
        if debug:
            print("proxy mode:", self.fast_proxy)
        try:
            self.iqdb_actions()
        except KeyboardInterrupt:
            print("Stop working...")
        finally:
            self.driver.quit()
            self.driver2.quit()
            self.f.close()
            print("Job's done")