예제 #1
0
class SeleniumMiddleware(object):
    def __init__(self):
        self.settings = Settings()
        self.settings.CreateCommonSettings()

    def init(self, timeout=None, executable_path=None, proxy=None):
        self.file = FileIOMiddleware()
        self.timeout = timeout
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        if proxy is not None:
            chrome_options.add_argument('--proxy-server=http://{0}'.format(proxy))
        self.browser = webdriver.Chrome(executable_path=executable_path, chrome_options=chrome_options)
        self.load_timeout = self.browser.set_page_load_timeout(self.timeout)
        self.wait = WebDriverWait(self.browser, self.timeout)

    def close(self):
        self.browser.close()
        self.browser.quit()
        del self.browser, self.file, self.timeout, self.load_timeout, self.wait
        gc.collect()

    def chrome_request(self, url, log_path, proxy):
        self.init(timeout=self.settings.SELENIUM_TIMEOUT, executable_path=self.settings.CHROMEDRIVER_PATH, proxy=proxy)
        try:
            self.file.logger(log_path, 'Starting Chrome for: {0}'.format(url))
            self.browser.get(url)
            return self.browser
        except TimeoutException:
            browser = self.browser
            self.file.logger(log_path, 'Chrome timeout for: {0}'.format(url))
            self.close()
            return browser
예제 #2
0
class FileTransferMiddleware():
    def __init__(self):
        self.settings = Settings()
        self.settings.CreateCommonSettings()
        logging.raiseExceptions = False

    def singleUpload(self, local_file_path, remote_file_path, host_name,
                     user_name, password, port):
        connect_port = paramiko.Transport((host_name, port))
        connect_port.connect(username=user_name, password=password)
        sftp = paramiko.SFTPClient.from_transport(connect_port)
        try:
            print 'start to transfer: {0}'.format(local_file_path)
            sftp.put(local_file_path, remote_file_path)
            connect_port.close()
            print 'finished to transfer: {0}'.format(local_file_path)
            print 'start to delete: {0}'.format(local_file_path)
            os.remove(local_file_path)
            print 'finished to delete: {0}'.format(local_file_path)
        except Exception as e:
            print 'Exception to transfer: {0} for {1}'.format(
                local_file_path, e.message)
        del connect_port, sftp
        gc.collect()

    def startUpload(self, local_diractory, remote_diractory, processes,
                    host_name, user_name, password, port):
        isLocalDiractoryExists = os.path.exists(local_diractory)
        if isLocalDiractoryExists is False:
            print '{0} is not exits'.format(local_diractory)
            return
        files = os.listdir(local_diractory)
        if len(files) == 0:
            print 'No new file to upload in {0}'.format(local_diractory)
            return
        process = Pool(processes)
        for file in files:
            local_file_path = '{0}/{1}'.format(local_diractory, file)
            remote_file_path = '{0}/{1}'.format(remote_diractory, file)
            process.apply_async(self.singleUpload,
                                args=(local_file_path, remote_file_path,
                                      host_name, user_name, password, port))
        process.close()
        process.join()
        print 'Done'
        del files, process
        gc.collect()
예제 #3
0
class RequestsMiddleware():
    def __init__(self):
        self.settings = Settings()
        self.settings.CreateCommonSettings()

    def init(self, headers=None, host=None, referer=None):
        self.file = FileIOMiddleware()
        self.requests = requests
        self.headers = headers
        if headers is None:
            self.headers = {}
            self.headers['Accept'] = self.settings.ACCEPT
            self.headers['Accept-Encoding'] = self.settings.ACCEPT_ENC0DING
            self.headers['Accept-Language'] = self.settings.ACCEPT_LANGUAGE
            self.headers['Cache-Control'] = self.settings.CACHE_CONTROL
            self.headers['Connection'] = self.settings.CONNECTION
            self.headers['Host'] = host
            self.headers[
                'Upgrade-Insecure-Requests'] = self.settings.UPGRADE_INSECURE_REQUESTS
            self.headers['Referer'] = referer
            self.headers['Pragma'] = self.settings.PRAGMA
            self.headers['User-Agent'] = self.settings.USER_AGENTS[
                random.randint(0,
                               len(self.settings.USER_AGENTS) - 1)]

    def requests_request(self, url, headers=None, host=None, referer=None):
        self.init(headers=headers, host=host, referer=referer)
        try:
            self.file.logger(self.settings.LOG_PATH, 'Starting Requests')
            res = self.requests.get(url=url, headers=self.headers)
            return res
        except Exception as e:
            self.file.logger(self.settings.LOG_PATH,
                             'Requests Timeout: {0}'.format(str(e.message)))

    def run_task(self,
                 url_title=[],
                 callback=callable,
                 headers=None,
                 host=None):
        self.file.logger(self.log_path, 'Start: {0}'.format(url_title[0]))
        print 'Start: {0}'.format(url_title[0])
        response = self.requests_request(url_title[0], headers, host,
                                         url_title[0])
        try:
            callback({
                'response': response,
                'request_url': url_title[0],
                'request_title': url_title[1]
            })
        except Exception as e:
            self.file.logger(
                self.log_path,
                'Exception: {0} for {1}'.format(e.message, url_title[0]))
            print 'Exception: {0} for {1}'.format(e.message, url_title[0])
            del response, self.requests_request
            gc.collect()
        self.file.logger(self.log_path, 'End: {0}'.format(response.url))
        print 'End: {0}'.format(response.url)
        del response, self.requests_request
        gc.collect()

    def start_requests(self,
                       url_titles,
                       processes,
                       log_path,
                       headers,
                       host,
                       proxy,
                       callback=callable):
        self.file = FileIOMiddleware()
        self.content = []
        self.log_path = log_path
        self.proxy = proxy
        process = Pool(processes)
        for url_title in url_titles:
            process.apply_async(self.run_task,
                                args=(url_title, callback, headers, host))
        process.close()
        process.join()
        self.file.logger(self.log_path, 'Done')
        print 'Done'
        del self.file, process
        gc.collect()