class ExtractorHtml: def __init__(self, url): _app = QApplication([]) self._page = QWebEnginePage() self._page.loadFinished.connect(self._load_finished_handler) self.html = None # Небольшой костыль для получения содержимого страницы сайта http://gama-gama.ru # Загрузка страницы проходит 2 раза: сначада кусок хитрого javascript кода, потом страница # сайта с содержимым self._counter_finished = 0 self._page.load(QUrl(url)) # Ожидание загрузки страницы и получения его содержимого # Этот цикл асинхронный код делает синхронным while self.html is None: _app.processEvents() _app.quit() # Чтобы избежать падений скрипта self._page = None def _callable(self, data): self.html = data def _load_finished_handler(self, _): self._counter_finished += 1 if self._counter_finished == 2: self._page.toHtml(self._callable)
class ExtractorHtml: def __init__(self, url): self.html = None _app = QApplication([]) self._page = QWebEnginePage() self._page.load(QUrl(url)) self._page.loadFinished.connect(self._load_finished_handler) # Ожидание загрузки страницы и получения его содержимого # Этот цикл асинхронный код делает синхронным while self.html is None: _app.processEvents() _app.quit() self._page = None def _callable(self, data): if check_content_func: if check_content_func(data): self.html = data else: self.html = data def _load_finished_handler(self): self._page.toHtml(self._callable)
def loadPage(url): page = QWebEnginePage() loop = QEventLoop() # Create event loop page.loadFinished.connect(loop.quit) # Connect loadFinished to loop quit page.load(QUrl(url)) loop.exec_() # Run event loop, it will end on loadFinished return page.toHtml(get_html)
class ExtractorHtml: def __init__(self, url): _app = QApplication([]) self._page = QWebEnginePage() self.html = None # Небольшой костыль для получения содержимого страницы сайта # https://www.origin.com/rus/ru-ru/search?searchString= # Загрузка страницы проходит постепенно -- данные не сразу появляются, поэтому нужно # подождать пока они закончатся загружаться. Для этого заводится таймер, который дает по 5 секунд # после каждой закончившееся загрузки чтобы после вытащить данные из страницы timer = QTimer() timer.setSingleShot(True) timer.setInterval(5000) timer.timeout.connect(self._load_finished_handler) self._page.loadProgress.connect( lambda x: x == 100 and timer.start()) self._page.load(QUrl(url)) # Ожидание загрузки страницы и получения его содержимого # Этот цикл асинхронный код делает синхронным while self.html is None: _app.processEvents() _app.quit() self._page = None def _callable(self, data): self.html = data def _load_finished_handler(self): self._page.toHtml(self._callable)
class MainWindow(QMainWindow): error = pyqtSignal(str) def __init__(self): super().__init__() self.ui = Ui_MainWindow() self.ui.setupUi(self) self.setWindowIcon(readIcon) # permalinks of comments we already notified the user of self.permalinks = [] self.quitting = False self.error.connect(self.showError) # Logger self.fileHandler = None self.handler = Handler() self.handler.newMessage.connect(self.logMessage) s = QSettings(orgName, appName) level = s.value('misc/loglevel', defaultLevel, type=int) self.handler.setLevel(logLevels[level]) self.handler.setFormatter(logging.Formatter(logFormat)) logging.getLogger().addHandler(self.handler) self.updateLogger() # Auto search self.model = Model() self.model.statusMessage.connect(self.showStatusMessage) self.model.progress.connect(self.showProgress) self.updateAutoSearch() self.autoSearchPage = QWebEnginePage() self.autoSearchPage.loadFinished.connect(self.searchPageLoaded) self.progressBar = QProgressBar() self.progressBar.setTextVisible(False) self.progressBar.setMaximumWidth(100) self.progressBar.setMinimum(0) self.progressBar.setMaximum(99) self.statusBar().addPermanentWidget(self.progressBar) self.progressBar.hide() self.delegate = ItemDelegate() self.fontSize = 14 self.ui.treeView.setHeaderHidden(True) self.ui.treeView.setIconSize(QSize(40, 40)) self.ui.treeView.setStyleSheet("QTreeView {{font-size: {}pt;}}".format( self.fontSize)) self.ui.treeView.setModel(self.model) self.ui.treeView.setItemDelegate(self.delegate) self.ui.treeView.hideColumn(1) self.ui.treeView.setContextMenuPolicy(Qt.CustomContextMenu) self.ui.treeView.customContextMenuRequested.connect(self.onCustomMenu) bookmarks = s.value('bookmarks/bookmarks_list', '') self.bookmarksList = [ baseUrl(line) for line in bookmarks.split('\n') if baseUrl(line) ] self.bookmarksList = list(dict.fromkeys(self.bookmarksList)) self.bookmarksModel = BookmarksModel(self.bookmarksList) self.bookmarksModel.setSourceModel(self.model) self.ui.bookmarksTreeView.setHeaderHidden(True) self.ui.bookmarksTreeView.setIconSize(QSize(40, 40)) self.ui.bookmarksTreeView.setStyleSheet( "QTreeView {{font-size: {}pt;}}".format(self.fontSize)) self.ui.bookmarksTreeView.setModel(self.bookmarksModel) self.ui.bookmarksTreeView.setItemDelegate(self.delegate) self.ui.bookmarksTreeView.hideColumn(1) self.ui.bookmarksTreeView.setContextMenuPolicy(Qt.CustomContextMenu) self.ui.bookmarksTreeView.customContextMenuRequested.connect( self.onCustomMenu) # Tray icon self.ui.prefsAction.triggered.connect(self.showPrefs) self.ui.refreshAction.triggered.connect(self.refresh) self.ui.quitAction.triggered.connect(self.quit) self.ui.zoomInAction.triggered.connect(self.zoomIn) self.ui.zoomOutAction.triggered.connect(self.zoomOut) minimizeAction = QAction("Mi&nimize", self) minimizeAction.triggered.connect(self.hide) restoreAction = QAction("&Restore", self) restoreAction.triggered.connect(self.show) trayMenu = QMenu(self) trayMenu.addAction(minimizeAction) trayMenu.addAction(restoreAction) trayMenu.addAction(self.ui.prefsAction) trayMenu.addAction(self.ui.refreshAction) trayMenu.addAction(self.ui.quitAction) self.trayIcon = QSystemTrayIcon() self.trayIcon.setContextMenu(trayMenu) self.trayIcon.activated.connect(self.iconActivated) self.trayIcon.setIcon(readIcon) self.trayIcon.setVisible(True) self.ui.webView.loadStarted.connect( lambda self=self: self.ui.urlLineEdit.setText(self.ui.webView.url( ).toString())) self.ui.webView.loadFinished.connect(self.loadFinished) self.ui.urlLineEdit.returnPressed.connect( lambda self=self: self.ui.webView.setUrl( QUrl(self.ui.urlLineEdit.text()))) self.ui.webView.setUrl(stUrl) self.timer = QTimer() self.timer.timeout.connect(self.refresh) self.updateInterval(s.value('misc/interval', defaultInterval, type=int)) self.timer.start() self.messagesPage = QWebEnginePage() self.messagesPage.loadFinished.connect(self.messagesPageLoaded) self.refresh() def zoomIn(self): currentIndex = self.ui.tabWidget.currentIndex() if currentIndex == 0: self.ui.webView.setZoomFactor(self.ui.webView.zoomFactor() + 0.25) elif currentIndex == 1 or currentIndex == 2: if self.fontSize >= 20: return self.fontSize += 1 self.ui.treeView.setStyleSheet( "QTreeView {{font-size: {}pt;}}".format(self.fontSize)) self.ui.bookmarksTreeView.setStyleSheet( "QTreeView {{font-size: {}pt;}}".format(self.fontSize)) elif currentIndex == 3: self.ui.logTextEdit.zoomIn() def zoomOut(self): currentIndex = self.ui.tabWidget.currentIndex() if currentIndex == 0: self.ui.webView.setZoomFactor(self.ui.webView.zoomFactor() - 0.25) elif currentIndex == 1 or currentIndex == 2: if self.fontSize <= 8: return self.fontSize -= 1 self.ui.treeView.setStyleSheet( "QTreeView {{font-size: {}pt;}}".format(self.fontSize)) self.ui.bookmarksTreeView.setStyleSheet( "QTreeView {{font-size: {}pt;}}".format(self.fontSize)) elif currentIndex == 3: self.ui.logTextEdit.zoomOut() def onCustomMenu(self, point): sender = self.sender() index = sender.indexAt(point) urlIndex = index.siblingAtColumn(1) url = str(sender.model().data(urlIndex, role=Qt.DisplayRole)) url = baseUrl(url) if not url: return self.contextMenu = QMenu() action = QAction("Copy url", self.contextMenu) action.triggered.connect( lambda checked, arg=url: QApplication.clipboard().setText(arg)) self.contextMenu.addAction(action) action = QAction("Open in browser", self.contextMenu) action.triggered.connect( lambda checked, self=self, url=url: self.toBrowser(url)) self.contextMenu.addAction(action) action = QAction("Open in default browser", self.contextMenu) action.triggered.connect(lambda checked, arg=url: subprocess.call( [sys.executable, '-m', 'webbrowser', '-t', arg])) self.contextMenu.addAction(action) if url not in self.bookmarksList: enabled = True action = QAction("Add to bookmarks", self.contextMenu) else: enabled = False action = QAction("Remove from bookmarks", self.contextMenu) action.triggered.connect(lambda checked, self=self, url=url, enabled= enabled: self.setBookmarked(url, enabled)) self.contextMenu.addAction(action) action = QAction("Check now", self.contextMenu) action.triggered.connect( lambda checked, self=self, url=url: self.model.checkNow(url)) self.contextMenu.addAction(action) self.contextMenu.exec(sender.viewport().mapToGlobal(point)) def setBookmarked(self, url, enabled): bookmarked = url in self.bookmarksList s = QSettings(orgName, appName) if bookmarked and not enabled: self.bookmarksList.remove(url) elif enabled and not bookmarked: self.bookmarksList.append(url) self.bookmarksModel.invalidate() s.setValue('bookmarks/bookmarks_list', '\n'.join(self.bookmarksList)) def toBrowser(self, url): self.ui.webView.setUrl(QUrl(url)) self.ui.tabWidget.setCurrentIndex(0) def logMessage(self, msg): self.ui.logTextEdit.appendPlainText(msg) def updateLogLevel(self, newLevel): logging.debug('setting log level ' + str(newLevel)) logging.getLogger().setLevel(logLevels[newLevel]) for h in logging.getLogger().handlers: h.setLevel(logLevels[newLevel]) def updateLogger(self): logging.debug("updating logger handlers") s = QSettings(orgName, appName) if self.fileHandler: logging.getLogger().removeHandler(self.fileHandler) self.fileHandler = None if s.value('logfile/enable', False, type=bool): f = s.value('logfile/filename', defaultLogfile) if not os.path.isabs(f): f = os.path.join(baseDir, f) logging.debug('new file handler for ' + f) fileHandler = logging.FileHandler(f) level = s.value('misc/loglevel', defaultLevel, type=int) fileHandler.setLevel(logLevels[level]) fileHandler.setFormatter(logging.Formatter(logFormat)) self.fileHandler = fileHandler logging.getLogger().addHandler(self.fileHandler) def updateAutoSearch(self): s = QSettings(orgName, appName) self.autoSearchEnabled = s.value('autosearch/enable', False, type=bool) haveListStr = s.value('autosearch/have_list', '') haveList = [ line.strip().lower() for line in haveListStr.split('\n') if line.strip() ] wantListStr = s.value('autosearch/want_list', '') wantList = [ line.strip().lower() for line in wantListStr.split('\n') if line.strip() ] self.model.updateLists(haveList, wantList) def searchPageLoaded(self, ok): if not ok: logging.warning('Failed to load URL: ' + stUrl.toString()) return self.autoSearchPage.toHtml(self.model.parseSearchResults) def refresh(self): self.messagesPage.setUrl(messagesUrl) if not self.autoSearchEnabled: return self.autoSearchPage.setUrl(stUrl) for url in self.bookmarksList: self.model.queueUrl(url, False) def updateInterval(self, newInterval): logging.info( 'setting refresh interval: {} minutes'.format(newInterval)) self.timer.setInterval(newInterval * 1000 * 60) def quit(self): self.trayIcon.setVisible(False) self.quitting = True self.model.cancelAll.emit() self.statusBar().showMessage('Waiting for worker threads...') QThreadPool.globalInstance().waitForDone() QApplication.setQuitOnLastWindowClosed(True) self.close() def loadFinished(self, ok): if not ok: logging.warning('failed to load URL: ' + self.ui.webView.url().toString()) return self.ui.urlLineEdit.setText(self.ui.webView.url().toString()) self.ui.urlLineEdit.setCursorPosition(0) if not self.autoSearchEnabled: return url = self.ui.webView.url().toString() if url == stUrl.toString() or url.startswith( 'https://www.steamtrades.com/trades/search'): self.ui.webView.page().toHtml(self.model.parseSearchResults) elif url.startswith('https://www.steamtrades.com/trade/'): self.model.queueUrl(url, False) def messagesPageLoaded(self, ok): if not ok: logging.warning('failed to load URL: ' + self.messagesPage.url().toString()) return self.messagesPage.toHtml(self.checkMessages) def closeEvent(self, event): if not self.quitting: self.hide() event.ignore() else: event.accept() def iconActivated(self, reason): if reason == QSystemTrayIcon.Context or reason == QSystemTrayIcon.Trigger: return if self.isVisible(): self.hide() else: self.show() def showPrefs(self): d = PrefsDialog(self) d.intervalChanged.connect(self.updateInterval) d.loglevelChanged.connect(self.updateLogLevel) d.logfileChanged.connect(self.updateLogger) d.autoSearchChanged.connect(self.updateAutoSearch) d.exec_() def showError(self, message): self.trayIcon.showMessage('', message, QSystemTrayIcon.Warning) def showStatusMessage(self, message): self.statusBar().showMessage(message, 5000) def showProgress(self, percent): if percent >= 0 and percent <= 99: self.progressBar.setValue(percent) if self.progressBar.isHidden(): self.progressBar.show() else: self.progressBar.hide() def onMailError(self, msg, permalink): self.showError(msg) if permalink in self.permalinks: self.permalinks.remove(permalink) def checkMessages(self, page): url = self.messagesPage.url() logging.info('loaded page ' + url.toString()) soup = BeautifulSoup(page, 'html.parser') if url.host() == 'www.steamtrades.com' or url.host( ) == 'steamtrades.com': if '<span>Messages' not in page: logging.warning( 'log in to SteamTrades to receive message notifications') return if url != messagesUrl: return messageCount = soup.find('span', attrs={'class': 'message_count'}) if not messageCount: self.trayIcon.setIcon(readIcon) self.setWindowIcon(readIcon) return logging.debug('message count:' + messageCount.text) self.trayIcon.setIcon(unreadIcon) self.setWindowIcon(unreadIcon) try: parsed = 0 for comment in soup.find_all('div', attrs={'class': 'comment_inner'}): if parsed >= int(messageCount.text): break if comment.find('div', attrs={'class': 'comment_unread'}) == None: continue parsed += 1 author = comment.find('a', attrs={ 'class': 'author_name' }).text.strip() message = comment.find('div', attrs={ 'class': 'comment_body_default markdown' }).text.strip() permalink = comment.find_all('a')[-1]['href'] if permalink not in self.permalinks: logging.debug('unread comment: \n' + str(comment)) logging.debug('author: ' + author) logging.debug('message: \n' + message) logging.debug('permalink:' + permalink) self.trayIcon.showMessage("New message from " + author, message) s = QSettings(orgName, appName) if s.value('email/notify', False, type=bool): sender = s.value('email/sender') recipient = s.value('email/recipient') smtpServer = s.value('email/host') smtpPort = s.value('email/port') encryption = s.value( 'email/encryption_type') if s.value( 'email/encrypt', False, type=bool) else '' username = s.value('email/username') if s.value( 'email/login', False, type=bool) else '' password = '' try: if s.value('email/login', False, type=bool): password = keyring.get_password( sysName, "email/password") except Exception as e: logging.warning( 'Cannot read password from keyring: ' + str(e)) mailSender = MailSender(sender, recipient, smtpServer, smtpPort, encryption, username, password,\ messageTemplate.format(sender = sender, recipient = recipient, count = messageCount.text, author = author, message = message), permalink) logging.info('sending email...') mailSender.emitter.error.connect(self.onMailError) QThreadPool.globalInstance().start(mailSender) self.permalinks.append(permalink) except Exception as e: logging.error(str(e)) self.error.emit(str(e))
class Worker(QRunnable): def __init__(self, url, haveList=[], wantList=[], id_=-1): super().__init__() self.url = url self.haveList = haveList self.wantList = wantList self.id_ = id_ self.state = WorkerState.PENDING self.mutex = QMutex() self.emitter = Emitter() self.page = QWebEnginePage() self.html = '' self.page.loadFinished.connect(self.loadFinished) self.page.setUrl(QUrl(self.url)) # Set timeout to 3 minutes QTimer.singleShot(3 * 60 * 1000, self.cancel) def changeState(self, new, old=None): mutexLocker = QMutexLocker(self.mutex) if not old or old == self.state: self.state = new return True def run(self): if not self.changeState(WorkerState.RUNNING, WorkerState.PENDING): return try: soup = BeautifulSoup(self.html, 'html.parser') closedTag = soup.find('div', attrs={'class': 'notification yellow'}) if closedTag and closedTag.text.startswith('Closed'): title = 'Closed' else: title = soup.find('div', attrs={ 'class': 'page_heading' }).find('h1').text style = soup.find('div', attrs={ 'class': 'comment_inner' }).find('a', attrs={'class': 'author_avatar'})['style'] res = re.findall('url\((.*)\);', style) if len(res) == 1: iconUrl = res[0] else: iconUrl = '' self.emitter.updateName.emit(self.id_, title) self.emitter.updateIconUrl.emit(self.id_, iconUrl) h = soup.find('div', attrs={'class': 'have markdown'}) if h: hls = h.text.split('\n') for g in self.wantList: for hl in hls: if g in hl.lower(): self.emitter.newNode.emit(self.id_, '[H] ' + hl, NodeType.H_GAME) h = soup.find('div', attrs={'class': 'want markdown'}) if h: hls = h.text.split('\n') for g in self.haveList: for hl in hls: if g in hl.lower(): self.emitter.newNode.emit(self.id_, '[W] ' + hl, NodeType.W_GAME) except Exception as e: logging.error('Error parsing trade page: ' + str(e)) self.changeState(WorkerState.FINISHED) self.emitter.finished.emit(self.id_) def loadFinished(self, ok): if not ok: logging.warning('Failed to load page: ' + self.url) self.changeState(WorkerState.FINISHED) self.emitter.loadError.emit(self.id_) self.emitter.finished.emit(self.id_) return self.page.toHtml(self.processPage) def processPage(self, html): self.html = html QThreadPool.globalInstance().start(self) def cancel(self): if not self.changeState(WorkerState.FINISHED, WorkerState.PENDING): return self.page.triggerAction(QWebEnginePage.Stop) logging.warning('Canceling ' + self.url) self.emitter.loadError.emit(self.id_) self.emitter.finished.emit(self.id_)
import re match = re.search(r'\d{,2}.\d{,2}.\d{4}', update_date_text) if match is None: return date_string = match.group() import os file_name = os.path.basename(href) file_name = date_string + '_' + file_name from datetime import datetime print(datetime.today().date(), file_name, file_url) url = 'http://www.dns-shop.ru/' app = QApplication([]) page = QWebEnginePage() page.load(QUrl(url)) page.loadFinished.connect(lambda x=None: page.toHtml(_callable)) # Настроим вызов загрузки страницы на каждые 10 часов timer = QTimer() timer.setInterval(10 * 60 * 60 * 1000) timer.timeout.connect(lambda x=None: page.load(QUrl(url))) timer.start() app.exec()