class mainWindow(QtGui.QWidget): def __init__(self, width=600, height=800): super(mainWindow, self).__init__() self.width = width self.height = height self.setGeometry(10, 10, self.width, self.height) self.initial() def initial(self): self.MainHLayout = QtGui.QHBoxLayout() self.WebView = WebView() self.WebView.show() self.WebView.load(QtCore.QUrl("http://127.0.0.1/tpcdebt/test.php")) self.MainHLayout.addWidget(self.WebView) self.WebView.loadFinished.connect(self.loadPageOK) self.WebView.page().setLinkDelegationPolicy( QtWebKit.QWebPage.DelegateAllLinks) self.setLayout(self.MainHLayout) self.show() def loadPageOK(self, isFinished): if isFinished: print("load OK") pass
class mainWindow(QtGui.QWidget): def __init__(self, width=600, height=800): super(mainWindow, self).__init__() self.width = width self.height = height self.setGeometry(10, 10, self.width, self.height) self.initial() def initial(self): self.MainHLayout = QtGui.QHBoxLayout() self.WebView = WebView() self.WebView.show() self.WebView.load(QtCore.QUrl("http://127.0.0.1/tpcdebt/test.php")) self.MainHLayout.addWidget(self.WebView) self.WebView.loadFinished.connect(self.loadPageOK) self.WebView.page().setLinkDelegationPolicy(QtWebKit.QWebPage.DelegateAllLinks) self.setLayout(self.MainHLayout) self.show() def loadPageOK(self, isFinished): if isFinished: print("load OK") pass
class Connection(QtCore.QObject): closeConnection = QtCore.Signal() def __init__(self, TcpSocket=None): if TcpSocket == None: raise ValueError self.stop = False self.webview = WebView() self.webview.load("http://127.0.0.1") self.TcpSocket = TcpSocket def widget(self): return self.webview @QtCore.Slot() def forceStop(self): self.stop = True self.closeConnection.emit(self.webview) self.TcpSocket.abort() self.TcpSocket.deleteLater() # self.webview.deleteLater() self.deleteLater() self.thread().forceStop() @QtCore.Slot() def service(self): print(self.thread().currentThreadId()) while not self.stop: command = self.TcpSocket.read(4096) if command["type"] == "loadUrl": self.webview.load(cmd["url"]) data = QtCore.QByteArray() data = "load finished" self.TcpSocket.write(data) self.TcpSocket.flush() data.deleteLater() elif cmd["type"] == "close": self.forceStop() else: print("unknow command")
class Connection(QtCore.QObject) : closeConnection = QtCore.Signal() def __init__(self, TcpSocket = None) : if TcpSocket == None : raise ValueError self.stop = False self.webview = WebView() self.webview.load("http://127.0.0.1") self.TcpSocket = TcpSocket def widget(self) : return self.webview @QtCore.Slot() def forceStop(self) : self.stop = True self.closeConnection.emit(self.webview) self.TcpSocket.abort() self.TcpSocket.deleteLater() # self.webview.deleteLater() self.deleteLater() self.thread().forceStop() @QtCore.Slot() def service(self) : print(self.thread().currentThreadId()) while not self.stop : command = self.TcpSocket.read(4096) if command["type"] == "loadUrl" : self.webview.load(cmd["url"]) data = QtCore.QByteArray() data = "load finished" self.TcpSocket.write(data) self.TcpSocket.flush() data.deleteLater() elif cmd["type"] == "close" : self.forceStop() else : print("unknow command")
class Crawler(QtGui.QWidget): signalClickedToLoadUrl = QtCore.Signal(QtCore.QUrl) def __init__(self, width = 800, height = 600): super(Crawler, self).__init__() self.setGeometry(10, 10, width, height) self.setupUI() def setupUI(self) : self.mainHBoxLayout = QtGui.QHBoxLayout() self.mainRVBoxLayout = QtGui.QVBoxLayout() self.mainLVBoxLayout = QtGui.QVBoxLayout() self.executeCrawler = QtGui.QPushButton("Get Content!!!") self.urlBarLineEdit = QtGui.QLineEdit("http://www2.tpa.edu.tw/tpaedu/Home/login.asp") self.myWebView = WebView() self.myTabWidget = TabWidget() #self.myWebView.settings().setDefaultTextEncoding("big5") self.myWebView.show() self.myTabWidget.addTab(self.myWebView, "my Web View") self.mainLVBoxLayout.addWidget(self.urlBarLineEdit) self.mainLVBoxLayout.addWidget(self.myTabWidget) self.mainRVBoxLayout.addWidget(self.executeCrawler) self.mainHBoxLayout.addLayout(self.mainLVBoxLayout) self.mainHBoxLayout.addLayout(self.mainRVBoxLayout) self.setLayout(self.mainHBoxLayout) self.executeCrawler.clicked.connect(self.slotClickedToLoadUrl) self.signalClickedToLoadUrl.connect(self.slotLoadUrl) self.myWebView.loadFinished.connect(self.slotWebViewLoadFinished) self.show() @QtCore.Slot() def slotClickedToLoadUrl(self) : print(self.urlBarLineEdit.text()) self.signalClickedToLoadUrl.emit(QtCore.QUrl(self.urlBarLineEdit.text())) @QtCore.Slot(QtCore.QUrl) def slotLoadUrl(self, qUrl) : self.myWebView.load(qUrl) @QtCore.Slot(bool) def slotWebViewLoadFinished(self, isFinished) : if isFinished : self.urlBarLineEdit.setText(self.myWebView.url().toString()) pageFrames = self.myWebView.page().mainFrame().childFrames() print("base Url := " + self.myWebView.page().mainFrame().baseUrl().toString()) if self.myWebView.page().mainFrame().baseUrl().toString() == "http://www2.tpa.edu.tw/tpaedu/default.asp" : print("it is tpa local net") for frame in pageFrames : print(frame.frameName()) #print(frame.toHtml()) elements = frame.findAllElements("td") print(elements.count()) for index in range(elements.count()) : """print("elements" + str(index)) print(elements.at(index + 1).tagName()) print(elements.at(index + 1).firstChild().tagName()) print(elements.at(index + 1).firstChild().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().tagName()) print(str(type(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().nextSibling())))""" findAllElementsChild(elements.at(index + 1).firstChild()) if frame.frameName() == "logo" : pass elif frame.frameName() == "menu" : # get menu for javascript robot action pass elif frame.frameName() == "main" : # get main content to crawler data pass elif self.myWebView.page().mainFrame().baseUrl().toString() == "http://www2.tpa.edu.tw/tpaedu/Home/login.asp" : pass pass
class Crawler(QtGui.QWidget): signalClickedToLoadUrl = QtCore.Signal(QtCore.QUrl) def __init__(self, width=800, height=600): super(Crawler, self).__init__() self.setGeometry(10, 10, width, height) self.setupUI() def setupUI(self): self.mainHBoxLayout = QtGui.QHBoxLayout() self.mainRVBoxLayout = QtGui.QVBoxLayout() self.mainLVBoxLayout = QtGui.QVBoxLayout() self.executeCrawler = QtGui.QPushButton("Get Content!!!") self.urlBarLineEdit = QtGui.QLineEdit( "http://www2.tpa.edu.tw/tpaedu/Home/login.asp") self.myWebView = WebView() self.myTabWidget = TabWidget() #self.myWebView.settings().setDefaultTextEncoding("big5") self.myWebView.show() self.myTabWidget.addTab(self.myWebView, "my Web View") self.mainLVBoxLayout.addWidget(self.urlBarLineEdit) self.mainLVBoxLayout.addWidget(self.myTabWidget) self.mainRVBoxLayout.addWidget(self.executeCrawler) self.mainHBoxLayout.addLayout(self.mainLVBoxLayout) self.mainHBoxLayout.addLayout(self.mainRVBoxLayout) self.setLayout(self.mainHBoxLayout) self.executeCrawler.clicked.connect(self.slotClickedToLoadUrl) self.signalClickedToLoadUrl.connect(self.slotLoadUrl) self.myWebView.loadFinished.connect(self.slotWebViewLoadFinished) self.show() @QtCore.Slot() def slotClickedToLoadUrl(self): print(self.urlBarLineEdit.text()) self.signalClickedToLoadUrl.emit( QtCore.QUrl(self.urlBarLineEdit.text())) @QtCore.Slot(QtCore.QUrl) def slotLoadUrl(self, qUrl): self.myWebView.load(qUrl) @QtCore.Slot(bool) def slotWebViewLoadFinished(self, isFinished): if isFinished: self.urlBarLineEdit.setText(self.myWebView.url().toString()) pageFrames = self.myWebView.page().mainFrame().childFrames() print("base Url := " + self.myWebView.page().mainFrame().baseUrl().toString()) if self.myWebView.page().mainFrame().baseUrl().toString( ) == "http://www2.tpa.edu.tw/tpaedu/default.asp": print("it is tpa local net") for frame in pageFrames: print(frame.frameName()) #print(frame.toHtml()) elements = frame.findAllElements("td") print(elements.count()) for index in range(elements.count()): """print("elements" + str(index)) print(elements.at(index + 1).tagName()) print(elements.at(index + 1).firstChild().tagName()) print(elements.at(index + 1).firstChild().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().tagName()) print(str(type(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().nextSibling())))""" findAllElementsChild( elements.at(index + 1).firstChild()) if frame.frameName() == "logo": pass elif frame.frameName() == "menu": # get menu for javascript robot action pass elif frame.frameName() == "main": # get main content to crawler data pass elif self.myWebView.page().mainFrame().baseUrl().toString( ) == "http://www2.tpa.edu.tw/tpaedu/Home/login.asp": pass pass