class mainWindow(QtGui.QWidget): def __init__(self, width=600, height=800): super(mainWindow, self).__init__() self.width = width self.height = height self.setGeometry(10, 10, self.width, self.height) self.initial() def initial(self): self.MainHLayout = QtGui.QHBoxLayout() self.WebView = WebView() self.WebView.show() self.WebView.load(QtCore.QUrl("http://127.0.0.1/tpcdebt/test.php")) self.MainHLayout.addWidget(self.WebView) self.WebView.loadFinished.connect(self.loadPageOK) self.WebView.page().setLinkDelegationPolicy( QtWebKit.QWebPage.DelegateAllLinks) self.setLayout(self.MainHLayout) self.show() def loadPageOK(self, isFinished): if isFinished: print("load OK") pass
class mainWindow(QtGui.QWidget): def __init__(self, width=600, height=800): super(mainWindow, self).__init__() self.width = width self.height = height self.setGeometry(10, 10, self.width, self.height) self.initial() def initial(self): self.MainHLayout = QtGui.QHBoxLayout() self.WebView = WebView() self.WebView.show() self.WebView.load(QtCore.QUrl("http://127.0.0.1/tpcdebt/test.php")) self.MainHLayout.addWidget(self.WebView) self.WebView.loadFinished.connect(self.loadPageOK) self.WebView.page().setLinkDelegationPolicy(QtWebKit.QWebPage.DelegateAllLinks) self.setLayout(self.MainHLayout) self.show() def loadPageOK(self, isFinished): if isFinished: print("load OK") pass
class Crawler(QtGui.QWidget): signalClickedToLoadUrl = QtCore.Signal(QtCore.QUrl) def __init__(self, width = 800, height = 600): super(Crawler, self).__init__() self.setGeometry(10, 10, width, height) self.setupUI() def setupUI(self) : self.mainHBoxLayout = QtGui.QHBoxLayout() self.mainRVBoxLayout = QtGui.QVBoxLayout() self.mainLVBoxLayout = QtGui.QVBoxLayout() self.executeCrawler = QtGui.QPushButton("Get Content!!!") self.urlBarLineEdit = QtGui.QLineEdit("http://www2.tpa.edu.tw/tpaedu/Home/login.asp") self.myWebView = WebView() self.myTabWidget = TabWidget() #self.myWebView.settings().setDefaultTextEncoding("big5") self.myWebView.show() self.myTabWidget.addTab(self.myWebView, "my Web View") self.mainLVBoxLayout.addWidget(self.urlBarLineEdit) self.mainLVBoxLayout.addWidget(self.myTabWidget) self.mainRVBoxLayout.addWidget(self.executeCrawler) self.mainHBoxLayout.addLayout(self.mainLVBoxLayout) self.mainHBoxLayout.addLayout(self.mainRVBoxLayout) self.setLayout(self.mainHBoxLayout) self.executeCrawler.clicked.connect(self.slotClickedToLoadUrl) self.signalClickedToLoadUrl.connect(self.slotLoadUrl) self.myWebView.loadFinished.connect(self.slotWebViewLoadFinished) self.show() @QtCore.Slot() def slotClickedToLoadUrl(self) : print(self.urlBarLineEdit.text()) self.signalClickedToLoadUrl.emit(QtCore.QUrl(self.urlBarLineEdit.text())) @QtCore.Slot(QtCore.QUrl) def slotLoadUrl(self, qUrl) : self.myWebView.load(qUrl) @QtCore.Slot(bool) def slotWebViewLoadFinished(self, isFinished) : if isFinished : self.urlBarLineEdit.setText(self.myWebView.url().toString()) pageFrames = self.myWebView.page().mainFrame().childFrames() print("base Url := " + self.myWebView.page().mainFrame().baseUrl().toString()) if self.myWebView.page().mainFrame().baseUrl().toString() == "http://www2.tpa.edu.tw/tpaedu/default.asp" : print("it is tpa local net") for frame in pageFrames : print(frame.frameName()) #print(frame.toHtml()) elements = frame.findAllElements("td") print(elements.count()) for index in range(elements.count()) : """print("elements" + str(index)) print(elements.at(index + 1).tagName()) print(elements.at(index + 1).firstChild().tagName()) print(elements.at(index + 1).firstChild().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().tagName()) print(str(type(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().nextSibling())))""" findAllElementsChild(elements.at(index + 1).firstChild()) if frame.frameName() == "logo" : pass elif frame.frameName() == "menu" : # get menu for javascript robot action pass elif frame.frameName() == "main" : # get main content to crawler data pass elif self.myWebView.page().mainFrame().baseUrl().toString() == "http://www2.tpa.edu.tw/tpaedu/Home/login.asp" : pass pass
class Crawler(QtGui.QWidget): signalClickedToLoadUrl = QtCore.Signal(QtCore.QUrl) def __init__(self, width=800, height=600): super(Crawler, self).__init__() self.setGeometry(10, 10, width, height) self.setupUI() def setupUI(self): self.mainHBoxLayout = QtGui.QHBoxLayout() self.mainRVBoxLayout = QtGui.QVBoxLayout() self.mainLVBoxLayout = QtGui.QVBoxLayout() self.executeCrawler = QtGui.QPushButton("Get Content!!!") self.urlBarLineEdit = QtGui.QLineEdit( "http://www2.tpa.edu.tw/tpaedu/Home/login.asp") self.myWebView = WebView() self.myTabWidget = TabWidget() #self.myWebView.settings().setDefaultTextEncoding("big5") self.myWebView.show() self.myTabWidget.addTab(self.myWebView, "my Web View") self.mainLVBoxLayout.addWidget(self.urlBarLineEdit) self.mainLVBoxLayout.addWidget(self.myTabWidget) self.mainRVBoxLayout.addWidget(self.executeCrawler) self.mainHBoxLayout.addLayout(self.mainLVBoxLayout) self.mainHBoxLayout.addLayout(self.mainRVBoxLayout) self.setLayout(self.mainHBoxLayout) self.executeCrawler.clicked.connect(self.slotClickedToLoadUrl) self.signalClickedToLoadUrl.connect(self.slotLoadUrl) self.myWebView.loadFinished.connect(self.slotWebViewLoadFinished) self.show() @QtCore.Slot() def slotClickedToLoadUrl(self): print(self.urlBarLineEdit.text()) self.signalClickedToLoadUrl.emit( QtCore.QUrl(self.urlBarLineEdit.text())) @QtCore.Slot(QtCore.QUrl) def slotLoadUrl(self, qUrl): self.myWebView.load(qUrl) @QtCore.Slot(bool) def slotWebViewLoadFinished(self, isFinished): if isFinished: self.urlBarLineEdit.setText(self.myWebView.url().toString()) pageFrames = self.myWebView.page().mainFrame().childFrames() print("base Url := " + self.myWebView.page().mainFrame().baseUrl().toString()) if self.myWebView.page().mainFrame().baseUrl().toString( ) == "http://www2.tpa.edu.tw/tpaedu/default.asp": print("it is tpa local net") for frame in pageFrames: print(frame.frameName()) #print(frame.toHtml()) elements = frame.findAllElements("td") print(elements.count()) for index in range(elements.count()): """print("elements" + str(index)) print(elements.at(index + 1).tagName()) print(elements.at(index + 1).firstChild().tagName()) print(elements.at(index + 1).firstChild().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().tagName()) print(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().tagName()) print(str(type(elements.at(index + 1).firstChild().nextSibling().nextSibling().nextSibling().nextSibling())))""" findAllElementsChild( elements.at(index + 1).firstChild()) if frame.frameName() == "logo": pass elif frame.frameName() == "menu": # get menu for javascript robot action pass elif frame.frameName() == "main": # get main content to crawler data pass elif self.myWebView.page().mainFrame().baseUrl().toString( ) == "http://www2.tpa.edu.tw/tpaedu/Home/login.asp": pass pass