Ejemplo n.º 1
0
    def run(self):  
            "rewrite the run() function"  
              
            global failedUrl  
            global triedUrl  
            global pagesContent  
            global textContent  
      
            # try: 
            print self.url 
            htmlContent = urllib2.urlopen(self.url).read() 
            # print htmlContent            
            transText = TextAnalyze.textTransfer(htmlContent) 
                # print transText 
                
            fOut = open(self.fileName, 'w')
            fOut.write(htmlContent)

            fOut.close()  
            tOut = open(self.textName, 'w')  
            tOut.write(transText)  
            tOut.close()  
      
            # except:  
            #     self.thLock.acquire()  
            #     triedUrl.append(self.url)  
            #     failedUrl.append(self.url)  
            #     sFailed = 'Failed!   ' + self.logLine  
            #     print sFailed  
            #     self.logFile.write(sFailed + '\n')  
            #     self.thLock.release()  
            #     return None  
              
            self.thLock.acquire() 
            # print htmlContent 
            pagesContent.append(htmlContent)
            # print transText 
            # print pagesContent 
            textContent.append(transText)
            # print textContent  
            triedUrl.append(self.url)  
            sSuccess = 'Success!  ' + self.logLine  
            print sSuccess  
            self.logFile.write(sSuccess + '\n')  
            self.thLock.release()  
Ejemplo n.º 2
0
    def run(self):
        "rewrite the run() function"

        global failedUrl
        global triedUrl
        global pagesContent
        global textContent

        try:
            htmlContent = urllib2.urlopen(self.url).read()
            transText = TextAnalyze.textTransfer(htmlContent)

            fOut = open(self.fileName, 'w')
            fOut.write(htmlContent)
            fOut.close()
            tOut = open(self.textName, 'w')
            tOut.write(transText)
            tOut.close()

        except:
            self.thLock.acquire()
            triedUrl.append(self.url)
            failedUrl.append(self.url)
            sFailed = 'Failed!   ' + self.logLine
            print sFailed
            self.logFile.write(sFailed + '\n')
            self.thLock.release()
            return None

        self.thLock.acquire()
        pagesContent.append(htmlContent)
        textContent.append(transText)
        triedUrl.append(self.url)
        sSuccess = 'Success!  ' + self.logLine
        print sSuccess
        self.logFile.write(sSuccess + '\n')
        self.thLock.release()