def processSection(self, bsection): '''process all pages in forum section''' actualPage = 1 sectionLenght = None url = urljoin(self._url,bsection) while(True): try: urll = url + "/" + str(actualPage) #urll = url + "/" + str(151) page = getDataFrom(urll, None, None) #print "PS for " + url if page is None and sectionLenght is None: log.debug("none data, return") return elif page is None and sectionLenght != None: log.debug("none data, continue") continue pageParser = PageParser(page) if not sectionLenght: #get max page in section sectionLenght = pageParser.getSectionLenght() #print "sectionLenght is" + str(sectionLenght) log.debug("sectionLenght is %s" , str(sectionLenght)) itemList = pageParser.getListOfItems() self.processData(itemList) #add stacionary data self.addItemToFinalList("source", "http://board.netdoktor.de/") self.addItemToFinalList("section", "netdoktor") self.addItemToFinalList("lang", "de") #SAVE!!! self.createXmlandWrite(name = bsection.replace("/","_")) except Exception,e: log.critical("%s",e) log.exception("Some exception in process section") if actualPage >= sectionLenght: return actualPage += 1
def processSection(self, bsection): '''process all pages in forum section''' actualPage = 1 sectionLenght = None url = urljoin(self._url, bsection) while (True): try: urll = url + "/" + str(actualPage) #urll = url + "/" + str(151) page = getDataFrom(urll, None, None) #print "PS for " + url if page is None and sectionLenght is None: log.debug("none data, return") return elif page is None and sectionLenght != None: log.debug("none data, continue") continue pageParser = PageParser(page) if not sectionLenght: #get max page in section sectionLenght = pageParser.getSectionLenght() #print "sectionLenght is" + str(sectionLenght) log.debug("sectionLenght is %s", str(sectionLenght)) itemList = pageParser.getListOfItems() self.processData(itemList) #add stacionary data self.addItemToFinalList("source", "http://board.netdoktor.de/") self.addItemToFinalList("section", "netdoktor") self.addItemToFinalList("lang", "de") #SAVE!!! self.createXmlandWrite(name=bsection.replace("/", "_")) except Exception, e: log.critical("%s", e) log.exception("Some exception in process section") if actualPage >= sectionLenght: return actualPage += 1
def updateContent(self, url): '''if new entries in article found, they are processing and add to list to save''' #print url[25:] #pbar(url[25:]) #print ".", #log.debug("UPDATING data from - %s",url) page = getDataFrom(url, None, None) if page == None: return False pageParser = PageParser(page) notCached = pageParser.getEntriesList() #print notCached eToStore = self.pageCache.cacheAndReturnNew(url, notCached) for i in range(len(eToStore)): eToStore[i]["link"] = url #print "adding" + str(len(eToStore)) log.debug("ADD %d new entries", len(eToStore)) self.dataToStore.extend(eToStore) return True