def __getimgs(self): cnt = 0 for img in self.imgs: imgdest = os.path.join(self.db_img, img['name']) Util.downloadimg(img['url'], imgdest, img['name']) cnt+=1 if cnt >= self.singleitemlimit: break
def __getimgs(self): cnt = 0 for img in self.imgs: imgdest = os.path.join(self.db_img, img['name']) Util.downloadimg(img['url'], imgdest, img['name']) cnt += 1 if cnt >= self.singleitemlimit: break
def __getdetail(self): url = config.url['detail'] % self.itemid detail = Util.http_get(url) try: detail = json.loads(detail) # logging.debug(detail) if not detail: logging.debug('%s no detail' % self.itemid) return if 'status' not in detail: logging.debug('%s no status' % self.itemid) return if 'code' not in detail['status']: logging.debug('%s no code' % self.itemid) return if detail['status']['code'] != 1001: logging.debug('%s code not 1001' % self.itemid) return if 'result' not in detail: logging.debug('%s no result' % self.itemid) return detail = detail['result'] except Exception, e: logging.exception('%s exception' % self.itemid) return
def __getnextsection(self): logging.info('get next section, title: %s category: %s, category id: %s, currentpage: %s, currentsection'%(self. title ,self.category, self.bookid, self.currentpage, self.currentsection)) if self.section >= self.sectionlimit: return False if self.currentsection == self.singlepagelimit: self.currentpage += 1 self.section+=1 self.currentsection = 2 self.__getfirstsection() return self.items.qsize()>0 self.payload['section'] = self.currentsection response = Util.http_post(config.url['ajaxbook'], self.payload) self.currentsection+=1 self.section+=1 if not response: logging.warning('getnextsection response null, payload: %s'%self.payload) return False try: items = json.loads(response) if 'result' not in items or 'list' not in items['result'] or not items['result']['list']: logging.warning('getnextsection list null, payload: %s'%self.payload) return False else: items = items['result']['list'] for item in items: self.items.put(item) return self.items.qsize>0 except Exception, ex: logging.exception('getnextsection exception: %s'%self.payload) return False
def __getfirstsection(self): logging.info('get portal page, title: %s category: %s, category id: %s, currentpage: %s'%(self. title ,self.category, self.bookid, self.currentpage)) url = config.url['portalbook']%(self.category, self.bookid) html = Util.http_get(url) if not html: logging.error('no portal html, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) profile = config.regex['profile'].search(html) if not profile or 'book' not in profile.groupdict(): logging.error('no book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) else: self.book = profile.groupdict()['book'] self.payload['book'] = self.book if not self.book: logging.error('no valid book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) firstdata = config.regex['firstdata'].search(html) if not firstdata or 'firstdata' not in firstdata.groupdict(): logging.error('no firstdata, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) else: firstdata = json.loads(firstdata.groupdict()['firstdata']) for item in firstdata: self.items.put(item) self.currentsection = 2
def __getdetail(self): url = config.url['detail']%self.itemid detail = Util.http_get(url) try: detail = json.loads(detail) # logging.debug(detail) if not detail: logging.debug('%s no detail'%self.itemid) return if 'status' not in detail: logging.debug('%s no status'%self.itemid) return if 'code' not in detail['status']: logging.debug('%s no code'%self.itemid) return if detail['status']['code'] != 1001: logging.debug('%s code not 1001' % self.itemid) return if 'result' not in detail: logging.debug('%s no result'%self.itemid) return detail = detail['result'] except Exception, e: logging.exception('%s exception'%self.itemid) return
def __getnextsection(self): logging.info( 'get next section, title: %s category: %s, category id: %s, currentpage: %s, currentsection' % (self.title, self.category, self.bookid, self.currentpage, self.currentsection)) if self.section >= self.sectionlimit: return False if self.currentsection == self.singlepagelimit: self.currentpage += 1 self.section += 1 self.currentsection = 2 self.__getfirstsection() return self.items.qsize() > 0 self.payload['section'] = self.currentsection response = Util.http_post(config.url['ajaxbook'], self.payload) self.currentsection += 1 self.section += 1 if not response: logging.warning('getnextsection response null, payload: %s' % self.payload) return False try: items = json.loads(response) if 'result' not in items or 'list' not in items[ 'result'] or not items['result']['list']: logging.warning('getnextsection list null, payload: %s' % self.payload) return False else: items = items['result']['list'] for item in items: self.items.put(item) return self.items.qsize > 0 except Exception, ex: logging.exception('getnextsection exception: %s' % self.payload) return False
def __getfirstsection(self): logging.info( 'get portal page, title: %s category: %s, category id: %s, currentpage: %s' % (self.title, self.category, self.bookid, self.currentpage)) url = config.url['portalbook'] % (self.category, self.bookid) html = Util.http_get(url) if not html: logging.error( 'no portal html, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) profile = config.regex['profile'].search(html) if not profile or 'book' not in profile.groupdict(): logging.error('no book, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) else: self.book = profile.groupdict()['book'] self.payload['book'] = self.book if not self.book: logging.error( 'no valid book, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) firstdata = config.regex['firstdata'].search(html) if not firstdata or 'firstdata' not in firstdata.groupdict(): logging.error( 'no firstdata, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) else: firstdata = json.loads(firstdata.groupdict()['firstdata']) for item in firstdata: self.items.put(item) self.currentsection = 2