def __getfirstsection(self): logging.info('get portal page, title: %s category: %s, category id: %s, currentpage: %s'%(self. title ,self.category, self.bookid, self.currentpage)) url = config.url['portalbook']%(self.category, self.bookid) html = Util.http_get(url) if not html: logging.error('no portal html, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) profile = config.regex['profile'].search(html) if not profile or 'book' not in profile.groupdict(): logging.error('no book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) else: self.book = profile.groupdict()['book'] self.payload['book'] = self.book if not self.book: logging.error('no valid book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) firstdata = config.regex['firstdata'].search(html) if not firstdata or 'firstdata' not in firstdata.groupdict(): logging.error('no firstdata, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage)) exit(2) else: firstdata = json.loads(firstdata.groupdict()['firstdata']) for item in firstdata: self.items.put(item) self.currentsection = 2
def __getdetail(self): url = config.url['detail']%self.itemid detail = Util.http_get(url) try: detail = json.loads(detail) # logging.debug(detail) if not detail: logging.debug('%s no detail'%self.itemid) return if 'status' not in detail: logging.debug('%s no status'%self.itemid) return if 'code' not in detail['status']: logging.debug('%s no code'%self.itemid) return if detail['status']['code'] != 1001: logging.debug('%s code not 1001' % self.itemid) return if 'result' not in detail: logging.debug('%s no result'%self.itemid) return detail = detail['result'] except Exception, e: logging.exception('%s exception'%self.itemid) return
def __getfirstsection(self): logging.info( 'get portal page, title: %s category: %s, category id: %s, currentpage: %s' % (self.title, self.category, self.bookid, self.currentpage)) url = config.url['portalbook'] % (self.category, self.bookid) html = Util.http_get(url) if not html: logging.error( 'no portal html, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) profile = config.regex['profile'].search(html) if not profile or 'book' not in profile.groupdict(): logging.error('no book, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) else: self.book = profile.groupdict()['book'] self.payload['book'] = self.book if not self.book: logging.error( 'no valid book, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) firstdata = config.regex['firstdata'].search(html) if not firstdata or 'firstdata' not in firstdata.groupdict(): logging.error( 'no firstdata, category: {}, bookid: {}, page: {}'.format( self.category, self.bookid, self.currentpage)) exit(2) else: firstdata = json.loads(firstdata.groupdict()['firstdata']) for item in firstdata: self.items.put(item) self.currentsection = 2