Beispiel #1
0
 def __getimgs(self):
     cnt = 0
     for img in self.imgs:
         imgdest = os.path.join(self.db_img, img['name'])
         Util.downloadimg(img['url'], imgdest, img['name'])
         cnt+=1
         if cnt >= self.singleitemlimit:
             break
Beispiel #2
0
    def __getnextsection(self):
        logging.info('get next section, title: %s category: %s, category id: %s, currentpage: %s, currentsection'%(self. title ,self.category, self.bookid, self.currentpage, self.currentsection))
        if self.section >= self.sectionlimit:
            return False
        if self.currentsection == self.singlepagelimit:
            self.currentpage += 1
            self.section+=1
            self.currentsection = 2
            self.__getfirstsection()
            return self.items.qsize()>0

        self.payload['section'] = self.currentsection
        response = Util.http_post(config.url['ajaxbook'], self.payload)
        self.currentsection+=1
        self.section+=1
        if not response:
            logging.warning('getnextsection response null, payload: %s'%self.payload)
            return False
        try:
            items = json.loads(response)
            if 'result' not in items or 'list' not in items['result'] or not items['result']['list']:
                logging.warning('getnextsection list null, payload: %s'%self.payload)
                return False
            else:
                items = items['result']['list']
                for item in items:
                    self.items.put(item)
                return self.items.qsize>0
        except Exception, ex:
            logging.exception('getnextsection exception: %s'%self.payload)
            return False
Beispiel #3
0
    def __getfirstsection(self):
        logging.info('get portal page, title: %s category: %s, category id: %s, currentpage: %s'%(self. title ,self.category, self.bookid, self.currentpage))
        url = config.url['portalbook']%(self.category, self.bookid)
        html = Util.http_get(url)
        if not html:
            logging.error('no portal html, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        profile = config.regex['profile'].search(html)
        if not profile or 'book' not in profile.groupdict():
            logging.error('no book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        else:
            self.book = profile.groupdict()['book']
        self.payload['book'] = self.book
        if not self.book:
            logging.error('no valid book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        firstdata = config.regex['firstdata'].search(html)
        if not firstdata or 'firstdata' not in firstdata.groupdict():
            logging.error('no firstdata, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        else:
            firstdata = json.loads(firstdata.groupdict()['firstdata'])

        for item in firstdata:
            self.items.put(item)
        self.currentsection = 2
Beispiel #4
0
 def __getdetail(self):
     url = config.url['detail']%self.itemid
     detail = Util.http_get(url)
     try:
         detail = json.loads(detail)
         # logging.debug(detail)
         if not detail:
             logging.debug('%s no detail'%self.itemid)
             return
         if 'status' not in detail:
             logging.debug('%s no status'%self.itemid)
             return
         if 'code' not in detail['status']:
             logging.debug('%s no code'%self.itemid)
             return
         if detail['status']['code'] != 1001:
             logging.debug('%s code not 1001' % self.itemid)
             return
         if 'result' not in detail:
             logging.debug('%s no result'%self.itemid)
             return
         detail = detail['result']
     except Exception, e:
         logging.exception('%s exception'%self.itemid)
         return