Esempio n. 1
0
 def __getimgs(self):
     cnt = 0
     for img in self.imgs:
         imgdest = os.path.join(self.db_img, img['name'])
         Util.downloadimg(img['url'], imgdest, img['name'])
         cnt+=1
         if cnt >= self.singleitemlimit:
             break
Esempio n. 2
0
 def __getimgs(self):
     cnt = 0
     for img in self.imgs:
         imgdest = os.path.join(self.db_img, img['name'])
         Util.downloadimg(img['url'], imgdest, img['name'])
         cnt += 1
         if cnt >= self.singleitemlimit:
             break
Esempio n. 3
0
 def __getdetail(self):
     url = config.url['detail'] % self.itemid
     detail = Util.http_get(url)
     try:
         detail = json.loads(detail)
         # logging.debug(detail)
         if not detail:
             logging.debug('%s no detail' % self.itemid)
             return
         if 'status' not in detail:
             logging.debug('%s no status' % self.itemid)
             return
         if 'code' not in detail['status']:
             logging.debug('%s no code' % self.itemid)
             return
         if detail['status']['code'] != 1001:
             logging.debug('%s code not 1001' % self.itemid)
             return
         if 'result' not in detail:
             logging.debug('%s no result' % self.itemid)
             return
         detail = detail['result']
     except Exception, e:
         logging.exception('%s exception' % self.itemid)
         return
Esempio n. 4
0
    def __getnextsection(self):
        logging.info('get next section, title: %s category: %s, category id: %s, currentpage: %s, currentsection'%(self. title ,self.category, self.bookid, self.currentpage, self.currentsection))
        if self.section >= self.sectionlimit:
            return False
        if self.currentsection == self.singlepagelimit:
            self.currentpage += 1
            self.section+=1
            self.currentsection = 2
            self.__getfirstsection()
            return self.items.qsize()>0

        self.payload['section'] = self.currentsection
        response = Util.http_post(config.url['ajaxbook'], self.payload)
        self.currentsection+=1
        self.section+=1
        if not response:
            logging.warning('getnextsection response null, payload: %s'%self.payload)
            return False
        try:
            items = json.loads(response)
            if 'result' not in items or 'list' not in items['result'] or not items['result']['list']:
                logging.warning('getnextsection list null, payload: %s'%self.payload)
                return False
            else:
                items = items['result']['list']
                for item in items:
                    self.items.put(item)
                return self.items.qsize>0
        except Exception, ex:
            logging.exception('getnextsection exception: %s'%self.payload)
            return False
Esempio n. 5
0
    def __getfirstsection(self):
        logging.info('get portal page, title: %s category: %s, category id: %s, currentpage: %s'%(self. title ,self.category, self.bookid, self.currentpage))
        url = config.url['portalbook']%(self.category, self.bookid)
        html = Util.http_get(url)
        if not html:
            logging.error('no portal html, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        profile = config.regex['profile'].search(html)
        if not profile or 'book' not in profile.groupdict():
            logging.error('no book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        else:
            self.book = profile.groupdict()['book']
        self.payload['book'] = self.book
        if not self.book:
            logging.error('no valid book, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        firstdata = config.regex['firstdata'].search(html)
        if not firstdata or 'firstdata' not in firstdata.groupdict():
            logging.error('no firstdata, category: {}, bookid: {}, page: {}'.format(self.category, self.bookid, self.currentpage))
            exit(2)
        else:
            firstdata = json.loads(firstdata.groupdict()['firstdata'])

        for item in firstdata:
            self.items.put(item)
        self.currentsection = 2
Esempio n. 6
0
 def __getdetail(self):
     url = config.url['detail']%self.itemid
     detail = Util.http_get(url)
     try:
         detail = json.loads(detail)
         # logging.debug(detail)
         if not detail:
             logging.debug('%s no detail'%self.itemid)
             return
         if 'status' not in detail:
             logging.debug('%s no status'%self.itemid)
             return
         if 'code' not in detail['status']:
             logging.debug('%s no code'%self.itemid)
             return
         if detail['status']['code'] != 1001:
             logging.debug('%s code not 1001' % self.itemid)
             return
         if 'result' not in detail:
             logging.debug('%s no result'%self.itemid)
             return
         detail = detail['result']
     except Exception, e:
         logging.exception('%s exception'%self.itemid)
         return
Esempio n. 7
0
    def __getnextsection(self):
        logging.info(
            'get next section, title: %s category: %s, category id: %s, currentpage: %s, currentsection'
            % (self.title, self.category, self.bookid, self.currentpage,
               self.currentsection))
        if self.section >= self.sectionlimit:
            return False
        if self.currentsection == self.singlepagelimit:
            self.currentpage += 1
            self.section += 1
            self.currentsection = 2
            self.__getfirstsection()
            return self.items.qsize() > 0

        self.payload['section'] = self.currentsection
        response = Util.http_post(config.url['ajaxbook'], self.payload)
        self.currentsection += 1
        self.section += 1
        if not response:
            logging.warning('getnextsection response null, payload: %s' %
                            self.payload)
            return False
        try:
            items = json.loads(response)
            if 'result' not in items or 'list' not in items[
                    'result'] or not items['result']['list']:
                logging.warning('getnextsection list null, payload: %s' %
                                self.payload)
                return False
            else:
                items = items['result']['list']
                for item in items:
                    self.items.put(item)
                return self.items.qsize > 0
        except Exception, ex:
            logging.exception('getnextsection exception: %s' % self.payload)
            return False
Esempio n. 8
0
    def __getfirstsection(self):
        logging.info(
            'get portal page, title: %s category: %s, category id: %s, currentpage: %s'
            % (self.title, self.category, self.bookid, self.currentpage))
        url = config.url['portalbook'] % (self.category, self.bookid)
        html = Util.http_get(url)
        if not html:
            logging.error(
                'no portal html, category: {}, bookid: {}, page: {}'.format(
                    self.category, self.bookid, self.currentpage))
            exit(2)
        profile = config.regex['profile'].search(html)
        if not profile or 'book' not in profile.groupdict():
            logging.error('no book, category: {}, bookid: {}, page: {}'.format(
                self.category, self.bookid, self.currentpage))
            exit(2)
        else:
            self.book = profile.groupdict()['book']
        self.payload['book'] = self.book
        if not self.book:
            logging.error(
                'no valid book, category: {}, bookid: {}, page: {}'.format(
                    self.category, self.bookid, self.currentpage))
            exit(2)
        firstdata = config.regex['firstdata'].search(html)
        if not firstdata or 'firstdata' not in firstdata.groupdict():
            logging.error(
                'no firstdata, category: {}, bookid: {}, page: {}'.format(
                    self.category, self.bookid, self.currentpage))
            exit(2)
        else:
            firstdata = json.loads(firstdata.groupdict()['firstdata'])

        for item in firstdata:
            self.items.put(item)
        self.currentsection = 2