def init(self, data = None):
        print 'quanben init'

        if not data or not isinstance(data, dict):
            raise InputException("requried dict data with fields: sid")
        if not data.has_key('id'):
            raise InputException("requried field 'id' in data")

        self.qid = data['id']
def mianfeiSearch(name, top = 5):
    url = MianFeiTXTSearchBaseUrl + quote(name.encode('utf-8'))
    soup = getSoupByUrl(url)
    bookTags = soup.select_one('#J-items')
    books = []
    for i in range(0, len(bookTags.select('li'))):
        if i > (top - 1): #只取前五个
            break
        bookTag = bookTags.select('li')[i]
        book = dict()
        book['title'] = bookTag.select_one('.title').get_text()
        book['img'] = bookTag.select_one('.img img')['src']
        book['author'] = bookTag.select_one('.author').get_text().replace(' ','').replace(' ','')
        book['finishwb'] = u'连载'
        if(bookTag.select_one('.finishwb')):
            book['finishwb'] = bookTag.select_one('.finishwb').get_text()

        href = bookTag.select_one('.title')['href']
        index = href.find('id=')
        if index < 0:
            raise InputException('cant find id in mianfeiTXT')
        book['id'] = href[index + 3:].replace(',', '').replace(')','').replace('\\','').replace("'",'')

        books.append(book)

    return books
def getSourceId(qid):
    srcUrl = srcListBaseUrl % str(qid)

    srcListContent = getContentWithUA(srcUrl)
    if not srcListContent:
        return
    srcJsonObj = json.loads(srcListContent)
    if not srcJsonObj or not srcJsonObj.has_key('items'):
        myLogging.error('no  srcObj items qid %s', qid)
        return

    srcItems = srcJsonObj['items']

    if len(srcItems.keys()) < 1:
        myLogging.error('  srcObj items len < 1 qid %s', qid)
        return

    if srcItems.has_key('api.zhuishuwang.com'):
        return srcItems['api.zhuishuwang.com'][0]['book_source_id']

    # updateTIme = 0
    # resId = ''
    # for itmkey in srcItems.keys():
    #     if srcItems[itmkey][0]['update_time'] > updateTIme:
    #         resId = srcItems[itmkey][0]['book_source_id']
    #         updateTIme = srcItems[itmkey][0]['update_time']
    #
    # return resId
    raise InputException('no zhuishuwang source, skip')
예제 #4
0
    def POST(self):
        web.header("Content-Type", "application/json; charset=UTF-8")
        response = {'code': 200, 'msg': 'ok'}
        respData = []
        try:
            params = getParams(web, name="", andCrawl=False, crawler_count=1, top=5, output_count=1, crawlerName = 'mianFeiTXT')

            if '' == params['name']:
                raise InputException('no input search name')

            manager = crawlManager
            if not manager.crawlers.has_key(params['crawlerName']):
                response['msg'] = 'no crawler name!'
                return response
            for crawlerName in params['crawlerName'].split(','):
                crawler = manager.crawlers[crawlerName]()

                searchResult = crawler.search(params['name'], params['top'])
                if params['andCrawl']:
                    for book in searchResult:
                        crawler.init(book)
                        task = Task(crawler, params['crawler_count'], params['output_count'])
                        task.start()
                respData.append({'crawlerName': crawlerName, 'books': searchResult})

        except Exception as e:
            response['msg'] = unicode(e)
            response['code'] = 500

        response['data'] = respData
        return json.dumps(response)
예제 #5
0
def getMianTxtSign(paramMap):

    if not isinstance(paramMap, dict):
        raise InputException("input must be dict")

    sortedMap = paramMap.items()
    sortedMap.sort()

    paramStr = tup2UrlStr(sortedMap)
    # paramStr = 'algorithm=MD5&apiKey=001&appId=26&bundle=com.mftxtxs.novel&channelId=2&keyword='\
    #            + '大主宰' + '&nouce=e694501a6cd844a797c98dedfc3c04f1&osType=2&pageNum=1&pageSize=10&sid=SID&timestamp=1498921779533&type=1&userId=201706202002092307744175&userType=0&v=1&version=3.4.0'
    # paramStr = urllib.urlencode(sortedMap)

    return getMD5(paramStr + "&" + "9dbfbfd095fe6648cbc14a8d19952791")
예제 #6
0
def updateByDbBookId(dbid):

    bookObj = getBookObjById(dbid)
    if not bookObj:
        raise InputException('wrong id')
    updateByBookObj(bookObj)