def init(self, data = None): print 'quanben init' if not data or not isinstance(data, dict): raise InputException("requried dict data with fields: sid") if not data.has_key('id'): raise InputException("requried field 'id' in data") self.qid = data['id']
def mianfeiSearch(name, top = 5): url = MianFeiTXTSearchBaseUrl + quote(name.encode('utf-8')) soup = getSoupByUrl(url) bookTags = soup.select_one('#J-items') books = [] for i in range(0, len(bookTags.select('li'))): if i > (top - 1): #只取前五个 break bookTag = bookTags.select('li')[i] book = dict() book['title'] = bookTag.select_one('.title').get_text() book['img'] = bookTag.select_one('.img img')['src'] book['author'] = bookTag.select_one('.author').get_text().replace(' ','').replace(' ','') book['finishwb'] = u'连载' if(bookTag.select_one('.finishwb')): book['finishwb'] = bookTag.select_one('.finishwb').get_text() href = bookTag.select_one('.title')['href'] index = href.find('id=') if index < 0: raise InputException('cant find id in mianfeiTXT') book['id'] = href[index + 3:].replace(',', '').replace(')','').replace('\\','').replace("'",'') books.append(book) return books
def getSourceId(qid): srcUrl = srcListBaseUrl % str(qid) srcListContent = getContentWithUA(srcUrl) if not srcListContent: return srcJsonObj = json.loads(srcListContent) if not srcJsonObj or not srcJsonObj.has_key('items'): myLogging.error('no srcObj items qid %s', qid) return srcItems = srcJsonObj['items'] if len(srcItems.keys()) < 1: myLogging.error(' srcObj items len < 1 qid %s', qid) return if srcItems.has_key('api.zhuishuwang.com'): return srcItems['api.zhuishuwang.com'][0]['book_source_id'] # updateTIme = 0 # resId = '' # for itmkey in srcItems.keys(): # if srcItems[itmkey][0]['update_time'] > updateTIme: # resId = srcItems[itmkey][0]['book_source_id'] # updateTIme = srcItems[itmkey][0]['update_time'] # # return resId raise InputException('no zhuishuwang source, skip')
def POST(self): web.header("Content-Type", "application/json; charset=UTF-8") response = {'code': 200, 'msg': 'ok'} respData = [] try: params = getParams(web, name="", andCrawl=False, crawler_count=1, top=5, output_count=1, crawlerName = 'mianFeiTXT') if '' == params['name']: raise InputException('no input search name') manager = crawlManager if not manager.crawlers.has_key(params['crawlerName']): response['msg'] = 'no crawler name!' return response for crawlerName in params['crawlerName'].split(','): crawler = manager.crawlers[crawlerName]() searchResult = crawler.search(params['name'], params['top']) if params['andCrawl']: for book in searchResult: crawler.init(book) task = Task(crawler, params['crawler_count'], params['output_count']) task.start() respData.append({'crawlerName': crawlerName, 'books': searchResult}) except Exception as e: response['msg'] = unicode(e) response['code'] = 500 response['data'] = respData return json.dumps(response)
def getMianTxtSign(paramMap): if not isinstance(paramMap, dict): raise InputException("input must be dict") sortedMap = paramMap.items() sortedMap.sort() paramStr = tup2UrlStr(sortedMap) # paramStr = 'algorithm=MD5&apiKey=001&appId=26&bundle=com.mftxtxs.novel&channelId=2&keyword='\ # + '大主宰' + '&nouce=e694501a6cd844a797c98dedfc3c04f1&osType=2&pageNum=1&pageSize=10&sid=SID×tamp=1498921779533&type=1&userId=201706202002092307744175&userType=0&v=1&version=3.4.0' # paramStr = urllib.urlencode(sortedMap) return getMD5(paramStr + "&" + "9dbfbfd095fe6648cbc14a8d19952791")
def updateByDbBookId(dbid): bookObj = getBookObjById(dbid) if not bookObj: raise InputException('wrong id') updateByBookObj(bookObj)