def step2(self, params): """""" try: key = params.customized['key'] soup = BeautifulSoup(params.content, 'html5lib') #print soup #searchListOne = soup.select('.searchListOne > ul') searchListOne = soup.select('.searchListOne > ul > li > div') if not searchListOne: Logger.getlogging().warning('{}:40000 No urllist'.format( params.originalurl)) return lis = soup.select( '.searchListOne > ul > li' )[:-1] #最后一个<li id=search_msg style="display:none"></li>,过滤掉 urllist = [] for li in lis: url = li.select_one('h3 > a').get('href') #print '*********',url tm = li.select('.source > span')[0].get_text() tm = getuniformtime(tm) now = getuniformtime(str(time.time())) cmt_num = li.select('.source > span')[-1].get_text() title = li.select_one('h3').get_text() if Common.checktitle(Common.urldec(key), title): if compareNow(tm, self.querylastdays): urllist.append(url) if len(urllist) > 0: self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_TIEBA) except: #traceback.print_exc() Logger.printexception() Logger.getlogging().error( 'extract comment error from {site}'.format(site=params.url))
def _create_direcotry(self): if self._dbPath: directory = os.path.abspath(self._dbPath) directory = os.path.split(directory)[0] Common.create_dir(directory) else: Log.error('empty db path')
def step2(self, params): """""" q = params.customized['query'] soup = BeautifulSoup(params.content, 'html5lib') divs = soup.select('.videobox') if not divs: Logger.log(params.originalurl, constant.ERRORCODE_SITE_NOGET_COMMNETS) return urllist = [] for div in divs: title = div.select_one('.title').get_text() #print title tm = getuniformtime(div.select_one('.date').get_text()) url = div.select_one('.title > a').get('href') Logger.getlogging().debug(title) if not compareNow(tm, self.querylastdays): Logger.log(url, constant.ERRORCODE_WARNNING_NOMATCHTIME) continue if not Common.checktitle(Common.urldec(q), title): Logger.log(url, constant.ERRORCODE_WARNNING_NOMATCHTITLE) continue urllist.append(url) #获取最终url列表 if len(urllist) > 0: self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_VIDEO)
def request_string(self, url, headers=None, data=None, method=None, encoding=None, cache=None, retryTimes=None): if not retryTimes: retryTimes = 1 if cache is None: cache = Common.debug() response = None for _ in range(retryTimes): response = self.request_data(url, headers=headers, data=data, method=method, cache=cache) if response is not None: break s = None if response is not None: if encoding: s = Common.decode_data(response, encoding=encoding) else: s = Common.decode_data(response, encoding='utf-8') if not s: s = Common.decode_data(response, encoding='gbk') return s
def analysis(self, line, method): try: js = json.loads(line) param = ProcessParam() param.crawler_time = TimeUtility.getuniformtime(js['crawler_time']) param.url = Common.urldec(js['foundin']) param.content = js['html'] if method == constant.REQUEST_TYPE_POST: param.data = js['data'] if js['html'][:3] == constant.GZIP_CODE: param.content = zlib.decompress(param.content, 16 + zlib.MAX_WBITS) # decode content = Common.urldec(param.content) charset = RegexUtility.getid('charset', content) content = Common.trydecode(content, charset) param.content = content return param except: line = line.replace('\n', '').strip() if not line or line[0] == '#': return Logger.getlogging().debug(line) param = ProcessParam() param.url = line if method == constant.REQUEST_TYPE_POST: js = json.loads(line) param.url = js['url'] param.data = js['data'] param.content = HttpCacher.getcontent(line, method) if param.content is None: return return param
def step2(self, params): info = Common.urldec(params.customized['info']) soup = BeautifulSoup(params.content, 'html5lib') text_divs = soup.select('.s_r_txt') urllist = [] if text_divs: for item in text_divs: title = item.select_one('h3 > a').get_text() url = item.select_one('h3 > a').get('href') curtime = item.select('p')[-1].get_text().strip() try: if TimeUtility.compareNow( TimeUtility.getuniformtime(curtime), self.querylastdays): if Common.checktitle(info, title): urllist.append(url) else: Logger.log( url, constant.ERRORCODE_WARNNING_NOMATCHTITLE) else: Logger.log(url, constant.ERRORCODE_WARNNING_NOMATCHTIME) except: urllist.append(url) self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_VIDEO)
def get(self, url): saveJson = {} try: Logger.getlogging().debug('Downloading: {url}'.format(url=url)) request = urllib2.Request(url, headers=self.headers) response = urllib2.urlopen(request, timeout=self.timeout) code = response.getcode() info = response.info() # 判断返回的code,如果不是200,则返回空 if code == 200: html = response.read() if (("Content-Encoding" in info) and (info['Content-Encoding'] == "gzip")): html = zlib.decompress(html, 16 + zlib.MAX_WBITS); Logger.getlogging().debug('Request Sucessed: {url}'.format(url=url)) else: Logger.getlogging().error('open {url} error, code = {code}'.format(url=url, code=code)) Logger.getlogging().error('Request Failed: {url}'.format(url=url)) return None except: Logger.getlogging().error('Request Failed: {url}'.format(url=url)) Logger.printexception() return None charset = RegexUtility.getid('charset', html) html = Common.trydecode(html, charset) saveJson['foundin'] = Common.urlenc(url) saveJson['html'] = Common.urlenc(html.encode(constant.CHARSET_UTF8)) saveJson['crawler_time'] = int(time.time()) jsonStr = json.dumps(saveJson) return jsonStr
def storecmt(url, content, pubdate, user): content = Common.strfilter(content) user = Common.strfilter(user) pubdate = TimeUtility.getuniformtime(pubdate) if not CMTStorage.exist(url, content, pubdate, user): Logger.getlogging().debug( 'url:{url}, content:{content}, pubdate:{pubdate}, user:{user}'. format(url=url, content=content, pubdate=pubdate, user=user)) id = CMTStorage.getid(url, content, pubdate, user) data = { SQLDAO.SPIDER_TABLE_COMMENTS_ID: id, SQLDAO.SPIDER_TABLE_COMMENTS_URL: url, SQLDAO.SPIDER_TABLE_COMMENTS_PUBLISH_DATE: pubdate, SQLDAO.SPIDER_TABLE_COMMENTS_USER: user, SQLDAO.SPIDER_TABLE_COMMENTS_CONTENT: content, SQLDAO.SPIDER_TABLE_COMMENTS_CREATE_DATE: SpiderConfigure.getinstance().starttime() } SQLDAO.getinstance().insert( SQLDAO.SPIDER_TABLE_COMMENTS, SQLDAO.SPIDER_TABLE_COMMENTS_KEYS, SQLDAO.getvaluesfromkeys(data, SQLDAO.SPIDER_TABLE_COMMENTS_KEYS))
def __init__(self, files, distdir): super(Package, self).__init__() self.files = files self.distdir = distdir Common.create_dir(distdir) Log.debug('packages: %s' % files) Log.debug('distdir: %s' % distdir)
def query(self,info): Logger.getlogging().info("query") querykey=Common.urlenc(Common.trydecode(info).encode('gbk')) #querykey = Common.urlenc(info) query_url = [S2Query.S2_URL % ('1',querykey)] Logger.getlogging().debug(query_url[0]) self.__storeqeuryurllist__(query_url, self.STEP_1, {'key':querykey})
def getid(url): idformat = '{machine}_{query}_{url}_{starttime}' id = idformat.format( machine=NewsStorage.LOCALMACHINEFLAG, query=Common.urlenc(SpiderConfigure.getinstance().getquery()), url=Common.urlenc(url), starttime=SpiderConfigure.getinstance().starttime()) return Common.md5(id)
def backup_with_config(config_path): content = Common.read_file(config_path) arr = Common.str2json(content) if arr: for x in arr: backup(x.get('src'), x.get('dst_dir'), x.get('retemtion_days'), hours_last_day=x.get('hours_last_day'), ignore_hours=x.get('ignore_hours'))
def query(self, info): Logger.getlogging().info("query") #keyvalue = Common.urlenc(info) keyvalue = Common.urlenc(Common.trydecode(info)) # step1: 根据key, 拼出下面的url # http://q1.fun.tv/ajax/filter_videos/?c=0&p={pageno}&word={key} url = FunS2Query.QUERY_TEMPLATE.format(pageno=1, key=keyvalue) urls = [url] Logger.getlogging().debug(urls[0]) self.__storeqeuryurllist__(urls, self.S2QUERY_FIRST_PAGE, {'query': info})
def process(self,params): if params.step == S2Query.STEP_1: html=etree.HTML(params.content) #try: #quit=html.xpath['//div[@id="results"]/text()'] #totalpage='0' #except: #totalpage=html.xpath('//div[@class="page"]/span/text()')[0] #totalpage= totalpage.split("/")[-1] #totalpage=re.sub("\D", "",totalpage) results = html.xpath('//*[@id="results"]') if not results: return totalpage=html.xpath('//*[@id="div_3"]/*[@class="page"]/span/text()') if totalpage: totalpage = self.r.parse('(\d+)',totalpage[0].split('/')[-1])[0] else: Logger.getlogging().info("there are no results you want!") return urllist=[] if int(totalpage) >= self.maxpages: totalpage = self.maxpages if totalpage <>'0': for pages in range(0,int(totalpage)): searchurl = S2Query.S2_URL % (pages+1,params.customized['key']) urllist.append(searchurl) self.__storeqeuryurllist__(urllist, S2Query.STEP_2,{'key':params.customized['key']}) else: return elif params.step == S2Query.STEP_2: comquerkey=Common.urldec(params.customized['key']).decode('gbk').encode('utf-8') soup = BeautifulSoup(params.content,'html5lib') urllist = [] divs = soup.find_all(attrs={'class':'result f s0'}) if not divs: return for div in divs: title = div.select_one('h3.c-title').get_text() title = ''.join(title.strip().split()) url_tm = div.select_one('.c-showurl').get_text() tm = getuniformtime(url_tm.split('/')[-1]) url = 'http://'+'/'.join(url_tm.split('/')[0:-1]) Logger.getlogging().debug(title) #Logger.getlogging().debug(url_tm) if not Common.checktitle(comquerkey, title): Logger.getlogging().warning('{url}:40000 out of range, the title!'.format(url=params.originalurl)) continue if not compareNow(tm, self.querylastdays): Logger.getlogging().warning('{url}:40000 out of range, the time!'.format(url=params.originalurl)) continue urllist.append(url) self.__storeurllist__(urllist,SPIDER_S2_WEBSITE_VIDEO)
def scale_image_file(cls, src, dst, newWidth): if not Common.isfile(src): return Common.remove(dst) img = Image.open(src) size = img.size print(size) newHeight = newWidth * size[1] / size[0] img.resize((newWidth, int(newHeight)), Image.ANTIALIAS).save(dst)
def exist(url, content, pubdate, user): content = Common.strfilter(content) user = Common.strfilter(user) pubdate = TimeUtility.getuniformtime(pubdate) id = CMTStorage.getid(url, content, pubdate, user) if id in CMTStorage.__cidset: return True if SQLDAO.getinstance().exists(SQLDAO.SPIDER_TABLE_COMMENTS, {SQLDAO.SPIDER_TABLE_COMMENTS_ID: id}): CMTStorage.__cidset.add(id) return True return False
def seturlinfos(params): id = NewsStorage.getid(params.url) if NewsStorage.exist(params.url): doc = NewsStorage.getdoc(params.url) data = {} #data[SQLDAO.SPIDER_TABLE_NEWS_TYPE] = params.type data[SQLDAO.SPIDER_TABLE_NEWS_TITLE] = Common.strfilter( params.title) if params.type != constant.SPIDER_S2_WEBSITE_VIDEO: data[SQLDAO.SPIDER_TABLE_NEWS_BODY] = Common.strfilter( params.body) if doc.get(SQLDAO.SPIDER_TABLE_NEWS_PUBLISH_DATE, TimeUtility.getintformtime( 0)) == TimeUtility.getintformtime(0): data[ SQLDAO. SPIDER_TABLE_NEWS_PUBLISH_DATE] = TimeUtility.getuniformtime( params.pubtime) data[SQLDAO.SPIDER_TABLE_NEWS_CMTNUM] = params.cmtnum data[SQLDAO.SPIDER_TABLE_NEWS_CLICKNUM] = params.clicknum data[SQLDAO.SPIDER_TABLE_NEWS_FANSNUM] = params.fansnum data[SQLDAO.SPIDER_TABLE_NEWS_VOTENUM] = params.votenum data[SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE] = SQLDAO.gettime() SQLDAO.getinstance().update(SQLDAO.SPIDER_TABLE_NEWS, {SQLDAO.SPIDER_TABLE_NEWS_ID: id}, data) else: data = {} data[SQLDAO.SPIDER_TABLE_NEWS_TYPE] = params.type data[SQLDAO.SPIDER_TABLE_NEWS_TITLE] = Common.strfilter( params.title) if params.type != constant.SPIDER_S2_WEBSITE_VIDEO: data[SQLDAO.SPIDER_TABLE_NEWS_BODY] = Common.strfilter( params.body) data[SQLDAO. SPIDER_TABLE_NEWS_PUBLISH_DATE] = TimeUtility.getuniformtime( params.pubtime) data[SQLDAO.SPIDER_TABLE_NEWS_CMTNUM] = params.cmtnum data[SQLDAO.SPIDER_TABLE_NEWS_CLICKNUM] = params.clicknum data[SQLDAO.SPIDER_TABLE_NEWS_FANSNUM] = params.fansnum data[SQLDAO.SPIDER_TABLE_NEWS_VOTENUM] = params.votenum data[SQLDAO.SPIDER_TABLE_NEWS_UPDATE_DATE] = SQLDAO.gettime() data[SQLDAO.SPIDER_TABLE_NEWS_ID] = id data[SQLDAO.SPIDER_TABLE_NEWS_URL] = params.url data[SQLDAO.SPIDER_TABLE_NEWS_QUERY] = params.query data[SQLDAO.SPIDER_TABLE_NEWS_CHANNEL] = params.channel data[SQLDAO.SPIDER_TABLE_NEWS_CREATE_DATE] = params.createtime data[SQLDAO. SPIDER_TABLE_NEWS_MACHINEFLAG] = NewsStorage.LOCALMACHINEFLAG SQLDAO.getinstance().insert(SQLDAO.SPIDER_TABLE_NEWS, SQLDAO.SPIDER_TABLE_NEWS_KEYS, SQLDAO.getvaluesfromkeys(data))
def pageprocess(self, params): # 获取文本 xparser = XPathUtility(params.content) # 获取该页超级链接 hreflist = xparser.xpath('//h3/a/@href') hrefs = [] for mid_url in hreflist: mid = self.preprocess(mid_url) if mid is not None: hrefs.append(mid) # 获取该页内容的所有发布时间 publictime = xparser.xpath('//*[@class="scontent"]/text()[1]') publicTimes = [] for timeindex in publictime: middle = str(timeindex).replace('\n', '').replace('\t', '').strip() publicTimes.append( str(str(middle).split(' ')[0]) + ' ' + str(str(middle).split(' ')[1])) # 获取改页所有title titles = [] titles_list = xparser.getlist('//h3') for title in titles_list: mid_title = str(title).replace('\n', '').replace('\t', '').strip() titles.append(mid_title) # 获取关键字 KEY_mid = params.customized['KEY'] KEY = Common.urldec(KEY_mid) # 获取标题正则表达式 titlePatten = KEY # 获取一周前日期 today = datetime.datetime.now() before_days = today + datetime.timedelta(-self.inputtime) before_arr = str(before_days).split('.') before_time = before_arr[0] urllist = [] len_hrefs = len(hrefs) number = 0 for index in publicTimes[:len_hrefs]: # 是否是标题命中 # mid_value = re.compile(titlePatten) # flg = mid_value.search(str(titles[number])) flg = Common.checktitle(titlePatten, str(titles[number])) # 是当前一周内发布视频,并且标题命中的场合 if index > before_time and flg: url = hrefs[number] urllist.append(url) number = number + 1 # 获取最终url列表 if len(urllist) > 0: self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_TIEBA)
def _cleanBuildInfo(self): for path in self.files: directory = Common.split_path(path)[0] Common.remove(Common.join_paths(directory, '__pycache__')) Common.remove(Common.join_paths(directory, 'build')) specs = [ x for x in os.listdir(directory) if os.path.isfile(x) and os.path.splitext(x)[1] == '.spec' ] for x in specs: path = Common.join_paths(directory, x) Common.remove(path)
def analysis(line): param = ProcessParam() js = json.loads(line) param.url = js['foundin'] param.content = js['html'] if js['html'][:3] == constant.GZIP_CODE: param.content = zlib.decompress(param.content, 16 + zlib.MAX_WBITS) # decode content = Common.urldec(param.content) charset = RegexUtility.getid('charset', content) content = Common.trydecode(content, charset) param.content = content return param
def _buildPys(self): for path in self.files: f1 = os.path.splitext( Common.join_paths(self.distdir, Common.split_path(path)[-1]))[0] f2 = f1 + '.exe' Common.remove(f1) Common.remove(f2) cmd = 'pyinstaller %s -F --distpath %s' % (path, self.distdir) Common.system_cmd(cmd, directory=Common.split_path(path)[0])
def getpagecomments(self, params): info = params.customized['query'] xpath = XPathUtility(html=params.content) hrefs = xpath.xpath('//*[@class="sosResult"]/strong/a/@href') titles = xpath.getlist('//*[@class="sosResult"]/strong/a') pubtimes = xpath.xpath('//*[@class="sosResult"]/span/cite[3]') today = datetime.datetime.strptime( TimeUtility.getcurrentdate(), TimeUtility.DATE_FORMAT_DEFAULT).date() urllist = [] for index in range(0, len(titles), 1): # 标题中包含指定要查询的关键字 # if titles[index].find(info) > -1: if Common.checktitle(info, titles[index]): pubtimestr = TimeUtility.getuniformtime( pubtimes[index].text).split(' ')[0] pubtime = datetime.datetime.strptime( pubtimestr, TimeUtility.DATE_FORMAT_DEFAULT).date() # pubtime = datetime.datetime.strptime(pubtimestr, TimeUtility.DATE_FORMAT_DEFAULT) inteveral = today - pubtime # 时间在指定周期内 if inteveral.days <= int(self.querylastdays): newurl = self.preprocess(hrefs[index]) if newurl is not None: urllist.append(newurl) if len(urllist) > 0: self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_TIEBA)
def s2query(self): self.conf.setchannel(SPIDER_CHANNEL_S2) s2file = SpiderConfigure.getinstance().gets2file() file = FileUtility.getfilename(s2file) s2temppath = Storage.getstoragelocation(const.SPIDER_QUERY_TEMP_PATH) + file if FileUtility.exists(s2temppath): with open(s2temppath, 'r') as fp: querylist = [] firstline = True for strquery in fp.readlines(): if firstline: firstline = False if strquery[:3] == codecs.BOM_UTF8: Logger.getlogging().warning('Remove BOM from {file}!'.format(file=file)) strquery = strquery[3:] strquery = Common.strip(strquery) if not strquery: continue Logger.getlogging().info('S2 {query} start...'.format(query=strquery)) self.conf.setquery(strquery) URLStorage.updaterecycle() querylist.append(strquery) for site in self.factory.getall(): site.s2query(strquery.replace('&', ' ')) sitelist = [] for site in self.factory.getall(): if site.exists2(): sitelist.append(site) SpiderReport.loadquery(querylist) SpiderReport.loadsites(sitelist)
def get_list(self, u_id, u_mobile, order_id, ctime_st, ctime_ed, order_type, page, count): if order_type == 'all': orders, total = yield self.context_repos.order_repo.select_for_background_all( u_id, u_mobile, order_id, ctime_st, ctime_ed, page, count) else: if order_type == 'need_pay': state = 0 elif order_type == 'need_send': state = 1 elif order_type == 'need_receive': state = 2 elif order_type == 'complete': state = 3 elif order_type == 'cancel': state = 4 elif order_type == 'overtime': state = 5 orders, total = yield self.context_repos.order_repo.select_for_background( u_id, u_mobile, order_id, state, ctime_st, ctime_ed, page, count) res = { 'orders': orders, 'pagination': Common().pagination(total, page, count) } raise gen.Return(res)
def pageprocess(self, params): # Step3:根据返回的html,通过xpath://*[@class="scout_anim_titletext"],获得检索结果的标题 # //*[@class="scout_anim_title"]/div/a/@href,获得检索结果的url #Logger.getlogging().debug(params.content) indexstart = params.content.find('(') indexstop = params.content.rfind(')') if indexstart > -1 and indexstop > -1: jsonvalue = params.content[indexstart + 1:indexstop] jsondata = json.loads(jsonvalue) info = params.customized['query'] soup = BeautifulSoup(jsondata['content'], 'html5lib') uls = soup.select('.scout_anim_odd > .scout_anim_odd_ul') if uls: for ul in uls: #titles = ul.select_one('.scout_anim_titletext') titles = ul.select_one('.scout_anim_titletext').get_text() Logger.getlogging().debug(titles) # if info not in titles: if not Common.checktitle(info, titles): return content = ul.select('.scout_anim_content > div > ul > li') if content: if len(content) > 3: content = content[-3:] urllist = [ 'https://donghua.dmzj.com' + item.find('a').get('href') for item in content ] self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_VIDEO)
def query(self, info): Logger.getlogging().info("AngeeksS2Query.query") keyvalue = Common.urlenc(info) # step1: 根据key, 拼出下面的url if int(self.querylastdays) <= 7: datevalue = self.WEEKLY elif int(self.querylastdays) <= 30: datevalue = self.MONTHLY else: datevalue = None if datevalue is None: urls = [ AngeeksS2Query.QUERY_TEMPLATE_ALL.format(key=keyvalue, page=0) ] else: urls = [ AngeeksS2Query.QUERY_TEMPLATE.format(key=keyvalue, page=0, date=datevalue) ] Logger.getlogging().debug(urls[0]) self.__storeqeuryurllist__(urls, self.S2QUERY_FIRST_PAGE, { 'query': info, 'date': datevalue })
def gets2url(self, params): # 获取文本 contents = json.loads(params.content) query = Common.urldec(params.customized['query']) urllist = [] for item in contents['video_list']: try: vid = item['vid'] if item.get('categoryName', '') == u"体育": url = 'http://sports.le.com/video/{vid}.html'.format( vid=vid) else: url = 'http://www.le.com/ptv/vplay/{vid}.html'.format( vid=vid) curtime = item['ctime'] #print TimeUtility.getuniformtime(curtime) title = item['name'] if self.compareNow(curtime): if self.checktitle(query, title): #Logger.getlogging().info(title) urllist.append(url) else: Logger.log(url, constant.ERRORCODE_WARNNING_NOMATCHTITLE) else: Logger.log(url, constant.ERRORCODE_WARNNING_NOMATCHTIME) except: Logger.printexception() # 获取最终url列表 if len(urllist) > 0: self.__storeurllist__(urllist, SPIDER_S2_WEBSITE_VIDEO)
def step1(self, params): # 搜索页面单个视频 info = params.customized['query'] keyvalue = Common.trydecode(info) soup = BeautifulSoup(params.content, 'html5lib') page_numlist = soup.select('#sort > .page > a') if soup.select_one('.no-result'): Logger.log(params.originalurl, constant.ERRORCODE_WARNNING_NORESULTS) return if page_numlist: page_num = int(page_numlist[-2].get_text()) else: page_num = 1 if page_num >= self.maxpages: page_num = self.maxpages querylist = [] for page in range(1, page_num + 1): if page == 1: self.step2(params) continue url = S2Query.S2_URL.format(key=keyvalue, page=page) querylist.append(url) self.__storeqeuryurllist__(querylist, S2Query.STEP_2, { 'query': info, 'page_num': page_num })
def step2(self, params): keyword = params.customized['keyword'] query = Common.urldec(keyword) jsondata = json.loads(params.content) # 获取分页数 html = jsondata['html'] soup = bs(html, 'html5lib') videoUrlList = [] videoList = soup.select('li.video') for video in videoList: try: videoUrl = 'https:' + video.select_one('a').get('href') videoUrl = videoUrl.split('?')[0] + '/' title = video.select_one('a').get('title') pubtime = video.find(attrs={ 'class': 'so-icon time' }).get_text().strip() if self.compareNow(TimeUtility.getuniformtime(pubtime)): if self.checktitle(query, title): videoUrlList.append(videoUrl) self.__storeurl__(videoUrl, pubtime, SPIDER_S2_WEBSITE_VIDEO) else: Logger.log(videoUrl, constant.ERRORCODE_WARNNING_NOMATCHTITLE) else: Logger.log(videoUrl, constant.ERRORCODE_WARNNING_NOMATCHTIME) except: Logger.printexception()
def query(self, info): q = Common.urlenc(info) urls = [One7173S2Query.V17173_QUERY_P.format(q=q, ps=0)] self.__storeqeuryurllist__(urls, self.FIRST, { 'query': q, 'pages_num': 0 })