Ejemplo n.º 1
0
 def __storeurllist__(self, urllist, type=constant.SPIDER_S2_WEBSITE_VIDEO, referlist=[]):
     count = 0
     index = 0
     for url in urllist:
         params = PageBasicInfo()
         params.url = url
         params.type = type
         #检查是否在cold数据库中
         #如果不在cold数据库中则插入hot数据库中
         if not NewsStorage.exist_cold(url):
             NewsStorage.seturlinfos(params)
         #params = {constant.SPIDER_S2_WEBSITE_TYPE: type,
         #constant.SPIDER_CHANNEL: constant.SPIDER_CHANNEL_S1}            
         #url = url.strip()
         #if not URLManager.getinstance().exist(url):
         #count += 1
         #if referlist:
         #params[SiteS2Query.REFER_URL] = referlist[index]
         #urlcontext = URLContext()
         #urlcontext.url = url
         #urlcontext.type = URLContext.S1_MAIN_BODY
         #urlcontext.originalurl = url
         #urlcontext.customized = params
         #URLManager.getinstance().storeurl(url, urlcontext, constant.REQUEST_TYPE_WEBKIT) 
         index += 1
Ejemplo n.º 2
0
 def __storeurl__(self, url, publishdate, type=constant.SPIDER_S2_WEBSITE_VIDEO):
     params = PageBasicInfo()
     params.url = url
     params.type = type
     params.pubtime = publishdate
     #检查是否在cold数据库中
     #如果不在cold数据库中则插入hot数据库中
     if not NewsStorage.exist_cold(url):
         NewsStorage.seturlinfos(params)