def __storeurllist__(self, urllist, type=constant.SPIDER_S2_WEBSITE_VIDEO, referlist=[]): count = 0 index = 0 for url in urllist: params = PageBasicInfo() params.url = url params.type = type #检查是否在cold数据库中 #如果不在cold数据库中则插入hot数据库中 if not NewsStorage.exist_cold(url): NewsStorage.seturlinfos(params) #params = {constant.SPIDER_S2_WEBSITE_TYPE: type, #constant.SPIDER_CHANNEL: constant.SPIDER_CHANNEL_S1} #url = url.strip() #if not URLManager.getinstance().exist(url): #count += 1 #if referlist: #params[SiteS2Query.REFER_URL] = referlist[index] #urlcontext = URLContext() #urlcontext.url = url #urlcontext.type = URLContext.S1_MAIN_BODY #urlcontext.originalurl = url #urlcontext.customized = params #URLManager.getinstance().storeurl(url, urlcontext, constant.REQUEST_TYPE_WEBKIT) index += 1
def __storeurl__(self, url, publishdate, type=constant.SPIDER_S2_WEBSITE_VIDEO): params = PageBasicInfo() params.url = url params.type = type params.pubtime = publishdate #检查是否在cold数据库中 #如果不在cold数据库中则插入hot数据库中 if not NewsStorage.exist_cold(url): NewsStorage.seturlinfos(params)