def divisionTarget(startcontext, startfilter, endfilter): startIndex = CommonsSpiderUtils.filterContext(startcontext, startfilter) endIndex = CommonsSpiderUtils.filterContext(startcontext, endfilter) + len(endfilter) return { 'targetContext': startcontext[startIndex:endIndex], 'nextContext': startcontext[endIndex:] }
def removeSpecialCharacter(removeContext): return ( CommonsSpiderUtils.removeSpecialCharacter(removeContext) .replace("./", "") .replace(" ", "") .replace("...", "") )
def filterContextByTarget(context,startfilter,endfilter): return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext): return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('../../../','').replace('./','')
def returnStartContext(link,startTarget): return CommonsSpiderUtils.returnCommonStartContext(link,startTarget).decode('gb2312').decode('UTF-8')
def returnStartContext(link, startTarget): return CommonsSpiderUtils.returnCommonStartContext(link, startTarget).decode("gb2312").decode("UTF-8")
def findAllImageTarget(startContext): return len(CommonsSpiderUtils.findAllTarget(startContext,r'<div class="thumbnailimage">'))
def findAllTarget(context): return CommonsSpiderUtils.findAllTarget( context, r'<tr class="record" valign="top">')
def filterContextByTarget(context, startfilter, endfilter): return context[CommonsSpiderUtils.filterContext(context, startfilter) + len(startfilter):CommonsSpiderUtils. filterContext(context, endfilter)]
def findAllTarget(context, filter): return len(CommonsSpiderUtils.findAllTarget(context, r'%s' % filter))
def returnFilterMainContext(startcontext, startfilter, endfilter): startIndex = CommonsSpiderUtils.filterContext( startcontext, startfilter) + len(startfilter) endIndex = CommonsSpiderUtils.filterContext(startcontext, endfilter) return startcontext[startIndex:endIndex]
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('<br />','').replace('</div>','')
def retrunStartContext(link): context = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext( context, '<tr class="record" valign="top">') return startContext
def filterTargetFlag(filterContext, filter): if CommonsSpiderUtils.filterContext(filterContext, filter) < 0: return False else: return True
def findAllImageTarget(context,param): return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%param))
def filterContextByTarget(context,startfilter,endfilter): return context[CommonsSpiderUtils.filterContext(context,startfilter)+len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
def returnStartContext(link, filter): currentContext = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext(currentContext, filter) return startContext
def returnImageListStartContext(link): context = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext(context,'<div class="thumbnailimage">') return startContext
def findAllTarget(context): return CommonsSpiderUtils.findAllTarget(context,r'<tr class="record" valign="top">')
def filterTargetFlag(filterContext,filter): if CommonsSpiderUtils.filterContext(filterContext,filter) < 0: return False else: return True
def retrunStartContext(link): context = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext(context,'<tr class="record" valign="top">') return startContext
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace(" ", "")
def returnStartContext(link,startFlag): currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8') startContext = CommonsSpiderUtils.startContext(currentContext,'%s'%startFlag) return startContext
def findAllTarget(context,filterTarget): return CommonsSpiderUtils.findAllTargets(context,filterTarget)
def returnFilterMainContext(startcontext,startfilter,endfilter): startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter)+len(startfilter) endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter) return startcontext[startIndex:endIndex]
def divisionTarget(startcontext,startfilter,endfilter): return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
def findAllTarget(context,filter): return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%filter))
def returnStartContext(link,startTarget): return CommonsSpiderUtils.returnStartContext(link,startTarget)
def returnStartContext(link,filter): currentContext = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext(currentContext,filter) return startContext
def findAllTarget(context): return len(CommonsSpiderUtils.findAllTarget(context,r"<div class='Article'>"))
def returnStartContext(link): context = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext(context,'<div class="imgModel">') return startContext
def divisionTarget(startcontext,startfilter,endfilter): startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter) endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter)+len(endfilter) return {'targetContext':startcontext[startIndex:endIndex],'nextContext':startcontext[endIndex:]}
def findAllTarget(context): return len(CommonsSpiderUtils.findAllTarget(context,r'<div class="imgModel">'))
def returnStartContext(link): currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8') startContext = CommonsSpiderUtils.startContext(currentContext,"<div class='Article'>") return startContext
def targetAfterContext(context,filterContext): return context[CommonsSpiderUtils.filterContext(context,filterContext):]