def divisionTarget(startcontext, startfilter, endfilter): startIndex = CommonsSpiderUtils.filterContext(startcontext, startfilter) endIndex = CommonsSpiderUtils.filterContext(startcontext, endfilter) + len(endfilter) return { 'targetContext': startcontext[startIndex:endIndex], 'nextContext': startcontext[endIndex:] }
def filterContextByTarget(context,startfilter,endfilter): return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext): return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('./','').replace(' ','').replace('...','')
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext)
def returnStartContext(link, startTarget): return CommonsSpiderUtils.returnCommonStartContext( link, startTarget).decode('gb2312').decode('UTF-8')
def returnStartContext(link, startFlag): currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link), "GBK").encode("UTF-8") startContext = CommonsSpiderUtils.startContext(currentContext, "%s" % startFlag) return startContext
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace( './', '').replace(' ', '').replace('...', '')
def targetAfterContext(context, filterContext): return context[CommonsSpiderUtils.filterContext(context, filterContext):]
def returnStartContext(link, startTarget): currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link), 'GBK').encode('UTF-8') startContext = CommonsSpiderUtils.startContext(currentContext, startTarget) return startContext
def findAllTarget(context, filterTarget): return len(CommonsSpiderUtils.findAllTarget(context, r'%s' % filterTarget))
def targetAfterContext(context,filterContext): return context[CommonsSpiderUtils.filterContext(context,filterContext):]
def returnStartContext(link, startTarget): return CommonsSpiderUtils.returnCommonStartContext(link, startTarget).decode("gb2312").decode("UTF-8")
def filterContextByTarget(context,startfilter,endfilter): return context[CommonsSpiderUtils.filterContext(context,startfilter)+len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace( '<b>', '').replace('</b>', '')
def filtetContextExpertise(context,startfilter,endfilter): finterIndex = CommonsSpiderUtils.filterContext(context,startfilter) filterContext = context[finterIndex:] return filterContextByTarget(filterContext,startfilter,endfilter)
def crawMarketSentimentDataSource(link): currentArray = [] text = CommonsSpiderUtils.openInternetUrl(link) text = CommonsSpiderUtils.removeSpecialCharacter(text) print text return currentArray
def returnStartContext(link): currentContext = CommonsSpiderUtils.openUrl(link) startContext = CommonsSpiderUtils.startContext(currentContext,'<div class="articleCell SG_j_linedot1">') return startContext
def divisionTarget(startcontext,startfilter,endfilter): return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
def findAllTarget(context): return len(CommonsSpiderUtils.findAllTarget(context,r'<div class="articleCell SG_j_linedot1">'))
def returnStartContext(link,startTarget): return CommonsSpiderUtils.returnStartContext(link,startTarget)
def findAllTarget(context,filterTarget): return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%filterTarget))
def findAllTarget(context,filterTarget): return CommonsSpiderUtils.findAllTargets(context,filterTarget)
def divisionTarget(startcontext,startfilter,endfilter): startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter) endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter)+len(endfilter) return {'targetContext':startcontext[startIndex:endIndex],'nextContext':startcontext[endIndex:]}
def returnStartContext(link,startTarget): currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8') startContext = CommonsSpiderUtils.startContext(currentContext,startTarget) return startContext
def returnStartContext(link,startTarget): return CommonsSpiderUtils.returnCommonStartContext(link,startTarget).decode('gb2312').decode('UTF-8')
def filterContextByTarget(context,startfilter,endfilter): return context[CommonsSpiderUtils.filterContext(context,startfilter) +len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
def removeSpecialCharacter(removeContext): return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace("<b>", "").replace("</b>", "")