def divisionTarget(startcontext, startfilter, endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext, startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,
                                                endfilter) + len(endfilter)
    return {
        'targetContext': startcontext[startIndex:endIndex],
        'nextContext': startcontext[endIndex:]
    }
def filterContextByTarget(context,startfilter,endfilter):
    return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext):
    return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('./','').replace(' ','').replace('...','')
def filterContextByTarget(context,startfilter,endfilter):
    return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext):
    return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext)
Example #8
0
def returnStartContext(link, startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(
        link, startTarget).decode('gb2312').decode('UTF-8')
def returnStartContext(link, startFlag):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link), "GBK").encode("UTF-8")
    startContext = CommonsSpiderUtils.startContext(currentContext, "%s" % startFlag)
    return startContext
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace(
        './', '').replace(' ', '').replace('...', '')
Example #11
0
def targetAfterContext(context, filterContext):
    return context[CommonsSpiderUtils.filterContext(context, filterContext):]
def returnStartContext(link, startTarget):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),
                             'GBK').encode('UTF-8')
    startContext = CommonsSpiderUtils.startContext(currentContext, startTarget)
    return startContext
def findAllTarget(context, filterTarget):
    return len(CommonsSpiderUtils.findAllTarget(context, r'%s' % filterTarget))
def targetAfterContext(context,filterContext):
    return  context[CommonsSpiderUtils.filterContext(context,filterContext):]
def returnStartContext(link, startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(link, startTarget).decode("gb2312").decode("UTF-8")
Example #16
0
def filterContextByTarget(context,startfilter,endfilter):
    return context[CommonsSpiderUtils.filterContext(context,startfilter)+len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
Example #17
0
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace(
        '<b>', '').replace('</b>', '')
Example #18
0
def filtetContextExpertise(context,startfilter,endfilter):
    finterIndex =  CommonsSpiderUtils.filterContext(context,startfilter)
    filterContext = context[finterIndex:]
    return filterContextByTarget(filterContext,startfilter,endfilter)
Example #19
0
def crawMarketSentimentDataSource(link):
    currentArray = []
    text = CommonsSpiderUtils.openInternetUrl(link)
    text = CommonsSpiderUtils.removeSpecialCharacter(text)
    print text
    return currentArray
Example #20
0
def returnStartContext(link):
    currentContext = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(currentContext,'<div class="articleCell SG_j_linedot1">')
    return startContext
def divisionTarget(startcontext,startfilter,endfilter):
    return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
Example #22
0
def findAllTarget(context):
    return len(CommonsSpiderUtils.findAllTarget(context,r'<div class="articleCell SG_j_linedot1">')) 
def returnStartContext(link,startTarget):
    return CommonsSpiderUtils.returnStartContext(link,startTarget)
Example #24
0
def findAllTarget(context,filterTarget):
    return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%filterTarget)) 
def findAllTarget(context,filterTarget):
    return CommonsSpiderUtils.findAllTargets(context,filterTarget) 
Example #26
0
def divisionTarget(startcontext,startfilter,endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter)+len(endfilter)
    return {'targetContext':startcontext[startIndex:endIndex],'nextContext':startcontext[endIndex:]}
def divisionTarget(startcontext,startfilter,endfilter):
    return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
Example #28
0
def returnStartContext(link,startTarget):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8')
    startContext = CommonsSpiderUtils.startContext(currentContext,startTarget)
    return startContext
def returnStartContext(link,startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(link,startTarget).decode('gb2312').decode('UTF-8')
Example #30
0
def filterContextByTarget(context,startfilter,endfilter):
    return context[CommonsSpiderUtils.filterContext(context,startfilter)
                   +len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
def findAllTarget(context,filterTarget):
    return CommonsSpiderUtils.findAllTargets(context,filterTarget) 
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace("<b>", "").replace("</b>", "")