def divisionTarget(startcontext, startfilter, endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext, startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,
                                                endfilter) + len(endfilter)
    return {
        'targetContext': startcontext[startIndex:endIndex],
        'nextContext': startcontext[endIndex:]
    }
def filterContextByTarget(context,startfilter,endfilter):
    return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext):
    return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('./','').replace(' ','').replace('...','')
def filterContextByTarget(context,startfilter,endfilter):
    return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext):
    return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext)
Esempio n. 8
0
def returnStartContext(link, startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(
        link, startTarget).decode('gb2312').decode('UTF-8')
def returnStartContext(link, startFlag):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link), "GBK").encode("UTF-8")
    startContext = CommonsSpiderUtils.startContext(currentContext, "%s" % startFlag)
    return startContext
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace(
        './', '').replace(' ', '').replace('...', '')
Esempio n. 11
0
def targetAfterContext(context, filterContext):
    return context[CommonsSpiderUtils.filterContext(context, filterContext):]
def returnStartContext(link, startTarget):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),
                             'GBK').encode('UTF-8')
    startContext = CommonsSpiderUtils.startContext(currentContext, startTarget)
    return startContext
def findAllTarget(context, filterTarget):
    return len(CommonsSpiderUtils.findAllTarget(context, r'%s' % filterTarget))
def targetAfterContext(context,filterContext):
    return  context[CommonsSpiderUtils.filterContext(context,filterContext):]
def returnStartContext(link, startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(link, startTarget).decode("gb2312").decode("UTF-8")
Esempio n. 16
0
def filterContextByTarget(context,startfilter,endfilter):
    return context[CommonsSpiderUtils.filterContext(context,startfilter)+len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
Esempio n. 17
0
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace(
        '<b>', '').replace('</b>', '')
Esempio n. 18
0
def filtetContextExpertise(context,startfilter,endfilter):
    finterIndex =  CommonsSpiderUtils.filterContext(context,startfilter)
    filterContext = context[finterIndex:]
    return filterContextByTarget(filterContext,startfilter,endfilter)
Esempio n. 19
0
def crawMarketSentimentDataSource(link):
    currentArray = []
    text = CommonsSpiderUtils.openInternetUrl(link)
    text = CommonsSpiderUtils.removeSpecialCharacter(text)
    print text
    return currentArray
Esempio n. 20
0
def returnStartContext(link):
    currentContext = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(currentContext,'<div class="articleCell SG_j_linedot1">')
    return startContext
def divisionTarget(startcontext,startfilter,endfilter):
    return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
Esempio n. 22
0
def findAllTarget(context):
    return len(CommonsSpiderUtils.findAllTarget(context,r'<div class="articleCell SG_j_linedot1">')) 
def returnStartContext(link,startTarget):
    return CommonsSpiderUtils.returnStartContext(link,startTarget)
Esempio n. 24
0
def findAllTarget(context,filterTarget):
    return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%filterTarget)) 
def findAllTarget(context,filterTarget):
    return CommonsSpiderUtils.findAllTargets(context,filterTarget) 
Esempio n. 26
0
def divisionTarget(startcontext,startfilter,endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter)+len(endfilter)
    return {'targetContext':startcontext[startIndex:endIndex],'nextContext':startcontext[endIndex:]}
Esempio n. 27
0
def divisionTarget(startcontext,startfilter,endfilter):
    return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
Esempio n. 28
0
def returnStartContext(link,startTarget):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8')
    startContext = CommonsSpiderUtils.startContext(currentContext,startTarget)
    return startContext
Esempio n. 29
0
def returnStartContext(link,startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(link,startTarget).decode('gb2312').decode('UTF-8')
Esempio n. 30
0
def filterContextByTarget(context,startfilter,endfilter):
    return context[CommonsSpiderUtils.filterContext(context,startfilter)
                   +len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
Esempio n. 31
0
def findAllTarget(context,filterTarget):
    return CommonsSpiderUtils.findAllTargets(context,filterTarget) 
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace("<b>", "").replace("</b>", "")