def divisionTarget(startcontext, startfilter, endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext, startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,
                                                endfilter) + len(endfilter)
    return {
        'targetContext': startcontext[startIndex:endIndex],
        'nextContext': startcontext[endIndex:]
    }
def removeSpecialCharacter(removeContext):
    return (
        CommonsSpiderUtils.removeSpecialCharacter(removeContext)
        .replace("./", "")
        .replace(" ", "")
        .replace("...", "")
    )
def filterContextByTarget(context,startfilter,endfilter):
    return CommonsSpiderUtils.filterContextByTarget(context,startfilter,endfilter)
def filterAfterContext(startContext,filterContext):
    return startContext[CommonsSpiderUtils.filterContext(startContext,filterContext)+len(filterContext):]
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('../../../','').replace('./','')
def returnStartContext(link,startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(link,startTarget).decode('gb2312').decode('UTF-8')
def returnStartContext(link, startTarget):
    return CommonsSpiderUtils.returnCommonStartContext(link, startTarget).decode("gb2312").decode("UTF-8")
def findAllImageTarget(startContext):
    return len(CommonsSpiderUtils.findAllTarget(startContext,r'<div class="thumbnailimage">'))
Example #9
0
def findAllTarget(context):
    return CommonsSpiderUtils.findAllTarget(
        context, r'<tr class="record" valign="top">')
def filterContextByTarget(context, startfilter, endfilter):
    return context[CommonsSpiderUtils.filterContext(context, startfilter) +
                   len(startfilter):CommonsSpiderUtils.
                   filterContext(context, endfilter)]
def findAllTarget(context, filter):
    return len(CommonsSpiderUtils.findAllTarget(context, r'%s' % filter))
def returnFilterMainContext(startcontext, startfilter, endfilter):
    startIndex = CommonsSpiderUtils.filterContext(
        startcontext, startfilter) + len(startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext, endfilter)
    return startcontext[startIndex:endIndex]
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace('<br />','').replace('</div>','')
Example #14
0
def retrunStartContext(link):
    context = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(
        context, '<tr class="record" valign="top">')
    return startContext
Example #15
0
def filterTargetFlag(filterContext, filter):
    if CommonsSpiderUtils.filterContext(filterContext, filter) < 0:
        return False
    else:
        return True
def findAllImageTarget(context,param):
    return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%param))
def filterContextByTarget(context,startfilter,endfilter):
    return context[CommonsSpiderUtils.filterContext(context,startfilter)+len(startfilter):CommonsSpiderUtils.filterContext(context,endfilter)]
def returnStartContext(link, filter):
    currentContext = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(currentContext, filter)
    return startContext
def returnImageListStartContext(link):
    context = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(context,'<div class="thumbnailimage">')
    return startContext
def findAllTarget(context):
    return CommonsSpiderUtils.findAllTarget(context,r'<tr class="record" valign="top">')
def filterTargetFlag(filterContext,filter):
    if  CommonsSpiderUtils.filterContext(filterContext,filter) < 0:
        return False
    else:
        return True
def retrunStartContext(link):
    context = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(context,'<tr class="record" valign="top">')
    return startContext
def removeSpecialCharacter(removeContext):
    return CommonsSpiderUtils.removeSpecialCharacter(removeContext).replace(" ", "")
def returnStartContext(link,startFlag):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8')
    startContext = CommonsSpiderUtils.startContext(currentContext,'%s'%startFlag)
    return startContext
def findAllTarget(context,filterTarget):
    return CommonsSpiderUtils.findAllTargets(context,filterTarget) 
def returnFilterMainContext(startcontext,startfilter,endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter)+len(startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter)
    return  startcontext[startIndex:endIndex]
def divisionTarget(startcontext,startfilter,endfilter):
    return CommonsSpiderUtils.divisionTarget(startcontext,startfilter,endfilter)
def findAllTarget(context,filter):
    return len(CommonsSpiderUtils.findAllTarget(context,r'%s'%filter)) 
def returnStartContext(link,startTarget):
    return CommonsSpiderUtils.returnStartContext(link,startTarget)
def returnStartContext(link,filter):
    currentContext = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(currentContext,filter)
    return startContext
def findAllTarget(context):
    return len(CommonsSpiderUtils.findAllTarget(context,r"<div class='Article'>"))
def  returnStartContext(link):
    context = CommonsSpiderUtils.openUrl(link)
    startContext = CommonsSpiderUtils.startContext(context,'<div class="imgModel">')
    return startContext
def divisionTarget(startcontext,startfilter,endfilter):
    startIndex = CommonsSpiderUtils.filterContext(startcontext,startfilter)
    endIndex = CommonsSpiderUtils.filterContext(startcontext,endfilter)+len(endfilter)
    return {'targetContext':startcontext[startIndex:endIndex],'nextContext':startcontext[endIndex:]}
def findAllTarget(context):
    return len(CommonsSpiderUtils.findAllTarget(context,r'<div class="imgModel">'))  
def returnStartContext(link):
    currentContext = unicode(CommonsSpiderUtils.openInternetUrl(link),'GBK').encode('UTF-8')
    startContext = CommonsSpiderUtils.startContext(currentContext,"<div class='Article'>")
    return startContext
def targetAfterContext(context,filterContext):
    return  context[CommonsSpiderUtils.filterContext(context,filterContext):]