예제 #1
0
def crawSXMetalComments(link):
    startContext = JTMetalNetSpiderUtils.returnStartContext(link,'<ul class="tab_conbox" id="tab_conbox2">')
    targetContext = JTMetalNetSpiderUtils.filterContextByTarget(startContext,'<div>','</div>')
    currentList = []
    linkUrl = link+JTMetalNetSpiderUtils.filterContextByTarget(targetContext,'<a href="','" title')
    title = JTMetalNetSpiderUtils.filterContextByTarget(targetContext,'<font style="color:red;" >','</font>')
    #currentTime = time.strftime("%Y-%m-%d",time.localtime())
    pubDate = JTMetalNetSpiderUtils.filterContextByTarget(targetContext,'<span>','</span>')
    descriptContext = crawDescriptContext(linkUrl)
    currentList.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'METAL','GTNET'])
    return currentList
예제 #2
0
def crawDailyMetalComments(link,webLink):
    currentList = []
    startContext = JTMetalNetSpiderUtils.returnStartContext(link,'<ul id="lie">')
    startContext = JTMetalNetSpiderUtils.filterContextByTarget(startContext,'<ul id="lie">','</ul>')
    i = 0
    while i <7:
        targetContext = JTMetalNetSpiderUtils.divisionTarget(startContext,'<li>','</li>')
        startContext = targetContext['nextContext']
        currentContext =  targetContext['targetContext']
        linkUrl = webLink + JTMetalNetSpiderUtils.filterContextByTarget(currentContext,'<a href="','" title')
        title = JTMetalNetSpiderUtils.filterContextByTarget(currentContext,'title="','">')
        pubDate = JTMetalNetSpiderUtils.filterContextByTarget(currentContext,'<span>','</span>')
        descriptContext = crawDescriptContext(linkUrl)
        currentList.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'METAL','GTNET'])
        i += 1
    return currentList
예제 #3
0
def crawDescriptContext(link):
    startContext = JTMetalNetSpiderUtils.returnStartContext(link,'</strong></p>')
    startContext = JTMetalNetSpiderUtils.filterAfterContext(startContext,'</strong></p>')
    descriptContext = JTMetalNetSpiderUtils.filterContextByTarget(startContext,'<p>','</p>')
    descriptContext = JTMetalNetSpiderUtils.removeSpecialCharacter(descriptContext)
    return descriptContext