コード例 #1
0
def crawMorningForexDailyNews(linkUrl):
    currentList = []
    startContext = FXNewsForexNetSpiderUtils.returnStartContext(linkUrl,'<div class="yjl_fx168_news_listBox">')
    startContext = FXNewsForexNetSpiderUtils.filterContextByTarget(startContext,'<ul>','</ul>')
    len = FXNewsForexNetSpiderUtils.findAllTarget(startContext,'<li>')
    for i in range(len):
        targetContext = FXNewsForexNetSpiderUtils.divisionTarget(startContext,'<li>','</li>')
        startContext = targetContext['nextContext']
        currentContext =  targetContext['targetContext']
        pubDate = FXNewsForexNetSpiderUtils.filterContextByTarget(currentContext,'<h5>','</h5>')
        currentContext = FXNewsForexNetSpiderUtils.removeSpecialCharacter(currentContext)
        currentContext = FXNewsForexNetSpiderUtils.filterAfterContext(currentContext,'<divclass="yjl_fx168_news_listPhoto">')
        linkUrl = FXNewsForexNetSpiderUtils.filterContextByTarget(currentContext,'href="','"title=')
        title = FXNewsForexNetSpiderUtils.filterContextByTarget(currentContext,'title="','><imglazy-src')
        imageUrl = FXNewsForexNetSpiderUtils.filterContextByTarget(currentContext,'imglazy-src="','"width=')
        descriptContext = FXNewsForexNetSpiderUtils.filterContextByTarget(currentContext,'<pclass="del">','</div></li>')
        currentTime = time.strftime("%Y-%m-%d",time.localtime()) 
        if  pubDate[:10]!=currentTime:
            break
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'FOREX','FXNET'])
    return currentList