コード例 #1
0
def crawDailyStockComments(link):
    currentList = []
    startContext = ZZStockNetSpiderUtils.returnStartContext(
        link, '<div class="column-box">')
    startContext = ZZStockNetSpiderUtils.filterContextByTarget(
        startContext, '<ul>', '<li class="nobg">')
    len = ZZStockNetSpiderUtils.findAllTarget(startContext, '<li>')
    for i in range(len):
        targetContext = ZZStockNetSpiderUtils.divisionTarget(
            startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentContext = targetContext['targetContext']
        currentYear = str(time.strftime('%Y', time.localtime(
            time.time()))) + '-'
        pubDate = currentYear + ZZStockNetSpiderUtils.filterContextByTarget(
            currentContext, '<span class="ctime">(', ')</span>')
        title = ZZStockNetSpiderUtils.filterContextByTarget(
            currentContext, '.html">', '</a>')
        linkUrl = ZZStockNetSpiderUtils.removeSpecialCharacter(currentContext)
        if linkUrl != '':
            linkUrl = ZZStockNetSpiderUtils.filterContextByTarget(
                linkUrl, '<ahref="', 'html">') + '.html'
            linkUrl = link + linkUrl
            #descriptContext = crawDailyStockDescriptContext(linkUrl)
            currentList.append([
                str(uuid.uuid1()), linkUrl, title, pubDate, '', 'STOCK',
                'ZZNET'
            ])
        #print linkUrl
    return currentList
コード例 #2
0
def crawDailyStockDescriptContext(linkUrl):
    startContext = ZZStockNetSpiderUtils.returnStartContext(
        linkUrl,
        '<div class="Dtext z_content" id="ozoom1" style="zoom: 100%;">')
    print startContext
    filterContext = ZZStockNetSpiderUtils.filterContextByTarget(
        startContext, '<p>', '</p>')
    print filterContext
コード例 #3
0
def crawDailyStockComments(link):
    currentList = []
    startContext = ZZStockNetSpiderUtils.returnStartContext(link,'<div class="column-box">')
    startContext = ZZStockNetSpiderUtils.filterContextByTarget(startContext,'<ul>','<li class="nobg">')
    len = ZZStockNetSpiderUtils.findAllTarget(startContext,'<li>')
    for i in range(len):
        targetContext = ZZStockNetSpiderUtils.divisionTarget(startContext,'<li>','</li>')
        startContext = targetContext['nextContext']
        currentContext =  targetContext['targetContext']
        currentYear = str(time.strftime('%Y',time.localtime(time.time())))+'-'
        pubDate = currentYear+ZZStockNetSpiderUtils.filterContextByTarget(currentContext,'<span class="ctime">(',')</span>')
        title = ZZStockNetSpiderUtils.filterContextByTarget(currentContext,'.html">','</a>')
        linkUrl = ZZStockNetSpiderUtils.removeSpecialCharacter(currentContext)
        if linkUrl !='':
            linkUrl = ZZStockNetSpiderUtils.filterContextByTarget(linkUrl,'<ahref="','html">')+'.html'
            linkUrl = link + linkUrl
            #descriptContext = crawDailyStockDescriptContext(linkUrl)
            currentList.append([str(uuid.uuid1()),linkUrl,title,pubDate,'','STOCK','ZZNET'])
        #print linkUrl
    return currentList
コード例 #4
0
def crawDailyStockDescriptContext(linkUrl):
    startContext = ZZStockNetSpiderUtils.returnStartContext(linkUrl,'<div class="Dtext z_content" id="ozoom1" style="zoom: 100%;">')
    print startContext
    filterContext = ZZStockNetSpiderUtils.filterContextByTarget(startContext,'<p>','</p>')
    print filterContext