def writeDailyThemeNews():
    link = 'http://stock.stcn.com/bankuai/1.shtml'
    currentList = crawDailyThemeNews(link)
    conn = StcnThemeNewsSpiderUtils.getMySQLConn()
    cursor = conn.cursor()    
    try:
        cursor.execute("DELETE FROM STOCK_POOL_THEME_NEWS_TABLE WHERE SOURCEFLAG = 'STCNNET'")
        conn.commit()
    except conn.Error,e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()
Example #2
0
def writeDailyThemeNews():
    link = 'http://stock.stcn.com/bankuai/1.shtml'
    currentList = crawDailyThemeNews(link)
    conn = StcnThemeNewsSpiderUtils.getMySQLConn()
    cursor = conn.cursor()    
    try:
        cursor.execute("DELETE FROM STOCK_POOL_THEME_NEWS_TABLE WHERE SOURCEFLAG = 'STCNNET'")
        conn.commit()
    except conn.Error,e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()
def crawDailyThemeNews(link):
    currentList = []
    startContext = StcnThemeNewsSpiderUtils.returnStartContext(link,'<ul class="mainlist" id="mainlist">')
    startContext = StcnThemeNewsSpiderUtils.filterContextByTarget(startContext,'<ul class="mainlist" id="mainlist">','</ul>')
    len = StcnThemeNewsSpiderUtils.findAllTarget(startContext,'<li>')
    for i in range(len):
        targetContext = StcnThemeNewsSpiderUtils.divisionTarget(startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentcontext =  targetContext['targetContext']
        linkUrl = StcnThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'href="','.shtml')+'.shtml'
        title = StcnThemeNewsSpiderUtils.filterContextByTarget(currentcontext, '.shtml">','</a>')
        keyid = str(uuid.uuid1())
        pubDate = StcnThemeNewsSpiderUtils.filterAfterContext(currentcontext,'</span>')
        pubDate = StcnThemeNewsSpiderUtils.filterContextByTarget(pubDate,'<span>','</span>')
        currentTime = time.strftime("%Y-%m-%d",time.localtime())
        if pubDate[:10]!=currentTime:
            break
        if title != '':
            currentList.append([keyid,linkUrl,pubDate,title,'STCNNET'])
    
    return currentList
Example #4
0
def crawDailyThemeNews(link):
    currentList = []
    startContext = StcnThemeNewsSpiderUtils.returnStartContext(link,'<ul class="mainlist" id="mainlist">')
    startContext = StcnThemeNewsSpiderUtils.filterContextByTarget(startContext,'<ul class="mainlist" id="mainlist">','</ul>')
    len = StcnThemeNewsSpiderUtils.findAllTarget(startContext,'<li>')
    for i in range(len):
        targetContext = StcnThemeNewsSpiderUtils.divisionTarget(startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentcontext =  targetContext['targetContext']
        linkUrl = StcnThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'href="','.shtml')+'.shtml'
        title = StcnThemeNewsSpiderUtils.filterContextByTarget(currentcontext, '.shtml">','</a>')
        keyid = str(uuid.uuid1())
        pubDate = StcnThemeNewsSpiderUtils.filterAfterContext(currentcontext,'</span>')
        pubDate = StcnThemeNewsSpiderUtils.filterContextByTarget(pubDate,'<span>','</span>')
        currentTime = time.strftime("%Y-%m-%d",time.localtime())
        if pubDate[:10]!=currentTime:
            break
        if title != '':
            currentList.append([keyid,linkUrl,pubDate,title,'STCNNET'])
    
    return currentList