def crawCompanyNews(link): filterContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="listnews" id="TacticNewsList1" >') startContext = ThemeNewsSpiderUtils.filterContextByTarget(filterContext,'<ul>','</ul>') len = ThemeNewsSpiderUtils.findAllTarget(startContext,'<li') newsFlag = 'good' currentList = [] for i in range(len): targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, '<li>', '</li>') startContext = targetContext['nextContext'] currentcontext = targetContext['targetContext'] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<span>','</span>') title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'">','</a>') if linkUrl != '': currentList.append([keyid,linkUrl,pubDate,title,newsFlag]) currentFilterContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="listnews" id="TacticNewsList2" style="display:none;">') currentstartContext = ThemeNewsSpiderUtils.filterContextByTarget(currentFilterContext,'<ul>','</ul>') currentlen = ThemeNewsSpiderUtils.findAllTarget(currentstartContext,'<li') newsFlag = 'bad' for m in range(currentlen): targetContext = ThemeNewsSpiderUtils.divisionTarget(currentstartContext, '<li>', '</li>') currentstartContext = targetContext['nextContext'] currentcontext = targetContext['targetContext'] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<span>','</span>') title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'">','</a>') if linkUrl != '': currentList.append([keyid,linkUrl,pubDate,title,newsFlag]) return currentList
def crawCompanyNews(link): filterContext = ThemeNewsSpiderUtils.returnStartContext( link, '<div class="listnews">') startContext = ThemeNewsSpiderUtils.filterContextByTarget( filterContext, '<ul>', '</ul>') len = ThemeNewsSpiderUtils.findAllTarget(startContext, '<li') currentList = [] for i in range(len): targetContext = ThemeNewsSpiderUtils.divisionTarget( startContext, '<li>', '</li>') startContext = targetContext['nextContext'] currentcontext = targetContext['targetContext'] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '<span>', '</span>') title = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '">', '</a>') currentTime = time.strftime("%Y-%m-%d", time.localtime()) if (pubDate[:10] != currentTime): break if linkUrl != '': currentList.append([keyid, linkUrl, pubDate, title, 'STOCKSTAR']) return currentList
def crawCompanyNews(link): filterContext = ThemeNewsSpiderUtils.returnStartContext( link, '<div class="listnews" id="TacticNewsList1" >') startContext = ThemeNewsSpiderUtils.filterContextByTarget( filterContext, '<ul>', '</ul>') len = ThemeNewsSpiderUtils.findAllTarget(startContext, '<li') newsFlag = 'good' currentList = [] for i in range(len): targetContext = ThemeNewsSpiderUtils.divisionTarget( startContext, '<li>', '</li>') startContext = targetContext['nextContext'] currentcontext = targetContext['targetContext'] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '<span>', '</span>') title = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '">', '</a>') if linkUrl != '': currentList.append([keyid, linkUrl, pubDate, title, newsFlag]) currentFilterContext = ThemeNewsSpiderUtils.returnStartContext( link, '<div class="listnews" id="TacticNewsList2" style="display:none;">') currentstartContext = ThemeNewsSpiderUtils.filterContextByTarget( currentFilterContext, '<ul>', '</ul>') currentlen = ThemeNewsSpiderUtils.findAllTarget(currentstartContext, '<li') newsFlag = 'bad' for m in range(currentlen): targetContext = ThemeNewsSpiderUtils.divisionTarget( currentstartContext, '<li>', '</li>') currentstartContext = targetContext['nextContext'] currentcontext = targetContext['targetContext'] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '<span>', '</span>') title = ThemeNewsSpiderUtils.filterContextByTarget( currentcontext, '">', '</a>') if linkUrl != '': currentList.append([keyid, linkUrl, pubDate, title, newsFlag]) return currentList
def crawThemeDailyNews(link): filterContext = ThemeNewsSpiderUtils.returnStartContext(link, '<div class="listnews">') startContext = ThemeNewsSpiderUtils.filterContextByTarget(filterContext, "<ul>", "</ul>") len = ThemeNewsSpiderUtils.findAllTarget(startContext, "<li") currentList = [] for i in range(len): targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, "<li>", "</li>") startContext = targetContext["nextContext"] currentcontext = targetContext["targetContext"] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext, '<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext, "<span>", "</span>") title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext, '">', "</a>") currentTime = time.strftime("%Y-%m-%d", time.localtime()) if pubDate[:10] != currentTime: break if linkUrl != "": currentList.append([keyid, linkUrl, pubDate, title, "STOCKNET"]) return currentList
def crawCompanyNews(link): filterContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="listnews">') startContext = ThemeNewsSpiderUtils.filterContextByTarget(filterContext,'<ul>','</ul>') len = ThemeNewsSpiderUtils.findAllTarget(startContext,'<li') currentList = [] for i in range(len): targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, '<li>', '</li>') startContext = targetContext['nextContext'] currentcontext = targetContext['targetContext'] keyid = str(uuid.uuid1()) linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="', '">') pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<span>','</span>') title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'">','</a>') currentTime = time.strftime("%Y-%m-%d",time.localtime()) if(pubDate[:10]!=currentTime): break if linkUrl != '': currentList.append([keyid,linkUrl,pubDate,title,'STOCKSTAR']) return currentList