def jfinfo2(root, item): time_text = "".join(root.xpath("//div[@class='t-tit']/span[1]/text()")) ts = my_utils.get_timestamp(time_text) content = "".join( root.xpath("//div[@class='t-context f16 picture']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def ybzhan1(root, item): time_text = "".join(root.xpath("//span[@class='time']/text()")[0]) # print time_text ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='newsContent']/div/text()")) item.set('content', content)
def cheaa1(root, item): time_text = "".join(root.xpath("//div[@id='NewsInfo']/text()")) time_text = "".join(time_text.strip().split()[:2]) ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@id='ctrlfscont']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def tom1(root, item): time_text = "".join(root.xpath("//span[@class='infor_time']/text()")) time_text = time_text.strip() ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='news_box_text']/p/text()")) item.set('content', content)
def zgny1(root, item): time_text = "".join(root.xpath("//p[@class='xinXi']/text()")) time_text = "".join(time_text.split()[3:5]) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='wenZi_02']/p/text()")) item.set('content', content)
def cri1(root, item): time_text = "".join(root.xpath("//span[@id='acreatedtime']/text()")) # print time_text ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='abody']/p/text()")) item.set('content', content)
def xinhua2(root, item): time_text = "".join("//div[@class='source']/span[@class='time']") time_text = time_text.strip() ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='p-detail']/p/text()")) item.set('content', content)
def machine3651(root, item): time_text = "".join(root.xpath("//div[@class='newliIn_Sti']/text()")) # print time_text ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@class='newliIn_Zti']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def takungpao1(root, item): time_text = "".join( root.xpath("//div[@class='tkp_con_author']/span[1]/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='tkp_content']//text()")) item.set('content', content)
def p21jingji1(root, item): date_text = "".join(root.xpath("//p[@class='Wh']/span[1]/text()")) time_text = "".join(root.xpath("//span[@class='hour']/text()")) time_text = date_text + " " + time_text ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='detailCont']/p/text()")) item.set('content', content)
def people1(root, item): time_text = "".join( root.xpath("//div[@class='box01']/div[@class='fl']/text()")) time_text = "".join(time_text.split()[:1]) ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@id='rwb_zw']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def aweb1(root, item): time_text = "".join( root.xpath("//div[@class='newsLeft newscontentB']/h5/text()")[0]) # print time_text ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='conT f14px'][1]/p/text()")) item.set('content', content)
def smm1(root, item): time_text = "".join( root.xpath("//p[@class='news-tips']/label[1]/span/text()")) time_text = "".join(time_text.split()[:2]) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='news-main']/p/text()")) item.set('content', content)
def china5e1(root, item): time_text = "".join( root.xpath( "//div[@class='showtitle']/div[@class='showtitinfo']/text()")) time_text = time_text.split()[0] ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@id='showcontent']/div[1]/p/text()")) item.set('timestamp', ts) item.set('content', content)
def texnet1(root, item): time_text = "".join(root.xpath("//p[@class='line22 fontgrey']/text()")) time_text = " ".join(time_text.split()[1:3]) # print time_text ts = my_utils.get_timestamp(time_text) content = "".join( root.xpath("//div[@class='detail-text line25 font14px']/div/p/text()")) item.set('timestamp', ts) item.set('content', content)
def tnc1(root, item): time_text = "".join(root.xpath("//div[@class='article-title']/p/text()")) time_text = " ".join(time_text.split()[:2]) # print time_text ts = my_utils.get_timestamp(time_text) content = "".join( root.xpath("//div[@class='article-content c-mb-20']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def chinaventure1(root, item): time_text = "".join( root.xpath("//div[@class='details_01_l']/span[3]/text()")) # print time_text ts = my_utils.get_timestamp(time_text) content = "".join( root.xpath("//div[@class='content_01 m_t_30 detasbmo']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def chinabreed1(root, item): time_text = "".join( root.xpath("//div[@class='artInfo']/span[@id='pub_date']/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join( root.xpath( "//div[@class='blkContainerSblk']/div[@id='artibody']/div/text()")) item.set('content', content)
def ftchinese1(root, item): # print "root", root if root is None: # print "warning: root is None" return time_text = "".join(root.xpath("//span[@class='story-time']/text()")) ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@id='story-body-container']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def jrj2(root, item): time_text = "".join( root.xpath( "//div[@class='titmain']/p[@class='inftop']/span[1]/text()")) time_text = time_text.strip() item.set('timestamp', my_utils.get_timestamp(time_text)) item.set( 'content', "".join( root.xpath( "//div[@class='titmain']/div[@class='texttit_m1']/p/text()")))
def qq1(root, item): """ """ time_text = "".join( root.xpath("//div[@class='a_Info']/span[@class='a_time']/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) item.set( 'content', "".join( root.xpath( ".//div[contains(@id,'Cnt-Main-Article-QQ')]/p[not(contains(@style,'COLOR: red'))]//text()" )))
def p21food1(root, item): time_text = "".join( root.xpath( "//div[@class='news_detail_t']/div[@class='ws_det_p2']/i[1]/text()" )) ts = my_utils.get_timestamp(time_text) content = "".join( root.xpath( "//div[@class='news_detail_t']/div[@class='ws_det_p3']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def dayoo1(root, item): time_text = "".join(root.xpath("//span[@class='time']/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='text_content']/p/text()")) item.set('content', content)
def bbtnews1(root, item): time_text = "".join(root.xpath("//div[@class='info']/span/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='pageContent']/p/text()")) item.set('content', content)
def lswb1(root, item): time_text = "".join(root.xpath("//ol[@class='breadcrumb']/li[1]/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@class='article-content']/p/text()")) item.set('content', content)
def banyuetan1(root, item): time_text = "".join(root.xpath("//div[@class='detail_tit_time']/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='detail_content']/p/text()")) item.set('content', content)
def sohu3(root, item): time_text = "".join(root.xpath("//span[@id='news-time']/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//article[@id='mp-editor']/p/text()")) item.set('content', content)
def iresearch1(root, item): time_text = "".join(root.xpath("//div[@class='origin']/em/text()")) ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@class='m-article']/p/text()")) item.set('timestamp', ts) item.set('content', content)
def gongkong1(root, item): time_text = "".join(root.xpath("//span[@class='f1405'][3]/text()")) ts = my_utils.get_timestamp(time_text) item.set('timestamp', ts) content = "".join(root.xpath("//div[@id='article']/p/text()")) item.set('content', content)
def carnoc1(root, item): time_text = "".join(root.xpath("//span[@id='pubtime_baidu']/text()")) ts = my_utils.get_timestamp(time_text) content = "".join(root.xpath("//div[@id='newstext']/p/text()")) item.set('timestamp', ts) item.set('content', content)