コード例 #1
0
def jfinfo2(root, item):
    time_text = "".join(root.xpath("//div[@class='t-tit']/span[1]/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='t-context f16 picture']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #2
0
def ybzhan1(root, item):
    time_text = "".join(root.xpath("//span[@class='time']/text()")[0])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='newsContent']/div/text()"))
    item.set('content', content)
コード例 #3
0
def cheaa1(root, item):
    time_text = "".join(root.xpath("//div[@id='NewsInfo']/text()"))
    time_text = "".join(time_text.strip().split()[:2])
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='ctrlfscont']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #4
0
def tom1(root, item):
    time_text = "".join(root.xpath("//span[@class='infor_time']/text()"))
    time_text = time_text.strip()
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='news_box_text']/p/text()"))
    item.set('content', content)
コード例 #5
0
def zgny1(root, item):
    time_text = "".join(root.xpath("//p[@class='xinXi']/text()"))
    time_text = "".join(time_text.split()[3:5])
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='wenZi_02']/p/text()"))
    item.set('content', content)
コード例 #6
0
def cri1(root, item):
    time_text = "".join(root.xpath("//span[@id='acreatedtime']/text()"))
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='abody']/p/text()"))
    item.set('content', content)
コード例 #7
0
def xinhua2(root, item):
    time_text = "".join("//div[@class='source']/span[@class='time']")
    time_text = time_text.strip()
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='p-detail']/p/text()"))
    item.set('content', content)
コード例 #8
0
def machine3651(root, item):
    time_text = "".join(root.xpath("//div[@class='newliIn_Sti']/text()"))
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@class='newliIn_Zti']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #9
0
def takungpao1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='tkp_con_author']/span[1]/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='tkp_content']//text()"))
    item.set('content', content)
コード例 #10
0
def p21jingji1(root, item):
    date_text = "".join(root.xpath("//p[@class='Wh']/span[1]/text()"))
    time_text = "".join(root.xpath("//span[@class='hour']/text()"))
    time_text = date_text + " " + time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='detailCont']/p/text()"))
    item.set('content', content)
コード例 #11
0
def people1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='box01']/div[@class='fl']/text()"))
    time_text = "".join(time_text.split()[:1])
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='rwb_zw']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #12
0
def aweb1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='newsLeft newscontentB']/h5/text()")[0])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='conT f14px'][1]/p/text()"))
    item.set('content', content)
コード例 #13
0
def smm1(root, item):
    time_text = "".join(
        root.xpath("//p[@class='news-tips']/label[1]/span/text()"))
    time_text = "".join(time_text.split()[:2])
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='news-main']/p/text()"))
    item.set('content', content)
コード例 #14
0
def china5e1(root, item):
    time_text = "".join(
        root.xpath(
            "//div[@class='showtitle']/div[@class='showtitinfo']/text()"))
    time_text = time_text.split()[0]
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='showcontent']/div[1]/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #15
0
def texnet1(root, item):
    time_text = "".join(root.xpath("//p[@class='line22 fontgrey']/text()"))
    time_text = " ".join(time_text.split()[1:3])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='detail-text line25 font14px']/div/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #16
0
def tnc1(root, item):
    time_text = "".join(root.xpath("//div[@class='article-title']/p/text()"))
    time_text = " ".join(time_text.split()[:2])
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='article-content c-mb-20']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #17
0
def chinaventure1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='details_01_l']/span[3]/text()"))
    # print time_text
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath("//div[@class='content_01 m_t_30 detasbmo']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #18
0
def chinabreed1(root, item):
    time_text = "".join(
        root.xpath("//div[@class='artInfo']/span[@id='pub_date']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(
        root.xpath(
            "//div[@class='blkContainerSblk']/div[@id='artibody']/div/text()"))
    item.set('content', content)
コード例 #19
0
def ftchinese1(root, item):
    # print "root", root
    if root is None:
        # print "warning: root is None"
        return
    time_text = "".join(root.xpath("//span[@class='story-time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='story-body-container']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #20
0
def jrj2(root, item):
    time_text = "".join(
        root.xpath(
            "//div[@class='titmain']/p[@class='inftop']/span[1]/text()"))
    time_text = time_text.strip()
    item.set('timestamp', my_utils.get_timestamp(time_text))
    item.set(
        'content', "".join(
            root.xpath(
                "//div[@class='titmain']/div[@class='texttit_m1']/p/text()")))
コード例 #21
0
def qq1(root, item):
    """  """
    time_text = "".join(
        root.xpath("//div[@class='a_Info']/span[@class='a_time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    item.set(
        'content', "".join(
            root.xpath(
                ".//div[contains(@id,'Cnt-Main-Article-QQ')]/p[not(contains(@style,'COLOR: red'))]//text()"
            )))
コード例 #22
0
def p21food1(root, item):
    time_text = "".join(
        root.xpath(
            "//div[@class='news_detail_t']/div[@class='ws_det_p2']/i[1]/text()"
        ))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(
        root.xpath(
            "//div[@class='news_detail_t']/div[@class='ws_det_p3']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #23
0
def dayoo1(root, item):
    time_text = "".join(root.xpath("//span[@class='time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='text_content']/p/text()"))
    item.set('content', content)
コード例 #24
0
def bbtnews1(root, item):
    time_text = "".join(root.xpath("//div[@class='info']/span/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='pageContent']/p/text()"))
    item.set('content', content)
コード例 #25
0
def lswb1(root, item):
    time_text = "".join(root.xpath("//ol[@class='breadcrumb']/li[1]/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@class='article-content']/p/text()"))
    item.set('content', content)
コード例 #26
0
def banyuetan1(root, item):
    time_text = "".join(root.xpath("//div[@class='detail_tit_time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='detail_content']/p/text()"))
    item.set('content', content)
コード例 #27
0
def sohu3(root, item):
    time_text = "".join(root.xpath("//span[@id='news-time']/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//article[@id='mp-editor']/p/text()"))
    item.set('content', content)
コード例 #28
0
def iresearch1(root, item):
    time_text = "".join(root.xpath("//div[@class='origin']/em/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@class='m-article']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)
コード例 #29
0
def gongkong1(root, item):
    time_text = "".join(root.xpath("//span[@class='f1405'][3]/text()"))
    ts = my_utils.get_timestamp(time_text)
    item.set('timestamp', ts)
    content = "".join(root.xpath("//div[@id='article']/p/text()"))
    item.set('content', content)
コード例 #30
0
def carnoc1(root, item):
    time_text = "".join(root.xpath("//span[@id='pubtime_baidu']/text()"))
    ts = my_utils.get_timestamp(time_text)
    content = "".join(root.xpath("//div[@id='newstext']/p/text()"))
    item.set('timestamp', ts)
    item.set('content', content)