Exemple #1
0
    def extract_publishedtime(self, tree):
        t = tree.xpath(self.publishedtime_xpath)[0].strip().split()[0]

        try:
            t = time.strptime(t, "%Y-%m-%d")
        except:
            try:
                t = time.strptime(t, "%Y年%m月%d日%H%M".decode('utf8'))
            except:
                pass

        t = TimeTransform.struct_to_string(t)
        return t
Exemple #2
0
    def parse_publishedtime(self, element):
        res = element.xpath(".//div[@class='bbs f13']/text()")  # 搜索百度贴吧

        if res:
            res = res[0]
            res = res[res.index('发帖时间'.decode('utf8')) + 5:]
        else:
            res = element.xpath(
                ".//div[@class='c-abstract']//span[@class=' newTimeFactor_before_abs m']/text()"
            )
            if res:
                res = res[0]
            else:
                return None

        res = res.split()[0]
        res = TimeTransform.struct_to_string(TimeTransform.date_to_struct(res))
        return res
Exemple #3
0
 def extract_publishedtime(self, tree):
     t = tree.xpath(self.publishedtime_xpath)[0].strip()
     t = time.strptime(t, "%Y-%m-%d %H:%M:%S")
     t = TimeTransform.struct_to_string(t)
     return t
Exemple #4
0
 def extract_publishedtime(self, tree):
     t = tree.xpath(self.publishedtime_xpath)[0].strip()
     t = time.strptime(t, "%Y年%m月%d日 %H:%M".decode('utf8'))
     t = TimeTransform.struct_to_string(t)
     return t
 def extract_publishedtime(self, tree):
     t = [i for i in tree.xpath(self.publishedtime_xpath) if i.strip()][-1]
     t = time.strptime(t, "%Y-%m-%d")
     t = TimeTransform.struct_to_string(t)
     return t
 def extract_publishedtime(self, tree):
     t = tree.xpath(self.publishedtime_xpath)[0].strip()
     t = t[5:]
     t = TimeTransform.struct_to_string(t)
     return t
Exemple #7
0
 def extract_publishedtime(self, tree):
     t = tree.xpath(self.publishedtime_xpath)
     t = ''.join(t).strip().split()[0]
     t = time.strptime(t, "%Y-%m-%d")
     t = TimeTransform.struct_to_string(t)
     return t
 def extract_publishedtime(self, tree):
     t = tree.xpath(self.publishedtime_xpath)[0].strip().split()[0]
     t = t[5:]
     t = time.strptime(t, "%Y-%m-%d".decode('utf8'))
     t = TimeTransform.struct_to_string(t)
     return t