def trans_time(self, t): if '日'.decode('utf8') in t: t = t[:t.index('日'.decode('utf8')) + 1] t = TimeTransform.date_to_struct(t) return t if '前'.decode('utf8') in t: if '天'.decode('utf8') in t: delta_d = int(t[:t.index('天'.decode('utf8'))]) t = time.gmtime(time.time() - delta_d * 24 * 60 * 60) return t elif '小时'.decode('utf8') in t: delta_H = int(t[:t.index('小时'.decode('utf8'))]) t = time.gmtime(time.time() - delta_H * 60 * 60) return t elif '分钟'.decode('utf8') in t: delta_M = int(t[:t.index('分钟'.decode('utf8'))]) t = time.gmtime(time.time() - delta_M * 60) return t elif '秒'.decode('utf8') in t: delta_S = int(t[:t.index('秒'.decode('utf8'))]) t = time.gmtime(time.time() - delta_S) return t return time.localtime()
def parse_publishedtime(self, element): res = element.xpath(".//div[@class='bbs f13']/text()") # 搜索百度贴吧 if res: res = res[0] res = res[res.index('发帖时间'.decode('utf8')) + 5:] else: res = element.xpath( ".//div[@class='c-abstract']//span[@class=' newTimeFactor_before_abs m']/text()" ) if res: res = res[0] else: return None res = res.split()[0] res = TimeTransform.struct_to_string(TimeTransform.date_to_struct(res)) return res
def read_keywords(self): # 读取检索的关键词及时间范围 allwords = [] for n, i in enumerate(self.text): if i.strip(): text = i.strip() try: text = text.decode('utf8') except: try: text = text.decode('utf8') except: pass else: continue text = text.split(';') times = text[0] starttime, endtime = times.split(' ') self.starttime = TimeTransform.date_to_struct( Utils.transform_coding(starttime)) self.endtime = TimeTransform.date_to_struct( Utils.transform_coding(endtime)) # self.endtime = TimeTransform.datetime_to_struct((TimeTransform.date_to_datetime(Utils.transform_coding(endtime)) + datetime.timedelta(days=1))) if len(text) > 1: words = list(set(text[1].split())) if ' ' in words: words.remove(' ') if '' in words: words.remove('') allwords.extend(words) break else: allwords = ['地震'.decode('utf8')] break self.allwords = allwords
def extract_publishedtime(self, tree): t = tree.xpath(self.publishedtime_xpath)[0].strip().split()[0] try: t = time.strptime(t, "%Y-%m-%d") except: try: t = time.strptime(t, "%Y年%m月%d日%H%M".decode('utf8')) except: pass t = TimeTransform.struct_to_string(t) return t
def extract_publishedtime(self, tree): t = tree.xpath(self.publishedtime_xpath)[0].strip() t = time.strptime(t, "%Y-%m-%d %H:%M:%S") t = TimeTransform.struct_to_string(t) return t
def extract_publishedtime(self, tree): t = tree.xpath(self.publishedtime_xpath)[0].strip() t = time.strptime(t, "%Y年%m月%d日 %H:%M".decode('utf8')) t = TimeTransform.struct_to_string(t) return t
def extract_publishedtime(self, tree): t = [i for i in tree.xpath(self.publishedtime_xpath) if i.strip()][-1] t = time.strptime(t, "%Y-%m-%d") t = TimeTransform.struct_to_string(t) return t
def extract_publishedtime(self, tree): t = tree.xpath(self.publishedtime_xpath)[0].strip() t = t[5:] t = TimeTransform.struct_to_string(t) return t
def extract_publishedtime(self, tree): t = tree.xpath(self.publishedtime_xpath) t = ''.join(t).strip().split()[0] t = time.strptime(t, "%Y-%m-%d") t = TimeTransform.struct_to_string(t) return t
def extract_publishedtime(self, tree): t = tree.xpath(self.publishedtime_xpath)[0].strip().split()[0] t = t[5:] t = time.strptime(t, "%Y-%m-%d".decode('utf8')) t = TimeTransform.struct_to_string(t) return t