def __timeEx(self): """ :param target: 输入文本字符串 :param timeBase: 输入基准时间 :return: TimeUnit[]时间表达式类型数组 """ startline = -1 endline = -1 rpointer = 0 temp = [] match = self.pattern.finditer(self.target) for m in match: startline = m.start() if startline == endline: rpointer -= 1 temp[rpointer] = temp[rpointer] + m.group() else: temp.append(m.group()) endline = m.end() rpointer += 1 res = [] # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。 contextTp = TimePoint() for i in range(0, rpointer): res.append(TimeUnit(temp[i], self, contextTp)) contextTp = res[i].tp res = self.__filterTimeUnit(res) return res
def __timeEx(self): """ :param target: 输入文本字符串 :param timeBase: 输入基准时间 :return: TimeUnit[]时间表达式类型数组 """ res = [] sents = cut_sent( re.sub( 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '[url]', self.target)).split('\n') for sent in sents: startline = -1 endline = -1 rpointer = 0 temp = [] match = self.pattern.finditer(sent) for m in match: # print(m.group()) startline = m.start() if startline == endline: rpointer -= 1 temp[rpointer] = temp[rpointer] + m.group() else: temp.append(m.group()) endline = m.end() rpointer += 1 if rpointer > 0: if sent.find('将') != -1: isPreferFuture = True # print('future!!') else: isPreferFuture = False self.isPreferFuture = isPreferFuture # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。 contextTp = TimePoint() for i in range(0, rpointer): res.append( TimeUnit(temp[i], self, contextTp, sent, isPreferFuture)) contextTp = res[i].tp res = self.__filterTimeUnit(res) return res
def __timeEx(self): """ :param target: 输入文本字符串 :param timeBase: 输入基准时间 :return: TimeUnit[]时间表达式类型数组 """ startline = -1 endline = -1 rpointer = 0 temp = [] match = self.pattern.finditer(self.target) for m in match: startline = m.start() if startline == endline: rpointer -= 1 temp[rpointer] = temp[rpointer] + m.group() else: temp.append(m.group()) endline = m.end() rpointer += 1 res = [] # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。 contextTp = TimePoint() # print(self.timeBase) # print('temp',temp) for i in range(0, rpointer): # 这里是一个类嵌套了一个类 # y = temp[i] # ss = TimeUnit(temp[i], self, contextTp) res.append(TimeUnit(temp[i], self, contextTp)) # res[i].tp.tunit[3] = -1 contextTp = res[i].tp self.timeBase = arrow.get(arrow.now()).format('YYYY-M-D-H-m-s') # print(self.nowTime.year) # print(contextTp.tunit) res = self.__filterTimeUnit(res) return [temp, res]
def __timeEx(self): """ :param target: 输入文本字符串 :param timeBase: 输入基准时间 :return: TimeUnit[]时间表达式类型数组 """ startline = -1 endline = -1 rpointer = 0 time_extractor = [] match = self.pattern.finditer(self.target) for m in match: startline = m.start() if startline == endline: rpointer -= 1 time_extractor[rpointer] = time_extractor[rpointer] + m.group() else: time_extractor.append(m.group()) endline = m.end() rpointer += 1 res = [] # 时间上下文: 前一个识别出来的时间会是下一个时间的上下文,用于处理:周六3点到5点这样的多个时间的识别,第二个5点应识别到是周六的。 contextTp = TimePoint() # print(self.timeBase) print('time_extractor:', time_extractor) for i in range(0, rpointer): # 这里是一个类嵌套了一个类 res.append(TimeUnit(time_extractor[i], self, contextTp)) # res[i].tp.tunit[3] = -1 contextTp = res[i].tp # print(self.nowTime.year) # print(contextTp.tunit) res = self.__filterTimeUnit(res) return res