def get_new_data(self, url, searcher: Searcher): content = requests.get(url=url).content ts = datetime.now().timestamp() * 1000 soup = bs4.BeautifulSoup(str(content)) words = [] for unit in soup.find_all(text=True): if not isinstance(unit, bs4.element.Comment) and not re.match( r"[\s\r\n]", unit) and unit.parent.name not in [ 'style', 'script', '[document]', 'head', 'title', 'meta' ]: tokens = searcher.preprocess(unit, True) if len(tokens) > 0: words.append(tokens) # convert to dict for word in words: if self.data.get(word) is None: self.data[word] = dict() self.data[word][ts] = [ts * 10**12] else: self.data[word][ts] += [ts * 10**12] return 0