def __iter__(self): with open(self.path + ".info") as info: for line in info: comment_info = self._parse_info(line) if self.clean_deleted and comment_info["author"] == "[deleted]": continue if self.clean_bots and (is_bot(comment_info["author"]) or comment_info["author"] in FILTERED_USERS): continue yield comment_info
def __iter__(self, week=None): with open(self.path + ".info") as info: with open(self.path + ".title.bin") as title_bin: for byte_string in Doc.read_bytes(title_bin): info_line = info.readline() comment_info = self._parse_info(info_line) if not (week is None) and get_week(comment_info["timestamp"]) != week: continue if self.clean_deleted and comment_info["author"] == "[deleted]": continue if self.clean_bots and (is_bot(comment_info["author"]) or comment_info["author"] in FILTERED_USERS): continue comment_info["doc"] = Doc(self._vocab).from_bytes(byte_string) yield comment_info
def __iter__(self, week=None): with open(self.path + ".bin", "rb") as bin: with open(self.path + ".info") as info: for byte_string in Doc.read_bytes(bin): comment_info = self._parse_info(info.next()) if (not week is None) and get_week(comment_info["timestamp"]) != week: continue if self.clean_deleted and comment_info["author"] == "[deleted]": continue if self.clean_bots and (is_bot(comment_info["author"]) or comment_info["author"] in FILTERED_USERS): continue doc = Doc(self._vocab).from_bytes(byte_string) comment_info["doc"] = doc comment_info["text"] = self._text_from_doc(doc) yield comment_info