def _get_backfill_events(self, txn, room_id, event_list, limit): logger.debug( "_get_backfill_events: %s, %s, %s", room_id, repr(event_list), limit ) event_results = set() # We want to make sure that we do a breadth-first, "depth" ordered # search. query = ( "SELECT depth, prev_event_id FROM event_edges" " INNER JOIN events" " ON prev_event_id = events.event_id" " WHERE event_edges.event_id = ?" " AND event_edges.is_state = ?" " LIMIT ?" ) queue = PriorityQueue() for event_id in event_list: depth = self._simple_select_one_onecol_txn( txn, table="events", keyvalues={ "event_id": event_id, "room_id": room_id, }, retcol="depth", allow_none=True, ) if depth: queue.put((-depth, event_id)) while not queue.empty() and len(event_results) < limit: try: _, event_id = queue.get_nowait() except Empty: break if event_id in event_results: continue event_results.add(event_id) txn.execute( query, (event_id, False, limit - len(event_results)) ) for row in txn: if row[1] not in event_results: queue.put((-row[0], row[1])) return event_results
class LayersApplier(object): """ Most layers replace content. We try to do this intelligently here, so that layers don't step over each other. """ HTML_TAG_REGEX = re.compile(r'<[^>]*?>') def __init__(self): self.queue = PriorityQueue() self.text = None def enqueue_from_list(self, elements_list): for le in elements_list: self.enqueue(le) def enqueue(self, layer_element): original, replacement, locations = layer_element priority = len(original) item = (original, replacement, locations) self.queue.put((-priority, item)) def location_replace(self, xml_node, original, replacement, locations): LocationReplace().location_replace(xml_node, original, replacement, locations) def replace_all(self, original, replacement): """ Replace all occurrences of original with replacement. This is HTML aware; it effectively looks at all of the text in between HTML tags""" text_chunks = [] index = 0 for match in self.HTML_TAG_REGEX.finditer(self.text): text = self.text[index:match.start()] text_chunks.append(text.replace(original, replacement)) text_chunks.append(self.text[match.start():match.end()]) # tag index = match.end() text_chunks.append(self.text[index:]) # trailing text self.text = "".join(text_chunks) def replace_at(self, original, replacement, locations): """ Replace the occurrences of original at all the locations with replacement. """ locations.sort() self.text = LocationReplace().location_replace_text( self.text, original, replacement, locations) def apply_layers(self, original_text): self.text = original_text while not self.queue.empty(): priority, layer_element = self.queue.get() original, replacement, locations = layer_element if not locations: self.replace_all(original, replacement) else: self.replace_at(original, replacement, locations) return self.text
def _create_files_list(self): priorityQueue = PriorityQueue() for txt_file in self._txt_files: wav_file = os.path.splitext(txt_file)[0] + ".wav" wav_file_size = os.path.getsize(wav_file) priorityQueue.put((wav_file_size, (txt_file, wav_file))) files_list = [] while not priorityQueue.empty(): priority, (txt_file, wav_file) = priorityQueue.get() files_list.append((txt_file, wav_file)) return files_list
class LayersApplier(object): """ Most layers replace content. We try to do this intelligently here, so that layers don't step over each other. """ HTML_TAG_REGEX = re.compile(r"<[^>]*?>") def __init__(self): self.queue = PriorityQueue() self.text = None def enqueue_from_list(self, elements_list): for le in elements_list: self.enqueue(le) def enqueue(self, layer_element): original, replacement, locations = layer_element priority = len(original) item = (original, replacement, locations) self.queue.put((-priority, item)) def location_replace(self, xml_node, original, replacement, locations): LocationReplace().location_replace(xml_node, original, replacement, locations) def unescape_text(self): """ Because of the way we do replace_all(), we need to unescape HTML entities. """ self.text = HTMLParser().unescape(self.text) def replace_all(self, original, replacement): """ Replace all occurrences of original with replacement. This is HTML aware; it effectively looks at all of the text in between HTML tags""" text_chunks = [] index = 0 for match in self.HTML_TAG_REGEX.finditer(self.text): text = self.text[index : match.start()] text_chunks.append(text.replace(original, replacement)) text_chunks.append(self.text[match.start() : match.end()]) # tag index = match.end() text_chunks.append(self.text[index:]) # trailing text self.text = "".join(text_chunks) self.unescape_text() def replace_at(self, original, replacement, locations): """ Replace the occurrences of original at all the locations with replacement. """ locations.sort() self.text = LocationReplace().location_replace_text(self.text, original, replacement, locations) self.unescape_text() def apply_layers(self, original_text): self.text = original_text while not self.queue.empty(): priority, layer_element = self.queue.get() original, replacement, locations = layer_element if not locations: self.replace_all(original, replacement) else: self.replace_at(original, replacement, locations) return self.text