def fill_example_queue(self): input_gen = self.text_generator( data.example_generator(self._data_path, self._single_pass)) while True: try: (article, abstract) = next( input_gen ) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: #tf.logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: #tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception( "single_pass mode is off but the example generator is out of data; error." ) abstract_sentences = [ sent.strip() for sent in data.abstract2sents(abstract) ] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self._vocab) # Process into an Example. self._example_queue.put( example) # place the Example in the example queue.
def __getitem__(self, index): article = self.db["articles"][index] abstract = self.db["abstracts"][index] # use existing Example from threaded Batcher code abstract_sentences = [ sent.strip() for sent in data.abstract2sents(abstract) ] # Use the <s> and </s> tags in abstract to get a list of sentences. example = Example(article, abstract_sentences, self.vocabulary) # Process into an Example. return example
def run(self): self.example_generator = self.generator() while self.alive: if not self.queue.full(): try: abstract, article = next(self.example_generator) except StopIteration: self.alive = False break except ValueError: continue if len(abstract) <= self.min_len or len(article) <= self.min_len: continue abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)] example = Example(article, abstract_sentences, self.vocab) self.queue.put(example)