def __iter__(self): count = 0 for document in stream_reuters_documents(): sample = document["content"], "acq" in document["topics"] # todo: maybe try "usa" or "earn" if count > num_train_samples: if self.first_iteration: test_samples.append(sample) else: yield sample count += 1 self.first_iteration = False
def __iter__(self): count = 0 for document in stream_reuters_documents(): sample = document["content"], "acq" in document[ "topics"] # todo: maybe try "usa" or "earn" if count > num_train_samples: if self.first_iteration: test_samples.append(sample) else: yield sample count += 1 self.first_iteration = False
def get_documents(self): for document in stream_reuters_documents(): yield simple_preprocess(document["content"])
def get_topic_counts(): labels = defaultdict(int) for document in stream_reuters_documents(): for topic in document["topics"]: labels[topic] += 1 return labels