예제 #1
0
파일: benchmark.py 프로젝트: phdowling/mSDA
 def __iter__(self):
     count = 0
     for document in stream_reuters_documents():
         sample = document["content"], "acq" in document["topics"]  # todo: maybe try "usa" or "earn"
         if count > num_train_samples:
             if self.first_iteration:
                 test_samples.append(sample)
         else:
             yield sample
         count += 1
     self.first_iteration = False
예제 #2
0
 def __iter__(self):
     count = 0
     for document in stream_reuters_documents():
         sample = document["content"], "acq" in document[
             "topics"]  # todo: maybe try "usa" or "earn"
         if count > num_train_samples:
             if self.first_iteration:
                 test_samples.append(sample)
         else:
             yield sample
         count += 1
     self.first_iteration = False
예제 #3
0
 def get_documents(self):
     for document in stream_reuters_documents():
         yield simple_preprocess(document["content"])
예제 #4
0
def get_topic_counts():
    labels = defaultdict(int)
    for document in stream_reuters_documents():
        for topic in document["topics"]:
            labels[topic] += 1
    return labels
예제 #5
0
파일: benchmark.py 프로젝트: phdowling/mSDA
 def get_documents(self):
     for document in stream_reuters_documents():
         yield simple_preprocess(document["content"])
예제 #6
0
파일: benchmark.py 프로젝트: phdowling/mSDA
def get_topic_counts():
    labels = defaultdict(int)
    for document in stream_reuters_documents():
        for topic in document["topics"]:
            labels[topic] += 1
    return labels