def _load_corpus(self, corpus_dir):
        self.documents = []
        if not os.path.exists(corpus_dir):
            logging.error('The corpus directory %s does not exists.'
                    % corpus_dir)
            return False

        for root, dirs, files in os.walk(corpus_dir):
            for f in files:
                filename = os.path.join(root, f)
                fp = open(filename, 'rb')
                record_reader = RecordReader(fp)
                while True:
                    blob = record_reader.read()
                    if blob == None:
                        break
                    document = Document(self.model.num_topics)
                    document.parse_from_string(blob)
                    self.documents.append(document)

        return True
Exemple #2
0
    def _load_corpus(self, corpus_dir):
        self.documents = []
        if not os.path.exists(corpus_dir):
            logging.error('The corpus directory %s does not exists.' %
                          corpus_dir)
            return False

        for root, dirs, files in os.walk(corpus_dir):
            for f in files:
                filename = os.path.join(root, f)
                fp = open(filename, 'rb')
                record_reader = RecordReader(fp)
                while True:
                    blob = record_reader.read()
                    if blob == None:
                        break
                    document = Document(self.model.num_topics)
                    document.parse_from_string(blob)
                    self.documents.append(document)

        return True