Example #1
0
    def preprocess_all(self, raw_files):
        '''
        Converts the raw files into the documents.
        For each document creates tokens and bag of words.

        Args:
            raw_files: dictionary[document_key] = document_content
        '''
        logger.info("Preprocessing...")
        self.documents = preprocess_all(raw_files)
        self.docs_bag = bag_of_documents(self.documents)
Example #2
0
    def preprocess_all(self, raw_files):
        '''
        Converts the raw files into the documents.

        Procedure:
            1. creates documents
            2. gets number of documents
            3. for each term determine all documents in which
               that term exists and how many times it occurs (docs_bag)

        Args:
            raw_files: dict (key = document key, value = document text)
        '''
        logger.info("Preprocessing...")
        self.documents = preprocess_all(raw_files)
        self.docs_no = len(self.documents)
        self.docs_bag = bag_of_documents(self.documents)
Example #3
0
    def preprocess_all(self, raw_files):
        '''
        Converts the raw files into the documents.

        Procedure:
            1. creates documents
            2. gets number of documents
            3. for each term determine all documents in which
               that term exists and how many times it occurs (docs_bag)

        Args:
            raw_files: dict (key = document key, value = document text)
        '''
        logger.info("Preprocessing...")
        self.documents = preprocess_all(raw_files)
        self.docs_no = len(self.documents)
        self.docs_bag = bag_of_documents(self.documents)