Exemple #1
0
    def train(self, corpus_dir):
        '''Train method the filter uses to teach according to !truth.txt file.'''
        self.train_corpus_dir = corpus_dir
        # test if the !truth.txt exists, else exit (no error raised)
        truth_file = os.path.join(corpus_dir, '!truth.txt')
        if os.path.exists(truth_file)==False:
            self.trained = False
            return
        
        # create TrainingCorpus object for better training handling
        Corpus = TrainingCorpus(corpus_dir)

        # get spams/hams senders, return_paths and subjects
        for fname in Corpus.truth_dict:
            (sender, subject) = Corpus.parse_email(fname)
            if Corpus.truth_dict[fname] == self.neg_tag:
                self.save_ham_header(sender, subject)
            else:
                self.save_spam_header(sender, subject)