Ejemplo n.º 1
0
 def get_dictionary( ):
     """ Build a Dictionary based on the Diceware data. """
     
     dicto = Dictionary()
     
     print 'Parsing Diceware data...'
     i = 0;
     nLines = 7780
     
     # open file for reading
     with open(Diceware.fname, 'r') as fid:    
         for line in fid:
             tokens = Diceware.parse_line(line)
             if tokens is None:
                 continue
             
             # save data to list
             word = Word(tokens['word'], -1, -1, i);
             dicto.add_word(word)
             
             # increment counter and show progress
             i = i + 1;
             progress = float(i) / float(nLines)
             if (progress % 0.05) < 1e-4:
                 sys.stdout.write("\r%2.2f%%" %(progress*100))
                 sys.stdout.flush()
                 
     print '\nDone.'
     
     return dicto
Ejemplo n.º 2
0
    def preproc(self):
        """ normalize the data (clean/remove problematic characters) for processing """

        print 'Pre-processing dictionary...'
        
        # remove words that contain non-alphanumeric characters
        dicto_new = Dictionary()
        alpha = re.compile('[\W]')
        num = re.compile('[0-9]')
        for word in self.dicto.get_words_iter():
            word_str = word.string
            
            # remove numbers from words
            word_str = num.sub('', word_str)
            if len(word_str) <= 0:
                continue
            
            if not alpha.search(word_str):
                word.set_string(word_str)
                dicto_new.add_word(word)
                
        print 'Done.'
        
        self.dicto = dicto_new