def read_dir(directory): '''Returns the text of all files in a directory.''' content = dir_list(directory) text = '' for filename in content: text += read_file(directory + '/' + filename) text += ' ' return text
def smart_cloud(self,input,max_text_size=72,min_text_size=12,exclude_words = True): '''Creates a word cloud using the input. Input can be a file, directory, or text. Set exclude_words to true if you want to eliminate words that only occur once.''' self.exclude_words = exclude_words if isdir(input): self.directory_cloud(input,max_text_size,min_text_size) elif isfile(input): text = read_file(input) self.text_cloud(text,max_text_size,min_text_size) elif isinstance(input, basestring): self.text_cloud(input,max_text_size,min_text_size) else: print 'Input type not supported.' print 'Supported types: String, Directory, .txt file'
def dir_freq(directory): '''Returns a list of tuples of (word,# of directories it occurs)''' content = dir_list(directory) i = 0 freqdict = {} for filename in content: filewords = eliminate_repeats(read_file(directory + '/' + filename)) for word in filewords: if freqdict.has_key(word): freqdict[word] += 1 else: freqdict[word] = 1 tupleize = [] for key in freqdict.keys(): wordtuple = (key,freqdict[key]) tupleize.append(wordtuple) return tupleize