def process_texts(self):
        relevant_words = []
        path = os.path.join('data', 'wiki')
        file_names = os.listdir(path)
        documents = []
        for file_name in file_names:
            file_path = os.path.join(path, file_name)
            f = open(file_path)
            documents.append((file_name, TextBlob(str.decode(f.read(), 'UTF-8', 'ignore'))))
            f.close()

        tfidf = TfIdf(documents)
        for file_name, document in documents:
            print file_name
            scores = {word: tfidf.compute_tfidf(word, document) for word in document.words}
            selected_scores = {}
            for word in scores:
                similars = sorted(self.get_similar(scores.keys(), word))
                selected_scores[similars[-1]] = scores[word]
            sorted_words = sorted(selected_scores.items(), key=lambda x: x[1], reverse=True)
            for word, score in sorted_words[:10]:
                if word not in relevant_words:
                    relevant_words.append(word)
        return set(relevant_words)