def main(): # SETTINGS NUM_PAGES = 10000 corpus_filename = "corpus10k.txt" stopwords_filename = "stopwords10k.txt" myTfIdf = TfIdf(corpus_filename, stopwords_filename) content = [] worker_threads = [] url = "http://en.wikipedia.org/wiki/Special:Random" for i in range(NUM_PAGES): t = threading.Thread(target=clean_html_thread, args=(url, content)) t.start() worker_threads.append(t) for t in worker_threads: t.join() for t in worker_threads: if not t.isAlive(): # get results from thtead t.handled = True worker_threads = [t for t in worker_threads if not t.handled] for document in content: myTfIdf.add_input_document(document) print_keywords(document) myTfIdf.save_corpus_to_file(corpus_filename, stopwords_filename)