def main(pos_tagged, document_key, pos_tag_key, language, dump_verbs, dump_tf_idf, dump_stdev, dump_popularity, dump_final, processes): """ Computes the three verb rankings: average TF-IDF, average of TF-IDF standard deviation and popularity. """ logger.info('Computing lemma-to-token map and TF-IDF matrix ...') lemma_tokens, (vectorizer, tf_idf_matrix) = parallel.execute( 2, produce_lemma_tokens, (pos_tagged, pos_tag_key, language), compute_tf_idf_matrix, (pos_tagged, document_key) ) logger.info('Scoring verbs by popularity ...') pop_ranking = PopularityRanking(pos_tagged, pos_tag_key).find_ranking(processes) logger.info('Scoring verbs by TF-IDF based metrics (average and standard deviation) ...') tfidf_ranking, stdev_ranking = TFIDFRanking(vectorizer, lemma_tokens, tf_idf_matrix).find_ranking(processes) logger.info('Producing combined final ranking ...') final_ranking = harmonic_ranking(pop_ranking, tfidf_ranking, stdev_ranking) json.dump(tfidf_ranking, dump_tf_idf, indent=2) json.dump(stdev_ranking, dump_stdev, indent=2) json.dump(pop_ranking, dump_popularity, indent=2) json.dump(final_ranking, dump_final, indent=2) logger.info('Dumped all the rankings to %s' % [dump_tf_idf.name, dump_stdev.name, dump_popularity.name, dump_final.name]) json.dump(lemma_tokens, dump_verbs, default=lambda x: list(x), indent=2) logger.info("Dumped lemma-to-token map to '%s'" % dump_verbs.name)
def test_execute(self): funcs = reduce(lambda x, y: x + y, [(self.function, [x]) for x in xrange(10)]) self.assertEqual(parallel.execute(0, *funcs), map(self.function, xrange(10)))