Esempio n. 1
0
def main(preprocessed_node_path, argument_path, dictionary_path, tfidf_path):
    preprocessed_node_path = Path(args.preprocessed_node_path)
    argument_path = Path(args.argument_path)
    dictionary_path = Path(args.dictionary_path)
    tfidf_path = Path(args.tfidf_path)

    #argument_generator_getter = lambda: utils.load(argument_path)

    #argument_nodes_ids = set((
    #        node_id 
    #        for argument in argument_generator_getter()
    #        for node_id in argument[0].values())) 

    # Use the set of ids to select only the relevant nodes
    # (and not train nlp models on all documents).
    #preprocessed_node_generator_getter = lambda : filter(
    #        lambda node: node['id'] in argument_nodes_ids,
    #        utils.load( preprocessed_node_path))

    dictionary = pkl.load(dictionary_path.open('rb'))

    #tfidf = text.fit_tfidf(preprocessed_node_generator_getter,
    #        dictionary,
    #        verbose = True)
    tfidf = TfIdf()
    tfidf.fit(dictionary.dictionary)
    tfidf.save(tfidf_path)