classifier = EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling,
                                    leak_rate=rc_leak_rate,
                                    input_sparsity=rc_input_sparsity, converter=wv_converter,
                                    spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity)

    # Add examples
    for author_index, author_id in enumerate((args.author1, args.author2)):
        author_path = os.path.join(args.dataset, "total", author_id)
        for file_index in training_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            classifier.train(io.open(file_path, 'r').read(), author_index)
            # end for
    # end for

    # Finalize model training
    classifier.finalize(verbose=True)

    # Init test epoch
    test_set = list()

    # Get text
    for author_index, author_id in enumerate((args.author1, args.author2)):
        author_path = os.path.join(args.dataset, "total", str(author_id))
        for file_index in test_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            test_set.append((io.open(file_path, 'r').read(), author_index))
        # end for
    # end for

    # Success rate
    success_rate = Metrics.success_rate(classifier, test_set, verbose=True, debug=True)
Esempio n. 2
0
    # Add examples
    document_index = 0
    for author_id in np.arange(1, args.n_authors + 1):
        author_path = os.path.join(args.dataset, "total", str(author_id))
        for file_index in range(args.n_documents):
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            logger.info(u"Adding document {} as {}".format(
                file_path, document_index))
            classifier.train(io.open(file_path, 'r').read(), document_index)
            document_index += 1
        # end for
    # end for

    # Finalize model training
    classifier.finalize(verbose=args.verbose)

    # Get documents embeddings
    document_embeddings = classifier.get_embeddings()
    logger.info(u"Document embeddings shape : {}".format(
        document_embeddings.shape))

    # Display similar doc for the first document of each author with each distance measure
    for distance_measure in ["euclidian", "cosine", "cosine_abs"]:
        print(u"###################### {} ######################".format(
            distance_measure))
        for document_index in np.arange(0, n_total_docs, args.n_authors):
            similar_doc = get_similar_documents(
                document_index,
                document_embeddings,
                distance_measure=distance_measure)