Ejemplo n.º 1
0
                converter = RCNLPWordVectorConverter(resize=-1, pca_model=pca_model)
            # end if

            # >> 3. Array for results
            average_success_rate = np.array([])

            # For each samples
            for s in range(0, args.samples):
                print("#")
                # >> 5. Prepare training and test set.
                training_set_indexes = np.arange(0, 100, 1)[s:s+args.training_size]
                test_set_indexes = np.delete(np.arange(0, 100, 1), training_set_indexes, axis=0)[:args.test_size]

                # >> 6. Create Echo Word Classifier
                classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                                     input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                                     spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, w=w)

                # >> 7. Add examples
                for author_index, author_id in enumerate((args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total", author_id)
                    for file_index in training_set_indexes:
                        classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), author_index)
                    # end for
                # end for

                # >> 8. Train model
                classifier.train()

                # >> 9. Test model performance
                success = 0.0
Ejemplo n.º 2
0
    average_success_rate = np.array([])

    # n-Fold cross validation
    for k in range(0, args.k):
        # Info
        print(u"K-Fold {}".format(k))

        # Prepare training and test set
        test_set_indexes = indexes[k]
        training_set_indexes = indexes
        training_set_indexes = np.delete(training_set_indexes, k, axis=0)
        training_set_indexes.shape = (n_texts - n_fold_samples)

        # Create Echo Word Classifier
        classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity)

        # Add examples
        print(u"Adding examples...")
        for training_index in training_set_indexes:
            training_text_path = os.path.join(args.dataset, text_codes[training_index] + ".txt")
            training_text_author = texts_data[text_codes[training_index]]
            classifier.add_example(training_text_path, training_text_author)
        # end for

        # Train model
        print(u"Training...")
        classifier.train()

        # Test model performance
    # >> 4. For each samples
    for reservoir_size in reservoir_sizes:
        print("Reservoir size %d" % reservoir_size)

        # Average success rate for this training size
        reservoir_size_average_success_rate = np.array([])

        # >> 4. Try n time
        for s in range(0, args.samples):
            try:
                # >> 6. Create Echo Word Classifier
                classifier = RCNLPEchoWordClassifier(
                    size=reservoir_size,
                    input_scaling=rc_input_scaling,
                    leak_rate=rc_leak_rate,
                    input_sparsity=rc_input_sparsity,
                    converter=converter,
                    n_classes=args.n_authors,
                    spectral_radius=rc_spectral_radius,
                    w_sparsity=rc_w_sparsity)

                # >> 7. Add examples
                for author_index, author_id in enumerate(
                        np.arange(1, args.n_authors + 1, 1)):
                    author_path = os.path.join(args.dataset, "total",
                                               author_id)
                    for file_index in training_set_indexes:
                        classifier.add_example(
                            os.path.join(author_path,
                                         str(file_index) + ".txt"),
                            author_index)
    # W matrix
    w = mdp.numx.random.choice([0.0, 1.0], (rc_size, rc_size), p=[1.0 - rc_w_sparsity, rc_w_sparsity])
    w[w == 1] = mdp.numx.random.rand(len(w[w == 1]))

    # >> 4. For each leak rate
    for index, leaky_rate in enumerate((args.lr1, args.lr2)):
        print("Leaky rate %f" % leaky_rate)

        # >> 5. Prepare training and test set.
        training_set_indexes = np.arange(1, 100, 1)
        test_set_indexes = np.array([0])

        # >> 6. Create Echo Word Classifier
        classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=leaky_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, w=w)

        # >> 7. Add examples
        for author_index, author_id in enumerate((args.author1, args.author2)):
            author_path = os.path.join(args.dataset, "total", author_id)
            for file_index in training_set_indexes:
                classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), author_index)
            # end for
        # end for

        # >> 8. Train model
        classifier.train()

        # >> 9. Test model performance
        author_index = 0
Ejemplo n.º 5
0
    # >> 4. Try n time
    for s in range(args.samples):
        n_token = 0

        # Choose a random author
        the_author = np.random.choice(np.arange(1, 51, 1))

        # >> 5. Prepare training and test set.
        training_set_indexes = np.random.choice(np.arange(0, 100), size=1, replace=False)
        test_set_indexes = np.delete(np.arange(0, 100, 1), training_set_indexes)[:args.test_size]
        negatives_set_indexes = np.arange(0, args.negatives, 1)
        other_authors = np.delete(np.arange(1, 51, 1), the_author-1)

        # >> 6. Create Echo Word Classifier
        classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, w=w)

        # >> 7. Add authors examples
        author_path = os.path.join(args.dataset, "total", str(the_author))
        for file_index in training_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            classifier.add_example(file_path, 0)
            n_token += get_n_token(file_path)
        # end for

        # >> 8. Add negative examples
        others_path = os.path.join(args.dataset, "total", "others")
        for file_index in negatives_set_indexes:
            file_path = os.path.join(others_path, str(file_index) + ".txt")
            classifier.add_example(file_path, 1)