Esempio n. 1
0
            for s in range(0, args.samples):
                print("#")
                # >> 5. Prepare training and test set.
                training_set_indexes = np.arange(0, 100, 1)[s:s+args.training_size]
                test_set_indexes = np.delete(np.arange(0, 100, 1), training_set_indexes, axis=0)[:args.test_size]

                # >> 6. Create Echo Word Classifier
                classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                                     input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                                     spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, w=w)

                # >> 7. Add examples
                for author_index, author_id in enumerate((args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total", author_id)
                    for file_index in training_set_indexes:
                        classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), author_index)
                    # end for
                # end for

                # >> 8. Train model
                classifier.train()

                # >> 9. Test model performance
                success = 0.0
                count = 0.0
                for author_index, author_id in enumerate((args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total", author_id)
                    for file_index in test_set_indexes:
                        author_pred, _, _ = classifier.pred(os.path.join(author_path, str(file_index) + ".txt"))
                        if author_pred == author_index:
                            success += 1.0
Esempio n. 2
0
        # >> 5. Prepare training and test set.
        training_set_indexes = np.random.choice(np.arange(0, 100), size=1, replace=False)
        test_set_indexes = np.delete(np.arange(0, 100, 1), training_set_indexes)[:args.test_size]
        negatives_set_indexes = np.arange(0, args.negatives, 1)
        other_authors = np.delete(np.arange(1, 51, 1), the_author-1)

        # >> 6. Create Echo Word Classifier
        classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, w=w)

        # >> 7. Add authors examples
        author_path = os.path.join(args.dataset, "total", str(the_author))
        for file_index in training_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            classifier.add_example(file_path, 0)
            n_token += get_n_token(file_path)
        # end for

        # >> 8. Add negative examples
        others_path = os.path.join(args.dataset, "total", "others")
        for file_index in negatives_set_indexes:
            file_path = os.path.join(others_path, str(file_index) + ".txt")
            classifier.add_example(file_path, 1)
        # end for

        # >> 8. Train model
        classifier.train()

        # >> 9. Test model performances
        success = 0.0
Esempio n. 3
0
        test_set_indexes = indexes[k]
        training_set_indexes = indexes
        training_set_indexes = np.delete(training_set_indexes, k, axis=0)
        training_set_indexes.shape = (n_texts - n_fold_samples)

        # Create Echo Word Classifier
        classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity)

        # Add examples
        print(u"Adding examples...")
        for training_index in training_set_indexes:
            training_text_path = os.path.join(args.dataset, text_codes[training_index] + ".txt")
            training_text_author = texts_data[text_codes[training_index]]
            classifier.add_example(training_text_path, training_text_author)
        # end for

        # Train model
        print(u"Training...")
        classifier.train()

        # Test model performance
        success = 0.0
        count = 1.0
        for test_index in test_set_indexes:
            test_text_path = os.path.join(args.dataset, text_codes[test_index] + ".txt")
            observed_author = texts_data[text_codes[test_index]]
            predicted_author = classifier.pred(test_text_path)
            if observed_author == predicted_author:
                success += 1
Esempio n. 4
0
                    leak_rate=rc_leak_rate,
                    input_sparsity=rc_input_sparsity,
                    converter=converter,
                    n_classes=2,
                    spectral_radius=rc_spectral_radius,
                    w_sparsity=rc_w_sparsity)

                # >> 7. Add authors examples
                for author_index, author_id in enumerate(
                    (args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total",
                                               str(author_id))
                    for file_index in training_set_indexes:
                        file_path = os.path.join(author_path,
                                                 str(file_index) + ".txt")
                        classifier.add_example(file_path, author_index)
                        n_token += get_n_token(file_path)
                    # end for
                # end for

                # >> 8. Train model
                classifier.train()

                # >> 9. Test model performance
                success = 0.0
                count = 0.0
                for author_index, author_id in enumerate(
                    (args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total",
                                               str(author_id))
                    for file_index in test_set_indexes: