Ejemplo n.º 1
0
                    author_path = os.path.join(args.dataset, "total", author_id)
                    for file_index in training_set_indexes:
                        classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), author_index)
                    # end for
                # end for

                # >> 8. Train model
                classifier.train()

                # >> 9. Test model performance
                success = 0.0
                count = 0.0
                for author_index, author_id in enumerate((args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total", author_id)
                    for file_index in test_set_indexes:
                        author_pred, _, _ = classifier.pred(os.path.join(author_path, str(file_index) + ".txt"))
                        if author_pred == author_index:
                            success += 1.0
                        # end if
                        count += 1.0
                    # end for
                # end for

                # >> 11. Save results
                average_success_rate = np.append(average_success_rate, [(success / count) * 100.0])

                # Delete variables
                del classifier
            # end for

            # Log results
Ejemplo n.º 2
0
                         str(file_index) + ".txt"), 1)
    # end for

    # >> 8. Train model
    classifier.train()

    # >> 9. Test truth author
    success = 0.0
    count = 0.0
    author_path = os.path.join(args.dataset, "total", args.author)
    for file_index in test_set_indexes:
        file_path = os.path.join(author_path, str(file_index) + ".txt")

        # Document success rate
        if not args.sentence:
            author_pred, _ = classifier.pred(file_path)
            if author_pred == 0:
                success += 1.0
            # end if
            count += 1.0
        else:
            # Sentence success rate
            nlp = spacy.load(args.lang)
            doc = nlp(io.open(file_path, 'r').read())
            for sentence in doc.sents:
                sentence_pred, _ = classifier.pred_text(sentence.text)
                if sentence_pred == 0:
                    success += 1.0
                # end if
                count += 1.0
            # end for
Ejemplo n.º 3
0
            training_text_path = os.path.join(args.dataset, text_codes[training_index] + ".txt")
            training_text_author = texts_data[text_codes[training_index]]
            classifier.add_example(training_text_path, training_text_author)
        # end for

        # Train model
        print(u"Training...")
        classifier.train()

        # Test model performance
        success = 0.0
        count = 1.0
        for test_index in test_set_indexes:
            test_text_path = os.path.join(args.dataset, text_codes[test_index] + ".txt")
            observed_author = texts_data[text_codes[test_index]]
            predicted_author = classifier.pred(test_text_path)
            if observed_author == predicted_author:
                success += 1
            # end if
            count += 1
        # end for

        # >> 11. Save results
        average_success_rate = np.append(average_success_rate, [(success / count) * 100.0])

        # Delete variables
        del classifier
    # end for

    # Log results
    logging.save_results(u"Average success rate ", np.average(average_success_rate), display=True)
Ejemplo n.º 4
0
                        os.path.join(author_path,
                                     str(file_index) + ".txt"), author_index)
                # end for
            # end for

            # >> 8. Train model
            classifier.train()

            # >> 9. Test model performance
            success = 0.0
            count = 0.0
            for author_index, author_id in enumerate(c):
                author_path = os.path.join(args.dataset, "total", author_id)
                for file_index in test_set_indexes:
                    author_pred = classifier.pred(os.path.join(
                        author_path,
                        str(file_index) + ".txt"),
                                                  show_graph=True)
                    if author_pred == author_index:
                        success += 1.0
                    # end if
                    count += 1.0
                # end for
            # end for

            # >> 11. Save results
            average_success_rate = np.append(average_success_rate,
                                             [(success / count) * 100.0])

            # Delete variables
            del classifier
        # end for
        test_set_indexes = np.array([0])

        # >> 6. Create Echo Word Classifier
        classifier = RCNLPEchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=leaky_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, w=w)

        # >> 7. Add examples
        for author_index, author_id in enumerate((args.author1, args.author2)):
            author_path = os.path.join(args.dataset, "total", author_id)
            for file_index in training_set_indexes:
                classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), author_index)
            # end for
        # end for

        # >> 8. Train model
        classifier.train()

        # >> 9. Test model performance
        author_index = 0
        author_id = args.author1
        author_path = os.path.join(args.dataset, "total", author_id)
        print("Testing model performances with %d text files for author from %s..." % (test_set_indexes.shape[0],
                                                                                       author_path))
        classifier.pred(os.path.join(author_path, str(0) + ".txt"), show_graph=False, print_outputs=True)

        # Delete variables
        del classifier
    # end for

# end if
            # end for

            # >> 8. Train model
            classifier.train()

            # >> 9. Test model performance
            success = 0.0
            count = 0.0
            for author_index, author_id in enumerate(
                (args.author1, args.author2)):
                author_path = os.path.join(args.dataset, "total", author_id)
                for file_index in test_set_indexes:
                    file_path = os.path.join(author_path,
                                             str(file_index) + ".txt")
                    author_pred, _, _ = classifier.pred(os.path.join(
                        author_path,
                        str(file_index) + ".txt"),
                                                        show_graph=False)
                    # Success rate
                    if not args.sentence:
                        author_pred, _, _ = classifier.pred(
                            os.path.join(author_path,
                                         str(file_index) + ".txt"))
                        if author_pred == author_index:
                            success += 1.0
                        # end if
                        count += 1.0
                    else:
                        # Sentence success rate
                        nlp = spacy.load(args.lang)
                        doc = nlp(io.open(file_path, 'r').read())
                        for sentence in doc.sents:
Ejemplo n.º 7
0
        # >> 8. Train model
        classifier.train()

        # >> 9. Test model performances
        success = 0.0
        count = 0.0
        max_prob = 0
        max_cat = ''

        # >> 10. Test same author
        for file_index in test_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            # Doc. success rate
            if not args.sentence:
                author_pred, same_prob, diff_prob = classifier.pred(file_path)
                same_probs = np.append(same_probs, same_prob)
                if same_prob > max_prob:
                    max_prob = same_prob
                    max_cat = 'same'
                # end if
                if same_prob > diff_prob and same_prob > args.threshold:
                    success += 1.0
                # end if
                count += 1.0
            else:
                # Sentence success rate
                nlp = spacy.load(args.lang)
                doc = nlp(io.open(file_path, 'r').read())
                for sentence in doc.sents:
                    author_pred, same_prob, diff_prob = classifier.pred_text(sentence.text)
                # >> 7. Add authors examples
                classifier.add_example(text1_path, 0)

                # >> 8. Add negative examples
                others_path = os.path.join(args.dataset, "total", "others")
                for file_index in range(0, args.negatives):
                    file_path = os.path.join(args.dataset, "others", str(file_index) + ".txt")
                    classifier.add_example(file_path, 1)
                # end for

                # >> 8. Train model
                classifier.train()

                # Get similarity
                author_pred, same_prob, diff_prob = classifier.pred(text2_path)
                print("%s : %f" % (text2, same_prob))
                # Save
                similarity_matrix[index1, index2] = same_prob
            # end if
            index2 += 1
        # end for
        index1 += 1
    # end for

    plt.imshow(similarity_matrix, cmap='gray')
    plt.show()
    pickle.dump(similarity_matrix, open(args.matrix, 'w'))

    # Get links
    count_links = 0