def create_model(name):
    """
    Create classifier model
    :param name: Classifier's name
    :return:
    """
    if name == 'SLTextClassifier-DP':
        return SLTextClassifier(classes=[0, 1],
                                smoothing="dp",
                                smoothing_param=sl_smoothing_param)
    elif name == 'SLTextClassifier-JM':
        return SLTextClassifier(classes=[0, 1],
                                smoothing="jm",
                                smoothing_param=sl_smoothing_param)
    elif name == 'TFIDFTextClassifier':
        return TFIDFTextClassifier(classes=[0, 1])
    elif name == 'EchoWordClassifier':
        return EchoWordClassifier(classes=[0, 1],
                                  size=rc_size,
                                  input_scaling=rc_input_scaling,
                                  leak_rate=rc_leak_rate,
                                  input_sparsity=rc_input_sparsity,
                                  converter=converter,
                                  spectral_radius=rc_spectral_radius,
                                  w_sparsity=rc_w_sparsity)
    elif name == 'SL2GramTextClassifier-DP':
        return SL2GramTextClassifier(classes=[0, 1],
                                     smoothing="dp",
                                     smoothing_param=sl_smoothing_param)
    elif name == 'SL2GramTextClassifier-JM':
        return SL2GramTextClassifier(classes=[0, 1],
                                     smoothing="jm",
                                     smoothing_param=sl_smoothing_param)
    elif name == 'TFIDF2GramTextClassifier':
        return TFIDF2GramTextClassifier(classes=[0, 1])
    wv_converter = WVConverter(pca_model=pca_model)

    # Prepare training and test set indexes.
    n_fold_samples = int(100 / args.K)
    indexes = np.arange(0, 100, 1)
    indexes.shape = (args.K, n_fold_samples)

    # Prepare training and test set.
    test_set_indexes = indexes[args.k]
    training_set_indexes = indexes
    training_set_indexes = np.delete(training_set_indexes, args.k, axis=0)
    training_set_indexes.shape = (100 - n_fold_samples)

    # Classifier
    classifier = EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling,
                                    leak_rate=rc_leak_rate,
                                    input_sparsity=rc_input_sparsity, converter=wv_converter,
                                    spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity)

    # Add examples
    for author_index, author_id in enumerate((args.author1, args.author2)):
        author_path = os.path.join(args.dataset, "total", author_id)
        for file_index in training_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            classifier.train(io.open(file_path, 'r').read(), author_index)
            # end for
    # end for

    # Finalize model training
    classifier.finalize(verbose=True)

    # Init test epoch
예제 #3
0
        converter = WVConverter(resize=args.in_components, pca_model=pca_model)
    else:
        word2vec = Word2Vec(dim=args.voc_size, mapper='one-hot')
        converter = OneHotConverter(lang=args.lang,
                                    voc_size=args.voc_size,
                                    word2vec=word2vec)
    # end if

    # Total number of authors

    # Create Echo Word Classifier
    classifier = EchoWordClassifier(classes=range(n_total_docs),
                                    size=rc_size,
                                    input_scaling=rc_input_scaling,
                                    leak_rate=rc_leak_rate,
                                    input_sparsity=rc_input_sparsity,
                                    converter=converter,
                                    spectral_radius=rc_spectral_radius,
                                    w_sparsity=rc_w_sparsity,
                                    use_sparse_matrix=args.sparse)

    # Add examples
    document_index = 0
    for author_id in np.arange(1, args.n_authors + 1):
        author_path = os.path.join(args.dataset, "total", str(author_id))
        for file_index in range(args.n_documents):
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            logger.info(u"Adding document {} as {}".format(
                file_path, document_index))
            classifier.train(io.open(file_path, 'r').read(), document_index)
            document_index += 1
    indexes = np.arange(0, 100, 1)
    indexes.shape = (args.k, n_fold_samples)

    # Aggregation
    if args.multi:
        aggregation = 'multiplication'
    else:
        aggregation = 'average'
    # end if

    # Create Echo Word Classifier
    classifier = EchoWordClassifier(classes=[0, 1],
                                    size=rc_size,
                                    input_scaling=rc_input_scaling,
                                    leak_rate=rc_leak_rate,
                                    input_sparsity=rc_input_sparsity,
                                    converter=converter,
                                    spectral_radius=rc_spectral_radius,
                                    w_sparsity=rc_w_sparsity,
                                    use_sparse_matrix=args.sparse,
                                    aggregation=aggregation)

    # Success rates
    success_rates = np.zeros(args.k)

    # k-Fold cross validation
    for k in range(0, args.k):
        # Prepare training and test set.
        test_set_indexes = indexes[k]
        training_set_indexes = indexes
        training_set_indexes = np.delete(training_set_indexes, k, axis=0)
        training_set_indexes.shape = (100 - n_fold_samples)
예제 #5
0
    # >> 3. Array for results
    average_success_rate = np.array([])

    # >> 4. n-Fold cross validation
    for k in range(0, n_fold):
        print("%d-Fold" % k)

        # >> 5. Prepare training and test set.
        training_set_indexes = indexes[k]
        test_set_indexes = indexes
        test_set_indexes = np.delete(test_set_indexes, k, axis=0)
        test_set_indexes.shape = (100 - n_training_samples)

        # >> 6. Create Echo Word Classifier
        classifier = EchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate,
                                             input_sparsity=rc_input_sparsity, converter=converter, n_classes=2,
                                             spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity)

        # >> 7. Add positive examples
        print(training_set_indexes)
        author_path = os.path.join(args.dataset, "total", str(args.author))
        for file_index in training_set_indexes:
            print("Adding positive example %s" % os.path.join(author_path, str(file_index) + ".txt"))
            classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), 0)
        # end for

        # >> 7. Add negative examples
        n_negative_samples = 0
        author_index = 0
        text_index = 0
        while n_negative_samples < args.negative_samples: