def create_model(name): """ Create classifier model :param name: Classifier's name :return: """ if name == 'SLTextClassifier-DP': return SLTextClassifier(classes=[0, 1], smoothing="dp", smoothing_param=sl_smoothing_param) elif name == 'SLTextClassifier-JM': return SLTextClassifier(classes=[0, 1], smoothing="jm", smoothing_param=sl_smoothing_param) elif name == 'TFIDFTextClassifier': return TFIDFTextClassifier(classes=[0, 1]) elif name == 'EchoWordClassifier': return EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=converter, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity) elif name == 'SL2GramTextClassifier-DP': return SL2GramTextClassifier(classes=[0, 1], smoothing="dp", smoothing_param=sl_smoothing_param) elif name == 'SL2GramTextClassifier-JM': return SL2GramTextClassifier(classes=[0, 1], smoothing="jm", smoothing_param=sl_smoothing_param) elif name == 'TFIDF2GramTextClassifier': return TFIDF2GramTextClassifier(classes=[0, 1])
wv_converter = WVConverter(pca_model=pca_model) # Prepare training and test set indexes. n_fold_samples = int(100 / args.K) indexes = np.arange(0, 100, 1) indexes.shape = (args.K, n_fold_samples) # Prepare training and test set. test_set_indexes = indexes[args.k] training_set_indexes = indexes training_set_indexes = np.delete(training_set_indexes, args.k, axis=0) training_set_indexes.shape = (100 - n_fold_samples) # Classifier classifier = EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=wv_converter, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity) # Add examples for author_index, author_id in enumerate((args.author1, args.author2)): author_path = os.path.join(args.dataset, "total", author_id) for file_index in training_set_indexes: file_path = os.path.join(author_path, str(file_index) + ".txt") classifier.train(io.open(file_path, 'r').read(), author_index) # end for # end for # Finalize model training classifier.finalize(verbose=True) # Init test epoch
converter = WVConverter(resize=args.in_components, pca_model=pca_model) else: word2vec = Word2Vec(dim=args.voc_size, mapper='one-hot') converter = OneHotConverter(lang=args.lang, voc_size=args.voc_size, word2vec=word2vec) # end if # Total number of authors # Create Echo Word Classifier classifier = EchoWordClassifier(classes=range(n_total_docs), size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=converter, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, use_sparse_matrix=args.sparse) # Add examples document_index = 0 for author_id in np.arange(1, args.n_authors + 1): author_path = os.path.join(args.dataset, "total", str(author_id)) for file_index in range(args.n_documents): file_path = os.path.join(author_path, str(file_index) + ".txt") logger.info(u"Adding document {} as {}".format( file_path, document_index)) classifier.train(io.open(file_path, 'r').read(), document_index) document_index += 1
indexes = np.arange(0, 100, 1) indexes.shape = (args.k, n_fold_samples) # Aggregation if args.multi: aggregation = 'multiplication' else: aggregation = 'average' # end if # Create Echo Word Classifier classifier = EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=converter, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, use_sparse_matrix=args.sparse, aggregation=aggregation) # Success rates success_rates = np.zeros(args.k) # k-Fold cross validation for k in range(0, args.k): # Prepare training and test set. test_set_indexes = indexes[k] training_set_indexes = indexes training_set_indexes = np.delete(training_set_indexes, k, axis=0) training_set_indexes.shape = (100 - n_fold_samples)
# >> 3. Array for results average_success_rate = np.array([]) # >> 4. n-Fold cross validation for k in range(0, n_fold): print("%d-Fold" % k) # >> 5. Prepare training and test set. training_set_indexes = indexes[k] test_set_indexes = indexes test_set_indexes = np.delete(test_set_indexes, k, axis=0) test_set_indexes.shape = (100 - n_training_samples) # >> 6. Create Echo Word Classifier classifier = EchoWordClassifier(size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=converter, n_classes=2, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity) # >> 7. Add positive examples print(training_set_indexes) author_path = os.path.join(args.dataset, "total", str(args.author)) for file_index in training_set_indexes: print("Adding positive example %s" % os.path.join(author_path, str(file_index) + ".txt")) classifier.add_example(os.path.join(author_path, str(file_index) + ".txt"), 0) # end for # >> 7. Add negative examples n_negative_samples = 0 author_index = 0 text_index = 0 while n_negative_samples < args.negative_samples: