def test_initialize(): """Test initialization of CRFDataset object""" attrs = CRFDict(KEYS1) labels = CRFDict(KEYS2) data = CRFDataset(attrs, labels) data_attrs = data.get_feature_list() for i in range(len(KEYS1)): assert KEYS1[i] == data_attrs[i] data_labels = data.get_label_list() for i in range(len(KEYS2)): assert KEYS2[i] == data_labels[i]
def test_algorithms(n_samples=100, n_features=200, n_instances=10): X = np.random.random((n_samples, n_features)) X[np.where(X < 0.8)] = 0 X = csr_matrix(X) labels = np.random.randint(10, size=n_samples) instances = np.linspace(0, n_samples, n_samples / n_instances + 1)[:-1] instances = np.round(instances) data = CRFDataset() data.add_group_from_array(X, labels, instances) for algorithm in ALGORITHMS: trainer = CRFTrainer(algorithm=algorithm, quiet=True) trainer.train(data)
def test_matrix_conversion(n_samples=50, n_features=100, n_instances=10): """Test conversion of csr matrix to and from CRFDataset""" X = np.random.random((n_samples, n_features)) X[np.where(X < 0.8)] = 0 X = csr_matrix(X) labels = np.random.randint(len(KEYS1), size=n_samples) instances = np.linspace(0, n_samples, n_samples / n_instances + 1)[:-1] instances = np.round(instances) data = CRFDataset() data.add_group_from_array(X, labels, instances) mat = data.to_matrix() assert_array_almost_equal(mat.toarray(), X.toarray())
from crfsuite import crfsuite_learn, CRFDataset, CRFDict, CRFTagger n_samples = 100 n_features = 100 n_instances = 10 n_labels = 10 features_per_sample = 5 data = np.zeros((n_samples, n_features)) indices = np.random.randint(n_features, size=(n_samples, features_per_sample)) data[np.arange(n_samples)[:, None], indices] += 1 labels = np.random.randint(n_labels, size=n_samples) instances = np.linspace(0, n_samples, n_instances + 1) instances = np.floor(instances[:-1]).astype(int) #crf_data = CRFDataset().add_group_from_array(data, labels, instances) crf_data = CRFDataset().add_groups_from_files('example_files/train_small.txt') model = crfsuite_learn(crf_data) crf_data_test = model.get_tagging_data_from_file( 'example_files/test_small.txt') tagger = model.get_tagger() output = tagger.tag(crf_data_test)