def make_reber_classification(n_samples, invalid_size=0.5): """ Generate random dataset for Reber grammar classification. Invalid words contains the same letters as at Reber grammar, but they are build whithout grammar rules. Parameters ---------- n_samples : int Number of samples in dataset. invalid_size : float Proportion of invalid words in dataset, defaults to `0.5`. Value must be between 0 and 1. Returns ------- tuple Return two lists. First contains words and second - labels for them. Examples -------- >>> from neupy.datasets import make_reber_classification >>> >>> data, labels = make_reber_classification(10, invalid_size=0.5) >>> data array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS', 'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'], dtype='<U12') >>> labels array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1]) """ if n_samples < 2: raise ValueError("There are must be at least 2 samples") if invalid_size <= 0 or invalid_size >= 1: raise ValueError("`invalid_size` property must be " "between zero and one") n_valid_words = int(math.ceil(n_samples * invalid_size)) n_invalid_words = n_samples - n_valid_words valid_words = make_reber(n_valid_words) valid_labels = [1] * n_valid_words invalid_words = [] invalid_labels = [0] * n_valid_words for i in range(n_invalid_words): word_length = randint(3, 14) word = [choice(avaliable_letters) for _ in range(word_length)] invalid_words.append(''.join(word)) return shuffle( np.array(valid_words + invalid_words), np.array(valid_labels + invalid_labels) )
def make_reber_classification(n_samples, invalid_size=0.5): """ Generate random dataset for Reber grammar classification. Invalid words contains the same letters as at Reber grammar, but they are build whithout grammar rules. Parameters ---------- n_samples : int Number of samples in dataset. invalid_size : float Proportion of invalid words in dataset, defaults to `0.5`. Value must be between 0 and 1. Returns ------- tuple Return two lists. First contains words and second - labels for them. Examples -------- >>> from neupy.datasets import make_reber_classification >>> >>> data, labels = make_reber_classification(10, invalid_size=0.5) >>> data array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS', 'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'], dtype='<U12') >>> labels array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1]) """ if n_samples < 2: raise ValueError("There are must be at least 2 samples") if invalid_size <= 0 or invalid_size >= 1: raise ValueError("`invalid_size` property must be " "between zero and one") n_valid_words = int(math.ceil(n_samples * invalid_size)) n_invalid_words = n_samples - n_valid_words valid_words = make_reber(n_valid_words) valid_labels = [1] * n_valid_words invalid_words = [] invalid_labels = [0] * n_valid_words for i in range(n_invalid_words): word_length = randint(3, 14) word = [choice(avaliable_letters) for _ in range(word_length)] invalid_words.append(''.join(word)) return shuffle(np.array(valid_words + invalid_words), np.array(valid_labels + invalid_labels))
def test_shuffle_with_nones(self): input_with_nones = (None, None) actual_output = shuffle(*input_with_nones) self.assertEqual(input_with_nones, actual_output)
def test_shuffle_invalid_shapes_exception(self): input_data = np.arange(10) with self.assertRaisesRegexp(ValueError, r'\(10,\), \(9,\)'): shuffle(input_data, input_data[:len(input_data) - 1])
def test_shuffle_single_input(self): input_data = np.ones(10) shuffeled_data = shuffle(input_data) # Output suppose to be a shuffled array, but # not a tuple with shuffled array np.testing.assert_array_equal(input_data, shuffeled_data)
def test_shuffle_empty_input(self): np.testing.assert_array_equal(tuple(), shuffle())
def test_shuffle_basic(self): input_data = np.arange(10) shuffeled_data = shuffle(input_data, input_data) np.testing.assert_array_equal(*shuffeled_data)
def test_shuffle_invalid_shapes_exception(self): input_data = np.arange(10) with self.assertRaises(ValueError): shuffle(input_data, input_data[:len(input_data) - 1])
def make_reber_classification(n_samples, invalid_size=0.5, return_indeces=False): """ Generate random dataset for Reber grammar classification. Invalid words contains the same letters as at Reber grammar, but they are build whithout grammar rules. Parameters ---------- n_samples : int Number of samples in dataset. invalid_size : float Proportion of invalid words in dataset, defaults to ``0.5``. Value must be between ``0`` and ``1``. return_indeces : bool If ``True``, each word will be converted to array where each letter converted to the index. Defaults to ``False``. Returns ------- tuple Return two lists. First contains words and second - labels for them. Examples -------- >>> from neupy.datasets import make_reber_classification >>> >>> data, labels = make_reber_classification(10, invalid_size=0.5) >>> data array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS', 'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'], dtype='<U12') >>> labels array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1]) >>> >>> data, labels = make_reber_classification( ... 4, invalid_size=0.5, return_indeces=True) >>> data array([array([1, 3, 1, 4]), array([0, 3, 0, 3, 0, 4, 3, 0, 4, 4]), array([1, 3, 1, 2, 3, 1, 2, 4]), array([0, 3, 0, 0, 3, 0, 4, 2, 4, 1, 0, 4, 0])], dtype=object) """ if n_samples < 2: raise ValueError("There are must be at least 2 samples") if not 0 < invalid_size < 1: raise ValueError("`invalid_size` argument value must be between " "zero and one, got {}".format(invalid_size)) n_valid_words = int(math.ceil(n_samples * invalid_size)) n_invalid_words = n_samples - n_valid_words valid_words = make_reber(n_valid_words) valid_labels = [1] * n_valid_words invalid_words = [] invalid_labels = [0] * n_valid_words for i in range(n_invalid_words): word_length = randint(3, 14) word = [choice(avaliable_letters) for _ in range(word_length)] invalid_words.append(''.join(word)) samples, labels = shuffle(np.array(valid_words + invalid_words), np.array(valid_labels + invalid_labels)) if return_indeces: samples = convert_letters_to_indeces(samples) return samples, labels