def get_data(dataset_path, normalization, unlabeled_percentage, seed=None): train_set, _ = timit.load_data(dataset_path) train_set, val_set = timit.split_validation(train_set, seed=seed) train_set, val_set = normalize(train_set, val_set, mode=normalization) train_set = unlabel(train_set, unlabeled_percentage, seed=seed) x_train_labeled = np.array([utterance['features'] for utterance in train_set if 'labels' in utterance]) x_train_unlabeled = np.array([utterance['features'] for utterance in train_set if 'labels' not in utterance]) y_train_labeled = np.array([utterance['labels'] for utterance in train_set if 'labels' in utterance]) x_val = np.array([utterance['features'] for utterance in val_set]) y_val = np.array([utterance['labels'] for utterance in val_set]) return x_train_labeled, x_train_unlabeled, y_train_labeled, x_val, y_val
def test_normalize(self): dataset_path = get_root_dir() / 'data' / 'timit' train_set, _ = timit.load_data(dataset_path) # test normalization on whole dataset normalized_train_set, _ = normalize(train_set, mode='full') x_train = np.concatenate( [utterance['features'] for utterance in normalized_train_set]) mean = x_train.mean(axis=0) var = x_train.var(axis=0) for i in range(x_train.shape[1]): self.assertAlmostEqual(mean[i], 0) self.assertAlmostEqual(var[i], 1)
def test_unlabel(self): dataset_path = get_root_dir() / 'data' / 'timit' train_set, _ = timit.load_data(dataset_path) n_total = len(train_set) unlabel(train_set, 0.7, seed=1) n_labeled = len( [utterance for utterance in train_set if 'labels' in utterance]) n_unlabeled = len([ utterance for utterance in train_set if 'labels' not in utterance ]) self.assertEqual(n_labeled + n_unlabeled, n_total) self.assertTrue(n_labeled < n_unlabeled) self.assertEqual(n_labeled, 1104) self.assertEqual(n_unlabeled, 2592)
def main(): args = get_command_line_arguments() dataset_path = args.data checkpoint_path = args.checkpoint version = args.version model_path = Path(args.output) model_path.parent.mkdir(parents=True, exist_ok=True) model_path = str(model_path) train_set, _ = timit.load_data(dataset_path) model = DualStudent(n_classes=get_number_of_classes(), n_hidden_layers=N_HIDDEN_LAYERS, n_units=N_UNITS, padding_value=PADDING_VALUE, version=version) model.build(input_shape=(None, ) + train_set[0]['features'].shape) checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore(checkpoint_path) model.save_weights(model_path)
def get_data(dataset_path, normalization): train_set, test_set = timit.load_data(dataset_path) _, test_set = normalize(train_set, test_set, mode=normalization) x_test = np.array([utterance['features'] for utterance in test_set]) y_test = np.array([utterance['labels'] for utterance in test_set]) return x_test, y_test