Ejemplo n.º 1
0
def get_data(dataset_path, normalization, unlabeled_percentage, seed=None):
    train_set, _ = timit.load_data(dataset_path)
    train_set, val_set = timit.split_validation(train_set, seed=seed)
    train_set, val_set = normalize(train_set, val_set, mode=normalization)
    train_set = unlabel(train_set, unlabeled_percentage, seed=seed)

    x_train_labeled = np.array([utterance['features'] for utterance in train_set if 'labels' in utterance])
    x_train_unlabeled = np.array([utterance['features'] for utterance in train_set if 'labels' not in utterance])
    y_train_labeled = np.array([utterance['labels'] for utterance in train_set if 'labels' in utterance])
    x_val = np.array([utterance['features'] for utterance in val_set])
    y_val = np.array([utterance['labels'] for utterance in val_set])

    return x_train_labeled, x_train_unlabeled, y_train_labeled, x_val, y_val
Ejemplo n.º 2
0
    def test_normalize(self):
        dataset_path = get_root_dir() / 'data' / 'timit'
        train_set, _ = timit.load_data(dataset_path)

        # test normalization on whole dataset
        normalized_train_set, _ = normalize(train_set, mode='full')
        x_train = np.concatenate(
            [utterance['features'] for utterance in normalized_train_set])
        mean = x_train.mean(axis=0)
        var = x_train.var(axis=0)
        for i in range(x_train.shape[1]):
            self.assertAlmostEqual(mean[i], 0)
            self.assertAlmostEqual(var[i], 1)
Ejemplo n.º 3
0
    def test_unlabel(self):
        dataset_path = get_root_dir() / 'data' / 'timit'
        train_set, _ = timit.load_data(dataset_path)
        n_total = len(train_set)

        unlabel(train_set, 0.7, seed=1)
        n_labeled = len(
            [utterance for utterance in train_set if 'labels' in utterance])
        n_unlabeled = len([
            utterance for utterance in train_set if 'labels' not in utterance
        ])

        self.assertEqual(n_labeled + n_unlabeled, n_total)
        self.assertTrue(n_labeled < n_unlabeled)
        self.assertEqual(n_labeled, 1104)
        self.assertEqual(n_unlabeled, 2592)
def main():
    args = get_command_line_arguments()
    dataset_path = args.data
    checkpoint_path = args.checkpoint
    version = args.version
    model_path = Path(args.output)
    model_path.parent.mkdir(parents=True, exist_ok=True)
    model_path = str(model_path)

    train_set, _ = timit.load_data(dataset_path)

    model = DualStudent(n_classes=get_number_of_classes(),
                        n_hidden_layers=N_HIDDEN_LAYERS,
                        n_units=N_UNITS,
                        padding_value=PADDING_VALUE,
                        version=version)

    model.build(input_shape=(None, ) + train_set[0]['features'].shape)
    checkpoint = tf.train.Checkpoint(model=model)
    checkpoint.restore(checkpoint_path)
    model.save_weights(model_path)
Ejemplo n.º 5
0
def get_data(dataset_path, normalization):
    train_set, test_set = timit.load_data(dataset_path)
    _, test_set = normalize(train_set, test_set, mode=normalization)
    x_test = np.array([utterance['features'] for utterance in test_set])
    y_test = np.array([utterance['labels'] for utterance in test_set])
    return x_test, y_test