Exemple #1
0
    def get_cnn(dataprovider=None):
        if dataprovider is None:
            dataprovider = dp
        en = CnnEmbedding(output_size=100,
                          cnn_layers_per_block=1,
                          block_feature_counts=[32, 64],
                          fc_layer_feature_counts=[100],
                          hidden_activation='relu',
                          final_activation='relu',
                          batch_norm_for_init_layer=True,
                          dropout_after_max_pooling=0.5,
                          dropout_after_fc=0.5)

        c_nn = ClusterNNTry04_Ddbc(dp,
                                   20,
                                   en,
                                   lstm_layers=0,
                                   lstm_units=1,
                                   cluster_count_dense_layers=0,
                                   cluster_count_dense_units=1,
                                   output_dense_layers=0,
                                   output_dense_units=1,
                                   cluster_count_lstm_layers=0,
                                   cluster_count_lstm_units=1)
        c_nn.minibatch_size = 35
        c_nn.validate_every_nth_epoch = 10
        c_nn.optimizer = Adadelta(clipnorm=0.5)

        c_nn.use_cluster_count_loss = False
        c_nn.use_similarities_loss = False
        c_nn.fixed_cluster_count_output = dp.get_max_cluster_count()
        return c_nn
Exemple #2
0
    def get_cnn(dataprovider=None):
        if dataprovider is None:
            dataprovider = dp

        en = CnnEmbedding(
            output_size=256, cnn_layers_per_block=1, block_feature_counts=[32, 64, 128],
            fc_layer_feature_counts=[256], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(),
            batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True
        )

        c_nn = ClusterNNTry00_V135(dataprovider, 20, en, lstm_layers=14, internal_embedding_size=96*3, cluster_count_dense_layers=1, cluster_count_dense_units=256,
                                  output_dense_layers=0, output_dense_units=256, cluster_count_lstm_layers=1, cluster_count_lstm_units=128,
                                  kl_embedding_size=128, kl_divergence_factor=0., simplified_center_loss_factor=0.,
                                  cluster_assignment_regularization_factor=10.0, use_v02_cluster_assignment_loss=True)
        c_nn.include_self_comparison = False
        c_nn.weighted_classes = True
        c_nn.class_weights_approximation = 'stochastic'
        c_nn.minibatch_size = 25
        c_nn.class_weights_post_processing_f = lambda x: np.sqrt(x)
        c_nn.set_loss_weight('similarities_output', 5.0)
        c_nn.optimizer = Adadelta(lr=5.0)

        validation_factor = 10
        c_nn.early_stopping_iterations = 15001
        c_nn.validate_every_nth_epoch = 10 * validation_factor
        c_nn.validation_data_count = c_nn.minibatch_size * validation_factor
        # c_nn.prepend_base_name_to_layer_name = False
        return c_nn
        ds_dir + 'datasets/TIMIT/traininglist_100/testlist_20.txt')
    dp = TIMITDataProvider(
        # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache",
        data_dir=top_dir + "/TIMIT",
        cache_directory=top_dir + "/test/cache",
        min_cluster_count=1,
        max_cluster_count=5,
        return_1d_audio_data=False,
        test_classes=TIMIT_lst,
        validate_classes=TIMIT_lst,
        concat_audio_files_of_speaker=True)
    en = CnnEmbedding(output_size=256,
                      cnn_layers_per_block=1,
                      block_feature_counts=[32, 64],
                      fc_layer_feature_counts=[],
                      hidden_activation=LeakyReLU(),
                      final_activation=LeakyReLU(),
                      batch_norm_for_init_layer=False,
                      batch_norm_after_activation=True,
                      batch_norm_for_final_layer=True)

    c_nn = ClusterNNTry00_V23(dp,
                              20,
                              en,
                              lstm_layers=7,
                              internal_embedding_size=96,
                              cluster_count_dense_layers=1,
                              cluster_count_dense_units=256,
                              output_dense_layers=1,
                              output_dense_units=256,
                              cluster_count_lstm_layers=2,
    dp = TIMITDataProvider(
        # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache",
        data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache",
        min_cluster_count=1,
        max_cluster_count=5,
        return_1d_audio_data=False,
        test_classes=TIMIT_lst[:100],
        validate_classes=TIMIT_lst[100:],
        concat_audio_files_of_speaker=True,

        minimum_snippets_per_cluster=2,
        window_width=200
    )
    en = CnnEmbedding(
        output_size=256, cnn_layers_per_block=1, block_feature_counts=[32, 64, 128], fc_layer_feature_counts=[512],
        hidden_activation=LeakyReLU(), final_activation=LeakyReLU(),
        batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True,
        dropout_init=0.25, dropout_after_max_pooling=[0.25, 0.25, 0.25], dropout_after_fc=0.25
    )

    def get_cnn():
        c_nn = ClusterNNTry00_V98(dp, 20, en, lstm_layers=14, internal_embedding_size=96*3, cluster_count_dense_layers=1, cluster_count_dense_units=256,
                                  output_dense_layers=0, output_dense_units=256, cluster_count_lstm_layers=1, cluster_count_lstm_units=128,
                                  kl_embedding_size=128, kl_divergence_factor=0.1, simplified_center_loss_factor=0.5)
        c_nn.include_self_comparison = False
        c_nn.weighted_classes = True
        c_nn.class_weights_approximation = 'stochastic'
        c_nn.minibatch_size = 15
        c_nn.class_weights_post_processing_f = lambda x: np.sqrt(x)
        c_nn.set_loss_weight('similarities_output', 5.0)
        c_nn.optimizer = Adadelta(lr=5.0)
        concat_audio_files_of_speaker=True,
        test_classes=TIMIT_lst[:200],
        validate_classes=TIMIT_lst[:200],

        # # Create at least two 1s snippets per speaker and create also some hints
        # window_width=[(100, 100), (200, 200), (300, 300), (400, 400)],
        # minimum_snippets_per_cluster=[(100, 100), (100, 100)],
        # split_audio_pieces_longer_than_and_create_hints=100
        window_width=256,
        minimum_snippets_per_cluster=2)
    en = CnnEmbedding(output_size=256,
                      cnn_layers_per_block=1,
                      block_feature_counts=[32, 64, 128],
                      fc_layer_feature_counts=[256],
                      hidden_activation=LeakyReLU(),
                      final_activation=LeakyReLU(),
                      batch_norm_for_init_layer=False,
                      batch_norm_after_activation=True,
                      batch_norm_for_final_layer=True,
                      max_pooling_size=[(4, 2), (2, 2), (2, 2)],
                      max_pooling_stride=[(4, 2), (2, 2), (2, 2)])

    def get_cnn():
        c_nn = ClusterNNTry00_V93(dp,
                                  40,
                                  en,
                                  lstm_layers=14,
                                  internal_embedding_size=96 * 3,
                                  cluster_count_dense_layers=1,
                                  cluster_count_dense_units=512,
                                  output_dense_layers=1,
        # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache",
        data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache",
        min_cluster_count=10,
        max_cluster_count=10,
        return_1d_audio_data=False,

        train_classes=speaker_list,
        test_classes=speaker_list,
        validate_classes=speaker_list,

        concat_audio_files_of_speaker=True
    )

    en = CnnEmbedding(
        output_size=100, cnn_layers_per_block=1, block_feature_counts=[32, 64],
        fc_layer_feature_counts=[100], hidden_activation='relu', final_activation='relu',
        batch_norm_for_init_layer=True, cnn_filter_size=5
    )

    c_nn = ClusterNNTry04_Ddbc(dp, 20, en, lstm_layers=0, lstm_units=1, cluster_count_dense_layers=0, cluster_count_dense_units=1,
                              output_dense_layers=0, output_dense_units=1, cluster_count_lstm_layers=0, cluster_count_lstm_units=1)
    c_nn.minibatch_size = 40
    c_nn.validate_every_nth_epoch = 10
    c_nn.optimizer = Adadelta(clipnorm=0.5)

    c_nn.use_cluster_count_loss = False
    c_nn.use_similarities_loss = False
    c_nn.fixed_cluster_count_output = dp.get_max_cluster_count()

    # c_nn.f_cluster_count = lambda: 10
    # c_nn.minibatch_size = 200
Exemple #7
0
        'MPRT0', 'MCTT0', 'FEME0', 'MCRE0'
    ]
    dp = TIMITDataProvider(
        # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache",
        data_dir=top_dir + "/TIMIT",
        cache_directory=top_dir + "/test/cache",
        min_cluster_count=1,
        max_cluster_count=5,
        return_1d_audio_data=False,
        test_classes=TIMIT20_lst,
        validate_classes=TIMIT20_lst,
        concat_audio_files_of_speaker=True)
    en = CnnEmbedding(output_size=128,
                      cnn_layers_per_block=1,
                      block_feature_counts=[32, 64],
                      fc_layer_feature_counts=[],
                      hidden_activation='relu',
                      final_activation='tanh',
                      batch_norm_for_init_layer=True)
    en.regularizer = l2(0.001)

    c_nn = ClusterNNTry00_V17(dp,
                              20,
                              en,
                              lstm_layers=7,
                              lstm_units=96,
                              cluster_count_dense_layers=1,
                              cluster_count_dense_units=256,
                              output_dense_layers=1,
                              output_dense_units=256,
                              cluster_count_lstm_layers=2,
Exemple #8
0
    required_input_count = dp.get_required_input_count_for_full_test('test')
    print("Required input count: {}".format(required_input_count))

    data = dp.get_data(required_input_count, 1)


    # dp = Birds200DataProvider(
    #     min_cluster_count=1,
    #     max_cluster_count=2,
    # )
    # dp = Simple2DPointDataProvider(min_cluster_count=1, max_cluster_count=2, allow_less_clusters=False)

    # en = SimpleFCEmbedding(hidden_layers=[3])
    # en = BDLSTMEmbedding()
    # en = None
    en = CnnEmbedding(block_feature_counts=[1, 2, 3], fc_layer_feature_counts=[4], output_size=3, dimensionality='auto')

    # cnn = MinimalClusterNN(dp, 5, en, weighted_classes=True)
    input_count = required_input_count
    cnn = ClusterNNHint(dp, input_count, en, weighted_classes=True, lstm_layers=2, cluster_count_lstm_units=2, cluster_count_lstm_layers=1, cluster_count_dense_layers=1,
                                cluster_count_dense_units=1, output_dense_layers=1, output_dense_units=1)
    cnn = ClusterNNMergedInputs(dp, input_count, en, weighted_classes=True)
    # cnn.class_weights_approximation = 'stochastic'
    # cnn.build_networks(print_summaries=True)
    t_start = time()
    cnn.build_networks(print_summaries=False, build_training_model=False)
    t_end = time()
    print("Required time to build the networks: {} s".format(t_end - t_start))
    cnn.minibatch_size = 2
    cnn.validate_every_nth_epoch = 1
Exemple #9
0
    from impl.nn.base.embedding_nn.cnn_embedding import CnnEmbedding

    is_linux = platform == "linux" or platform == "linux2"
    top_dir = "/cluster/home/meierbe8/data/MT/" if is_linux else "G:/tmp/"
    ds_dir = "./" if is_linux else "../"

    dp = FashionMNISTDataProvider(
        min_cluster_count=1,
        max_cluster_count=5,
    )
    en = CnnEmbedding(output_size=32,
                      cnn_layers_per_block=2,
                      block_feature_counts=[64, 128, 256],
                      fc_layer_feature_counts=[512],
                      hidden_activation=LeakyReLU(),
                      final_activation=LeakyReLU(),
                      batch_norm_for_init_layer=False,
                      batch_norm_after_activation=True,
                      batch_norm_for_final_layer=True,
                      dropout_init=.5,
                      dropout_after_max_pooling=[.5, .5, .5])

    c_nn = ClusterNNTry00_V30(dp,
                              20,
                              en,
                              lstm_layers=7,
                              internal_embedding_size=96,
                              cluster_count_dense_layers=1,
                              cluster_count_dense_units=256,
                              output_dense_layers=1,
                              output_dense_units=256,
        # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache",
        data_dir=top_dir + "/TIMIT",
        cache_directory=top_dir + "/test/cache",
        min_cluster_count=1,
        max_cluster_count=5,
        return_1d_audio_data=True,
        test_classes=TIMIT_lst,
        validate_classes=TIMIT_lst,
        concat_audio_files_of_speaker=True,
        minimum_snippets_per_cluster=[(200, 200), (100, 100)],
        window_width=[(100, 200)])
    en = CnnEmbedding(output_size=256,
                      cnn_layers_per_block=1,
                      block_feature_counts=[64, 128, 256],
                      fc_layer_feature_counts=[256],
                      hidden_activation=LeakyReLU(),
                      final_activation=LeakyReLU(),
                      batch_norm_for_init_layer=False,
                      batch_norm_after_activation=True,
                      batch_norm_for_final_layer=True,
                      dimensionality='1d')

    c_nn = ClusterNNTry00_V51(dp,
                              20,
                              en,
                              lstm_layers=7,
                              internal_embedding_size=96,
                              cluster_count_dense_layers=1,
                              cluster_count_dense_units=256,
                              output_dense_layers=1,
                              output_dense_units=256,
                              cluster_count_lstm_layers=1,