def get_cnn(dataprovider=None): if dataprovider is None: dataprovider = dp en = CnnEmbedding(output_size=100, cnn_layers_per_block=1, block_feature_counts=[32, 64], fc_layer_feature_counts=[100], hidden_activation='relu', final_activation='relu', batch_norm_for_init_layer=True, dropout_after_max_pooling=0.5, dropout_after_fc=0.5) c_nn = ClusterNNTry04_Ddbc(dp, 20, en, lstm_layers=0, lstm_units=1, cluster_count_dense_layers=0, cluster_count_dense_units=1, output_dense_layers=0, output_dense_units=1, cluster_count_lstm_layers=0, cluster_count_lstm_units=1) c_nn.minibatch_size = 35 c_nn.validate_every_nth_epoch = 10 c_nn.optimizer = Adadelta(clipnorm=0.5) c_nn.use_cluster_count_loss = False c_nn.use_similarities_loss = False c_nn.fixed_cluster_count_output = dp.get_max_cluster_count() return c_nn
def get_cnn(dataprovider=None): if dataprovider is None: dataprovider = dp en = CnnEmbedding( output_size=256, cnn_layers_per_block=1, block_feature_counts=[32, 64, 128], fc_layer_feature_counts=[256], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(), batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True ) c_nn = ClusterNNTry00_V135(dataprovider, 20, en, lstm_layers=14, internal_embedding_size=96*3, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=0, output_dense_units=256, cluster_count_lstm_layers=1, cluster_count_lstm_units=128, kl_embedding_size=128, kl_divergence_factor=0., simplified_center_loss_factor=0., cluster_assignment_regularization_factor=10.0, use_v02_cluster_assignment_loss=True) c_nn.include_self_comparison = False c_nn.weighted_classes = True c_nn.class_weights_approximation = 'stochastic' c_nn.minibatch_size = 25 c_nn.class_weights_post_processing_f = lambda x: np.sqrt(x) c_nn.set_loss_weight('similarities_output', 5.0) c_nn.optimizer = Adadelta(lr=5.0) validation_factor = 10 c_nn.early_stopping_iterations = 15001 c_nn.validate_every_nth_epoch = 10 * validation_factor c_nn.validation_data_count = c_nn.minibatch_size * validation_factor # c_nn.prepend_base_name_to_layer_name = False return c_nn
ds_dir + 'datasets/TIMIT/traininglist_100/testlist_20.txt') dp = TIMITDataProvider( # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache", data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache", min_cluster_count=1, max_cluster_count=5, return_1d_audio_data=False, test_classes=TIMIT_lst, validate_classes=TIMIT_lst, concat_audio_files_of_speaker=True) en = CnnEmbedding(output_size=256, cnn_layers_per_block=1, block_feature_counts=[32, 64], fc_layer_feature_counts=[], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(), batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True) c_nn = ClusterNNTry00_V23(dp, 20, en, lstm_layers=7, internal_embedding_size=96, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=1, output_dense_units=256, cluster_count_lstm_layers=2,
dp = TIMITDataProvider( # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache", data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache", min_cluster_count=1, max_cluster_count=5, return_1d_audio_data=False, test_classes=TIMIT_lst[:100], validate_classes=TIMIT_lst[100:], concat_audio_files_of_speaker=True, minimum_snippets_per_cluster=2, window_width=200 ) en = CnnEmbedding( output_size=256, cnn_layers_per_block=1, block_feature_counts=[32, 64, 128], fc_layer_feature_counts=[512], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(), batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True, dropout_init=0.25, dropout_after_max_pooling=[0.25, 0.25, 0.25], dropout_after_fc=0.25 ) def get_cnn(): c_nn = ClusterNNTry00_V98(dp, 20, en, lstm_layers=14, internal_embedding_size=96*3, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=0, output_dense_units=256, cluster_count_lstm_layers=1, cluster_count_lstm_units=128, kl_embedding_size=128, kl_divergence_factor=0.1, simplified_center_loss_factor=0.5) c_nn.include_self_comparison = False c_nn.weighted_classes = True c_nn.class_weights_approximation = 'stochastic' c_nn.minibatch_size = 15 c_nn.class_weights_post_processing_f = lambda x: np.sqrt(x) c_nn.set_loss_weight('similarities_output', 5.0) c_nn.optimizer = Adadelta(lr=5.0)
concat_audio_files_of_speaker=True, test_classes=TIMIT_lst[:200], validate_classes=TIMIT_lst[:200], # # Create at least two 1s snippets per speaker and create also some hints # window_width=[(100, 100), (200, 200), (300, 300), (400, 400)], # minimum_snippets_per_cluster=[(100, 100), (100, 100)], # split_audio_pieces_longer_than_and_create_hints=100 window_width=256, minimum_snippets_per_cluster=2) en = CnnEmbedding(output_size=256, cnn_layers_per_block=1, block_feature_counts=[32, 64, 128], fc_layer_feature_counts=[256], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(), batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True, max_pooling_size=[(4, 2), (2, 2), (2, 2)], max_pooling_stride=[(4, 2), (2, 2), (2, 2)]) def get_cnn(): c_nn = ClusterNNTry00_V93(dp, 40, en, lstm_layers=14, internal_embedding_size=96 * 3, cluster_count_dense_layers=1, cluster_count_dense_units=512, output_dense_layers=1,
# data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache", data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache", min_cluster_count=10, max_cluster_count=10, return_1d_audio_data=False, train_classes=speaker_list, test_classes=speaker_list, validate_classes=speaker_list, concat_audio_files_of_speaker=True ) en = CnnEmbedding( output_size=100, cnn_layers_per_block=1, block_feature_counts=[32, 64], fc_layer_feature_counts=[100], hidden_activation='relu', final_activation='relu', batch_norm_for_init_layer=True, cnn_filter_size=5 ) c_nn = ClusterNNTry04_Ddbc(dp, 20, en, lstm_layers=0, lstm_units=1, cluster_count_dense_layers=0, cluster_count_dense_units=1, output_dense_layers=0, output_dense_units=1, cluster_count_lstm_layers=0, cluster_count_lstm_units=1) c_nn.minibatch_size = 40 c_nn.validate_every_nth_epoch = 10 c_nn.optimizer = Adadelta(clipnorm=0.5) c_nn.use_cluster_count_loss = False c_nn.use_similarities_loss = False c_nn.fixed_cluster_count_output = dp.get_max_cluster_count() # c_nn.f_cluster_count = lambda: 10 # c_nn.minibatch_size = 200
'MPRT0', 'MCTT0', 'FEME0', 'MCRE0' ] dp = TIMITDataProvider( # data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache", data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache", min_cluster_count=1, max_cluster_count=5, return_1d_audio_data=False, test_classes=TIMIT20_lst, validate_classes=TIMIT20_lst, concat_audio_files_of_speaker=True) en = CnnEmbedding(output_size=128, cnn_layers_per_block=1, block_feature_counts=[32, 64], fc_layer_feature_counts=[], hidden_activation='relu', final_activation='tanh', batch_norm_for_init_layer=True) en.regularizer = l2(0.001) c_nn = ClusterNNTry00_V17(dp, 20, en, lstm_layers=7, lstm_units=96, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=1, output_dense_units=256, cluster_count_lstm_layers=2,
required_input_count = dp.get_required_input_count_for_full_test('test') print("Required input count: {}".format(required_input_count)) data = dp.get_data(required_input_count, 1) # dp = Birds200DataProvider( # min_cluster_count=1, # max_cluster_count=2, # ) # dp = Simple2DPointDataProvider(min_cluster_count=1, max_cluster_count=2, allow_less_clusters=False) # en = SimpleFCEmbedding(hidden_layers=[3]) # en = BDLSTMEmbedding() # en = None en = CnnEmbedding(block_feature_counts=[1, 2, 3], fc_layer_feature_counts=[4], output_size=3, dimensionality='auto') # cnn = MinimalClusterNN(dp, 5, en, weighted_classes=True) input_count = required_input_count cnn = ClusterNNHint(dp, input_count, en, weighted_classes=True, lstm_layers=2, cluster_count_lstm_units=2, cluster_count_lstm_layers=1, cluster_count_dense_layers=1, cluster_count_dense_units=1, output_dense_layers=1, output_dense_units=1) cnn = ClusterNNMergedInputs(dp, input_count, en, weighted_classes=True) # cnn.class_weights_approximation = 'stochastic' # cnn.build_networks(print_summaries=True) t_start = time() cnn.build_networks(print_summaries=False, build_training_model=False) t_end = time() print("Required time to build the networks: {} s".format(t_end - t_start)) cnn.minibatch_size = 2 cnn.validate_every_nth_epoch = 1
from impl.nn.base.embedding_nn.cnn_embedding import CnnEmbedding is_linux = platform == "linux" or platform == "linux2" top_dir = "/cluster/home/meierbe8/data/MT/" if is_linux else "G:/tmp/" ds_dir = "./" if is_linux else "../" dp = FashionMNISTDataProvider( min_cluster_count=1, max_cluster_count=5, ) en = CnnEmbedding(output_size=32, cnn_layers_per_block=2, block_feature_counts=[64, 128, 256], fc_layer_feature_counts=[512], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(), batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True, dropout_init=.5, dropout_after_max_pooling=[.5, .5, .5]) c_nn = ClusterNNTry00_V30(dp, 20, en, lstm_layers=7, internal_embedding_size=96, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=1, output_dense_units=256,
# data_dir=top_dir + "/test/TIMIT_mini", cache_directory=top_dir + "/test/cache", data_dir=top_dir + "/TIMIT", cache_directory=top_dir + "/test/cache", min_cluster_count=1, max_cluster_count=5, return_1d_audio_data=True, test_classes=TIMIT_lst, validate_classes=TIMIT_lst, concat_audio_files_of_speaker=True, minimum_snippets_per_cluster=[(200, 200), (100, 100)], window_width=[(100, 200)]) en = CnnEmbedding(output_size=256, cnn_layers_per_block=1, block_feature_counts=[64, 128, 256], fc_layer_feature_counts=[256], hidden_activation=LeakyReLU(), final_activation=LeakyReLU(), batch_norm_for_init_layer=False, batch_norm_after_activation=True, batch_norm_for_final_layer=True, dimensionality='1d') c_nn = ClusterNNTry00_V51(dp, 20, en, lstm_layers=7, internal_embedding_size=96, cluster_count_dense_layers=1, cluster_count_dense_units=256, output_dense_layers=1, output_dense_units=256, cluster_count_lstm_layers=1,