def create_and_train(num_epochs=1000, batch_size=100, epoch_batches=10, network_params_file_in=None, network_params_file_out=None, train_file=None, network_fun=nf.create_network_10_speakers, with_validation=True): # load training data with open(train_file, 'rb') as f: (X, y, speaker_names) = pickle.load(f) # create symbolic theano variables input_var = T.tensor4('inputs') target_var = T.ivector('targets') margin = T.scalar('margin') # create network network = network_fun(input_var) if network_params_file_in is not None: all_param_values = pickler.load(network_params_file_in) lasagne.layers.set_all_param_values(network, all_param_values) train_fn, val_fn = create_loss_functions_kl_div(input_var, network, target_var, margin) # start training if not with_validation: val_fn = None # Train network train(X, y, num_epochs, train_fn, val_fn, SpectTrainBatchIterator(batch_size, epoch_batches, config), SpectValidBatchIterator(batch_size, epoch_batches, config)) # Save if if network_params_file_out is not None: pickler.save(lasagne.layers.get_all_param_values(network), network_params_file_out)
def analyse_results(network_name, checkpoint_names, set_of_predicted_clusters, set_of_true_clusters, embedding_numbers, set_of_times, set_of_utterance_embeddings): """ Analyses each checkpoint with the values of set_of_predicted_clusters and set_of_true_clusters. After the analysis the result are stored in the Pickle network_name.pickle and the best Result according to min MR is stored in network_name_best.pickle. :param network_name: The name for the result pickle. :param checkpoint_names: A list of names from the checkpoints. Later used as curvenames, :param set_of_predicted_clusters: A 2D array of the predicted Clusters from the Network. [checkpoint, clusters] :param set_of_true_clusters: A 2d array of the validation clusters. [checkpoint, validation-clusters] :param embedding_numbers: A list which represent the number of embeddings in each checkpoint. :param set_of_times: A 2d array of the time per utterance [checkpoint, times] """ logger = get_logger('analysis', logging.INFO) logger.info('Run analysis') metric_sets = [[None] * len(set_of_predicted_clusters) for _ in range(len(metric_names))] for index, predicted_clusters in enumerate(set_of_predicted_clusters): checkpoint = checkpoint_names[index] logger.info('Analysing checkpoint:' + checkpoint) # Check if checkpoint is already stored analysis_pickle = get_results_intermediate_analysis(checkpoint) if os.path.isfile(analysis_pickle): (metric_results, eer_result) = load(analysis_pickle) else: metric_results = _calculate_analysis_values( predicted_clusters, set_of_true_clusters[index], set_of_times[index]) eer_result = _calculate_eer_result( set_of_utterance_embeddings[index]) save((metric_results, eer_result), analysis_pickle) logger.info("\tEER: {}".format(round(eer_result, 5))) for m, metric_result in enumerate(metric_results): metric_sets[m][index] = metric_result _write_result_pickle(network_name, checkpoint_names, metric_sets, embedding_numbers) _save_best_results(network_name, checkpoint_names, metric_sets, embedding_numbers) logger.info('Clearing intermediate result checkpoints') for checkpoint in checkpoint_names: analysis_pickle = get_results_intermediate_analysis(checkpoint) test_pickle = get_results_intermediate_test(checkpoint) if os.path.exists(analysis_pickle): os.remove(analysis_pickle) if os.path.exists(test_pickle): os.remove(test_pickle) logger.info('Analysis done')
def write_result_pickle(network_name, checkpoint_names, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores, number_of_embeddings ,algorithm): logger = get_logger('analysis', logging.INFO) save((checkpoint_names, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores, number_of_embeddings), (get_result_pickle(network_name+"_"+algorithm))) logger.info('Write result pickle to ' + str((get_result_pickle(network_name+"_"+algorithm))))
def write_result_pickle(network_name, checkpoint_names, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores, number_of_embeddings): logger = get_logger('analysis', logging.INFO) logger.info('Write result pickle') save((checkpoint_names, set_of_mrs, set_of_homogeneity_scores, set_of_completeness_scores, number_of_embeddings), get_result_pickle(network_name))
def train_network(self): mixture_count = self.config.getint('gmm', 'mixturecount') X, y, speaker_names = load( get_speaker_pickle(self.config.get('train', 'pickle') + '_mfcc')) model = [] for i in range(len(X)): features = X[i] gmm = mixture.GaussianMixture(n_components=mixture_count, covariance_type='diag', n_init=1) gmm.fit(features.transpose()) speaker = {'mfccs': features, 'gmm': gmm} model.append(speaker) save(model, get_experiment_nets(self.name))
def create_and_train(self, training_data): # Load training data x, y, speaker_names = load(training_data) # Create network net = create_net(self.create_paper(x.shape[1])) # Set new batch iterator net.batch_iterator_train = SegmentBatchIterator(batch_size=128) net.batch_iterator_test = SegmentBatchIterator(batch_size=128) net.train_split = TrainSplit(eval_size=0) # Train the network self.logger.info("Fitting...") net.fit(x, y) # Comments from old spectrogram_cnn_100 implementation, don't delete yet if eventually needed later # net.load_params_from('../data/experiments/paper/networks/net_100_81_not_reynolds.pickle'); # net.save_params_to('../../data/experiments/paper/networks/net_100_81_not_reynolds.pickle'); # network_helper.save(net, '../../data/experiments/paper/networks/net_100_81_not_reynolds.pickle') save(net, self.net_path)
def create_and_train(network_file, train_file, out_file): print("Loading static convolution...") x = T.tensor4('x') get_conv_output = prepare(network_file, x) print("Static convolution loaded!") print("Create vanilla network...") input_var = T.tensor4('inputs') target_var = T.ivector('targets') margin = T.scalar('margin') network = network_factory.create_network_KL_clustering_no_convolution( input_var, input_size=1000, output_size=100) train_fn, val_fn = create_loss_functions_kl_div(input_var, network, target_var, margin) train_batch_iterator = SpectWithSeparateConvTrainBatchIterator( batchsize=100, batches_per_epoch=10, config=config, input_dim=1000, get_conv_output=get_conv_output) valid_batch_iterator = SpectWithSeparateConvValidBatchIterator( batchsize=100, batches_per_epoch=10, config=config, input_dim=1000, get_conv_output=get_conv_output) print("Vanilla network created!") with open(train_file, 'rb') as f: (X, y, speaker_names) = pickle.load(f) clustering_network.train(X, y, num_epochs=1000, train_fn=train_fn, val_fn=val_fn, train_iterator=train_batch_iterator, validation_iterator=valid_batch_iterator) pickler.save(layers.get_all_param_values(network), out_file)
def _write_result_pickle(network_name, checkpoint_names, metric_sets, number_of_embeddings): logger = get_logger('analysis', logging.INFO) logger.info('Write result pickle') save((checkpoint_names, metric_sets, number_of_embeddings), get_result_pickle(network_name))