def __init__(self, root_dir=None, use_memmap=True, sliding_window=None, dimension=None): super(Precomputed, self).__init__() self.root_dir = Path(root_dir).expanduser().resolve(strict=False) self.use_memmap = use_memmap path = self.root_dir / 'metadata.yml' if path.exists(): with io.open(path, 'r') as f: params = yaml.load(f) self.dimension_ = params.pop('dimension') self.sliding_window_ = SlidingWindow(**params) if dimension is not None and self.dimension_ != dimension: msg = 'inconsistent "dimension" (is: {0}, should be: {1})' raise ValueError(msg.format(dimension, self.dimensions_)) if ((sliding_window is not None) and ((sliding_window.start != self.sliding_window_.start) or (sliding_window.duration != self.sliding_window_.duration) or (sliding_window.step != self.sliding_window_.step))): msg = 'inconsistent "sliding_window"' raise ValueError(msg) else: if sliding_window is None or dimension is None: msg = ( f'Either directory {self.root_dir} does not exist or it ' f'does not contain precomputed features. In case it exists ' f'and this was done on purpose, please provide both ' f'`sliding_window` and `dimension` parameters when ' f'instantianting `Precomputed`.') raise ValueError(msg) # create parent directory mkdir_p(path.parent) params = { 'start': sliding_window.start, 'duration': sliding_window.duration, 'step': sliding_window.step, 'dimension': dimension } with io.open(path, 'w') as f: yaml.dump(params, f, default_flow_style=False) self.sliding_window_ = sliding_window self.dimension_ = dimension
def __init__(self, root_dir=None, use_memmap=True, sliding_window=None, dimension=None): super(Precomputed, self).__init__() self.root_dir = Path(root_dir).expanduser().resolve(strict=False) self.use_memmap = use_memmap path = self.root_dir / 'metadata.yml' if path.exists(): with io.open(path, 'r') as f: params = yaml.load(f) self.dimension_ = params.pop('dimension') self.sliding_window_ = SlidingWindow(**params) if dimension is not None and self.dimension_ != dimension: msg = 'inconsistent "dimension" (is: {0}, should be: {1})' raise ValueError(msg.format(dimension, self.dimensions_)) if ((sliding_window is not None) and ((sliding_window.start != self.sliding_window_.start) or (sliding_window.duration != self.sliding_window_.duration) or (sliding_window.step != self.sliding_window_.step))): msg = 'inconsistent "sliding_window"' raise ValueError(msg) else: if sliding_window is None or dimension is None: msg = ( f'Either directory {self.root_dir} does not exist or it ' f'does not contain precomputed features. In case it exists ' f'and this was done on purpose, please provide both ' f'`sliding_window` and `dimension` parameters when ' f'instantianting `Precomputed`.') raise ValueError(msg) # create parent directory mkdir_p(path.parent) params = {'start': sliding_window.start, 'duration': sliding_window.duration, 'step': sliding_window.step, 'dimension': dimension} with io.open(path, 'w') as f: yaml.dump(params, f, default_flow_style=False) self.sliding_window_ = sliding_window self.dimension_ = dimension
def __init__(self, log_dir, restart=False): super(Checkpoint, self).__init__() # make sure path is absolute self.log_dir = os.path.realpath(log_dir) # create log_dir directory mkdir_p(self.log_dir) # this will fail if the directory already exists # and this is OK because 'weights' directory # usually contains the output of very long computations # and you do not want to erase them by mistake :/ self.restart = restart if not self.restart: weights_dir = self.WEIGHTS_DIR.format(log_dir=self.log_dir) os.makedirs(weights_dir)
def apply(self, protocol_name, output_dir): # file generator protocol = get_protocol(protocol_name, progress=True, preprocessors=self.preprocessors_) mkdir_p(output_dir) path = Path(output_dir) / f'{protocol_name}.txt' with open(path, mode='w') as fp: for current_file in FileFinder.protocol_file_iter( protocol, extra_keys=['audio']): uri = get_unique_identifier(current_file) hypothesis = self.pipeline_.apply(current_file) if isinstance(hypothesis, Timeline): for s in hypothesis: fp.write(f'{uri} {s.start:.3f} {s.end:.3f}\n') continue for s, t, l in hypothesis.itertracks(yield_label=True): fp.write(f'{uri} {s.start:.3f} {s.end:.3f} {t} {l}\n')
def validate(self, protocol_name, subset='development', every=1, start=0, end=None, in_order=False, **kwargs): minimize, values, best_epoch, best_value = {}, {}, {}, {} validate_dir = self.VALIDATE_DIR.format(train_dir=self.train_dir_, protocol=protocol_name, subset=subset) mkdir_p(validate_dir) writer = tensorboardX.SummaryWriter(log_dir=validate_dir) validation_data = self.validate_init(protocol_name, subset=subset, **kwargs) progress_bar = tqdm(unit='epoch') for i, epoch in enumerate( self.validate_iter(start=start, end=end, step=every, in_order=in_order)): # {'metric1': {'minimize': True, 'value': 0.2}, # 'metric2': {'minimize': False, 'value': 0.9}} metrics = self.validate_epoch(epoch, protocol_name, subset=subset, validation_data=validation_data) if i == 0: for metric, details in metrics.items(): minimize[metric] = details.get('minimize', True) values[metric] = SortedDict() description = 'Epoch #{epoch}'.format(epoch=epoch) for metric, details in sorted(metrics.items()): value = details['value'] values[metric][epoch] = value writer.add_scalar( f'validate/{protocol_name}.{subset}/{metric}', values[metric][epoch], global_step=epoch) # keep track of best epoch so far if minimize[metric] == 'NA': best_value = 'NA' elif minimize[metric]: best_epoch = \ values[metric].iloc[np.argmin(values[metric].values())] best_value = values[metric][best_epoch] else: best_epoch = \ values[metric].iloc[np.argmax(values[metric].values())] best_value = values[metric][best_epoch] if best_value == 'NA': continue if abs(best_value) < 1: addon = (' : {metric} = {value:.3f}% ' '[{best_value:.3f}%, #{best_epoch}]') description += addon.format(metric=metric, value=100 * value, best_value=100 * best_value, best_epoch=best_epoch) else: addon = (' : {metric} = {value:.3f} ' '[{best_value:.3f}, #{best_epoch}]') description += addon.format(metric=metric, value=value, best_value=best_value, best_epoch=best_epoch) progress_bar.set_description(description) progress_bar.update(1)
def apply(self, protocol_name, output_dir, step=None, internal=False): # load best performing model with open(self.validate_txt_, 'r') as fp: eers = SortedDict(np.loadtxt(fp)) best_epoch = int(eers.iloc[np.argmin(eers.values())]) embedding = SequenceEmbeddingAutograd.load(self.train_dir_, best_epoch) # guess sequence duration from path (.../3.2+0.8/...) directory = basename(dirname(self.experiment_dir)) duration, _, _, _ = self._directory_to_params(directory) if step is None: step = 0.5 * duration # initialize embedding extraction batch_size = self.approach_.batch_size extraction = Extraction(embedding, self.feature_extraction_, duration, step=step, batch_size=batch_size, internal=internal) sliding_window = extraction.sliding_window dimension = extraction.dimension # create metadata file at root that contains # sliding window and dimension information path = Precomputed.get_config_path(output_dir) mkdir_p(dirname(path)) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.close() # file generator protocol = get_protocol(protocol_name, progress=True, preprocessors=self.preprocessors_) for subset in ['development', 'test', 'train']: try: file_generator = getattr(protocol, subset)() first_item = next(file_generator) except NotImplementedError as e: continue file_generator = getattr(protocol, subset)() for current_file in file_generator: fX = extraction.apply(current_file) path = Precomputed.get_path(output_dir, current_file) mkdir_p(dirname(path)) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.create_dataset('features', data=fX.data) f.close()
def validate(self, protocol_name, subset='development', aggregate=False, every=1, start=0): # prepare paths validate_dir = self.VALIDATE_DIR.format(train_dir=self.train_dir_, protocol=protocol_name) validate_txt = self.VALIDATE_TXT.format( validate_dir=validate_dir, subset=subset, aggregate='aggregate.' if aggregate else '') validate_png = self.VALIDATE_PNG.format( validate_dir=validate_dir, subset=subset, aggregate='aggregate.' if aggregate else '') validate_eps = self.VALIDATE_EPS.format( validate_dir=validate_dir, subset=subset, aggregate='aggregate.' if aggregate else '') # create validation directory mkdir_p(validate_dir) # Build validation set if aggregate: X, n, y = self._validation_set_z(protocol_name, subset=subset) else: X, y = self._validation_set_y(protocol_name, subset=subset) # list of equal error rates, and epoch to process eers, epoch = SortedDict(), start desc_format = ('Best EER = {best_eer:.2f}% @ epoch #{best_epoch:d} ::' ' EER = {eer:.2f}% @ epoch #{epoch:d} :') progress_bar = tqdm(unit='epoch') with open(validate_txt, mode='w') as fp: # watch and evaluate forever while True: # last completed epochs completed_epochs = self.get_epochs(self.train_dir_) - 1 if completed_epochs < epoch: time.sleep(60) continue # if last completed epoch has already been processed # go back to first epoch that hasn't been processed yet process_epoch = epoch if completed_epochs in eers \ else completed_epochs # do not validate this epoch if it has been done before... if process_epoch == epoch and epoch in eers: epoch += every progress_bar.update(every) continue weights_h5 = LoggingCallback.WEIGHTS_H5.format( log_dir=self.train_dir_, epoch=process_epoch) # this is needed for corner case when training is started from # an epoch > 0 if not isfile(weights_h5): time.sleep(60) continue # sleep 5 seconds to let the checkpoint callback finish time.sleep(5) embedding = keras.models.load_model( weights_h5, custom_objects=CUSTOM_OBJECTS, compile=False) if aggregate: def embed(X): func = K.function([ embedding.get_layer(name='input').input, K.learning_phase() ], [embedding.get_layer(name='internal').output]) return func([X, 0])[0] else: embed = embedding.predict # embed all validation sequences fX = embed(X) if aggregate: indices = np.hstack([[0], np.cumsum(n)]) fX = np.stack([ np.sum(np.sum(fX[i:j], axis=0), axis=0) for i, j in pairwise(indices) ]) fX = l2_normalize(fX) # compute pairwise distances y_pred = pdist(fX, metric=self.approach_.metric) # compute pairwise groundtruth y_true = pdist(y, metric='chebyshev') < 1 # estimate equal error rate _, _, _, eer = det_curve(y_true, y_pred, distances=True) eers[process_epoch] = eer # save equal error rate to file fp.write( self.VALIDATE_TXT_TEMPLATE.format(epoch=process_epoch, eer=eer)) fp.flush() # keep track of best epoch so far best_epoch = eers.iloc[np.argmin(eers.values())] best_eer = eers[best_epoch] progress_bar.set_description( desc_format.format(epoch=process_epoch, eer=100 * eer, best_epoch=best_epoch, best_eer=100 * best_eer)) # plot fig = plt.figure() plt.plot(eers.keys(), eers.values(), 'b') plt.plot([best_epoch], [best_eer], 'bo') plt.plot([eers.iloc[0], eers.iloc[-1]], [best_eer, best_eer], 'k--') plt.grid(True) plt.xlabel('epoch') plt.ylabel('EER on {subset}'.format(subset=subset)) TITLE = '{best_eer:.5g} @ epoch #{best_epoch:d}' title = TITLE.format(best_eer=best_eer, best_epoch=best_epoch, subset=subset) plt.title(title) plt.tight_layout() plt.savefig(validate_png, dpi=75) plt.savefig(validate_eps) plt.close(fig) # go to next epoch if epoch == process_epoch: epoch += every progress_bar.update(every) else: progress_bar.update(0) progress_bar.close()
def data(self, protocol_name, duration=3.2, min_duration=None, step=None, heterogeneous=False): # labeled segment generator generator = SlidingLabeledSegments(duration=duration, min_duration=min_duration, step=step, heterogeneous=heterogeneous, source='annotated') data_dir = self.DATA_DIR.format(root_dir=self.root_dir_, params=self._params_to_directory( duration=duration, min_duration=min_duration, step=step, heterogeneous=heterogeneous)) # file generator protocol = get_protocol(protocol_name, progress=True, preprocessors=self.preprocessors_) for subset in ['train', 'development', 'test']: try: file_generator = getattr(protocol, subset)() first_item = next(file_generator) except NotImplementedError as e: continue file_generator = getattr(protocol, subset)() data_h5 = self.DATA_H5.format(data_dir=data_dir, protocol=protocol_name, subset=subset) mkdir_p(dirname(data_h5)) with h5py.File(data_h5, mode='w') as fp: # initialize with a fixed number of sequences n_sequences = 1000 # dataset meant to store the speaker identifier Y = fp.create_dataset('y', shape=(n_sequences, ), dtype=h5py.special_dtype(vlen=bytes), maxshape=(None, )) # dataset meant to store the speech turn unique ID Z = fp.create_dataset('z', shape=(n_sequences, ), dtype=np.int64, maxshape=(None, )) i = 0 # number of sequences z = 0 # speech turn identifier for item in file_generator: # feature extraction features = self.feature_extraction_(item) for segment, y in generator.from_file(item): # extract feature sequence x = features.crop(segment, mode='center', fixed=duration) # create X dataset to store feature sequences # this cannot be done before because we need # the number of samples per sequence and the # dimension of feature vectors. if i == 0: # get number of samples and feature dimension # from the first sequence... n_samples, n_features = x.shape # create X dataset accordingly X = fp.create_dataset( 'X', dtype=x.dtype, compression='gzip', shape=(n_sequences, n_samples, n_features), chunks=(1, n_samples, n_features), maxshape=(None, n_samples, n_features)) # make sure the speech turn identifier # will not be erroneously incremented prev_y = y # increase the size of the datasets when full if i == n_sequences: n_sequences = int(n_sequences * 1.1) X.resize(n_sequences, axis=0) Y.resize(n_sequences, axis=0) Z.resize(n_sequences, axis=0) # save current feature sequence and its label X[i] = x Y[i] = y # a change of label indicates that a new speech turn has began. # increment speech turn identifier (z) accordingly if y != prev_y: prev_y = y z += 1 # save speech turn identifier Z[i] = z # increment number of sequences i += 1 X.resize(i - 1, axis=0) Y.resize(i - 1, axis=0) Z.resize(i - 1, axis=0)
def apply(self, protocol_name, subset='test'): apply_dir = self.APPLY_DIR.format(tune_dir=self.tune_dir_) mkdir_p(apply_dir) # load tuning results tune_yml = self.TUNE_YML.format(tune_dir=self.tune_dir_) with io.open(tune_yml, 'r') as fp: self.tune_ = yaml.load(fp) # load model for epoch 'epoch' epoch = self.tune_['epoch'] sequence_labeling = SequenceLabeling.from_disk(self.train_dir_, epoch) # initialize sequence labeling duration = self.config_['sequences']['duration'] step = self.config_['sequences']['step'] aggregation = SequenceLabelingAggregation(sequence_labeling, self.feature_extraction_, duration=duration, step=step) # initialize protocol protocol = get_protocol(protocol_name, progress=True, preprocessors=self.preprocessors_) for i, item in enumerate(getattr(protocol, subset)()): prediction = aggregation.apply(item) if i == 0: # create metadata file at root that contains # sliding window and dimension information path = Precomputed.get_config_path(apply_dir) f = h5py.File(path) f.attrs['start'] = prediction.sliding_window.start f.attrs['duration'] = prediction.sliding_window.duration f.attrs['step'] = prediction.sliding_window.step f.attrs['dimension'] = 2 f.close() path = Precomputed.get_path(apply_dir, item) # create parent directory mkdir_p(dirname(path)) f = h5py.File(path) f.attrs['start'] = prediction.sliding_window.start f.attrs['duration'] = prediction.sliding_window.duration f.attrs['step'] = prediction.sliding_window.step f.attrs['dimension'] = 2 f.create_dataset('features', data=prediction.data) f.close() # initialize binarizer onset = self.tune_['onset'] offset = self.tune_['offset'] binarize = Binarize(onset=onset, offset=offset) precomputed = Precomputed(root_dir=apply_dir) writer = MDTMParser() path = self.HARD_MDTM.format(apply_dir=apply_dir, protocol=protocol_name, subset=subset) with io.open(path, mode='w') as gp: for item in getattr(protocol, subset)(): prediction = precomputed(item) segmentation = binarize.apply(prediction, dimension=1) writer.write(segmentation.to_annotation(), f=gp, uri=item['uri'], modality='speaker')
def validate(protocol, train_dir, validation_dir, subset='development'): mkdir_p(validation_dir) # -- DURATIONS -- duration, min_duration, step, heterogeneous = \ path_to_duration(os.path.basename(train_dir)) # -- CONFIGURATION -- config_dir = os.path.dirname(os.path.dirname(os.path.dirname(train_dir))) config_yml = config_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) # -- DISTANCE -- distance = config['glue'].get('params', {}).get('distance', 'sqeuclidean') # -- PREPROCESSORS -- for key, preprocessor in config.get('preprocessors', {}).items(): preprocessor_name = preprocessor['name'] preprocessor_params = preprocessor.get('params', {}) preprocessors = __import__('pyannote.audio.preprocessors', fromlist=[preprocessor_name]) Preprocessor = getattr(preprocessors, preprocessor_name) protocol.preprocessors[key] = Preprocessor(**preprocessor_params) # -- FEATURE EXTRACTION -- feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) architecture_yml = train_dir + '/architecture.yml' WEIGHTS_H5 = train_dir + '/weights/{epoch:04d}.h5' EER_TEMPLATE = '{epoch:04d} {now} {eer:5f}\n' eers = [] path = validation_dir + '/{subset}.eer.txt'.format(subset=subset) with open(path, mode='w') as fp: epoch = 0 while True: # wait until weight file is available weights_h5 = WEIGHTS_H5.format(epoch=epoch) if not os.path.isfile(weights_h5): time.sleep(60) continue now = datetime.datetime.now().isoformat() # load current model sequence_embedding = SequenceEmbedding.from_disk( architecture_yml, weights_h5) # if speaker recognition protocol if isinstance(protocol, SpeakerRecognitionProtocol): aggregation = SequenceEmbeddingAggregation( sequence_embedding, feature_extraction, duration=duration, min_duration=min_duration, step=step, internal=-2, batch_size=8192) aggregation.cache_preprocessed_ = False # compute equal error rate _, _, _, eer = speaker_recognition_xp( aggregation, protocol, subset=subset, distance=distance) elif isinstance(protocol, SpeakerDiarizationProtocol): if epoch == 0: X, y = generate_test( protocol, subset, feature_extraction, duration, min_duration=min_duration, step=step, heterogeneous=heterogeneous) _, _, _, eer = speaker_diarization_xp( sequence_embedding, X, y, distance=distance) fp.write(EER_TEMPLATE.format(epoch=epoch, eer=eer, now=now)) fp.flush() eers.append(eer) best_epoch = np.argmin(eers) best_value = np.min(eers) fig = plt.figure() plt.plot(eers, 'b') plt.plot([best_epoch], [best_value], 'bo') plt.plot([0, epoch], [best_value, best_value], 'k--') plt.grid(True) plt.xlabel('epoch') plt.ylabel('EER on {subset}'.format(subset=subset)) TITLE = 'EER = {best_value:.5g} on {subset} @ epoch #{best_epoch:d}' title = TITLE.format(best_value=best_value, best_epoch=best_epoch, subset=subset) plt.title(title) plt.tight_layout() path = validation_dir + '/{subset}.eer.png'.format(subset=subset) plt.savefig(path, dpi=75) plt.close(fig) # skip to next epoch epoch += 1
def dump(self, item, features): path = Path(self.get_path(item)) mkdir_p(path.parent) np.save(path, features.data)
def extract(database_name, task_name, protocol_name, preprocessors, experiment_dir, robust=False): database = get_database(database_name, preprocessors=preprocessors) protocol = database.get_protocol(task_name, protocol_name, progress=True) if task_name == 'SpeakerDiarization': items = itertools.chain(protocol.train(), protocol.development(), protocol.test()) elif task_name == 'SpeakerRecognition': items = itertools.chain(protocol.train(yield_name=False), protocol.development_enroll(yield_name=False), protocol.development_test(yield_name=False), protocol.test_enroll(yield_name=False), protocol.test_test(yield_name=False)) # load configuration file config_yml = experiment_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) sliding_window = feature_extraction.sliding_window() dimension = feature_extraction.dimension() # create metadata file at root that contains # sliding window and dimension information path = Precomputed.get_config_path(experiment_dir) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.close() for item in items: uri = get_unique_identifier(item) path = Precomputed.get_path(experiment_dir, item) if os.path.exists(path): continue try: # NOTE item contains the 'channel' key features = feature_extraction(item) except PyannoteFeatureExtractionError as e: if robust: msg = 'Feature extraction failed for file "{uri}".' msg = msg.format(uri=uri) warnings.warn(msg) continue else: raise e if features is None: msg = 'Feature extraction returned None for file "{uri}".' msg = msg.format(uri=uri) if not robust: raise PyannoteFeatureExtractionError(msg) warnings.warn(msg) continue data = features.data if np.any(np.isnan(data)): msg = 'Feature extraction returned NaNs for file "{uri}".' msg = msg.format(uri=uri) if not robust: raise PyannoteFeatureExtractionError(msg) warnings.warn(msg) continue # create parent directory mkdir_p(os.path.dirname(path)) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.create_dataset('features', data=data) f.close()
def validate(self, protocol_name, subset='development'): # prepare paths validate_dir = self.VALIDATE_DIR.format(train_dir=self.train_dir_, protocol=protocol_name) validate_txt = self.VALIDATE_TXT.format(validate_dir=validate_dir, subset=subset) validate_png = self.VALIDATE_PNG.format(validate_dir=validate_dir, subset=subset) validate_eps = self.VALIDATE_EPS.format(validate_dir=validate_dir, subset=subset) # create validation directory mkdir_p(validate_dir) # Build validation set y = self._validation_set(protocol_name, subset=subset) # list of equal error rates, and current epoch eers, epoch = [], 0 desc_format = ('EER = {eer:.2f}% @ epoch #{epoch:d} ::' ' Best EER = {best_eer:.2f}% @ epoch #{best_epoch:d} :') progress_bar = tqdm(unit='epoch', total=1000) with open(validate_txt, mode='w') as fp: # watch and evaluate forever while True: weights_h5 = LoggingCallback.WEIGHTS_H5.format( log_dir=self.train_dir_, epoch=epoch) # wait until weight file is available if not isfile(weights_h5): time.sleep(60) continue # load model for current epoch sequence_labeling = SequenceLabeling.from_disk( self.train_dir_, epoch) # initialize sequence labeling duration = self.config_['sequences']['duration'] step = duration # hack to make things faster # step = self.config_['sequences']['step'] aggregation = SequenceLabelingAggregation( sequence_labeling, self.feature_extraction_, duration=duration, step=step) aggregation.cache_preprocessed_ = False # estimate equal error rate (average of all files) eers_ = [] protocol = get_protocol(protocol_name, progress=False, preprocessors=self.preprocessors_) file_generator = getattr(protocol, subset)() for current_file in file_generator: identifier = get_unique_identifier(current_file) uem = get_annotated(current_file) y_true = y[identifier].crop(uem)[:, 1] counts = Counter(y_true) if counts[0] * counts[1] == 0: continue y_pred = aggregation.apply(current_file).crop(uem)[:, 1] _, _, _, eer = det_curve(y_true, y_pred, distances=False) eers_.append(eer) eer = np.mean(eers_) eers.append(eer) # save equal error rate to file fp.write( self.VALIDATE_TXT_TEMPLATE.format(epoch=epoch, eer=eer)) fp.flush() # keep track of best epoch so far best_epoch, best_eer = np.argmin(eers), np.min(eers) progress_bar.set_description( desc_format.format(epoch=epoch, eer=100 * eer, best_epoch=best_epoch, best_eer=100 * best_eer)) progress_bar.update(1) # plot fig = plt.figure() plt.plot(eers, 'b') plt.plot([best_epoch], [best_eer], 'bo') plt.plot([0, epoch], [best_eer, best_eer], 'k--') plt.grid(True) plt.xlabel('epoch') plt.ylabel('EER on {subset}'.format(subset=subset)) TITLE = '{best_eer:.5g} @ epoch #{best_epoch:d}' title = TITLE.format(best_eer=best_eer, best_epoch=best_epoch, subset=subset) plt.title(title) plt.tight_layout() plt.savefig(validate_png, dpi=75) plt.savefig(validate_eps) plt.close(fig) # validate next epoch epoch += 1 progress_bar.close()
def apply(protocol, train_dir, store_dir, threshold, subset='development', epoch=None, min_duration=1.0): # -- LOAD MODEL -- nb_epoch = 0 while True: weights_h5 = LoggingCallback.WEIGHTS_H5.format(log_dir=train_dir, epoch=nb_epoch) if not os.path.isfile(weights_h5): break nb_epoch += 1 config_dir = os.path.dirname(os.path.dirname(train_dir)) config_yml = config_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) # -- FEATURE EXTRACTION -- feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) # -- SEQUENCE GENERATOR -- duration = config['sequences']['duration'] step = config['sequences']['step'] def saveSeg(filepath, filename, segmentation): f = open(filepath, 'w') for idx, val in enumerate(segmentation): line = filename + ' ' + str(idx) + ' 1 ' + str(int( val[0] * 100)) + ' ' + str( int(val[1] * 100 - val[0] * 100)) + '\n' f.write(line) f.close() filepath = store_dir + '/' + str(threshold) + '/' mkdir_p(filepath) # -- CHOOSE MODEL -- if epoch > nb_epoch: raise ValueError('Epoch should be less than ' + str(nb_epoch)) if epoch is None: epoch = nb_epoch - 1 sequence_labeling = SequenceLabeling.from_disk(train_dir, epoch) aggregation = SequenceLabelingAggregation(sequence_labeling, feature_extraction, duration=duration, step=step) # -- PREDICTION -- predictions = {} for dev_file in getattr(protocol, subset)(): uri = dev_file['uri'] predictions[uri] = aggregation.apply(dev_file) # initialize peak detection algorithm peak = Peak(alpha=threshold, min_duration=min_duration) for dev_file in getattr(protocol, subset)(): uri = dev_file['uri'] hypothesis = peak.apply(predictions[uri]) filepath = store_dir + '/' + str(threshold) + '/' + uri + '.0.seg' saveSeg(filepath, uri, hypothesis)
def evaluate(protocol, train_dir, store_dir, subset='development', epoch=None, min_duration=1.0): mkdir_p(store_dir) # -- LOAD MODEL -- nb_epoch = 0 while True: weights_h5 = LoggingCallback.WEIGHTS_H5.format(log_dir=train_dir, epoch=nb_epoch) if not os.path.isfile(weights_h5): break nb_epoch += 1 config_dir = os.path.dirname(os.path.dirname(train_dir)) config_yml = config_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) # -- FEATURE EXTRACTION -- feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) # -- SEQUENCE GENERATOR -- duration = config['sequences']['duration'] step = config['sequences']['step'] groundtruth = {} for dev_file in getattr(protocol, subset)(): uri = dev_file['uri'] groundtruth[uri] = dev_file['annotation'] # -- CHOOSE MODEL -- if epoch > nb_epoch: raise ValueError('Epoch should be less than ' + str(nb_epoch)) if epoch is None: epoch = nb_epoch - 1 sequence_labeling = SequenceLabeling.from_disk(train_dir, epoch) aggregation = SequenceLabelingAggregation(sequence_labeling, feature_extraction, duration=duration, step=step) # -- PREDICTION -- predictions = {} for dev_file in getattr(protocol, subset)(): uri = dev_file['uri'] predictions[uri] = aggregation.apply(dev_file) alphas = np.linspace(0, 1, 20) purity = [SegmentationPurity(parallel=False) for alpha in alphas] coverage = [SegmentationCoverage(parallel=False) for alpha in alphas] # -- SAVE RESULTS -- for i, alpha in enumerate(alphas): # initialize peak detection algorithm peak = Peak(alpha=alpha, min_duration=min_duration) for uri, reference in groundtruth.items(): # apply peak detection hypothesis = peak.apply(predictions[uri]) # compute purity and coverage purity[i](reference, hypothesis) coverage[i](reference, hypothesis) TEMPLATE = '{alpha:g} {purity:.3f}% {coverage:.3f}%' with open(store_dir + '/res.txt', 'a') as fp: for i, a in enumerate(alphas): p = 100 * abs(purity[i]) c = 100 * abs(coverage[i]) print(TEMPLATE.format(alpha=a, purity=p, coverage=c)) fp.write(TEMPLATE.format(alpha=a, purity=p, coverage=c) + '\n')
def test(protocol, tune_dir, apply_dir, subset='test', beta=1.0): os.makedirs(apply_dir) train_dir = os.path.dirname(os.path.dirname(os.path.dirname(tune_dir))) duration = float(os.path.basename(train_dir)) config_dir = os.path.dirname(os.path.dirname(os.path.dirname(train_dir))) config_yml = config_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) # -- FEATURE EXTRACTION -- feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) # -- HYPER-PARAMETERS -- tune_yml = tune_dir + '/tune.yml' with open(tune_yml, 'r') as fp: tune = yaml.load(fp) architecture_yml = train_dir + '/architecture.yml' WEIGHTS_H5 = train_dir + '/weights/{epoch:04d}.h5' weights_h5 = WEIGHTS_H5.format(epoch=tune['epoch']) sequence_embedding = SequenceEmbedding.from_disk( architecture_yml, weights_h5) segmentation = Segmentation( sequence_embedding, feature_extraction, duration=duration, step=0.100) peak = Peak(alpha=tune['alpha']) HARD_JSON = apply_dir + '/{uri}.hard.json' SOFT_PKL = apply_dir + '/{uri}.soft.pkl' eval_txt = apply_dir + '/eval.txt' TEMPLATE = '{uri} {purity:.5f} {coverage:.5f} {f_measure:.5f}\n' purity = SegmentationPurity() coverage = SegmentationCoverage() fscore = [] for test_file in getattr(protocol, subset)(): soft = segmentation.apply(test_file) hard = peak.apply(soft) uri = get_unique_identifier(test_file) path = SOFT_PKL.format(uri=uri) mkdir_p(os.path.dirname(path)) with open(path, 'w') as fp: pickle.dump(soft, fp) path = HARD_JSON.format(uri=uri) mkdir_p(os.path.dirname(path)) with open(path, 'w') as fp: pyannote.core.json.dump(hard, fp) try: reference = test_file['annotation'] uem = test_file['annotated'] except KeyError as e: continue p = purity(reference, hard) c = coverage(reference, hard) f = f_measure(c, p, beta=beta) fscore.append(f) line = TEMPLATE.format( uri=uri, purity=p, coverage=c, f_measure=f) with open(eval_txt, 'a') as fp: fp.write(line) p = abs(purity) c = abs(coverage) f = np.mean(fscore) line = TEMPLATE.format( uri='ALL', purity=p, coverage=c, f_measure=f) with open(eval_txt, 'a') as fp: fp.write(line)
def tune(self, protocol_name, subset='development'): tune_dir = self.TUNE_DIR.format(experiment_dir=self.experiment_dir, protocol=protocol_name, subset=subset) mkdir_p(tune_dir) tune_yml = self.TUNE_YML.format(tune_dir=tune_dir) tune_png = self.TUNE_PNG.format(tune_dir=tune_dir) protocol = get_protocol(protocol_name, progress=False, preprocessors=self.preprocessors_) items = list(getattr(protocol, subset)()) # segmentation segmentation_mdtm = self.SEGMENTATION_MDTM.format( segmentation_dir=self.segmentation_dir_, protocol=protocol_name, subset=subset) parser = MDTMParser().read(segmentation_mdtm) segmentations = [parser(item['uri']) for item in items] # features features = [self.feature_extraction_(item) for item in items] n_jobs = min(cpu_count(), len(items)) pool = Pool(n_jobs) print(n_jobs, 'jobs') def callback(res): # plot convergence import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import skopt.plots _ = skopt.plots.plot_convergence(res) plt.savefig(tune_png, dpi=75) plt.close() # save state params = { 'status': { 'objective': float(res.fun) }, 'covariance_type': str(res.x[0]), 'penalty_coef': float(res.x[1]) } with io.open(tune_yml, 'w') as fp: yaml.dump(params, fp, default_flow_style=False) def objective_function(params): metric = GreedyDiarizationErrorRate() covariance_type, penalty_coef, = params process_one_file = functools.partial( helper_cluster_tune, metric=metric, covariance_type=covariance_type, penalty_coef=penalty_coef) if n_jobs > 1: results = list( pool.map(process_one_file, zip(items, segmentations, features))) else: results = [ process_one_file(isf) for isf in zip(items, segmentations, features) ] return abs(metric) space = [ skopt.space.Categorical(['full', 'diag']), skopt.space.Real(0., 5., prior='uniform') ] res = skopt.gp_minimize(objective_function, space, random_state=1337, n_calls=20, n_random_starts=10, verbose=True, callback=callback) return {'covariance_type': str(res.x[0])}, res.fun
def train(self, protocol_name, subset='development', n_calls=1): train_dir = self.TRAIN_DIR.format( experiment_dir=self.experiment_dir, protocol=protocol_name, subset=subset) mkdir_p(train_dir) protocol = get_protocol(protocol_name, progress=False, preprocessors=self.preprocessors_) tune_db = f'{train_dir}/tune.db' params_yml = f'{train_dir}/params.yml' params_yml_lock = f'{train_dir}/params.yml.lock' pid = os.getpid() writer = SummaryWriter(log_dir=f"{train_dir}/{pid}") progress_bar = tqdm(unit='trial') progress_bar.set_description('Trial #1 : ...') progress_bar.update(0) iterations = self.pipeline_.tune_iter( tune_db, protocol, subset=subset, sampler=self.sampler_) for s, status in enumerate(iterations): if s+1 == n_calls: break loss = status['latest']['loss'] writer.add_scalar(f'train/{protocol_name}.{subset}/loss/latest', loss, global_step=s + 1) writer.add_scalars( f'train/{protocol_name}.{subset}/params/latest', status['latest']['params'], global_step=s + 1) if 'new_best' in status: _ = self.dump(status['new_best'], params_yml, params_yml_lock) n_trials = status['new_best']['n_trials'] best_loss = status['new_best']['loss'] writer.add_scalar(f'train/{protocol_name}.{subset}/loss/best', best_loss, global_step=n_trials) writer.add_scalars( f'train/{protocol_name}.{subset}/params/best', status['new_best']['params'], global_step=n_trials) # progress bar desc = f"Trial #{s+1}" loss = status['latest']['loss'] if abs(loss) < 1: desc += f" = {100 * loss:.3f}%" desc += f" : Best = {100 * best_loss:.3f}% after {n_trials} trials" else: desc += f" = {loss:.3f}" desc += f" : Best = {best_loss:.3f} after {n_trials} trials" progress_bar.set_description(desc=desc) progress_bar.update(1) best = self.pipeline_.best(tune_db) content = self.dump(best, params_yml, params_yml_lock) sep = "=" * max(len(params_yml), max(len(l) for l in content.split('\n'))) print(f"\n{sep}\n{params_yml}\n{sep}\n{content}{sep}") print(f"Loss = {best['loss']:g} | {best['n_trials']} trials") print(f"{sep}")
def embed(protocol, tune_dir, apply_dir, subset='test', step=None, internal=None, aggregate=False): mkdir_p(apply_dir) train_dir = os.path.dirname(os.path.dirname(tune_dir)) duration, _, _, heterogeneous = \ path_to_duration(os.path.basename(train_dir)) config_dir = os.path.dirname(os.path.dirname(os.path.dirname(train_dir))) config_yml = config_dir + '/config.yml' with open(config_yml, 'r') as fp: config = yaml.load(fp) # -- FEATURE EXTRACTION -- feature_extraction_name = config['feature_extraction']['name'] features = __import__('pyannote.audio.features', fromlist=[feature_extraction_name]) FeatureExtraction = getattr(features, feature_extraction_name) feature_extraction = FeatureExtraction( **config['feature_extraction'].get('params', {})) # -- HYPER-PARAMETERS -- tune_yml = tune_dir + '/tune.yml' with open(tune_yml, 'r') as fp: tune = yaml.load(fp) architecture_yml = train_dir + '/architecture.yml' WEIGHTS_H5 = train_dir + '/weights/{epoch:04d}.h5' weights_h5 = WEIGHTS_H5.format(epoch=tune['epoch']) sequence_embedding = SequenceEmbedding.from_disk( architecture_yml, weights_h5) extraction = Extraction(sequence_embedding, feature_extraction, duration=duration, step=step, internal=internal, aggregate=aggregate) dimension = extraction.dimension sliding_window = extraction.sliding_window # create metadata file at root that contains # sliding window and dimension information path = Precomputed.get_config_path(apply_dir) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.close() for item in getattr(protocol, subset)(): uri = get_unique_identifier(item) path = Precomputed.get_path(apply_dir, item) extracted = extraction.apply(item) # create parent directory mkdir_p(os.path.dirname(path)) f = h5py.File(path) f.attrs['start'] = sliding_window.start f.attrs['duration'] = sliding_window.duration f.attrs['step'] = sliding_window.step f.attrs['dimension'] = dimension f.create_dataset('features', data=extracted.data) f.close()
def tune(self, protocol_name, subset='development'): tune_dir = self.TUNE_DIR.format(train_dir=self.train_dir_, protocol=protocol_name, subset=subset) mkdir_p(tune_dir) epoch = self.get_epochs(self.train_dir_) space = [skopt.space.Integer(0, epoch - 1)] best_binarize_params = {} best_metric = {} tune_yml = self.TUNE_YML.format(tune_dir=tune_dir) tune_png = self.TUNE_PNG.format(tune_dir=tune_dir) def callback(res): # plot convergence import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import skopt.plots _ = skopt.plots.plot_convergence(res) plt.savefig(tune_png, dpi=75) plt.close() # save state params = { 'status': { 'epochs': epoch, 'objective': float(res.fun) }, 'epoch': int(res.x[0]), 'onset': float(best_binarize_params[tuple(res.x)]['onset']), 'offset': float(best_binarize_params[tuple(res.x)]['offset']) } with io.open(tune_yml, 'w') as fp: yaml.dump(params, fp, default_flow_style=False) def objective_function(params): params = tuple(params) epoch, = params # do not rerun everything if epoch has already been tested if params in best_metric: return best_metric[params] # tune binarizer binarize_params, metric = tune_binarizer(self, epoch, protocol_name, subset=subset) # remember outcome of this trial best_binarize_params[params] = binarize_params best_metric[params] = metric return metric res = skopt.gp_minimize(objective_function, space, random_state=1337, n_calls=20, n_random_starts=10, x0=[epoch - 1], verbose=True, callback=callback) # TODO tune Binarize a bit longer with the best epoch return {'epoch': res.x[0]}, res.fun