Python SequenceLabeling Examples, pyannote.audio.labeling.base.SequenceLabeling Python Examples

Example #1

0

Show file

def train(protocol, experiment_dir, train_dir, subset='train'):

    # -- TRAINING --
    nb_epoch = 100
    optimizer = SSMORMS3()

    # load configuration file
    config_yml = experiment_dir + '/config.yml'
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # -- FEATURE EXTRACTION --
    feature_extraction_name = config['feature_extraction']['name']
    features = __import__('pyannote.audio.features',
                          fromlist=[feature_extraction_name])
    FeatureExtraction = getattr(features, feature_extraction_name)
    feature_extraction = FeatureExtraction(
        **config['feature_extraction'].get('params', {}))

    # -- ARCHITECTURE --
    architecture_name = config['architecture']['name']
    models = __import__('pyannote.audio.labeling.models',
                        fromlist=[architecture_name])
    Architecture = getattr(models, architecture_name)
    architecture = Architecture(**config['architecture'].get('params', {}))

    # -- SEQUENCE GENERATOR --
    batch_size = config['sequences'].get('batch_size', 1024)
    duration = config['sequences']['duration']
    step = config['sequences']['step']
    balance = config['sequences']['balance']
    generator = ChangeDetectionBatchGenerator(feature_extraction,
                                              batch_size=batch_size,
                                              duration=duration,
                                              step=step,
                                              balance=balance)

    # number of steps per epoch
    seconds_per_epoch = protocol.stats(subset)['annotated']
    steps_per_epoch = int(np.ceil((seconds_per_epoch / step) / batch_size))

    # input shape (n_frames, n_features)
    input_shape = generator.shape

    labeling = SequenceLabeling()
    labeling.fit(input_shape,
                 architecture,
                 generator(getattr(protocol, subset)(), infinite=True),
                 steps_per_epoch,
                 nb_epoch,
                 loss='binary_crossentropy',
                 optimizer=optimizer,
                 log_dir=train_dir)

Example #2

0

Show file

    def train(self, protocol_name, subset='train'):

        train_dir = self.TRAIN_DIR.format(experiment_dir=self.experiment_dir,
                                          protocol=protocol_name,
                                          subset=subset)

        # sequence batch generator
        batch_size = self.config_['sequences'].get('batch_size', 8192)
        duration = self.config_['sequences']['duration']
        step = self.config_['sequences']['step']
        batch_generator = SpeechActivityDetectionBatchGenerator(
            self.feature_extraction_,
            duration=duration,
            step=step,
            batch_size=batch_size)
        batch_generator.cache_preprocessed_ = self.cache_preprocessed_

        protocol = get_protocol(protocol_name,
                                progress=False,
                                preprocessors=self.preprocessors_)

        # total train duration
        train_total = protocol.stats(subset)['annotated']
        # number of batches per epoch
        steps_per_epoch = int(np.ceil((train_total / step) / batch_size))

        # input shape (n_frames, n_features)
        input_shape = batch_generator.shape

        # generator that loops infinitely over all training files
        train_files = getattr(protocol, subset)()
        generator = batch_generator(train_files, infinite=True)

        labeling = SequenceLabeling()
        labeling.fit(input_shape,
                     self.architecture_,
                     generator,
                     steps_per_epoch,
                     1000,
                     optimizer=SSMORMS3(),
                     log_dir=train_dir)

        return labeling

Example #3

0

Show file

def tune_binarizer(app, epoch, protocol_name, subset='development'):
    """Tune binarizer

    Parameters
    ----------
    app : SpeechActivityDetection
    epoch : int
        Epoch number.
    protocol_name : str
        E.g. 'Etape.SpeakerDiarization.TV'
    subset : {'train', 'development', 'test'}, optional
        Defaults to 'development'.

    Returns
    -------
    params : dict
        See Binarize.tune
    metric : float
        Best achieved detection error rate
    """

    # initialize protocol
    protocol = get_protocol(protocol_name,
                            progress=False,
                            preprocessors=app.preprocessors_)

    # load model for epoch 'epoch'
    sequence_labeling = SequenceLabeling.from_disk(app.train_dir_, epoch)

    # initialize sequence labeling
    duration = app.config_['sequences']['duration']
    step = app.config_['sequences']['step']
    aggregation = SequenceLabelingAggregation(sequence_labeling,
                                              app.feature_extraction_,
                                              duration=duration,
                                              step=step)
    aggregation.cache_preprocessed_ = False

    # tune Binarize thresholds (onset & offset)
    # with respect to detection error rate
    binarize_params, metric = Binarize.tune(getattr(protocol, subset)(),
                                            aggregation.apply,
                                            get_metric=DetectionErrorRate,
                                            dimension=1)

    return binarize_params, metric

Example #4

0

Show file

File: speech_activity_detection.py Project: GregGovit/pyannote-audio

def train(dataset, medium_template, config_yml):

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # deduce workdir from path of configuration file
    workdir = os.path.dirname(config_yml)

    # this is where model weights are saved after each epoch
    log_dir = workdir + '/' + dataset

    # -- DATASET --
    db, task, protocol, subset = dataset.split('.')
    database = get_database(db, medium_template=medium_template)
    protocol = database.get_protocol(task, protocol)

    if not hasattr(protocol, subset):
        raise NotImplementedError('')

    file_generator = getattr(protocol, subset)()

    # -- FEATURE EXTRACTION --
    # input sequence duration
    duration = config['feature_extraction']['duration']
    # MFCCs
    feature_extractor = YaafeMFCC(**config['feature_extraction']['mfcc'])
    # normalization
    normalize = config['feature_extraction']['normalize']

    # -- NETWORK STRUCTURE --
    # internal model structure
    lstm = config['network']['lstm']
    dense = config['network']['dense']
    # bi-directional
    bidirectional = config['network']['bidirectional']

    # -- TRAINING --
    # number training set hours (speech + non speech) to use in each epoch
    # FIXME -- update ETAPE so that we can query this information directly
    hours_per_epoch = config['training']['hours_per_epoch']
    # overlap ratio between each window
    overlap = config['training']['overlap']
    # batch size
    batch_size = config['training']['batch_size']
    # number of epochs
    nb_epoch = config['training']['nb_epoch']
    # optimizer
    optimizer = config['training']['optimizer']

    # labeling
    n_classes = 2
    design_model = StackedLSTM(n_classes=n_classes,
                               lstm=lstm,
                               bidirectional=bidirectional,
                               dense=dense)

    labeling = SequenceLabeling(design_model,
                                optimizer=optimizer,
                                log_dir=log_dir)

    # segment generator for training
    step = duration * (1. - overlap)
    batch_generator = SpeechActivityDetectionBatchGenerator(
        feature_extractor,
        duration=duration,
        normalize=normalize,
        step=step,
        batch_size=batch_size)

    # log loss and accuracy during training and
    # keep track of best models for both metrics
    log = [('train', 'loss'), ('train', 'accuracy')]
    callback = LoggingCallback(log_dir=log_dir, log=log)

    # number of samples per epoch + round it to closest batch
    samples_per_epoch = batch_size * int(
        np.ceil((3600 * hours_per_epoch / step) / batch_size))

    # input shape (n_frames, n_features)
    input_shape = batch_generator.get_shape()

    generator = batch_generator(file_generator, infinite=True)

    labeling.fit(input_shape,
                 generator,
                 samples_per_epoch,
                 nb_epoch,
                 callbacks=[callback])

Example #5

0

Show file

File: speech_activity_detection.py Project: GregGovit/pyannote-audio

def test(dataset, medium_template, config_yml, weights_h5, output_dir):

    # load configuration file
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # this is where model architecture was saved
    architecture_yml = os.path.dirname(
        os.path.dirname(weights_h5)) + '/architecture.yml'

    # -- DATASET --
    db, task, protocol, subset = dataset.split('.')
    database = get_database(db, medium_template=medium_template)
    protocol = database.get_protocol(task, protocol)

    if not hasattr(protocol, subset):
        raise NotImplementedError('')

    file_generator = getattr(protocol, subset)()

    # -- FEATURE EXTRACTION --
    # input sequence duration
    duration = config['feature_extraction']['duration']
    # MFCCs
    feature_extractor = YaafeMFCC(**config['feature_extraction']['mfcc'])
    # normalization
    normalize = config['feature_extraction']['normalize']

    # -- TESTING --
    # overlap ratio between each window
    overlap = config['testing']['overlap']
    step = duration * (1. - overlap)

    # prediction smoothing
    onset = config['testing']['binarize']['onset']
    offset = config['testing']['binarize']['offset']
    binarizer = Binarize(onset=0.5, offset=0.5)

    sequence_labeling = SequenceLabeling.from_disk(architecture_yml,
                                                   weights_h5)

    aggregation = SequenceLabelingAggregation(sequence_labeling,
                                              feature_extractor,
                                              normalize=normalize,
                                              duration=duration,
                                              step=step)

    collar = 0.500
    error_rate = DetectionErrorRate(collar=collar)
    accuracy = DetectionAccuracy(collar=collar)
    precision = DetectionPrecision(collar=collar)
    recall = DetectionRecall(collar=collar)

    LINE = '{uri} {e:.3f} {a:.3f} {p:.3f} {r:.3f} {f:.3f}\n'

    PATH = '{output_dir}/eval.{dataset}.{subset}.txt'
    path = PATH.format(output_dir=output_dir, dataset=dataset, subset=subset)

    with open(path, 'w') as fp:

        header = '# uri error accuracy precision recall f_measure\n'
        fp.write(header)
        fp.flush()

        for current_file in file_generator:

            uri = current_file['uri']
            wav = current_file['medium']['wav']
            annotated = current_file['annotated']
            annotation = current_file['annotation']

            predictions = aggregation.apply(wav)
            hypothesis = binarizer.apply(predictions, dimension=1)

            e = error_rate(annotation, hypothesis, uem=annotated)
            a = accuracy(annotation, hypothesis, uem=annotated)
            p = precision(annotation, hypothesis, uem=annotated)
            r = recall(annotation, hypothesis, uem=annotated)
            f = f_measure(p, r)

            line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f)
            fp.write(line)
            fp.flush()

            PATH = '{output_dir}/{uri}.json'
            path = PATH.format(output_dir=output_dir, uri=uri)
            dump_to(hypothesis, path)

        # average on whole corpus
        uri = '{dataset}.{subset}'.format(dataset=dataset, subset=subset)
        e = abs(error_rate)
        a = abs(accuracy)
        p = abs(precision)
        r = abs(recall)
        f = f_measure(p, r)
        line = LINE.format(uri=uri, e=e, a=a, p=p, r=r, f=f)
        fp.write(line)
        fp.flush()

Example #6

0

Show file

def apply(protocol,
          train_dir,
          store_dir,
          threshold,
          subset='development',
          epoch=None,
          min_duration=1.0):

    # -- LOAD MODEL --
    nb_epoch = 0
    while True:
        weights_h5 = LoggingCallback.WEIGHTS_H5.format(log_dir=train_dir,
                                                       epoch=nb_epoch)
        if not os.path.isfile(weights_h5):
            break
        nb_epoch += 1
    config_dir = os.path.dirname(os.path.dirname(train_dir))
    config_yml = config_dir + '/config.yml'
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # -- FEATURE EXTRACTION --
    feature_extraction_name = config['feature_extraction']['name']
    features = __import__('pyannote.audio.features',
                          fromlist=[feature_extraction_name])
    FeatureExtraction = getattr(features, feature_extraction_name)
    feature_extraction = FeatureExtraction(
        **config['feature_extraction'].get('params', {}))

    # -- SEQUENCE GENERATOR --
    duration = config['sequences']['duration']
    step = config['sequences']['step']

    def saveSeg(filepath, filename, segmentation):
        f = open(filepath, 'w')
        for idx, val in enumerate(segmentation):
            line = filename + ' ' + str(idx) + ' 1 ' + str(int(
                val[0] * 100)) + ' ' + str(
                    int(val[1] * 100 - val[0] * 100)) + '\n'
            f.write(line)
        f.close()

    filepath = store_dir + '/' + str(threshold) + '/'
    mkdir_p(filepath)

    # -- CHOOSE MODEL --
    if epoch > nb_epoch:
        raise ValueError('Epoch should be less than ' + str(nb_epoch))
    if epoch is None:
        epoch = nb_epoch - 1
    sequence_labeling = SequenceLabeling.from_disk(train_dir, epoch)
    aggregation = SequenceLabelingAggregation(sequence_labeling,
                                              feature_extraction,
                                              duration=duration,
                                              step=step)

    # -- PREDICTION --
    predictions = {}
    for dev_file in getattr(protocol, subset)():
        uri = dev_file['uri']
        predictions[uri] = aggregation.apply(dev_file)

    # initialize peak detection algorithm
    peak = Peak(alpha=threshold, min_duration=min_duration)

    for dev_file in getattr(protocol, subset)():
        uri = dev_file['uri']
        hypothesis = peak.apply(predictions[uri])
        filepath = store_dir + '/' + str(threshold) + '/' + uri + '.0.seg'
        saveSeg(filepath, uri, hypothesis)

Example #7

0

Show file

def evaluate(protocol,
             train_dir,
             store_dir,
             subset='development',
             epoch=None,
             min_duration=1.0):

    mkdir_p(store_dir)

    # -- LOAD MODEL --
    nb_epoch = 0
    while True:
        weights_h5 = LoggingCallback.WEIGHTS_H5.format(log_dir=train_dir,
                                                       epoch=nb_epoch)
        if not os.path.isfile(weights_h5):
            break
        nb_epoch += 1
    config_dir = os.path.dirname(os.path.dirname(train_dir))
    config_yml = config_dir + '/config.yml'
    with open(config_yml, 'r') as fp:
        config = yaml.load(fp)

    # -- FEATURE EXTRACTION --
    feature_extraction_name = config['feature_extraction']['name']
    features = __import__('pyannote.audio.features',
                          fromlist=[feature_extraction_name])
    FeatureExtraction = getattr(features, feature_extraction_name)
    feature_extraction = FeatureExtraction(
        **config['feature_extraction'].get('params', {}))

    # -- SEQUENCE GENERATOR --
    duration = config['sequences']['duration']
    step = config['sequences']['step']

    groundtruth = {}
    for dev_file in getattr(protocol, subset)():
        uri = dev_file['uri']
        groundtruth[uri] = dev_file['annotation']

    # -- CHOOSE MODEL --
    if epoch > nb_epoch:
        raise ValueError('Epoch should be less than ' + str(nb_epoch))
    if epoch is None:
        epoch = nb_epoch - 1

    sequence_labeling = SequenceLabeling.from_disk(train_dir, epoch)

    aggregation = SequenceLabelingAggregation(sequence_labeling,
                                              feature_extraction,
                                              duration=duration,
                                              step=step)

    # -- PREDICTION --
    predictions = {}
    for dev_file in getattr(protocol, subset)():
        uri = dev_file['uri']
        predictions[uri] = aggregation.apply(dev_file)

    alphas = np.linspace(0, 1, 20)

    purity = [SegmentationPurity(parallel=False) for alpha in alphas]
    coverage = [SegmentationCoverage(parallel=False) for alpha in alphas]

    # -- SAVE RESULTS --
    for i, alpha in enumerate(alphas):
        # initialize peak detection algorithm
        peak = Peak(alpha=alpha, min_duration=min_duration)
        for uri, reference in groundtruth.items():
            # apply peak detection
            hypothesis = peak.apply(predictions[uri])
            # compute purity and coverage
            purity[i](reference, hypothesis)
            coverage[i](reference, hypothesis)

    TEMPLATE = '{alpha:g} {purity:.3f}% {coverage:.3f}%'
    with open(store_dir + '/res.txt', 'a') as fp:
        for i, a in enumerate(alphas):
            p = 100 * abs(purity[i])
            c = 100 * abs(coverage[i])
            print(TEMPLATE.format(alpha=a, purity=p, coverage=c))
            fp.write(TEMPLATE.format(alpha=a, purity=p, coverage=c) + '\n')

Example #8

0

Show file

    def apply(self, protocol_name, subset='test'):

        apply_dir = self.APPLY_DIR.format(tune_dir=self.tune_dir_)

        mkdir_p(apply_dir)

        # load tuning results
        tune_yml = self.TUNE_YML.format(tune_dir=self.tune_dir_)
        with io.open(tune_yml, 'r') as fp:
            self.tune_ = yaml.load(fp)

        # load model for epoch 'epoch'
        epoch = self.tune_['epoch']
        sequence_labeling = SequenceLabeling.from_disk(self.train_dir_, epoch)

        # initialize sequence labeling
        duration = self.config_['sequences']['duration']
        step = self.config_['sequences']['step']
        aggregation = SequenceLabelingAggregation(sequence_labeling,
                                                  self.feature_extraction_,
                                                  duration=duration,
                                                  step=step)

        # initialize protocol
        protocol = get_protocol(protocol_name,
                                progress=True,
                                preprocessors=self.preprocessors_)

        for i, item in enumerate(getattr(protocol, subset)()):

            prediction = aggregation.apply(item)

            if i == 0:
                # create metadata file at root that contains
                # sliding window and dimension information
                path = Precomputed.get_config_path(apply_dir)
                f = h5py.File(path)
                f.attrs['start'] = prediction.sliding_window.start
                f.attrs['duration'] = prediction.sliding_window.duration
                f.attrs['step'] = prediction.sliding_window.step
                f.attrs['dimension'] = 2
                f.close()

            path = Precomputed.get_path(apply_dir, item)

            # create parent directory
            mkdir_p(dirname(path))

            f = h5py.File(path)
            f.attrs['start'] = prediction.sliding_window.start
            f.attrs['duration'] = prediction.sliding_window.duration
            f.attrs['step'] = prediction.sliding_window.step
            f.attrs['dimension'] = 2
            f.create_dataset('features', data=prediction.data)
            f.close()

        # initialize binarizer
        onset = self.tune_['onset']
        offset = self.tune_['offset']
        binarize = Binarize(onset=onset, offset=offset)

        precomputed = Precomputed(root_dir=apply_dir)

        writer = MDTMParser()
        path = self.HARD_MDTM.format(apply_dir=apply_dir,
                                     protocol=protocol_name,
                                     subset=subset)
        with io.open(path, mode='w') as gp:
            for item in getattr(protocol, subset)():
                prediction = precomputed(item)
                segmentation = binarize.apply(prediction, dimension=1)
                writer.write(segmentation.to_annotation(),
                             f=gp,
                             uri=item['uri'],
                             modality='speaker')

Example #9

0

Show file

    def validate(self, protocol_name, subset='development'):

        # prepare paths
        validate_dir = self.VALIDATE_DIR.format(train_dir=self.train_dir_,
                                                protocol=protocol_name)
        validate_txt = self.VALIDATE_TXT.format(validate_dir=validate_dir,
                                                subset=subset)
        validate_png = self.VALIDATE_PNG.format(validate_dir=validate_dir,
                                                subset=subset)
        validate_eps = self.VALIDATE_EPS.format(validate_dir=validate_dir,
                                                subset=subset)

        # create validation directory
        mkdir_p(validate_dir)

        # Build validation set
        y = self._validation_set(protocol_name, subset=subset)

        # list of equal error rates, and current epoch
        eers, epoch = [], 0

        desc_format = ('EER = {eer:.2f}% @ epoch #{epoch:d} ::'
                       ' Best EER = {best_eer:.2f}% @ epoch #{best_epoch:d} :')
        progress_bar = tqdm(unit='epoch', total=1000)

        with open(validate_txt, mode='w') as fp:

            # watch and evaluate forever
            while True:

                weights_h5 = LoggingCallback.WEIGHTS_H5.format(
                    log_dir=self.train_dir_, epoch=epoch)

                # wait until weight file is available
                if not isfile(weights_h5):
                    time.sleep(60)
                    continue

                # load model for current epoch
                sequence_labeling = SequenceLabeling.from_disk(
                    self.train_dir_, epoch)

                # initialize sequence labeling
                duration = self.config_['sequences']['duration']
                step = duration  # hack to make things faster
                # step = self.config_['sequences']['step']
                aggregation = SequenceLabelingAggregation(
                    sequence_labeling,
                    self.feature_extraction_,
                    duration=duration,
                    step=step)
                aggregation.cache_preprocessed_ = False

                # estimate equal error rate (average of all files)
                eers_ = []
                protocol = get_protocol(protocol_name,
                                        progress=False,
                                        preprocessors=self.preprocessors_)
                file_generator = getattr(protocol, subset)()
                for current_file in file_generator:
                    identifier = get_unique_identifier(current_file)
                    uem = get_annotated(current_file)
                    y_true = y[identifier].crop(uem)[:, 1]
                    counts = Counter(y_true)
                    if counts[0] * counts[1] == 0:
                        continue
                    y_pred = aggregation.apply(current_file).crop(uem)[:, 1]

                    _, _, _, eer = det_curve(y_true, y_pred, distances=False)

                    eers_.append(eer)
                eer = np.mean(eers_)
                eers.append(eer)

                # save equal error rate to file
                fp.write(
                    self.VALIDATE_TXT_TEMPLATE.format(epoch=epoch, eer=eer))
                fp.flush()

                # keep track of best epoch so far
                best_epoch, best_eer = np.argmin(eers), np.min(eers)

                progress_bar.set_description(
                    desc_format.format(epoch=epoch,
                                       eer=100 * eer,
                                       best_epoch=best_epoch,
                                       best_eer=100 * best_eer))
                progress_bar.update(1)

                # plot
                fig = plt.figure()
                plt.plot(eers, 'b')
                plt.plot([best_epoch], [best_eer], 'bo')
                plt.plot([0, epoch], [best_eer, best_eer], 'k--')
                plt.grid(True)
                plt.xlabel('epoch')
                plt.ylabel('EER on {subset}'.format(subset=subset))
                TITLE = '{best_eer:.5g} @ epoch #{best_epoch:d}'
                title = TITLE.format(best_eer=best_eer,
                                     best_epoch=best_epoch,
                                     subset=subset)
                plt.title(title)
                plt.tight_layout()
                plt.savefig(validate_png, dpi=75)
                plt.savefig(validate_eps)
                plt.close(fig)

                # validate next epoch
                epoch += 1

        progress_bar.close()