Beispiel #1
0
def example_of_usage():
    """!
    How the class of Audio mixtures should be called"""

    import os
    import sys
    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            '../../')
    sys.path.insert(0, root_dir)
    import spatial_two_mics.examples.mixture_example as me

    mixture_creator = AudioMixtureConstructor(n_fft=1024,
                                              win_len=1024,
                                              hop_len=512,
                                              mixture_duration=2.0,
                                              force_delays=[-1, 1])

    mixture_info = me.mixture_info_example()

    import spatial_two_mics.data_generator.source_position_generator \
        as  position_generator

    # add some randomness in the generation of the positions
    random_positioner = position_generator.RandomCirclePositioner()
    positions_info = random_positioner.get_sources_locations(2)
    mixture_info['positions'] = positions_info

    tf_mixtures = mixture_creator.construct_mixture(mixture_info)

    pprint(tf_mixtures)
    def gather_mixtures_information(self,
                                    speakers,
                                    n_sources_in_mix=2,
                                    n_mixtures=0):
        """
        speakers_dic should be able to return a dic like this:
            'speaker_id_i': {
                'dialect': which dialect the speaker belongs to,
                'gender': f or m,
                'sentences': {
                    'sentence_id_j': {
                        'wav': wav_on_a_numpy_matrix,
                        'sr': Fs in Hz integer,
                        'path': PAth of the located wav
                    }
                }
            }

        combination_info should be in the following format:
           [{'gender': 'm', 'sentence_id': 'sx298', 'speaker_id': 'mctt0'},
            {'gender': 'm', 'sentence_id': 'sx364', 'speaker_id': 'mrjs0'},
           {'gender': 'f', 'sentence_id': 'sx369', 'speaker_id': 'fgjd0'}]

        """
        speakers_dic = self.data_dic[self.subset_of_speakers]

        possible_sources = []
        for speaker in speakers:
            sentences = list(speakers_dic[speaker]['sentences'].keys())
            sentences_names = []
            for i in range(len(sentences)):
                sentences_names.append(sentences[i].split('\\')[-1])
            gender = speakers_dic[speaker]['gender']
            possible_sources += [{'speaker_id': speaker,
                                  'gender': gender,
                                  'sentence_id': sentences[i],
                                  'sentence_name': sentences_names[i]}
                                 for i in range(len(sentences))]

        shuffle(possible_sources)

        valid_combinations = self.get_only_valid_mixture_combinations(
            possible_sources,
            speakers_dic,
            n_mixed_sources=n_sources_in_mix,
            n_mixtures=n_mixtures)

        random_positioner = positions_generator.RandomCirclePositioner()

        mixtures_info = [self.construct_mixture_info(
            speakers_dic,
            combination,
            random_positioner.get_sources_locations(len(
                combination)))
            for combination in valid_combinations]

        return mixtures_info
    def gather_mixtures_information(self,
                                    samples,
                                    n_sources_in_mix=2,
                                    n_mixtures=0):
        """
        speakers_dic should be able to return a dic like this:
            'speaker_id_i': {
                'dialect': which dialect the speaker belongs to,
                'gender': f or m,
                'sentences': {
                    'sentence_id_j': {
                        'wav': wav_on_a_numpy_matrix,
                        'sr': Fs in Hz integer,
                        'path': PAth of the located wav
                    }
                }
            }

        combination_info should be in the following format:
           [{'gender': 'm', 'sentence_id': 'sx298', 'speaker_id': 'mctt0'},
            {'gender': 'm', 'sentence_id': 'sx364', 'speaker_id': 'mrjs0'},
           {'gender': 'f', 'sentence_id': 'sx369', 'speaker_id': 'fgjd0'}]

        """
        samples_dic = self.available_sound_samples
        possible_sources = []
        for sample in samples:
            t = samples_dic[sample]['type']
            possible_sources += [{'sample_id': sample[], 'type': t}]

        shuffle(possible_sources)

        valid_combinations = self.get_only_valid_mixture_combinations(
            possible_sources,
            samples_dic,
            n_mixed_sources=n_sources_in_mix,
            n_mixtures=n_mixtures)

        random_positioner = positions_generator.RandomCirclePositioner()

        mixtures_info = [self.construct_mixture_info(
            speakers_dic,
            combination,
            random_positioner.get_sources_locations(len(
                combination)))
            for combination in valid_combinations]

        return mixtures_info
Beispiel #4
0
    def __init__(self, n_sources: int):
        # check that n_sources is positive
        if n_sources < 1:
            raise ValueError('n_sources should be positive integer')

        self.n_sources = n_sources
        self.positions_generator = positions_generator.RandomCirclePositioner()
        self.mixture_generator = mix_constructor.AudioMixtureConstructor(
            n_fft=512,
            win_len=512,
            hop_len=128,
            mixture_duration=1.5,
            force_delays=None)
        self.gt_estimator = mask_estimator.TFMaskEstimator(
            inference_method='Ground_truth')
        self.sl_estimator = mask_estimator.TFMaskEstimator(
            inference_method='duet_Kmeans', return_duet_raw_features=True)