Ejemplo n.º 1
0
    def audio_container(cls):
        from dcase_util.containers import AudioContainer
        container = AudioContainer(fs=44100)
        t = numpy.linspace(0, 2, 2 * container.fs, endpoint=False)
        x1 = numpy.sin(220 * 2 * numpy.pi * t)
        x2 = numpy.sin(440 * 2 * numpy.pi * t)
        container.data = numpy.vstack([x1, x2])

        return container
Ejemplo n.º 2
0
    def __init__(self,
                 data=None,
                 fs=44100,
                 focus_start_samples=None,
                 focus_stop_samples=None,
                 focus_channel=None,
                 mono=False,
                 **kwargs):
        """Constructor

        Parameters
        ----------
        data : DataContainer
            Data to initialize the container

        fs : int
            Target sampling rate when reading audio

        focus_start_samples : int
            Sample id of the focus segment start

        focus_stop_samples : int
            Sample id of the focus segment stop

        focus_channel : int or str
            Focus segment channel

        mono : bool
            Mixdown multi-channel audio in during the reading stage.

        """

        kwargs.update({
            'data': data,
            'fs': fs,
            'focus_start_samples': focus_start_samples,
            'focus_stop_samples': focus_stop_samples,
            'focus_channel': focus_channel,
            'mono': mono
        })

        # Run ProcessorMixin init
        ProcessorMixin.__init__(self, **kwargs)

        # Run AudioContainer init
        AudioContainer.__init__(self, **kwargs)

        # Run super init to call init of mixins too
        super(AudioReadingProcessor, self).__init__(**kwargs)

        self.mono = kwargs.get('mono', False)
Ejemplo n.º 3
0
    def feature_container(cls, filename=None):
        if filename is None:
            filename = cls.audio_filename()

        from dcase_util.containers import AudioContainer, FeatureContainer
        from dcase_util.features import MelExtractor
        audio_container = AudioContainer(filename=filename).load().mixdown()
        mel_extractor = MelExtractor(fs=audio_container.fs)
        feature_data = mel_extractor.extract(audio_container.data)
        feature_container = FeatureContainer(
            data=feature_data,
            time_resolution=mel_extractor.hop_length_seconds)

        return feature_container
Ejemplo n.º 4
0
    def process(self,
                data=None,
                filename=None,
                focus_start_samples=None,
                focus_stop_samples=None,
                focus_duration_samples=None,
                focus_start_seconds=None,
                focus_stop_seconds=None,
                focus_duration_seconds=None,
                focus_channel=None,
                store_processing_chain=False,
                **kwargs):
        """Audio reading

        Parameters
        ----------
        data :

        filename : str
            Filename

        focus_start_samples : int
            Sample index of focus segment start

        focus_stop_samples : int
            Sample index of focus segment stop

        focus_duration_samples : int
            Sample count of focus segment

        focus_start_seconds : float
            Time stamp (in seconds) of focus segment start

        focus_stop_seconds : float
            Time stamp (in seconds) of focus segment stop

        focus_duration_seconds : float
            Duration (in seconds) of focus segment

        focus_channel : int or str
            Audio channel id or name to focus. In case of stereo signal, valid channel labels to select
            single channel are 'L', 'R', 'left', and 'right' or 0, 1, and to get mixed down version
            of all channels 'mixdown'.

        store_processing_chain : bool
            Store processing chain to data container returned
            Default value False

        Returns
        -------
        AudioContainer

        """

        if data is None and self.input_type == ProcessingChainItemType.NONE:
            audio_container = AudioContainer(**self.init_parameters)

            if filename:
                audio_container.load(filename=filename,
                                     mono=self.init_parameters.get('mono'))

            # Set focus segment and channel
            audio_container.set_focus(start=focus_start_samples,
                                      stop=focus_stop_samples,
                                      duration=focus_duration_samples,
                                      start_seconds=focus_start_seconds,
                                      stop_seconds=focus_stop_seconds,
                                      duration_seconds=focus_duration_seconds,
                                      channel=focus_channel)

            if store_processing_chain:
                processing_chain_item = self.get_processing_chain_item()

                if 'process_parameters' not in processing_chain_item:
                    processing_chain_item['process_parameters'] = {}

                processing_chain_item['process_parameters'][
                    'filename'] = filename

                processing_chain_item['process_parameters'][
                    'focus_start_samples'] = focus_start_samples
                processing_chain_item['process_parameters'][
                    'focus_stop_samples'] = focus_stop_samples
                processing_chain_item['process_parameters'][
                    'focus_duration_samples'] = focus_duration_samples

                processing_chain_item['process_parameters'][
                    'focus_start_seconds'] = focus_start_seconds
                processing_chain_item['process_parameters'][
                    'focus_stop_seconds'] = focus_stop_seconds
                processing_chain_item['process_parameters'][
                    'focus_duration_seconds'] = focus_duration_seconds

                processing_chain_item['process_parameters'][
                    'focus_channel'] = focus_channel

                # Push chain item into processing chain stored in the container

                # Create processing chain to be stored in the container, and push chain item into it
                if hasattr(audio_container, 'processing_chain'):
                    audio_container.processing_chain.push_processor(
                        **processing_chain_item)

                else:
                    audio_container.processing_chain = ProcessingChain(
                    ).push_processor(**processing_chain_item)

            return audio_container

        else:
            message = '{name}: Wrong input data type, type required [{input_type}].'.format(
                name=self.__class__.__name__, input_type=self.input_type)

            self.logger.exception(message)
            raise ValueError(message)
Ejemplo n.º 5
0
def download_file(result_dir, filename):
    """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to
    information provided in the filename)

    Parameters
    ----------

    result_dir : str, result directory which will contain the downloaded file

    filename : str, AudioSet filename to download

    Return
    ------

    list : list, Empty list if the file is downloaded, otherwise contains the filename and the error associated

    """
    LOG.debug(filename)
    tmp_filename = ""
    query_id = filename[1:12]
    segment_start = filename[13:-4].split('_')[0]
    segment_end = filename[13:-4].split('_')[1]
    audio_container = AudioContainer()

    # Define download parameters
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': TMP_FOLDER+'%(id)s.%(ext)s',
        'noplaylist': True,
        'quiet': True,
        'prefer_ffmpeg': True,
        'logger': MyLogger(),
        'audioformat': 'wav'
    }

    try:
        # Download file
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            meta = ydl.extract_info(
                'https://www.youtube.com/watch?v={query_id}'.format(query_id=query_id), download=True)

        audio_formats = [f for f in meta["formats"] if f.get('vcodec') == 'none']

        if audio_formats is []:
            return [filename, "no audio format available"]

        # get the best audio format
        best_audio_format = audio_formats[-1]

        tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format["ext"]

        audio_container.load(filename=tmp_filename, fs=44100, res_type='kaiser_best',
                             start=float(segment_start), stop=float(segment_end))

        # Save segmented audio
        audio_container.filename = filename
        audio_container.detect_file_format()
        audio_container.save(filename=os.path.join(result_dir, filename))

        #Remove temporary file
        os.remove(tmp_filename)
        return []

    except (KeyboardInterrupt, SystemExit):
        # Remove temporary files and current audio file.
        for fpath in glob.glob(TMP_FOLDER + query_id + "*"):
            os.remove(fpath)
        raise

    # youtube-dl error, file often removed
    except (ExtractorError, DownloadError, OSError) as e:
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)

        return [filename, str(e)]

    # multiprocessing can give this error
    except IndexError as e:
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)
        LOG.info(filename)
        LOG.info(str(e))
        return [filename, "Index Error"]
Ejemplo n.º 6
0
def download_file(filename, result_dir, platform="youtube"):
    """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to
    information provided in the filename)
    Args:
        filename : str, AudioSet filename to download
        result_dir : str, result directory which will contain the downloaded file
        platform: str, name of the platform, here youtube or vimeo

    Return:
        list, Empty list if the file is downloaded, otherwise contains the filename and the error associated

    """
    logger = create_logger(__name__ + "/" +
                           inspect.currentframe().f_code.co_name)
    tmp_filename = ""
    fname_no_ext = os.path.splitext(filename)[0]
    segment_start = fname_no_ext.split('_')[-2]
    segment_end = fname_no_ext.split('_')[-1]
    audio_container = AudioContainer()

    # Define download parameters
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': TMP_FOLDER + '%(id)s.%(ext)s',
        'noplaylist': True,
        'quiet': True,
        'prefer_ffmpeg': True,
        'logger': MyLogger(),
        'audioformat': 'wav'
    }

    if platform.lower() == "youtube":
        query_id = filename[1:12]  # Remove the Y in front of the file.
        baseurl = "https://www.youtube.com/watch?v="
    elif platform.lower() == "vimeo":
        query_id = filename.split('_')[0]
        baseurl = "https://vimeo.com/"
    else:
        raise NotImplementedError("platform can only be vimeo or youtube")

    if not os.path.isfile(os.path.join(result_dir, filename)):
        try:
            logger.debug(filename)
            # Download file
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                meta = ydl.extract_info(f"{baseurl}{query_id}", download=True)

            audio_formats = [
                f for f in meta["formats"] if f.get('vcodec') == 'none'
            ]
            if audio_formats is []:
                return [filename, "no audio format available"]
            # get the best audio format
            best_audio_format = audio_formats[-1]

            tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format[
                "ext"]
            audio_container.load(filename=tmp_filename,
                                 fs=44100,
                                 res_type='kaiser_best',
                                 start=float(segment_start),
                                 stop=float(segment_end),
                                 auto_trimming=True)

            # Save segmented audio
            audio_container.filename = filename
            audio_container.detect_file_format()
            audio_container.save(filename=os.path.join(result_dir, filename))

            # Remove temporary file
            os.remove(tmp_filename)
            return []

        except (KeyboardInterrupt, SystemExit):
            # Remove temporary files and current audio file.
            for fpath in glob.glob(TMP_FOLDER + query_id + "*"):
                os.remove(fpath)
            raise

        # youtube-dl error, file often removed, IO Error is for AudioContainer error if length of file is different.
        except (ExtractorError, DownloadError, IOError) as e:
            if os.path.exists(tmp_filename):
                os.remove(tmp_filename)

            return [filename, str(e)]

        # multiprocessing can give this error
        except IndexError as e:
            if os.path.exists(tmp_filename):
                os.remove(tmp_filename)
            logger.info(filename)
            logger.info(str(e))
            return [filename, "Index Error"]
    else:
        logger.debug(filename, "exists, skipping")
        return []
Ejemplo n.º 7
0
    def prepare(self):
        """Prepare dataset for the usage.

        Returns
        -------
        self

        """

        if is_jupyter():
            from tqdm import tqdm_notebook as tqdm
        else:
            from tqdm import tqdm

        # Make sure audio directory exists
        Path().makedirs(path=os.path.join(self.local_path, 'audio'))

        # Make sure evaluation_setup directory exists
        Path().makedirs(
            path=os.path.join(self.local_path, self.evaluation_setup_folder))

        if 'audio' in self.included_content_types:
            # Collect file ids
            files = []
            files += ListDictContainer(filename=os.path.join(
                self.local_path, 'testing_set.csv')).load(
                    fields=['query_id', 'segment_start', 'segment_end'])

            files += ListDictContainer(filename=os.path.join(
                self.local_path, 'training_set.csv')).load(
                    fields=['query_id', 'segment_start', 'segment_end'])

            file_progress = tqdm(files,
                                 desc="{0: <25s}".format('Files'),
                                 file=sys.stdout,
                                 leave=False,
                                 disable=self.disable_progress_bar,
                                 ascii=self.use_ascii_progress_bar)

            non_existing_videos = {}

            # Load list of already identified non-accessible videos
            item_access_log_filename = os.path.join(
                self.local_path, 'item_access_error.log.csv')
            if os.path.isfile(item_access_log_filename):
                for item in ListDictContainer(
                        filename=item_access_log_filename).load(
                            fields=['query_id', 'error']):
                    non_existing_videos[item['query_id']] = item

            # Check that audio files exists
            for file_data in file_progress:
                audio_filename = os.path.join(
                    self.local_path, 'audio',
                    'Y{query_id}_{segment_start}_{segment_end}.{extension}'.
                    format(query_id=file_data['query_id'],
                           segment_start=file_data['segment_start'],
                           segment_end=file_data['segment_end'],
                           extension=self.default_audio_extension))

                # Download segment if it does not exists
                if not os.path.isfile(audio_filename) and file_data[
                        'query_id'] not in non_existing_videos:
                    try:
                        AudioContainer().load_from_youtube(
                            query_id=file_data['query_id'],
                            start=file_data['segment_start'],
                            stop=file_data['segment_end']).save(
                                filename=audio_filename)

                    except IOError as e:
                        non_existing_videos[file_data['query_id']] = {
                            'error': str(e.message).replace('\n', ' '),
                            'query_id': file_data['query_id']
                        }

            # Save list of non-accessible videos
            ListDictContainer(list(non_existing_videos.values()),
                              filename=item_access_log_filename).save(
                                  fields=['query_id', 'error'])

        # Evaluation setup filenames
        train_filename = self.evaluation_setup_filename(setup_part='train',
                                                        fold=1,
                                                        scene_label='youtube',
                                                        file_extension='txt')

        test_filename = self.evaluation_setup_filename(setup_part='test',
                                                       fold=1,
                                                       scene_label='youtube',
                                                       file_extension='txt')

        evaluate_filename = self.evaluation_setup_filename(
            setup_part='evaluate',
            fold=1,
            scene_label='youtube',
            file_extension='txt')

        # Check that evaluation setup exists
        evaluation_setup_exists = True
        if not os.path.isfile(train_filename) or not os.path.isfile(
                test_filename) or not os.path.isfile(evaluate_filename):
            evaluation_setup_exists = False

        if not evaluation_setup_exists:
            # Evaluation setup was not found, generate one
            fold = 1

            train_meta = MetaDataContainer()
            for item in MetaDataContainer().load(
                    os.path.join(self.local_path,
                                 'groundtruth_weak_label_training_set.csv')):
                if not item.filename.endswith(self.default_audio_extension):
                    item.filename = os.path.join(
                        'audio', 'Y' + os.path.splitext(item.filename)[0] +
                        '.' + self.default_audio_extension)

                # Set scene label
                item.scene_label = 'youtube'

                # Translate event onset and offset, weak labels
                item.offset -= item.onset
                item.onset -= item.onset

                # Only collect items which exists if audio present
                if 'audio' in self.included_content_types:
                    if os.path.isfile(
                            os.path.join(self.local_path, item.filename)):
                        train_meta.append(item)
                else:
                    train_meta.append(item)

            train_meta.save(
                filename=self.evaluation_setup_filename(setup_part='train',
                                                        fold=fold,
                                                        scene_label='youtube',
                                                        file_extension='txt'))

            evaluate_meta = MetaDataContainer()
            for item in MetaDataContainer().load(
                    os.path.join(self.local_path,
                                 'groundtruth_strong_label_testing_set.csv')):
                if not item.filename.endswith(self.default_audio_extension):
                    item.filename = os.path.join(
                        'audio', 'Y' + os.path.splitext(item.filename)[0] +
                        '.' + self.default_audio_extension)
                # Set scene label
                item.scene_label = 'youtube'

                # Only collect items which exists
                if 'audio' in self.included_content_types:
                    if os.path.isfile(
                            os.path.join(self.local_path, item.filename)):
                        evaluate_meta.append(item)
                else:
                    evaluate_meta.append(item)

            evaluate_meta.save(
                filename=self.evaluation_setup_filename(setup_part='evaluate',
                                                        fold=fold,
                                                        scene_label='youtube',
                                                        file_extension='txt'))

            test_meta = MetaDataContainer()
            for item in evaluate_meta:
                test_meta.append(MetaDataItem({'filename': item.filename}))

            test_meta.save(
                filename=self.evaluation_setup_filename(setup_part='test',
                                                        fold=fold,
                                                        scene_label='youtube',
                                                        file_extension='txt'))

            # Load meta and cross validation
            self.load()

        if not self.meta_container.exists():
            fold = 1
            meta_data = MetaDataContainer()
            meta_data += MetaDataContainer().load(
                self.evaluation_setup_filename(setup_part='train',
                                               fold=fold,
                                               scene_label='youtube',
                                               file_extension='txt'))

            meta_data += MetaDataContainer().load(
                self.evaluation_setup_filename(setup_part='evaluate',
                                               fold=fold,
                                               scene_label='youtube',
                                               file_extension='txt'))
            # Save meta
            meta_data.save(filename=self.meta_file)

            # Load meta and cross validation
            self.load()

        return self