Beispiel #1
0
    def take_action(self, parsed_args):
        module_name, class_name = parsed_args.parser.rsplit(".", 1)
        parser_class = getattr(importlib.import_module(module_name),
                               class_name)

        if not issubclass(parser_class, Parser):
            raise ValueError(
                "specified parser does not inherit audeep.backend.parsers.Parser"
            )

        parser = parser_class(parsed_args.basedir)

        if not parser.can_parse():
            raise ValueError(
                "specified parser is unable to parse data set at {}".format(
                    parsed_args.basedir))

        lengths = []
        sample_rates = []
        channels = []

        non_seekable_files = False

        instance_metadata = parser.parse()

        self.log.info("reading audio file information")

        for index, metadata in enumerate(instance_metadata):
            self.log.debug(
                "processing %%s (%%%dd/%%d)" %
                int(math.ceil(math.log10(len(instance_metadata)))),
                metadata.path, index + 1, len(instance_metadata))

            with SoundFile(str(metadata.path)) as sf:
                sample_rates.append(sf.samplerate)
                channels.append(sf.channels)

                if sf.seekable():
                    lengths.append(sf.seek(0, SEEK_END) / sf.samplerate)
                else:
                    non_seekable_files = True

        if non_seekable_files:
            self.log.warning(
                "could not determine the length of some files - information may be inaccurate"
            )

        information = [
            ("number of audio files", parser.num_instances),
            ("number of labels", len(parser.label_map)),
            ("cross validation folds", parser.num_folds),
            ("minimum sample length", "%.2f s" % np.min(lengths)),
            ("maximum sample length", "%.2f s" % np.max(lengths)),
        ]

        if len(np.unique(sample_rates)) > 1:
            information.append(("sample rates", "%d Hz - %d Hz" %
                                (np.min(sample_rates), np.max(sample_rates))))
        else:
            information.append(("sample rate", "%d Hz" % sample_rates[0]))

        if len(np.unique(channels)) > 1:
            information.append(
                ("channels", "%d - %d" % (np.min(channels), np.max(channels))))
        else:
            information.append(("channels", channels[0]))

        TableFormatter("lr").print(data=information,
                                   header="data set information")
Beispiel #2
0
    def take_action(self, parsed_args):
        if (parsed_args.chunk_count is not None) ^ (parsed_args.chunk_length
                                                    is not None):
            raise ValueError(
                "--chunk-count can only be used with --chunk-length and vice-versa"
            )

        module_name, class_name = parsed_args.parser.rsplit(".", 1)
        parser_class = getattr(importlib.import_module(module_name),
                               class_name)

        if not issubclass(parser_class, Parser):
            raise ValueError(
                "specified parser does not inherit audeep.backend.parsers.Parser"
            )

        parser = parser_class(parsed_args.basedir)
        preprocessor = self.get_preprocessor(parsed_args)

        if not parser.can_parse():
            raise ValueError(
                "specified parser is unable to parse data set at {}".format(
                    parsed_args.basedir))

        self.log.info("parsing data set at %s", parsed_args.basedir)

        instance_metadata = parser.parse()

        if parsed_args.pretend is not None:
            metadata = instance_metadata[
                parsed_args.pretend]  # type: _InstanceMetadata

            sxx, f, t = preprocessor.process(metadata.path)

            # noinspection PyTypeChecker
            spectrogram_info = [("data file", metadata.path)]

            if parser.label_map is not None:
                spectrogram_info.append(("label", "{} ({})".format(
                    metadata.label_nominal,
                    parser.label_map[metadata.label_nominal])))
            else:
                spectrogram_info.append(
                    ("label", "{} ({})".format(metadata.label_nominal,
                                               metadata.label_numeric)))

            if metadata.cv_folds:
                # noinspection PyTypeChecker
                spectrogram_info.append(
                    ("cross validation splits",
                     ",".join([x.name for x in metadata.cv_folds])))

            if metadata.partition is not None:
                spectrogram_info.append(("partition", metadata.partition.name))

            # noinspection PyTypeChecker
            spectrogram_info.append(("number of chunks", len(sxx)))
            spectrogram_info.append(
                ("spectrogram time steps", [x.shape[1] for x in sxx]))
            spectrogram_info.append(
                ("spectrogram frequency bands", f.shape[0]))

            TableFormatter().print(data=spectrogram_info)

            fig = plt.figure()
            sxx_full = np.concatenate(sxx, axis=1)
            t_full = np.concatenate(t)

            nxticks = sxx_full.shape[1] // 25
            nyticks = 4

            # spectrogram
            ax = fig.add_subplot(2, 1, 1)
            plt.title("Spectrogram")
            ax.set_xticks(
                np.arange(0, t_full.shape[0], t_full.shape[0] // nxticks))
            ax.set_xticklabels(np.round(t_full[::t_full.shape[0] // nxticks]))
            ax.set_xlabel("Time (s)")

            ax.set_yticks(np.arange(0, len(f), len(f) // nyticks))
            ax.set_yticklabels(np.round(f[::-len(f) // nyticks]))
            ax.set_ylabel("Frequency (Hz)")

            ax.imshow(sxx_full[::-1], cmap="magma")

            # histogram
            ax = fig.add_subplot(2, 1, 2)
            plt.title("Amplitude Histogram")
            ax.set_xlabel("Amplitude (dB)")
            ax.set_ylabel("Probability")

            range_min = parsed_args.clip_below + 0.01 if parsed_args.clip_below is not None else sxx_full.min(
            )
            range_max = parsed_args.clip_above - 0.01 if parsed_args.clip_above is not None else 0

            ax.hist(sxx_full.flatten(),
                    range=(range_min, range_max),
                    bins=100,
                    normed=True,
                    histtype="stepfilled")

            plt.tight_layout()
            plt.show()
        else:
            num_instances = parser.num_instances * (
                1 if parsed_args.chunk_count is None else
                parsed_args.chunk_count)
            data_set = None

            index = 0

            for file_index, metadata in enumerate(
                    instance_metadata):  # type: Tuple[int, _InstanceMetadata]
                self.log.info(
                    "processing %%s (%%%dd/%%d)" %
                    int(math.ceil(math.log10(len(instance_metadata)))),
                    metadata.path, file_index + 1, len(instance_metadata))

                sxx, _, _ = preprocessor.process(metadata.path)

                for chunk_nr, sxx_chunk in enumerate(sxx):
                    if data_set is None:
                        data_set = empty(num_instances=num_instances,
                                         feature_dimensions=[
                                             ("time", sxx_chunk.shape[1]),
                                             ("freq", sxx_chunk.shape[0])
                                         ],
                                         num_folds=parser.num_folds)
                        data_set.label_map = parser.label_map

                    instance = data_set[index]
                    instance.filename = metadata.filename
                    instance.chunk_nr = chunk_nr
                    instance.label_nominal = metadata.label_nominal

                    if data_set.label_map is None:
                        instance.label_numeric = metadata.label_numeric

                    instance.cv_folds = metadata.cv_folds
                    instance.partition = metadata.partition
                    instance.features = np.transpose(sxx_chunk)

                    index += 1

            data_set.save(parsed_args.output)
Beispiel #3
0
    def take_action(self, parsed_args):
        if not parsed_args.input.exists():
            raise IOError("unable to open data set at {}".format(
                parsed_args.input))

        data_set = load(parsed_args.input)
        formatter = TableFormatter()

        # print global information
        global_information = [
            ("number of instances", data_set.num_instances),
            ("cross validation info", data_set.has_cv_info),
            ("partition info", data_set.has_partition_info),
            ("fully labeled", data_set.is_fully_labeled),
            ("feature dimensions", data_set.feature_dims),
        ]

        print()
        formatter.print(data=global_information,
                        header="global data set information")
        print()

        # print instance information
        if parsed_args.instance is not None:
            instance = data_set[parsed_args.instance]

            instance_information = [
                ("data file", instance.filename),
                ("chunk number", instance.chunk_nr),
                ("label", "{} ({})".format(instance.label_nominal,
                                           instance.label_numeric)),
                ("cross validation splits", ", ".join([
                    "None" if x is None else x.name for x in instance.cv_folds
                ]) or None),
                ("partition", None
                 if instance.partition is None else instance.partition.name),
                ("shape", instance.feature_shape),
            ]

            formatter.print(data=instance_information,
                            header="instance {} information:".format(
                                parsed_args.instance))
            print()

        if parsed_args.detailed_folds and data_set.has_cv_info and data_set.is_fully_labeled:
            formatter = TableFormatter(alignment="lrrrr")

            inverse_label_map = dict(map(reversed, data_set.label_map.items()))

            for fold in range(data_set.num_folds):
                train_split = data_set.split(fold, Split.TRAIN)
                valid_split = data_set.split(fold, Split.VALID)

                labels, train_counts = np.unique(train_split.labels_numeric,
                                                 return_counts=True)
                _, valid_counts = np.unique(valid_split.labels_numeric,
                                            return_counts=True)

                train_total = sum(train_counts)
                valid_total = sum(valid_counts)

                fold_information = []

                for i in range(len(labels)):
                    train_count = train_counts[i]
                    valid_count = valid_counts[i]

                    train_relative = 100 * train_count / train_total
                    valid_relative = 100 * valid_count / valid_total

                    fold_information.append(
                        (inverse_label_map[labels[i]], train_count,
                         "%2.2f%%" % train_relative, valid_count,
                         "%2.2f%%" % valid_relative))

                fold_information.append(
                    ("total", train_total, "", valid_total, ""))

                formatter.print(data=fold_information,
                                header="fold {} information:".format(fold + 1),
                                dividers=[len(labels) - 1])
                print()