コード例 #1
0
    def extract_spectra(self, offset=50, scaling=80):
        # Loop over subsets
        for subset in ["Train", "Val", "Test"]:
            # Get audio files
            dir_audio = os.path.join(self._dir_root, "Ego-Noise Prediction",
                                     "Dataset", subset, "Audio")
            files_audio = retrieve_files(dir_audio)
            # directory for the unsynchronized spectra
            dir_output = os.path.join(self._dir_root_set, "Unsynchronized",
                                      subset, "Spectra")
            # Refresh directory
            refresh_directory(dir_output)

            # Loop through files in set
            for f in files_audio:
                # Extract spectrum
                Z = fh.extract_spectrum(f, self._feature)
                # Scale spectrum
                Z += offset
                Z /= scaling
                # Save to appropriate directory
                fn = os.path.split(f)[-1].replace(".wav", ".csv")
                fp = os.path.join(dir_output, fn)
                pd.DataFrame(Z).to_csv(fp, index=False, header=False)

                print_verbose(
                    self.super_verbose,
                    "Finished extracting feature for '%s' set." % subset,
                )
コード例 #2
0
    def generate_silence_category(self, n_instances, duration):
        """Generates a 'silence' category consisting of white noise.

        Keyword arguments:
            n_instances -- number of instances (recordings) generated,
            duration -- duration of each recording in seconds,
            verbose -- whether to print the generation of each file
            (default: False)
        """
        # generate silence
        np.random.seed(42)
        silence = np.random.uniform(low=-1.0,
                                    high=1.0,
                                    size=(n_instances,
                                          duration * self._sample_rate))
        # loop over instances
        for i in range(n_instances):
            # export to file
            fn = "silence_%02d.wav" % (i + 1)
            fp = os.path.join(self._dir_root, "Aircraft Classification",
                              "Audio", "Full", fn)
            sf.write(fp, silence[i], samplerate=self._sample_rate)
            print_verbose(self.super_verbose, "Generated file: '%s'" % fp)

        print_verbose(
            self.verbose,
            "Finished generating %d instances of silence." % n_instances)
コード例 #3
0
    def save_network(self, network, loss, overwrite=False):

        # generate output filename and directory for model and config
        network_id = "%.6f_c%d" % (loss, self._states["context_frames"])
        dir_model = os.path.join(self._dir_root_set, "Models", network_id)
        fn_model = "enp_model.pt"
        fn_config = "enp_config.json"

        # create or overwrite directory
        if os.path.exists(dir_model) and not overwrite:
            print_verbose(self.verbose, "Network already exists.")
            return dir_model
        refresh_directory(dir_model)

        # save network
        torch.save(network.state_dict(), os.path.join(dir_model, fn_model))

        # save network config and settings
        config_file = open(os.path.join(dir_model, fn_config), "w")
        json.dump(
            [
                self._net_config, self._spectrum, self._states,
                self._train_settings
            ],
            config_file,
        )
        config_file.close()

        return dir_model
コード例 #4
0
    def _synchronize_pair(self, file_spectrum, file_states, dir_root_output,
                          skip_takeoff):
        # load spectrum
        Z = pd.read_csv(file_spectrum, header=None).to_numpy()
        # get time vector from the spectrum
        t_mic = librosa.times_like(
            Z,
            sr=self._feature["fft_sample_rate"],
            hop_length=self._feature["stft_hop_length"],
        )
        # load states
        S = pd.read_csv(file_states, header=0)
        # get time vector from state data
        t_mav = S["delta_t"].to_numpy()
        # scale the states and transpose: (time, states) -> (states, time)
        S.drop(columns=["delta_t"], inplace=True)
        S = fh.scale_states(S).to_numpy().transpose()

        # pair each time in t_mic with the closest found in t_mav
        assert len(t_mic) < len(t_mav)  # requires hop_size / fft_freq > 100
        idx = np.searchsorted(t_mav, t_mic)
        # only keep the matched pairs from t_mav
        S = S[:, idx]

        if skip_takeoff:
            # identify take off by delta-rpm spike (ROW 4-7)
            delta_rpms = S[4:8]
            max_spike = abs(delta_rpms).max()
            _, spikes = np.where(
                abs(delta_rpms) > 0.5 * max_spike)  # get column idx only
            last_spike = spikes.max()  # last column
            # cut off spikes + buffer frames
            buf = 5
            S = S[:, last_spike + 1 + buf:]
            Z = Z[:, last_spike + 1 + buf:]

        # filename of output identical to input
        fn = os.path.split(file_states)[-1]
        # export the synchronized spectra
        fp = os.path.join(dir_root_output, "Spectra", fn)
        pd.DataFrame(Z).to_csv(fp, header=False, index=False)
        # export the synchronized states
        fp = os.path.join(dir_root_output, "States", fn)
        pd.DataFrame(S).to_csv(fp, header=False, index=False)

        print_verbose(
            self.super_verbose,
            "Synchronized '%s' (%d datapoints)" %
            (fn, min(len(t_mic), len(t_mav))),
        )
コード例 #5
0
 def import_audio_esc50(self):
     """Download and extract the ESC-50 dataset.
     """
     # set destination
     dir_dest = os.path.join(self._dir_root, "Raw", "Aircraft")
     if not os.path.exists(dir_dest):
         os.makedirs(dir_dest)
     # download
     fp_dest = os.path.join(dir_dest, "ESC-50-master.zip")
     fp_unzipped = os.path.join(dir_dest, "ESC-50-master")
     if not os.path.exists(fp_unzipped):
         print_verbose(self.verbose, "Downloading...")
         url = "https://github.com/karoldvl/ESC-50/archive/master.zip"
         wget.download(url, dir_dest)
         print_verbose(self.verbose, "Download finished.")
         # unzip
         with zipfile.ZipFile(fp_dest, "r") as zip_ref:
             zip_ref.extractall(dir_dest)
         print_verbose(self.verbose,
                       "Extracted ESC-50 to %s" % os.path.abspath(fp_dest))
     else:
         print_verbose(
             self.verbose,
             "ESC-50 has already been extracted to %s" %
             os.path.abspath(fp_dest),
         )
コード例 #6
0
    def save_network(self, network, loss, overwrite=False):

        # generate output filename and directory for model and config
        network_id = "%.6f" % loss
        dir_model = os.path.join(self._dir_root_set, "Models", network_id)
        fn_model = "ac_model.pt"
        fn_config = "ac_config.json"

        # create or overwrite directory
        if os.path.exists(dir_model) and not overwrite:
            print_verbose(self.verbose, "Network already exists.")
            return dir_model
        refresh_directory(dir_model)

        # save network
        torch.save(network.state_dict(), os.path.join(dir_model, fn_model))

        # save network config and settings
        config_file = open(os.path.join(dir_model, fn_config), "w")
        if not hasattr(self, "_states"):
            json.dump(
                [
                    self._net_config,
                    self._spectrum,
                    self._feature,
                    self._classification,
                    self._train_settings,
                ],
                config_file,
            )
        else:
            json.dump(
                [
                    self._net_config,
                    self._spectrum,
                    self._feature,
                    self._states,
                    self._classification,
                    self._train_settings,
                ],
                config_file,
            )
        config_file.close()

        return dir_model
コード例 #7
0
def _create_network(net_config, verbose=True, super_verbose=False):
    # set up network
    network = Net(*net_config.values())
    network.to(net_config["device"])

    print_verbose(verbose, "Device: %s." % net_config["device"])
    print_verbose(super_verbose, network)
    print_verbose(
        verbose,
        "Number of trainable parameters in network: %d." %
        sum([p.numel() for p in network.parameters()]),
    )

    return network
コード例 #8
0
    def split_features(
        self, subset=None, augmentations=None, noise_set=None, noise_ratio=None
    ):
        # default 'noise' is no noise (clean)
        if noise_set is None:
            noise_set = "Clean"

        if subset is not None:
            if type(subset) == str:
                subset = [subset]
            subsets = subset
        else:
            # split 'Train', 'Val', 'Test' set if no specific subset is given
            if augmentations is not None:
                if type(augmentations) == str:
                    augmentations = [augmentations]
                # add specific augmentation(s) to default sets
                subsets = ["Train", "Val", "Test"]
                subsets += ["Train " + a for a in augmentations]
            else:
                # use all available augmentations
                if noise_set == "Clean":
                    subsets = os.listdir(
                        os.path.join(self._dir_root_set, "Features", "Clean", "Spectra")
                    )
                else:
                    subsets = os.listdir(
                        os.path.join(
                            self._dir_root_set,
                            "Features",
                            noise_set,
                            "Spectra",
                            "Ratio_%.2f" % noise_ratio,
                        )
                    )

        # root input directory spectra
        dir_root_spectra_in = os.path.join(
            self._dir_root_set, "Features", noise_set, "Spectra"
        )
        if noise_set != "Clean":
            dir_root_spectra_in = os.path.join(
                dir_root_spectra_in, "Ratio_%.2f" % noise_ratio
            )

        for subset in sorted(subsets):
            # load 5-second spectra belonging to categories
            dir_in_spectra = os.path.join(dir_root_spectra_in, subset)
            files_spectra = retrieve_files(dir_in_spectra)
            files_spectra = [
                f
                for f in files_spectra
                if os.path.split(f)[-1].split("_")[0]
                in self._classification["categories"]
            ]

            # set output directory (augmentations i.e. 'Train Denoised' go in 'Train')
            dir_out_spectra = os.path.join(
                self._dir_root_set, "Dataset", subset.split(" ")[0], "Spectra"
            )
            # refresh directories only for non-augmented sets
            if subset in ["Train", "Val", "Test"]:
                refresh_directory(dir_out_spectra)

            # split spectra
            part = partial(self._split_spectra, dir_output=dir_out_spectra)
            with Pool(processes=os.cpu_count() - 1) as pool:
                pool.map(part, files_spectra)

            # split the states in case of implicit denoising
            if hasattr(self, "_states"):
                # load states
                dir_in_states = os.path.join(
                    self._dir_root_set, "Features", "Mixed", "States", subset
                )
                files_states = retrieve_files(dir_in_states)
                files_states = [
                    f
                    for f in files_states
                    if os.path.split(f)[-1].split("_")[0]
                    in self._classification["categories"]
                ]

                # refresh output directory only for non-augmented sets

                dir_out_states = os.path.join(
                    self._dir_root_set, "Dataset", subset.split(" ")[0], "States"
                )
                if subset in ["Train", "Val", "Test"]:
                    refresh_directory(dir_out_states)

                # split states
                part = partial(self._split_states, dir_output=dir_out_states)
                with Pool(processes=os.cpu_count() - 1) as pool:
                    pool.map(part, files_states)

            print_verbose(
                self.verbose,
                "Split %d files (%d categories) into %d files"
                % (
                    len(files_spectra),
                    len(self._classification["categories"]),
                    len(os.listdir(dir_out_spectra)),
                ),
            )
コード例 #9
0
def train_network(
    train_settings,
    train_set,
    val_set,
    net_config,
    loss_fn,
    verbose=True,
    super_verbose=False,
):

    # reset the seed
    torch.manual_seed(42)

    # create network from config
    network = _create_network(net_config)

    # copy optimizer settings to avoid modifying train_settings
    dict_optimizer = train_settings["optimizer"].copy()
    # select the optimizer in torch.optim from settings
    optimizer = getattr(torch.optim, dict_optimizer.pop("optimizer"))
    # bind network, unpack optimizer settings
    optimizer = optimizer(network.parameters(), **dict_optimizer)

    if "lr_scheduler" in train_settings:
        # copy scheduler settings to avoid modifying train_settings
        dict_scheduler = train_settings["lr_scheduler"].copy()
        # select the lr scheduler in torch.optim from settings
        lr_scheduler = getattr(torch.optim.lr_scheduler,
                               dict_scheduler.pop("scheduler"))
        # bind optimizer, unpack scheduler settings
        lr_scheduler = lr_scheduler(optimizer, **dict_scheduler)

    # create train dataloader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_set,
        batch_size=train_settings["batch_size"],
        shuffle=True,
        drop_last=False,
    )
    # create validation dataloader
    if len(val_set) > 2048:
        val_batch_size = 2048  # cap batch size to avoid memory issues
    else:
        val_batch_size = len(val_set)
    val_loader = torch.utils.data.DataLoader(dataset=val_set,
                                             batch_size=val_batch_size,
                                             drop_last=False)

    if "es_patience" in train_settings:
        # set up early stopping checkpoint
        fp_checkpoint = "checkpoint-es.pt"
        early_stopping = es.EarlyStopping(
            patience=train_settings["es_patience"],
            delta=1e-7,
            verbose=super_verbose,
            output_fp=fp_checkpoint,
        )

    training_loss_history = []
    validation_loss_history = []
    # loop over epochs
    for epoch in range(train_settings["epochs"]):
        train_losses = []
        # set in training mode
        network.train()
        for data in train_loader:
            # to device (gpu/cpu)
            x_train = data[0].to(net_config["device"])
            if len(data) > 2:
                x2_train = data[1].to(net_config["device"])
            y_train = data[-1].to(net_config["device"])
            # clear gradient of optimizer
            optimizer.zero_grad()
            # forward pass
            if len(data) == 2:
                yhat = network(x_train)
            else:
                yhat = network(x_train, x2_train)
            # compute loss
            loss = loss_fn(yhat, y_train)
            # backward pass
            loss.backward()
            # record loss
            train_losses.append(loss.item())
            # update parameters
            optimizer.step()
        # record loss and update loss history
        training_loss = np.mean(train_losses)
        training_loss_history.append(training_loss)

        # validation loss
        with torch.no_grad():
            val_losses = []
            # set in eval mode
            network.eval()
            for data in val_loader:
                # to device (gpu/cpu)
                x_val = data[0].to(net_config["device"])
                if len(data) > 2:
                    x2_val = data[1].to(net_config["device"])
                y_val = data[-1].to(net_config["device"])
                # forward pass
                if len(data) == 2:
                    yhat = network(x_val)
                else:
                    yhat = network(x_val, x2_val)
                # compute loss
                val_loss = loss_fn(yhat, y_val)
                # record loss
                val_losses.append(val_loss.item())
        # record loss and update loss history
        validation_loss = np.mean(val_losses)
        validation_loss_history.append(validation_loss)

        print_verbose(
            super_verbose,
            "epoch %d: training loss = %.6f, validation loss = %.6f" %
            (epoch + 1, training_loss, validation_loss),
        )

        if "es_patience" in train_settings:
            # check early stopping criterion
            early_stopping(validation_loss, network)
            if early_stopping.early_stop:
                # get training loss at best epoch
                training_loss = training_loss_history[
                    epoch - train_settings["es_patience"]]
                # get validation loss at best epoch
                validation_loss = early_stopping.val_loss_min
                print_verbose(
                    super_verbose,
                    "Early stopping (using model at epoch %d with val. loss %.5f)"
                    % (epoch + 1 - train_settings["es_patience"],
                       validation_loss),
                )
                # end training
                break

        if "lr_scheduler" in train_settings:
            # update learning rate
            lr_scheduler.step(validation_loss)

    if "es_patience" in train_settings:
        # load network from checkpoint
        network.load_state_dict(torch.load(early_stopping.output_fp))
        # delete checkpoint !!!

    loss = (training_loss, validation_loss)
    loss_history = (training_loss_history, validation_loss_history)
    return network, loss, loss_history
コード例 #10
0
    def extract_audio_from_esc50(self,
                                 categories,
                                 db_trim=30,
                                 overwrite=False):
        """Export relevant categories from the ESC-50 dataset.

        Keyword arguments:
            categories -- iterable containing the categories from the ESC-50
            dataset that should be extracted,
            db_trim -- threshold for the trimming of silence (default: 30dB),
            overwrite -- whether to overwrite existing data (default: False),
            verbose -- whether to print each file export (default: False).
        """
        # set directories
        dir_esc50 = os.path.join(self._dir_root, "Raw", "Aircraft",
                                 "ESC-50-master")
        fp_esc50_csv = os.path.join(dir_esc50, "meta", "esc50.csv")
        dir_input = os.path.join(dir_esc50, "audio")
        dir_output = os.path.join(self._dir_root, "Aircraft Classification",
                                  "Audio", "Full")
        # check if output directory exists
        if os.path.exists(dir_output):
            if overwrite:
                shutil.rmtree(dir_output)
            else:
                print_verbose("Output directory already exists.")
                return
        os.makedirs(os.path.join(dir_output))
        # get dataframe with filenames, categories
        df = pd.read_csv(fp_esc50_csv)
        df.drop(["fold", "target", "esc10", "src_file", "take"],
                axis=1,
                inplace=True)
        df.replace("_", "-", inplace=True, regex=True)  # less tedious later
        # extract relevant categories
        df_binary = df.loc[df["category"].isin(categories)]
        categories = df_binary["category"].unique()

        # loop over categories
        for cat in categories:
            # load files belonging to category
            files_src = df_binary.loc[df_binary["category"] ==
                                      cat]["filename"].to_list()
            # loop over files
            for i, file in enumerate(files_src):
                src = os.path.join(dir_input, file)
                # load audio
                y, sr = librosa.load(src, sr=self._sample_rate)
                # trim audio
                y_trim, _ = librosa.effects.trim(y, top_db=db_trim)
                # export audio
                fn_out = "%s_%02d.wav" % (cat, i + 1)
                dest = os.path.join(dir_output, fn_out)
                sf.write(dest, y_trim, samplerate=sr)
                # printing
                if self.super_verbose:
                    # set trim message
                    if len(y_trim) < len(y):
                        trim_msg = " (trimmed to %.3f sec.)" % (len(y_trim) /
                                                                sr)
                    else:
                        trim_msg = ""
                    print("%s ---> %s%s" % (file, dest, trim_msg))

        print_verbose(
            self.verbose,
            "Finished exporting %d files (sr = %d Hz)" % (len(df_binary), sr),
        )
コード例 #11
0
    def augment_training_data(self, overwrite=False):
        """Augment the training data.

        Keyword arguments:
            overwrite -- whether to overwrite existing data (default: False).
        Augmentations include:
            Pitch Shift at [-2, -1, 1, 2] octaves,
            Time Stretch with ratios of [0.70,0.85, 1.15, 1.30],
            Intra-category mixing with four random files belonging to the
            same category.
        The 'silence' category (if generated) is omitted from the augmentation.
        """
        # set directories
        dir_input = os.path.join(self._dir_root, "Aircraft Classification",
                                 "Audio", "Train")
        dir_root_output = os.path.join(self._dir_root,
                                       "Aircraft Classification", "Audio")

        # get files, but ignore augmentation for 'silence' category
        files = [
            os.path.join(dir_input, f) for f in sorted(os.listdir(dir_input))
            if os.path.split(f)[-1].split("_")[0] != "silence"
        ]

        # loop through augmentations
        augmentations = ["Pitch Shift", "Time Stretch", "Class Mix"]
        do_augmentations = []
        for aug in augmentations:
            # set output directory
            dir_output = os.path.join(dir_root_output, "Train " + aug)
            # check if it exists or should be overwritten
            if overwrite or not os.path.exists(dir_output):
                refresh_directory(dir_output)
                do_augmentations.append(aug)

        # do augmentations
        if len(do_augmentations) > 0:
            for aug in do_augmentations:
                dir_output = os.path.join(dir_root_output, "Train " + aug)
                if aug == "Class Mix":
                    # generate a list of directory-specific 'seeds' from the given seed
                    # to preserve reproducible randomness while multiprocessing
                    np.random.seed(42)
                    seeds = np.random.randint(0, 10 * len(files), len(files))

                    part = partial(self._augment_class_mix,
                                   dir_out=dir_output,
                                   all_files=files)

                    with Pool(processes=os.cpu_count() - 1) as pool:
                        pool.starmap(part, list(zip(files, seeds)))

                elif aug == "Pitch Shift":
                    part = partial(self._augment_pitch_shift,
                                   dir_out=dir_output)

                    with Pool(processes=os.cpu_count() - 1) as pool:
                        pool.map(part, files)

                elif aug == "Time Stretch":
                    part = partial(self._augment_time_stretch,
                                   dir_out=dir_output)

                    with Pool(processes=os.cpu_count() - 1) as pool:
                        pool.map(part, files)

            print_verbose(
                self.verbose,
                "Augmentation: %d --> %d files using %s augmentation" % (
                    len(files),
                    len(do_augmentations) * 4 * len(files),
                    do_augmentations,
                ),
            )
        else:
            print_verbose(self.verbose, "Augmentation has already been done.")
コード例 #12
0
    def split_dataset(self,
                      train_test_ratio=0.8,
                      train_val_ratio=0.8,
                      overwrite=False):
        """Split the dataset into a training, validation and test subset.

        Keyword arguments:
            train_test_ratio -- ratio of the training set over the complete,
            set, the remainder will be assigned to the test subset
            (default: 0.8),
            train_val_ratio -- ratio of the actual training set over the
            training set, the remainder will be assigned to the validation
            subset (default: 0.8),
            overwrite -- whether to overwrite existing data (default: False).
        """
        # directories
        dir_input = os.path.join(self._dir_root, "Aircraft Classification",
                                 "Audio", "Full")
        dir_root_output = os.path.join(self._dir_root,
                                       "Aircraft Classification", "Audio")
        # check if data should be overwritten if it exists
        if os.path.exists(os.path.join(dir_root_output,
                                       "Train")) and not overwrite:
            print_verbose(
                self.verbose,
                "Dataset already exists and should not be overwritten.")
            return
        # refresh the output directories
        subdirs = ["Train", "Val", "Test"]
        for subdir in subdirs:
            refresh_directory(os.path.join(dir_root_output, subdir))

        # read files into array for easy slicing
        files = np.array(retrieve_files(dir_input))
        # get categories
        file_categories = np.array(
            [os.path.split(f)[-1].split("_")[0] for f in files])
        categories = np.unique(file_categories)
        files_per_category = len(files) // len(categories)

        # get train, val, test indices per category
        train_idcs, test_idcs = train_test_split(np.arange(files_per_category),
                                                 train_size=train_test_ratio,
                                                 random_state=42)
        train_idcs, val_idcs = train_test_split(train_idcs,
                                                train_size=train_val_ratio,
                                                random_state=42)
        print_verbose(
            self.verbose,
            "Split per category (Train, Val, Test): (%d, %d, %d)" %
            (len(train_idcs), len(val_idcs), len(test_idcs)),
        )

        # extract  train, val, test files using indices and export to subdirs
        indices = [train_idcs, val_idcs, test_idcs]
        for idcs, subdir in zip(indices, subdirs):
            files_set = [
                f for f in files
                if int(os.path.split(f)[-1].split("_")[-1].split(".")[0]) -
                1 in idcs
            ]
            for file in files_set:
                dest = os.path.join(dir_root_output, subdir,
                                    os.path.split(file)[-1])
                shutil.copyfile(file, dest)

        # remove the now redundant 'Full' input directory
        shutil.rmtree(dir_input)