Ejemplo n.º 1
0
def get_predictions(model, valid_dataset, decoder, save_predictions=None):
    for i, (input, _) in enumerate(valid_dataset):
        [input] = to_cuda_if_available([input])

        pred_strong, _ = model(input.unsqueeze(0))
        pred_strong = pred_strong.cpu()
        pred_strong = pred_strong.squeeze(0).detach().numpy()
        if i == 0:
            LOG.debug(pred_strong)
        pred_strong = ProbabilityEncoder().binarization(pred_strong, binarization_type="global_threshold",
                                                        threshold=0.5)
        pred_strong = scipy.ndimage.filters.median_filter(pred_strong, (cfg.median_window, 1))
        pred = decoder(pred_strong)
        pred = pd.DataFrame(pred, columns=["event_label", "onset", "offset"])
        pred["filename"] = valid_dataset.filenames.iloc[i]
        if i == 0:
            LOG.debug("predictions: \n{}".format(pred))
            LOG.debug("predictions strong: \n{}".format(pred_strong))
            prediction_df = pred.copy()
        else:
            prediction_df = prediction_df.append(pred)

    if save_predictions is not None:
        LOG.info("Saving predictions at: {}".format(save_predictions))
        prediction_df.to_csv(save_predictions, index=False, sep="\t")
    return prediction_df
Ejemplo n.º 2
0
    def means(self, dataset):
        """
       Splits a dataset in to train test validation.
       :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple.
       """
        LOG.info('computing mean')
        start = time.time()
        sum_ = 0
        sum_square = 0
        n = 0
        n_sq = 0

        for sample in dataset:
            if type(sample) in [tuple, list] and len(sample) == 2:
                batch_x, _ = sample
            else:
                batch_x = sample
            if type(batch_x) is torch.Tensor:
                batch_x_arr = batch_x.numpy()
            else:
                batch_x_arr = batch_x

            su, nn = self.sum(batch_x_arr, axis=-1)
            sum_ += su
            n += nn

            su_sq, nn_sq = self.sum(batch_x_arr ** 2, axis=-1)
            sum_square += su_sq
            n_sq += nn_sq

        self.mean_ = sum_ / n
        self.mean_of_square_ = sum_square / n_sq

        LOG.debug('time to compute means: ' + str(time.time() - start))
        return self
Ejemplo n.º 3
0
def get_weak_predictions(model,
                         valid_dataset,
                         weak_decoder,
                         save_predictions=None):
    for i, (data, _) in enumerate(valid_dataset):
        data = to_cuda_if_available(data)

        pred_weak = model(data.unsqueeze(0))
        pred_weak = pred_weak.cpu()
        pred_weak = pred_weak.squeeze(0).detach().numpy()
        if i == 0:
            LOG.debug(pred_weak)
        pred_weak = ProbabilityEncoder().binarization(
            pred_weak, binarization_type="global_threshold", threshold=0.5)
        pred = weak_decoder(pred_weak)
        pred = pd.DataFrame(pred, columns=["event_labels"])
        pred["filename"] = valid_dataset.filenames.iloc[i]
        if i == 0:
            LOG.debug("predictions: \n{}".format(pred))
            prediction_df = pred.copy()
        else:
            prediction_df = prediction_df.append(pred)

    if save_predictions is not None:
        LOG.info("Saving predictions at: {}".format(save_predictions))
        prediction_df.to_csv(save_predictions, index=False, sep="\t")
    return prediction_df
Ejemplo n.º 4
0
def measure_embeddings(set_embed, model, emb_path, figure_path, set_name=''):
    df, embed = calculate_embedding(set_embed,
                                    model,
                                    savedir=emb_path,
                                    concatenate="append")
    df = df.dropna()
    embed = embed[df.index]
    LOG.debug("embed shape: {}".format(embed.shape))
    LOG.debug("df shape: {}".format(df.shape))

    tsne_emb = TSNE().fit_transform(X=embed.reshape(embed.shape[0], -1))
    tsne_plots(tsne_emb, df, savefig=figure_path)
    scatter = scatter_ratio(embed.reshape(embed.shape[0], -1),
                            df.reset_index())
    silhouette = sklearn.metrics.silhouette_score(embed.reshape(
        embed.shape[0], -1),
                                                  df.event_labels,
                                                  metric='euclidean')
    # Just informative
    LOG.info(
        f"{set_name} silhouette for all classes in 2D (tsne) : "
        f"{sklearn.metrics.silhouette_score(df[['X', 'Y']], df.event_labels, metric='euclidean')}"
    )

    proto = proto_acc(embed.reshape(embed.shape[0], -1), df.reset_index())
    LOG.info("Proto accuracy {} : {}".format(set_name, proto))

    return {
        "scatter" + set_name: scatter,
        "silhouette" + set_name: silhouette,
        "proto" + set_name: proto
    }
def train(train_loader,
          model,
          optimizer,
          epoch,
          weak_mask=None,
          strong_mask=None):
    class_criterion = nn.BCELoss()
    [class_criterion] = to_cuda_if_available([class_criterion])

    meters = AverageMeterSet()
    meters.update('lr', optimizer.param_groups[0]['lr'])

    LOG.debug("Nb batches: {}".format(len(train_loader)))
    start = time.time()
    for i, (batch_input, target) in enumerate(train_loader):
        [batch_input, target] = to_cuda_if_available([batch_input, target])
        LOG.debug(batch_input.mean())

        strong_pred, weak_pred = model(batch_input)
        loss = 0
        if weak_mask is not None:
            # Weak BCE Loss
            # Trick to not take unlabeled data
            # Todo figure out another way
            target_weak = target.max(-2)[0]
            weak_class_loss = class_criterion(weak_pred[weak_mask],
                                              target_weak[weak_mask])
            if i == 1:
                LOG.debug("target: {}".format(target.mean(-2)))
                LOG.debug("Target_weak: {}".format(target_weak))
                LOG.debug(weak_class_loss)
            meters.update('Weak loss', weak_class_loss.item())

            loss += weak_class_loss

        if strong_mask is not None:
            # Strong BCE loss
            strong_class_loss = class_criterion(strong_pred[strong_mask],
                                                target[strong_mask])
            meters.update('Strong loss', strong_class_loss.item())

            loss += strong_class_loss

        assert not (np.isnan(loss.item())
                    or loss.item() > 1e5), 'Loss explosion: {}'.format(
                        loss.item())
        assert not loss.item() < 0, 'Loss problem, cannot be negative'
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))
Ejemplo n.º 6
0
def train_triplet_epoch(loader, model_triplet, optimizer, semi_hard_input=None,
                        semi_hard_embed=None, pit=False, margin=None, swap=False, acc_grad=False):

    start = time.time()
    loss_mean_triplet = []
    nb_triplets_used = 0
    nb_triplets = 0
    if acc_grad:
        lder = loader.batch_sampler
    else:
        lder = loader
    # for i, samples in enumerate(concat_loader_triplet):
    for i, samples in enumerate(lder):
        optimizer.zero_grad()
        if acc_grad:
            outs = loop_batches_acc_grad(samples, loader.dataset, model_triplet,
                                         semi_hard_input, semi_hard_embed, i=i)
        else:
            outs = loop_batches(samples, model_triplet, semi_hard_input, semi_hard_embed, i=i)

        outputs, outputs_pos, outputs_neg = outs
        if i == 0:
            LOG.debug("output CNN shape: {}".format(outputs.shape))
            LOG.debug(outputs.mean())
            LOG.debug(outputs_pos.mean())
            LOG.debug(outputs_neg.mean())
        dist_pos, dist_neg = get_distances(outputs, outputs_pos, outputs_neg,
                                           pit, swap,
                                           )

        if margin is not None:
            loss_triplet = torch.clamp(margin + dist_pos - dist_neg, min=0.0)
        else:
            loss_triplet = ratio_loss(dist_pos, dist_neg)

        pair_cnt = (loss_triplet.detach() > 0).sum().item()
        nb_triplets_used += pair_cnt
        nb_triplets += len(loss_triplet)

        # Normalize based on the number of pairs.
        if pair_cnt > 0:
            # loss_triplet = loss_triplet.sum() / pair_cnt
            loss_triplet = loss_triplet.mean()

            loss_triplet.backward()
            optimizer.step()
            loss_mean_triplet.append(loss_triplet.item())
        else:
            LOG.debug("batch doesn't have any loss > 0")

    epoch_time = time.time() - start
    LOG.info("Loss: {:.4f}\t"
             "Time: {}\t"
             "\tnb_triplets used: {} / {}\t"
             "".format(np.mean(loss_mean_triplet), epoch_time, nb_triplets_used, nb_triplets))
    ratio_triplet_used = nb_triplets_used / nb_triplets
    return model_triplet, loss_mean_triplet, ratio_triplet_used
Ejemplo n.º 7
0
def compute_strong_metrics(predictions, valid_df, pooling_time_ratio):
    # In seconds
    predictions.onset = predictions.onset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length)
    predictions.offset = predictions.offset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length)

    metric_event = event_based_evaluation_df(valid_df, predictions, t_collar=0.200,
                                                      percentage_of_length=0.2)
    metric_segment = segment_based_evaluation_df(valid_df, predictions, time_resolution=1.)
    LOG.info(metric_event)
    LOG.info(metric_segment)
    return metric_event
Ejemplo n.º 8
0
    def means(self, dataset):
        """
       Splits a dataset in to train test validation.
       :param dataset: dataset, from DataLoad class, each sample is an (X, y) tuple.
       """
        LOG.info('computing mean')
        start = time.time()

        shape = None

        counter = 0
        for sample in dataset:
            if type(sample) in [tuple, list] and len(sample)==2:
                batch_X, _ = sample
            else:
                batch_X = sample
            if type(batch_X) is torch.Tensor:
                batch_X_arr = batch_X.numpy()
            else:
                batch_X_arr = batch_X
            data_square = batch_X_arr ** 2
            counter += 1

            if shape is None:
                shape = batch_X_arr.shape
            else:
                if not batch_X_arr.shape == shape:
                    raise NotImplementedError("Not possible to add data with different shape in mean calculation yet")

            # assume first item will have shape info
            if self.mean_ is None:
                self.mean_ = self.mean(batch_X_arr, axis=-1)
            else:
                self.mean_ += self.mean(batch_X_arr, axis=-1)

            if self.mean_of_square_ is None:
                self.mean_of_square_ = self.mean(data_square, axis=-1)
            else:
                self.mean_of_square_ += self.mean(data_square, axis=-1)

        self.mean_ /= counter
        self.mean_of_square_ /= counter

        ## To be used if data different shape, but need to stop the iteration before.
        # rest = len(dataset) - i
        # if rest != 0:
        #     weight = rest / float(i + rest)
        #     X, y = dataset[-1]
        #     data_square = X ** 2
        #     mean = mean * (1 - weight) + self.mean(X, axis=-1) * weight
        #     mean_of_square = mean_of_square * (1 - weight) + self.mean(data_square, axis=-1) * weight

        LOG.debug('time to compute means: ' + str(time.time() - start))
        return self
def compute_strong_metrics(predictions, valid_df, pooling_time_ratio=None):
    if pooling_time_ratio is not None:
        LOG.warning("pooling_time_ratio is deprecated, use it in get_predictions() instead.")
        # In seconds
        predictions.onset = predictions.onset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length)
        predictions.offset = predictions.offset * pooling_time_ratio / (cfg.sample_rate / cfg.hop_length)

    metric_event = event_based_evaluation_df(valid_df, predictions, t_collar=0.200,
                                                      percentage_of_length=0.2)
    metric_segment = segment_based_evaluation_df(valid_df, predictions, time_resolution=1.)
    LOG.info(metric_event)
    LOG.info(metric_segment)
    return metric_event
Ejemplo n.º 10
0
def get_model(state, args, init_model_name=None):
    if init_model_name is not None and os.path.exists(init_model_name):
        model, optimizer, state = load_model(init_model_name,
                                             return_optimizer=True,
                                             return_state=True)
    else:
        if "conv_dropout" in args:
            conv_dropout = args.conv_dropout
        else:
            conv_dropout = cfg.conv_dropout
        cnn_args = {1}

        if args.fixed_segment is not None:
            frames = cfg.frames
        else:
            frames = None

        nb_layers = 4
        cnn_kwargs = {
            "activation": cfg.activation,
            "conv_dropout": conv_dropout,
            "batch_norm": cfg.batch_norm,
            "kernel_size": nb_layers * [3],
            "padding": nb_layers * [1],
            "stride": nb_layers * [1],
            "nb_filters": [16, 16, 32, 65],
            "pooling": [(2, 2), (2, 2), (1, 4), (1, 2)],
            "aggregation": args.agg_time,
            "norm_out": args.norm_embed,
            "frames": frames,
        }
        nb_frames_staying = cfg.frames // (2**2)
        model = CNN(*cnn_args, **cnn_kwargs)
        # model.apply(weights_init)
        state.update({
            'model': {
                "name": model.__class__.__name__,
                'args': cnn_args,
                "kwargs": cnn_kwargs,
                'state_dict': model.state_dict()
            },
            'nb_frames_staying': nb_frames_staying
        })
        if init_model_name is not None:
            save_model(state, init_model_name)
    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    LOG.info(
        "number of parameters in the model: {}".format(pytorch_total_params))
    return model, state
Ejemplo n.º 11
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   feature_dir,
                                   subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path to the directory where the features are
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        for ind, wav_name in enumerate(df_meta.filename.unique()):
            if ind % 500 == 0:
                LOG.debug(ind)
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)

            out_filename = os.path.join(feature_dir,
                                        name_only(wav_name) + ".npy")

            if not os.path.exists(out_filename):
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    (audio, _) = read_audio(wav_path, cfg.sample_rate)
                    if audio.shape[0] == 0:
                        print("File %s is corrupted!" % wav_path)
                    else:
                        mel_spec = self.calculate_mel_spec(
                            audio, log_feature=self.save_log_feature)

                        np.save(out_filename, mel_spec)

                    LOG.debug("compute features time: %s" % (time.time() - t1))

        return df_meta.reset_index(drop=True)
Ejemplo n.º 12
0
def test_model(state,
               reference_tsv_path,
               reduced_number_of_data=None,
               strore_predicitions_fname=None):
    dataset = DatasetDcase2019Task4(os.path.join(cfg.workspace),
                                    base_feature_dir=os.path.join(
                                        cfg.workspace, "dataset", "features"),
                                    save_log_feature=False)

    crnn_kwargs = state["model"]["kwargs"]
    crnn = CRNN(**crnn_kwargs)
    crnn.load(parameters=state["model"]["state_dict"])
    LOG.info("Model loaded at epoch: {}".format(state["epoch"]))
    pooling_time_ratio = state["pooling_time_ratio"]

    crnn.load(parameters=state["model"]["state_dict"])
    scaler = Scaler()
    scaler.load_state_dict(state["scaler"])
    classes = cfg.classes
    many_hot_encoder = ManyHotEncoder.load_state_dict(
        state["many_hot_encoder"])

    crnn = crnn.eval()
    [crnn] = to_cuda_if_available([crnn])
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)

    LOG.info(reference_tsv_path)
    df = dataset.initialize_and_get_df(reference_tsv_path,
                                       reduced_number_of_data)
    strong_dataload = DataLoadDf(df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_strong_df,
                                 transform=transforms_valid)

    predictions = get_predictions(crnn,
                                  strong_dataload,
                                  many_hot_encoder.decode_strong,
                                  pooling_time_ratio,
                                  save_predictions=strore_predicitions_fname)
    compute_strong_metrics(predictions, df)

    weak_dataload = DataLoadDf(df,
                               dataset.get_feature_file,
                               many_hot_encoder.encode_weak,
                               transform=transforms_valid)
    weak_metric = get_f_measure_by_class(
        crnn, len(classes), DataLoader(weak_dataload,
                                       batch_size=cfg.batch_size))
    LOG.info("Weak F1-score per class: \n {}".format(
        pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
Ejemplo n.º 13
0
def proto_acc(embed, df):
    classes = ['Alarm_bell_ringing', 'Blender', 'Cat', 'Dishes', 'Dog', 'Electric_shaver_toothbrush', 'Frying',
               'Running_water', 'Speech', 'Vacuum_cleaner']
    vector_embed = embed.reshape(embed.shape[0], -1)
    classes_mean = np.zeros((10, embed.shape[-1]))
    for i, c in enumerate(classes):
        class_df = df[df.event_labels.fillna("").str.contains(c)]
        if not class_df.empty:
            class_embed = vector_embed[class_df.index]
            mean_class = np.mean(class_embed, axis=0)
            classes_mean[i] = mean_class

    acc_per_class = np.zeros((len(classes)))
    for i, c in enumerate(classes):
        class_df = df[df.event_labels.fillna("").str.contains(c)]
        if not class_df.empty:
            class_embed = vector_embed[class_df.index]
            distance_to_min = scipy.spatial.distance.cdist(class_embed, classes_mean)
            labels = distance_to_min.argmin(-1)
            acc_per_class[i] = (labels == i).mean()

    LOG.info(pd.DataFrame([classes, acc_per_class.tolist()]).transpose())
    return acc_per_class.mean()
Ejemplo n.º 14
0
    def trunc_pad_segment(df, fixed_segment):
        def apply_ps_func(row, length):
            duration = (row["offset"] - row["onset"])
            # Choose fixed segment in the event
            if duration > length:
                ra = np.random.uniform(-1, 1)
                onset_bias = fixed_segment * ra
                row["onset"] = max(0, row["onset"] + onset_bias)
            # Bias the onset and the offset accordingly
            else:
                ra = np.random.rand()
                onset_bias = fixed_segment * ra
                row["onset"] = max(0, row["onset"] - onset_bias)

            row["offset"] = row["onset"] + fixed_segment
            if row["offset"] > cfg.max_len_seconds:
                row["offset"] = cfg.max_len_seconds
                row["onset"] = row["offset"] - fixed_segment
            return row

        assert "onset" in df.columns and "offset" in df.columns, "bias label only available with strong labels"
        LOG.info(f"Fix labels {fixed_segment} seconds")
        df = df.apply(apply_ps_func, axis=1, args=[fixed_segment])
        return df
Ejemplo n.º 15
0
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None,
                     valid_loader=None, state={},
                     dir_model="model", result_path="res", recompute=True):
    criterion_bce = nn.BCELoss()
    classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce)
    print(classif_model)

    early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup",
                                        init_patience=cfg.first_early_wait)
    save_best_call = SaveBest(val_comp="sup")

    # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr,
    #                               verbose=True)
    print(optimizer_classif)

    save_results = pd.DataFrame()

    create_folder(dir_model)
    if cfg.save_best:
        model_path_sup1 = os.path.join(dir_model, "best_model")
    else:
        model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
    print("path of model : " + model_path_sup1)

    state['many_hot_encoder'] = many_hot_encoder.state_dict()

    if not os.path.exists(model_path_sup1) or recompute:
        for epoch_ in range(cfg.n_epoch_classifier):
            print(classif_model.training)
            start = time.time()
            loss_mean_bce = []
            for i, samples in enumerate(train_loader):
                inputs, pred_labels = samples
                if i == 0:
                    LOG.debug("classif input shape: {}".format(inputs.shape))

                # zero the parameter gradients
                optimizer_classif.zero_grad()
                inputs = to_cuda_if_available(inputs)

                # forward + backward + optimize
                weak_out = classif_model(inputs)
                weak_out = to_cpu(weak_out)
                # print(output)
                loss_bce = criterion_bce(weak_out, pred_labels)
                loss_mean_bce.append(loss_bce.item())
                loss_bce.backward()
                optimizer_classif.step()

            loss_mean_bce = np.mean(loss_mean_bce)
            classif_model.eval()
            n_class = len(many_hot_encoder.labels)
            macro_f_measure_train = get_f_measure_by_class(classif_model, n_class,
                                                           train_loader)
            if valid_loader is not None:
                macro_f_measure = get_f_measure_by_class(classif_model, n_class,
                                                         valid_loader)
                mean_macro_f_measure = np.mean(macro_f_measure)
            else:
                mean_macro_f_measure = -1
            classif_model.train()
            print("Time to train an epoch: {}".format(time.time() - start))
            # print statistics
            print('[%d / %d, %5d] loss: %.3f' %
                  (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce))

            results = {"train_loss": loss_mean_bce,
                       "macro_measure_train": np.mean(macro_f_measure_train),
                       "class_macro_train": np.array_str(macro_f_measure_train, precision=2),
                       "macro_measure_valid": mean_macro_f_measure,
                       "class_macro_valid": np.array_str(macro_f_measure, precision=2),
                       }
            for key in results:
                LOG.info("\t\t ---->  {} : {}".format(key, results[key]))

            save_results = save_results.append(results, ignore_index=True)
            # scheduler.step(mean_macro_f_measure)

            # ##########
            # # Callbacks
            # ##########
            state['epoch'] = epoch_ + 1
            state["model"]["state_dict"] = classif_model.state_dict()
            state["optimizer"]["state_dict"] = optimizer_classif.state_dict()
            state["loss"] = loss_mean_bce
            state.update(results)

            if cfg.early_stopping is not None:
                if early_stopping_call.apply(mean_macro_f_measure):
                    print("EARLY STOPPING")
                    break

            if cfg.save_best and save_best_call.apply(mean_macro_f_measure):
                save_model(state, model_path_sup1)

        if cfg.save_best:
            LOG.info(
                "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val))
            LOG.info("loading model from: {}".format(model_path_sup1))
            classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
        else:
            model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
            save_model(state, model_path_sup1)
        LOG.debug("model path: {}".format(model_path_sup1))
        LOG.debug('Finished Training')
    else:
        classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
    LOG.info("#### End classif")
    save_results.to_csv(result_path, sep="\t", header=True, index=False)

    return classif_model, state
Ejemplo n.º 16
0
def test_model(state, reduced_number_of_data, strore_predicitions_fname=None):
    crnn_kwargs = state["model"]["kwargs"]
    crnn = CRNN(**crnn_kwargs)
    crnn.load(parameters=state["model"]["state_dict"])
    LOG.info("Model loaded at epoch: {}".format(state["epoch"]))
    pooling_time_ratio = state["pooling_time_ratio"]

    crnn.load(parameters=state["model"]["state_dict"])
    scaler = Scaler()
    scaler.load_state_dict(state["scaler"])
    classes = cfg.classes
    many_hot_encoder = ManyHotEncoder.load_state_dict(
        state["many_hot_encoder"])

    # ##############
    # Validation
    # ##############
    crnn = crnn.eval()
    [crnn] = to_cuda_if_available([crnn])
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)

    # # 2018
    # LOG.info("Eval 2018")
    # eval_2018_df = dataset.initialize_and_get_df(cfg.eval2018, reduced_number_of_data)
    # # Strong
    # eval_2018_strong = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_strong_df,
    #                               transform=transforms_valid)
    # predictions = get_predictions(crnn, eval_2018_strong, many_hot_encoder.decode_strong)
    # compute_strong_metrics(predictions, eval_2018_df, pooling_time_ratio)
    # # Weak
    # eval_2018_weak = DataLoadDf(eval_2018_df, dataset.get_feature_file, many_hot_encoder.encode_weak,
    #                             transform=transforms_valid)
    # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(eval_2018_weak, batch_size=cfg.batch_size))
    # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))

    # Validation 2019
    # LOG.info("Validation 2019 (original code)")
    # b_dataset = B_DatasetDcase2019Task4(cfg.workspace,
    #                                   base_feature_dir=os.path.join(cfg.workspace, 'dataset', 'features'),
    #                                   save_log_feature=False)
    # b_validation_df = b_dataset.initialize_and_get_df(cfg.validation, reduced_number_of_data)
    # b_validation_df.to_csv('old.csv')
    # b_validation_strong = B_DataLoadDf(b_validation_df,
    #                                  b_dataset.get_feature_file, many_hot_encoder.encode_strong_df,
    #                                  transform=transforms_valid)

    # predictions2 = get_predictions(crnn, b_validation_strong, many_hot_encoder.decode_strong,
    #                               save_predictions=strore_predicitions_fname)
    # compute_strong_metrics(predictions2, b_validation_df, pooling_time_ratio)

    # b_validation_weak = B_DataLoadDf(b_validation_df, b_dataset.get_feature_file, many_hot_encoder.encode_weak,
    #                              transform=transforms_valid)
    # weak_metric = get_f_measure_by_class(crnn, len(classes), DataLoader(b_validation_weak, batch_size=cfg.batch_size))
    # LOG.info("Weak F1-score per class: \n {}".format(pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    # LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))

    # ============================================================================================
    # ============================================================================================
    # ============================================================================================

    dataset = DatasetDcase2019Task4(feature_dir=cfg.feature_dir,
                                    local_path=cfg.workspace,
                                    exp_tag=cfg.exp_tag,
                                    save_log_feature=False)
    # Validation 2019
    LOG.info("Validation 2019")
    validation_df = dataset.initialize_and_get_df(cfg.validation,
                                                  reduced_number_of_data)
    validation_strong = DataLoadDf(validation_df,
                                   dataset.get_feature_file,
                                   many_hot_encoder.encode_strong_df,
                                   transform=transforms_valid)

    predictions = get_predictions(crnn,
                                  validation_strong,
                                  many_hot_encoder.decode_strong,
                                  save_predictions=strore_predicitions_fname)
    vdf = validation_df.copy()
    vdf.filename = vdf.filename.str.replace('.npy', '.wav')
    pdf = predictions.copy()
    pdf.filename = pdf.filename.str.replace('.npy', '.wav')
    compute_strong_metrics(pdf, vdf, pooling_time_ratio)

    validation_weak = DataLoadDf(validation_df,
                                 dataset.get_feature_file,
                                 many_hot_encoder.encode_weak,
                                 transform=transforms_valid)
    weak_metric = get_f_measure_by_class(
        crnn, len(classes),
        DataLoader(validation_weak, batch_size=cfg.batch_size))
    LOG.info("Weak F1-score per class: \n {}".format(
        pd.DataFrame(weak_metric * 100, many_hot_encoder.labels)))
    LOG.info("Weak F1-score macro averaged: {}".format(np.mean(weak_metric)))
Ejemplo n.º 17
0
    from utils.Transforms import ApplyLog, Unsqueeze, PadOrTrunc, ToTensor, Normalize, Compose
    from utils.utils import load_model, ManyHotEncoder

    # ###########
    # ## Argument
    # ###########
    t = time.time()
    print("Arguments have been set for a certain group of experiments, feel free to change it.")
    parser = argparse.ArgumentParser(description="")
    parser.add_argument('--subpart_data', type=int, default=None)
    parser.add_argument('--model_path', type=str, default=None)
    parser.add_argument('--embed_name', type=str, default=None)
    # Experiences to compare the impact of number of labaled vs unlabeled triplets
    # Be careful if subpart data is not None!!!!!!
    f_args = parser.parse_args()
    LOG.info(pformat(vars(f_args)))
    model_path = f_args.model_path
    assert model_path is not None, "model_path has to be defined to compute an embedding"
    embed_name = f_args.embed_name
    if embed_name is None:
        embed_name = model_path.split("/")[-2]
    ############
    #  Parameters experiences
    ###########
    subpart_data = f_args.subpart_data
    dataset = DesedSynthetic("../dcase2019",
                             base_feature_dir="../dcase2019/features",
                             save_log_feature=False)
    emb_model, state = load_model(model_path, return_state=True)
    epoch_model = state["epoch"]
    LOG.info("model loaded at epoch: {}".format(epoch_model))
        optimizer.step()
        global_step += 1
        if ema_model is not None:
            update_ema_variables(model, ema_model, 0.999, global_step)

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))

    print("\ncheck_cus_weak:\n", check_cus_weak / count)


if __name__ == '__main__':
    LOG.info("MEAN TEACHER")
    parser = argparse.ArgumentParser(description="")
    parser.add_argument(
        "-s",
        '--subpart_data',
        type=int,
        default=None,
        dest="subpart_data",
        help=
        "Number of files to be used. Useful when testing on small number of files."
    )

    parser.add_argument("-n",
                        '--no_synthetic',
                        dest='no_synthetic',
                        action='store_true',
Ejemplo n.º 19
0
        if train:
            loss.backward()
            optimizer.step()

        cnt += 1
    if cnt > 0:
        loss_mean = loss_mean / cnt
        acc_mean = acc_mean / cnt
    else:
        warnings.warn("No training has been performed")
    return loss_mean, acc_mean


if __name__ == '__main__':
    LOG.info(__file__)
    t = time.time()
    parser = argparse.ArgumentParser()
    parser.add_argument('--max-epoch', type=int, default=200)
    parser.add_argument('--save-epoch', type=int, default=20)
    parser.add_argument('--shot', type=int,
                        default=1)  # How many to get for proto
    parser.add_argument('--query', type=int, default=1)  # How many to eval
    parser.add_argument('--train-way', type=int, default=10)
    parser.add_argument('--test-way', type=int, default=10)

    parser.add_argument('--n_layers_RNN', type=int, default=2)
    parser.add_argument('--dim_RNN', type=int, default=64)

    parser.add_argument('--test-only', action="store_true", default=False)
    parser.add_argument('--load',
def train(train_loader,
          model,
          optimizer,
          epoch,
          ema_model=None,
          weak_mask=None,
          strong_mask=None):
    """ One epoch of a Mean Teacher model
    :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch.
    Should return 3 values: teacher input, student input, labels
    :param model: torch.Module, model to be trained, should return a weak and strong prediction
    :param optimizer: torch.Module, optimizer used to train the model
    :param epoch: int, the current epoch of training
    :param ema_model: torch.Module, student model, should return a weak and strong prediction
    :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss)
    :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss)
    """
    class_criterion = nn.BCELoss()

    ##################################################
    class_criterion1 = nn.BCELoss(reduction='none')
    ##################################################

    consistency_criterion = nn.MSELoss()

    # [class_criterion, consistency_criterion] = to_cuda_if_available(
    #     [class_criterion, consistency_criterion])
    [class_criterion, class_criterion1,
     consistency_criterion] = to_cuda_if_available(
         [class_criterion, class_criterion1, consistency_criterion])

    meters = AverageMeterSet()

    LOG.debug("Nb batches: {}".format(len(train_loader)))
    start = time.time()
    rampup_length = len(train_loader) * cfg.n_epoch // 2

    print("Train\n")
    # LOG.info("Weak[k] -> Weak[k]")
    # LOG.info("Weak[k] -> strong[k]")

    # print(weak_mask.start)
    # print(strong_mask.start)
    # exit()
    count = 0
    check_cus_weak = 0
    difficulty_loss = 0
    loss_w = 1
    LOG.info("loss paramater:{}".format(loss_w))
    for i, (batch_input, ema_batch_input, target) in enumerate(train_loader):
        # print(batch_input.shape)
        # print(ema_batch_input.shape)
        # exit()
        global_step = epoch * len(train_loader) + i
        if global_step < rampup_length:
            rampup_value = ramps.sigmoid_rampup(global_step, rampup_length)
        else:
            rampup_value = 1.0

        # Todo check if this improves the performance
        # adjust_learning_rate(optimizer, rampup_value, rampdown_value)
        meters.update('lr', optimizer.param_groups[0]['lr'])

        [batch_input, ema_batch_input,
         target] = to_cuda_if_available([batch_input, ema_batch_input, target])
        LOG.debug("batch_input:{}".format(batch_input.mean()))

        # print(batch_input)
        # exit()

        # Outputs
        ##################################################
        # strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input)
        strong_pred_ema, weak_pred_ema, sof_ema = ema_model(ema_batch_input)
        sof_ema = sof_ema.detach()
        ##################################################

        strong_pred_ema = strong_pred_ema.detach()
        weak_pred_ema = weak_pred_ema.detach()

        ##################################################
        # strong_pred, weak_pred = model(batch_input)
        strong_pred, weak_pred, sof = model(batch_input)
        ##################################################

        ##################################################
        # custom_ema_loss = Custom_BCE_Loss(ema_batch_input, class_criterion1)

        if difficulty_loss == 0:
            LOG.info("############### Deffine Difficulty Loss ###############")
            difficulty_loss = 1
        custom_ema_loss = Custom_BCE_Loss_difficulty(ema_batch_input,
                                                     class_criterion1,
                                                     paramater=loss_w)
        custom_ema_loss.initialize(strong_pred_ema, sof_ema)

        # custom_loss = Custom_BCE_Loss(batch_input, class_criterion1)
        custom_loss = Custom_BCE_Loss_difficulty(batch_input,
                                                 class_criterion1,
                                                 paramater=loss_w)
        custom_loss.initialize(strong_pred, sof)
        ##################################################

        # print(strong_pred.shape)
        # print(strong_pred)
        # print(weak_pred.shape)
        # print(weak_pred)
        # exit()

        loss = None
        # Weak BCE Loss
        # Take the max in the time axis
        # torch.set_printoptions(threshold=10000)
        # print(target[-10])
        # # print(target.max(-2))
        # # print(target.max(-2)[0])
        # print(target.max(-1)[0][-10])
        # exit()

        target_weak = target.max(-2)[0]
        if weak_mask is not None:
            weak_class_loss = class_criterion(weak_pred[weak_mask],
                                              target_weak[weak_mask])
            ema_class_loss = class_criterion(weak_pred_ema[weak_mask],
                                             target_weak[weak_mask])

            print(
                "noraml_weak:",
                class_criterion(weak_pred[weak_mask], target_weak[weak_mask]))

            ##################################################
            custom_weak_class_loss = custom_loss.weak(target_weak, weak_mask)
            custom_ema_class_loss = custom_ema_loss.weak(
                target_weak, weak_mask)
            print("custom_weak:", custom_weak_class_loss)
            ##################################################

            count += 1
            check_cus_weak += custom_weak_class_loss
            # print(custom_weak_class_loss.item())

            if i == 0:
                LOG.debug("target: {}".format(target.mean(-2)))
                LOG.debug("Target_weak: {}".format(target_weak))
                LOG.debug("Target_weak mask: {}".format(
                    target_weak[weak_mask]))
                LOG.debug(custom_weak_class_loss)  ###
                LOG.debug("rampup_value: {}".format(rampup_value))
            meters.update('weak_class_loss',
                          custom_weak_class_loss.item())  ###
            meters.update('Weak EMA loss', custom_ema_class_loss.item())  ###

            # loss = weak_class_loss
            loss = custom_weak_class_loss

            ####################################################################################
            # weak_class_loss = class_criterion(strong_pred[weak_mask], target[weak_mask])
            # ema_class_loss = class_criterion(strong_pred_ema[weak_mask], target[weak_mask])
            # # if i == 0:
            # #     LOG.debug("target: {}".format(target.mean(-2)))
            # #     LOG.debug("Target_weak: {}".format(target))
            # #     LOG.debug("Target_weak mask: {}".format(target[weak_mask]))
            # #     LOG.debug(weak_class_loss)
            # #     LOG.debug("rampup_value: {}".format(rampup_value))
            # meters.update('weak_class_loss', weak_class_loss.item())
            # meters.update('Weak EMA loss', ema_class_loss.item())

            # loss = weak_class_loss
            ####################################################################################

        # Strong BCE loss
        if strong_mask is not None:
            strong_class_loss = class_criterion(strong_pred[strong_mask],
                                                target[strong_mask])
            # meters.update('Strong loss', strong_class_loss.item())

            strong_ema_class_loss = class_criterion(
                strong_pred_ema[strong_mask], target[strong_mask])
            # meters.update('Strong EMA loss', strong_ema_class_loss.item())

            print(
                "normal_strong:",
                class_criterion(strong_pred[strong_mask], target[strong_mask]))

            ##################################################
            custom_strong_class_loss = custom_loss.strong(target, strong_mask)
            meters.update('Strong loss', custom_strong_class_loss.item())

            custom_strong_ema_class_loss = custom_ema_loss.strong(
                target, strong_mask)
            meters.update('Strong EMA loss',
                          custom_strong_ema_class_loss.item())
            print("custom_strong:", custom_strong_class_loss)
            ##################################################

            if loss is not None:
                # loss += strong_class_loss
                loss += custom_strong_class_loss
            else:
                # loss = strong_class_loss
                loss = custom_strong_class_loss

        # print("check_weak:", class_criterion1(weak_pred[weak_mask], target_weak[weak_mask]).mean())
        # print("check_strong:", class_criterion1(strong_pred[strong_mask], target[strong_mask]).mean())
        # print("\n")

        # exit()

        # Teacher-student consistency cost
        if ema_model is not None:

            consistency_cost = cfg.max_consistency_cost * rampup_value
            meters.update('Consistency weight', consistency_cost)
            # Take consistency about strong predictions (all data)
            consistency_loss_strong = consistency_cost * consistency_criterion(
                strong_pred, strong_pred_ema)
            meters.update('Consistency strong', consistency_loss_strong.item())
            if loss is not None:
                loss += consistency_loss_strong
            else:
                loss = consistency_loss_strong

            meters.update('Consistency weight', consistency_cost)
            # Take consistency about weak predictions (all data)
            consistency_loss_weak = consistency_cost * consistency_criterion(
                weak_pred, weak_pred_ema)
            meters.update('Consistency weak', consistency_loss_weak.item())
            if loss is not None:
                loss += consistency_loss_weak
            else:
                loss = consistency_loss_weak

        assert not (np.isnan(loss.item())
                    or loss.item() > 1e5), 'Loss explosion: {}'.format(
                        loss.item())
        assert not loss.item() < 0, 'Loss problem, cannot be negative'
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        global_step += 1
        if ema_model is not None:
            update_ema_variables(model, ema_model, 0.999, global_step)

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))

    print("\ncheck_cus_weak:\n", check_cus_weak / count)
Ejemplo n.º 21
0
from utils.Transforms import ApplyLog, Unsqueeze, ToTensor, View, Normalize, Compose
from utils.Samplers import CategoriesSampler
from pprint import pformat
import config as cfg
from DesedSynthetic import DesedSynthetic
from evaluation_measures import get_f_measure_by_class, measure_classif
from common import get_model, get_optimizer, shared_args, get_dfs, measure_embeddings
from models.FullyConnected import FullyConnected
from models.CombineModel import CombineModel
from utils.Logger import LOG
from utils.Scaler import ScalerSum
from utils.utils import ManyHotEncoder, create_folder, to_cuda_if_available, EarlyStopping, SaveBest, to_cpu, \
    load_model, save_model, ViewModule

if __name__ == '__main__':
    LOG.info(__file__)
    t = time.time()
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_layers_classif', type=int, default=1)
    parser.add_argument('--conv_dropout', type=float, default=cfg.conv_dropout)
    parser.add_argument('--dropout_classif',
                        type=float,
                        default=cfg.dropout_non_recurrent)
    parser.add_argument('--nb_layers', type=int, default=cfg.nb_layers)
    parser.add_argument('--pool_freq', type=int, default=cfg.pool_freq)
    parser.add_argument('--last_layer', type=int, default=cfg.last_layer)
    parser.add_argument('--epochs', type=float, default=cfg.n_epoch_classifier)

    parser = shared_args(parser)

    args = parser.parse_args()
Ejemplo n.º 22
0
class MyLogger(object):
    def debug(self, msg):
        pass

    def warning(self, msg):
        pass

    def error(self, msg):
        pass


if __name__ == "__main__":
    base_missing_files_folder = ".."
    dataset_folder = os.path.join("..", "dataset")

    LOG.info("Download_data")
    LOG.info("\n\nOnce database is downloaded, do not forget to check your missing_files\n\n")

    LOG.info("You can change N_JOBS and CHUNK_SIZE to increase the download with more processes.")
    # Modify it with the number of process you want, but be careful, youtube can block you if you put too many.
    N_JOBS = 3

    # Only useful when multiprocessing,
    # if chunk_size is high, download is faster. Be careful, progress bar only update after each chunk.
    CHUNK_SIZE = 10

    LOG.info("Validation data")
    test = os.path.join(dataset_folder, "metadata", "validation", "validation.tsv")
    result_dir = os.path.join(dataset_folder, "audio", "validation")
    # read metadata file and get only one filename once
    df = pd.read_csv(test, header=0, sep='\t')
Ejemplo n.º 23
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   subpart_data=None,
                                   training=False):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        df_all = list()
        feature_fns = list()
        LOG.info('Extracting/loading features')
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        augmentation_funcs = [
            ('orig', None),  # original signal
        ]

        if training:
            augmentation_funcs += [
                # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)),
                # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)),
                # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)),
                # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)),
                # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)),
                # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)),
                # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)),
                # ('ts1.25', partial(time_stretch, rate=1.25)),
                # ('ts1.5', partial(time_stretch, rate=1.5)),
                # ('amp0.5', partial(amplitude_scale, coeff=0.5)),
                # ('amp0.75', partial(amplitude_scale, coeff=0.75)),
                # ('hp0.25', partial(hp_reweight, lam=0.25)),
                # ('hp0.75', partial(hp_reweight, lam=0.75))
            ]

        wav_fns = df_meta.filename.unique()
        flag = False
        for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)):
            if ind % 500 == 0:
                LOG.debug(ind)

            # verify the audio file is present
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)
            if os.path.isfile(wav_path):
                # defer loading audio until the need for feature extraction is verified
                audio = None

                # perform all augmentations (including no augmentation)
                for name, func in augmentation_funcs:
                    if name == 'orig':
                        out_filename = os.path.splitext(wav_name)[0] + ".npy"
                    else:
                        out_filename = os.path.splitext(
                            wav_name)[0] + '_' + name + ".npy"
                    out_path = os.path.join(self.feature_dir, out_filename)

                    # add the metadata
                    meta = df_meta.loc[df_meta.filename == wav_name]
                    df_all.append(meta)

                    # for synthetic data with time annotation of events, the meta df will have several entries for
                    # each wav file. therefore, we need to append the feature filename len(meta) times.
                    if len(meta) > 1:
                        feature_fns += [out_filename] * len(meta)
                        if flag:
                            print('Length of meta: {}'.format(len(meta)))
                            flag = False
                    else:
                        feature_fns.append(out_filename)

                    if not os.path.exists(out_path):
                        if audio is None:
                            (audio, _) = read_audio(wav_path, cfg.sample_rate)
                            if audio.shape[0] == 0:
                                print("File %s is corrupted!" % wav_path)
                                del feature_fns[-1]
                                del df_all[-1]

                        # perform any augmentation, extract features, save features
                        # LOG.info('extracting {}'.format(out_filename))
                        if func is not None:
                            mel_spec = self.calculate_mel_spec(func(audio))
                        else:
                            mel_spec = self.calculate_mel_spec(audio)
                        np.save(out_path, mel_spec)

                        LOG.debug("compute features time: %s" %
                                  (time.time() - t1))
            else:
                LOG.error(
                    "File %s is in the csv file but the feature is not extracted!"
                    % wav_path)
                # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index)

        # form the final DataFrame of meta data for features from original and augmented audio
        df_all = pd.concat(df_all).reset_index(drop=True)
        df_all['feature_filename'] = feature_fns

        return df_all
Ejemplo n.º 24
0
    def extract_features_from_meta_segment(self,
                                           csv_audio,
                                           feature_dir,
                                           subpart_data=None,
                                           fixed_segment=None):
        """Extract log mel spectrogram features, but the csv needs to be strongly labeled.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path of the features directory.
            subpart_data: int, number of files to extract features from the csv.
            fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept.
                If segment is True, and >label, it takes the surrounding (allow creating weak labels).
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        self.get_classes(df_meta)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        ext_name = "_segment_"
        if subpart_data:
            ext_name += str(subpart_data)

        if fixed_segment is not None:
            LOG.debug(
                f" durations before: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )
            ext_name += f"fix{fixed_segment}"
            df_meta = self.trunc_pad_segment(df_meta, fixed_segment)
            LOG.debug(
                f" durations after: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            # Loop in all the filenames
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                if ind % 500 == 0:
                    LOG.debug(ind)

                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        sub_df = df_meta[df_meta.filename == wav_name]
                        cnt_max = len(sub_df)

                        if cnt_max == 0:
                            break

                        base_wav_name = name_only(wav_name)
                        ext_featname = "_seg"
                        if fixed_segment:
                            ext_featname += f"fix{fixed_segment}"
                            files_exist = False  # We should always recompute because of the randomness of onset offset
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + ext_featname + str(cnt) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            fpath = os.path.join(feature_dir, fname)
                            if not os.path.exists(fpath):
                                files_exist = False
                                break

                        add_item = {
                            "raw_filename": [],
                            "filename": [],
                            "event_labels": []
                        }
                        for ii, (i, row) in enumerate(sub_df.iterrows()):
                            if not pd.isna(row.event_label):
                                if ii > 0:
                                    extnb = str(ii)
                                else:
                                    extnb = ""
                                out_filename = os.path.join(
                                    feature_dir, name_only(wav_name))
                                out_filename += ext_featname + extnb + ".npy"
                                if not files_exist:
                                    sr = soundfile.info(wav_path).samplerate
                                    (audio,
                                     _) = read_audio(wav_path,
                                                     cfg.sample_rate,
                                                     start=int(row.onset * sr),
                                                     stop=int(row.offset * sr))
                                    mel_spec = self.calculate_mel_spec(
                                        audio,
                                        log_feature=self.save_log_feature)
                                    if fixed_segment:
                                        pad_trunc_length = int(
                                            fixed_segment * cfg.sample_rate //
                                            cfg.hop_length)
                                        mel_spec = pad_trunc_seq(
                                            mel_spec, pad_trunc_length)
                                    np.save(out_filename, mel_spec)

                                add_item["raw_filename"].append(wav_name)
                                add_item["filename"].append(out_filename)
                                add_item["event_labels"].append(
                                    row["event_label"])

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features
Ejemplo n.º 25
0
                                                   )

                if margin is not None:
                    triplet_loss = torch.clamp(margin + dist_pos - dist_neg, min=0.0).mean()
                else:
                    triplet_loss = ratio_loss(dist_pos, dist_neg).mean()

            triplet_loss = to_cpu(triplet_loss)
            validation_loss.append(triplet_loss.item())
    validation_loss = np.mean(validation_loss)
    triplet_model.train()
    return validation_loss


if __name__ == '__main__':
    LOG.info(__file__)
    # ###########
    # ## Argument
    # ###########
    t = time.time()
    print("Arguments have been set for a certain group of experiments, feel free to change it.")
    parser = argparse.ArgumentParser(description="")
    parser.add_argument('--margin', type=float, default=None, dest="margin")
    parser.add_argument('--type_positive', type=str, default="nearest", dest="type_positive")
    parser.add_argument('--type_negative', type=str, default="semi_hard", dest="type_negative")
    # Experiences to compare the impact of number of labaled vs unlabeled triplets
    # Be careful if subpart data is not None!!!!!!
    parser.add_argument('--nb_labeled_triplets', type=int, default=None, dest="nb_labeled_triplets")
    parser.add_argument('--nb_unlabeled_triplets', type=int, default=None, dest="nb_unlabeled_triplets")
    parser.add_argument('--pit', action="store_true", default=False)
    parser.add_argument('--swap', action="store_true", default=False)
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))


if __name__ == '__main__':
    LOG.info("Simple CRNNs")
    parser = argparse.ArgumentParser(description="")
    parser.add_argument(
        "-s",
        '--subpart_data',
        type=int,
        default=None,
        dest="subpart_data",
        help=
        "Number of files to be used. Useful when testing on small number of files."
    )
    parser.add_argument("-n",
                        '--no_weak',
                        dest='no_weak',
                        action='store_true',
                        default=False,
Ejemplo n.º 27
0
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        global_step += 1
        if ema_model is not None:
            update_ema_variables(model, ema_model, 0.999, global_step)

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))


if __name__ == '__main__':
    LOG.info("MEAN TEACHER")
    parser = argparse.ArgumentParser(description="")
    parser.add_argument(
        "-s",
        '--subpart_data',
        type=int,
        default=None,
        dest="subpart_data",
        help=
        "Number of files to be used. Useful when testing on small number of files."
    )

    parser.add_argument("-n",
                        '--no_synthetic',
                        dest='no_synthetic',
                        action='store_true',
Ejemplo n.º 28
0
    def extract_features_from_meta_frames(self,
                                          csv_audio,
                                          feature_dir,
                                          frames_in_sec,
                                          subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the directory where the features are or will be created
            subpart_data: int, number of files to extract features from the csv.
            frames_in_sec: int, number of frames to take for a subsegment.
        """
        frames = int(frames_in_sec * cfg.sample_rate / cfg.hop_length)
        t1 = time.time()
        df_meta = pd.read_csv(csv_audio, header=0, sep="\t")
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        # Csv to store the features
        ext_name = "_" + str(frames)
        if subpart_data is not None and subpart_data < len(
                df_meta.filename.unique()):
            ext_name += "_sub" + str(subpart_data)
            df_meta = self.get_subpart_data(df_meta, subpart_data)

        self.get_classes(df_meta)

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            LOG.debug("Creating new feature df")

            # Loop in all the filenames
            cnt_new_features = 0
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        cnt_max = min(
                            int(audio_len_sec // frames_in_sec),
                            int(cfg.max_len_seconds // frames_in_sec))
                        if cnt_max == 0:
                            cnt_max = 1

                        base_wav_name = os.path.join(feature_dir,
                                                     name_only(wav_name))
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + "fr" + str(frames) + "_" +
                            str(cnt * frames) + "-" + str(
                                (cnt + 1) * frames) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            if not os.path.exists(fname):
                                files_exist = False
                                break

                        if not files_exist:
                            if cnt_new_features % 500 == 0:
                                LOG.debug(f"new features, {cnt_new_features}")
                            cnt_new_features += 1
                            audio, cnt_max = self.get_features(
                                wav_path, feature_dir, frames)
                            out_filenames = [
                                base_wav_name + "fr" + str(frames) + "_" +
                                str(cnt * frames) + "-" + str(
                                    (cnt + 1) * frames) + ".npy"
                                for cnt in range(cnt_max)
                            ]

                        # features label to add to the dataframe
                        add_item = self.get_labels(ind, df_meta, wav_name,
                                                   frames, out_filenames)

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            LOG.info(csv_features)
            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features
Ejemplo n.º 29
0
def download_file(result_dir, filename):
    """ download a file from youtube given an audioSet filename. (It takes only a part of the file thanks to
    information provided in the filename)

    Parameters
    ----------

    result_dir : str, result directory which will contain the downloaded file

    filename : str, AudioSet filename to download

    Return
    ------

    list : list, Empty list if the file is downloaded, otherwise contains the filename and the error associated

    """
    LOG.debug(filename)
    tmp_filename = ""
    query_id = filename[1:12]
    segment_start = filename[13:-4].split('_')[0]
    segment_end = filename[13:-4].split('_')[1]
    audio_container = AudioContainer()

    # Define download parameters
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': TMP_FOLDER+'%(id)s.%(ext)s',
        'noplaylist': True,
        'quiet': True,
        'prefer_ffmpeg': True,
        'logger': MyLogger(),
        'audioformat': 'wav'
    }

    try:
        # Download file
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            meta = ydl.extract_info(
                'https://www.youtube.com/watch?v={query_id}'.format(query_id=query_id), download=True)

        audio_formats = [f for f in meta["formats"] if f.get('vcodec') == 'none']

        if audio_formats is []:
            return [filename, "no audio format available"]

        # get the best audio format
        best_audio_format = audio_formats[-1]

        tmp_filename = TMP_FOLDER + query_id + "." + best_audio_format["ext"]

        audio_container.load(filename=tmp_filename, fs=44100, res_type='kaiser_best',
                             start=float(segment_start), stop=float(segment_end))

        # Save segmented audio
        audio_container.filename = filename
        audio_container.detect_file_format()
        audio_container.save(filename=os.path.join(result_dir, filename))

        #Remove temporary file
        os.remove(tmp_filename)
        return []

    except (KeyboardInterrupt, SystemExit):
        # Remove temporary files and current audio file.
        for fpath in glob.glob(TMP_FOLDER + query_id + "*"):
            os.remove(fpath)
        raise

    # youtube-dl error, file often removed
    except (ExtractorError, DownloadError, OSError) as e:
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)

        return [filename, str(e)]

    # multiprocessing can give this error
    except IndexError as e:
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)
        LOG.info(filename)
        LOG.info(str(e))
        return [filename, "Index Error"]
Ejemplo n.º 30
0
def train(cfg,
          train_loader,
          model,
          optimizer,
          epoch,
          ema_model=None,
          weak_mask=None,
          strong_mask=None):
    """ One epoch of a Mean Teacher model
    :param train_loader: torch.utils.data.DataLoader, iterator of training batches for an epoch.
    Should return 3 values: teacher input, student input, labels
    :param model: torch.Module, model to be trained, should return a weak and strong prediction
    :param optimizer: torch.Module, optimizer used to train the model
    :param epoch: int, the current epoch of training
    :param ema_model: torch.Module, student model, should return a weak and strong prediction
    :param weak_mask: mask the batch to get only the weak labeled data (used to calculate the loss)
    :param strong_mask: mask the batch to get only the strong labeled data (used to calcultate the loss)
    """
    class_criterion = nn.BCELoss()
    consistency_criterion_strong = nn.MSELoss()
    lds_criterion = LDSLoss(xi=cfg.vat_xi,
                            eps=cfg.vat_eps,
                            n_power_iter=cfg.vat_n_power_iter)
    [class_criterion, consistency_criterion_strong,
     lds_criterion] = to_cuda_if_available(
         [class_criterion, consistency_criterion_strong, lds_criterion])

    meters = AverageMeterSet()

    LOG.debug("Nb batches: {}".format(len(train_loader)))
    start = time.time()
    rampup_length = len(train_loader) * cfg.n_epoch // 2
    for i, (batch_input, ema_batch_input, target) in enumerate(train_loader):
        global_step = epoch * len(train_loader) + i
        if global_step < rampup_length:
            rampup_value = ramps.sigmoid_rampup(global_step, rampup_length)
        else:
            rampup_value = 1.0

        # Todo check if this improves the performance
        # adjust_learning_rate(optimizer, rampup_value, rampdown_value)
        meters.update('lr', optimizer.param_groups[0]['lr'])

        [batch_input, ema_batch_input,
         target] = to_cuda_if_available([batch_input, ema_batch_input, target])
        LOG.debug(batch_input.mean())
        # Outputs
        strong_pred_ema, weak_pred_ema = ema_model(ema_batch_input)
        strong_pred_ema = strong_pred_ema.detach()
        weak_pred_ema = weak_pred_ema.detach()

        strong_pred, weak_pred = model(batch_input)
        loss = None
        # Weak BCE Loss
        # Take the max in axis 2 (assumed to be time)
        if len(target.shape) > 2:
            target_weak = target.max(-2)[0]
        else:
            target_weak = target

        if weak_mask is not None:
            weak_class_loss = class_criterion(weak_pred[weak_mask],
                                              target_weak[weak_mask])
            ema_class_loss = class_criterion(weak_pred_ema[weak_mask],
                                             target_weak[weak_mask])

            if i == 0:
                LOG.debug("target: {}".format(target.mean(-2)))
                LOG.debug("Target_weak: {}".format(target_weak))
                LOG.debug("Target_weak mask: {}".format(
                    target_weak[weak_mask]))
                LOG.debug(weak_class_loss)
                LOG.debug("rampup_value: {}".format(rampup_value))
            meters.update('weak_class_loss', weak_class_loss.item())

            meters.update('Weak EMA loss', ema_class_loss.item())

            loss = weak_class_loss

        # Strong BCE loss
        if strong_mask is not None:
            strong_class_loss = class_criterion(strong_pred[strong_mask],
                                                target[strong_mask])
            meters.update('Strong loss', strong_class_loss.item())

            strong_ema_class_loss = class_criterion(
                strong_pred_ema[strong_mask], target[strong_mask])
            meters.update('Strong EMA loss', strong_ema_class_loss.item())
            if loss is not None:
                loss += strong_class_loss
            else:
                loss = strong_class_loss

        # Teacher-student consistency cost
        if ema_model is not None:

            consistency_cost = cfg.max_consistency_cost * rampup_value
            meters.update('Consistency weight', consistency_cost)
            # Take only the consistence with weak and unlabel
            consistency_loss_strong = consistency_cost * consistency_criterion_strong(
                strong_pred, strong_pred_ema)
            meters.update('Consistency strong', consistency_loss_strong.item())
            if loss is not None:
                loss += consistency_loss_strong
            else:
                loss = consistency_loss_strong

            meters.update('Consistency weight', consistency_cost)
            # Take only the consistence with weak and unlabel
            consistency_loss_weak = consistency_cost * consistency_criterion_strong(
                weak_pred, weak_pred_ema)
            meters.update('Consistency weak', consistency_loss_weak.item())
            if loss is not None:
                loss += consistency_loss_weak
            else:
                loss = consistency_loss_weak

        # LDS loss
        if cfg.vat_enabled:
            lds_loss = cfg.vat_coeff * lds_criterion(model, batch_input,
                                                     weak_pred)
            LOG.info('loss: {:.3f}, lds loss: {:.3f}'.format(
                loss,
                cfg.vat_coeff * lds_loss.detach().cpu().numpy()))
            loss += lds_loss
        else:
            if i % 25 == 0:
                LOG.info('loss: {:.3f}'.format(loss))

        assert not (np.isnan(loss.item())
                    or loss.item() > 1e5), 'Loss explosion: {}'.format(
                        loss.item())
        assert not loss.item() < 0, 'Loss problem, cannot be negative'
        meters.update('Loss', loss.item())

        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        global_step += 1
        if ema_model is not None:
            update_ema_variables(model, ema_model, 0.999, global_step)

    epoch_time = time.time() - start

    LOG.info('Epoch: {}\t'
             'Time {:.2f}\t'
             '{meters}'.format(epoch, epoch_time, meters=meters))