list_datasets = [train_synth_data]
        training_data = train_synth_data

    scaler = Scaler()
    scaler.calculate_scaler(training_data)
    LOG.debug(scaler.mean_)

    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)
    # Validation dataset is only used to get an idea of wha could be results on evaluation dataset
    validation_dataset = DataLoadDf(validation_df,
                                    dataset.get_feature_file,
                                    many_hot_encoder.encode_strong_df,
                                    transform=transforms_valid)

    transforms = get_transforms(cfg.max_frames, scaler)
    train_synth_data.set_transform(transforms)
    if not no_weak:
        train_weak_data.set_transform(transforms)
        concat_dataset = ConcatDataset([train_weak_data, train_synth_data])
        # Taking as much data from synthetic than strong.
        sampler = MultiStreamBatchSampler(
            concat_dataset,
            batch_sizes=[cfg.batch_size // 2, cfg.batch_size // 2])
        training_data = DataLoader(concat_dataset, batch_sampler=sampler)
        valid_weak_data = DataLoadDf(valid_weak_df,
                                     dataset.get_feature_file,
                                     many_hot_encoder.encode_strong_df,
                                     transform=transforms_valid)
        weak_mask = slice(cfg.batch_size // 2)
        strong_mask = slice(cfg.batch_size // 2, cfg.batch_size)
    else:
Exemple #2
0
    scaler = Scaler()
    if path.exists(cfg.scaler_fn):
        LOG.info('Loading scaler from {}'.format(cfg.scaler_fn))
        scaler.load(cfg.scaler_fn)
    else:
        scaler.calculate_scaler(ConcatDataset(list_dataset))
        LOG.info('Saving scaler to {}'.format(cfg.scaler_fn))
        scaler.save(cfg.scaler_fn)

    LOG.debug(scaler.mean_)

    transforms = get_transforms(cfg.max_frames, scaler, augment_type="noise")
    transforms_valid = get_transforms(cfg.max_frames, scaler=scaler)
    for i in range(len(list_dataset)):
        list_dataset[i].set_transform(transforms)
    validation_data.set_transform(transforms_valid)
    test_data.set_transform(transforms_valid)

    concat_dataset = ConcatDataset(list_dataset)
    sampler = MultiStreamBatchSampler(concat_dataset, batch_sizes=batch_sizes)
    training_data = DataLoader(concat_dataset, batch_sampler=sampler)

    # ##############
    # Model
    # ##############
    crnn_kwargs = cfg.crnn_kwargs
    crnn = CRNN(**crnn_kwargs)
    crnn_ema = CRNN(**crnn_kwargs)

    if path.exists(cfg.load_weights_fn):
        model_cfg = torch.load(cfg.load_weights_fn)
Exemple #3
0
    train_set = DataLoadDf(train_weak_df,
                           many_hot_encoder.encode_weak,
                           Compose(list_trans_fr),
                           return_indexes=False)
    LOG.debug("len train : {}".format(len(train_set)))
    # train_load = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, shuffle=True,
    #                         drop_last=True, collate_fn=default_collate)

    # scaler = Scaler()
    scaler = ScalerSum()
    scaler.calculate_scaler(train_set)
    LOG.debug(scaler.mean_)

    list_trans_fr.append(Normalize(scaler))
    train_set.set_transform(Compose(list_trans_fr))
    # Validation data
    valid_weak_df = dfs["valid"]
    if valid_weak_df is not None:
        valid_set = DataLoadDf(valid_weak_df,
                               many_hot_encoder.encode_weak,
                               Compose(list_trans_fr),
                               return_indexes=False)

    list_trans_val = deepcopy(list_trans_fr)
    if not args.segment:
        list_trans_val.append(Unsqueeze(0))

    train_dl_emb = DataLoadDf(train_weak_df,
                              many_hot_encoder.encode_weak,
                              Compose(list_trans_val),
Exemple #4
0
def datasets_classif(model,
                     train_weak_embed,
                     valid_weak_dl_fr,
                     test_dl_fr,
                     args,
                     many_hot_encoder,
                     classes,
                     save_name="",
                     eval_dl=None):
    encode_function_label = many_hot_encoder.encode_weak
    num_workers = cfg.num_workers
    model.eval()
    embed_dir = "stored_data/embeddings"
    embed_dir = os.path.join(embed_dir, save_name)
    create_folder(embed_dir)
    fig_dir = os.path.join(embed_dir, "figures")
    create_folder(fig_dir)

    if args.agg_time is not None:
        trans_embedding = [ToTensor(), View(-1)]
    else:
        trans_embedding = [ToTensor()]

    model = to_cuda_if_available(model)
    embed_set = "final"
    train_embed_dir = os.path.join(embed_dir, embed_set)
    df_weak, embed_weak = calculate_embedding(train_weak_embed,
                                              model,
                                              savedir=train_embed_dir,
                                              concatenate="append")
    weak_embed = DataLoadDf(df_weak,
                            encode_function_label,
                            transform=Compose(trans_embedding))
    LOG.info(f"len weak embed: {len(weak_embed)}")
    weak_embed.set_transform(Compose(trans_embedding))

    batch_size_classif = cfg.batch_size_classif
    df_valid, embed_valid = calculate_embedding(valid_weak_dl_fr,
                                                model,
                                                savedir=train_embed_dir,
                                                concatenate="append")

    valid_embed = DataLoadDf(df_valid,
                             encode_function_label,
                             transform=Compose(trans_embedding))
    embed_set = "final_test"
    test_embed_dir = os.path.join(embed_dir, embed_set)
    df_test_embed, emb_test = calculate_embedding(test_dl_fr,
                                                  model,
                                                  savedir=test_embed_dir,
                                                  concatenate="append")

    test_embed = DataLoadDf(df_test_embed,
                            encode_function_label,
                            transform=Compose(trans_embedding))

    if args.balance:
        n_per_class = max(round(batch_size_classif / len(classes)), 1)
        weak_sampler = CategoriesSampler(weak_embed.df.event_labels, classes,
                                         n_per_class)
        weak_embed_loader = DataLoader(weak_embed,
                                       batch_sampler=weak_sampler,
                                       num_workers=num_workers)
        valid_sampler = CategoriesSampler(valid_embed.df.event_labels, classes,
                                          n_per_class)
        valid_embed_loader = DataLoader(valid_embed,
                                        batch_sampler=valid_sampler,
                                        num_workers=num_workers)
        test_sampler = CategoriesSampler(test_embed.df.event_labels, classes,
                                         n_per_class)
        test_embed_loader = DataLoader(test_embed,
                                       batch_sampler=test_sampler,
                                       num_workers=num_workers)
    else:
        weak_embed_loader = DataLoader(weak_embed,
                                       batch_size=batch_size_classif,
                                       num_workers=num_workers,
                                       shuffle=True,
                                       drop_last=True)
        valid_embed_loader = DataLoader(valid_embed,
                                        batch_size=batch_size_classif,
                                        shuffle=False,
                                        num_workers=num_workers,
                                        drop_last=False)
        test_embed_loader = DataLoader(test_embed,
                                       batch_size=batch_size_classif,
                                       shuffle=False,
                                       num_workers=num_workers,
                                       drop_last=False)

    if eval_dl is not None:
        model = to_cuda_if_available(model)
        embed_set = "final_eval"
        eval_embed_dir = os.path.join(embed_dir, embed_set)
        df_eval_embed, embed_eval = calculate_embedding(eval_dl,
                                                        model,
                                                        savedir=eval_embed_dir,
                                                        concatenate="append")

        eval_embed = DataLoadDf(df_eval_embed,
                                encode_function_label,
                                transform=Compose(trans_embedding))
        if args.balance:
            eval_sampler = CategoriesSampler(eval_embed.df.event_labels,
                                             classes, n_per_class)
            eval_embed_loader = DataLoader(eval_embed,
                                           batch_sampler=eval_sampler,
                                           num_workers=num_workers)
        else:
            eval_embed_loader = DataLoader(eval_embed,
                                           batch_size=batch_size_classif,
                                           shuffle=False,
                                           num_workers=num_workers,
                                           drop_last=False)
    else:
        eval_embed_loader = None

    model = to_cpu(model)
    return {
        "train": weak_embed_loader,
        "valid": valid_embed_loader,
        "test": test_embed_loader,
        "eval": eval_embed_loader
    }