Exemple #1
0
def aggregate_all_command(n_tracks):
    aggrdata_dir = Path(current_app.config['AGGRDATA_DIR'])
    aggrdata_dir.mkdir(parents=True, exist_ok=True)

    models = get_models()
    for model in models.get_all_offline():
        aggregate(model, aggrdata_dir / str(model), n_tracks)
Exemple #2
0
def measure_spread_command(output_dir, n_tracks, n_tags):
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    for model in get_models().get_all_offline():
        logging.info(f'Analyzing {model}..')
        measure_model_spread(model, output_dir / str(model), n_tracks)
        measure_tag_spread(model, output_dir / str(model), n_tags)
Exemple #3
0
def index_all_embeddings(n_trees=16, n_tracks=None, dry=False, force=False):
    models = get_models()
    for model in models.get_combinations():
        index_embeddings(
            model,
            n_trees, n_tracks, dry, force
        )
Exemple #4
0
def extract_all(models_dir, dry=False, force=False):
    # TODO: incorporate dry properly
    from app.processing.essentia_wrappers import get_embeddings, get_melspecs, get_predictors
    app = current_app
    audio_dir = Path(app.config['AUDIO_DIR'])
    data_root_dir = Path(app.config['DATA_DIR'])
    models_dir = Path(models_dir)

    models = get_models()
    predictors = get_predictors(models_dir, models.data['architectures'])

    tracks_to_delete = []
    for track in tqdm(Track.get_all()):
        audio_file = audio_dir / track.path

        if force or not _already_extracted(track, models, data_root_dir):
            melspecs = get_melspecs(audio_file, models.data['algorithms'])
            embeddings = get_embeddings(melspecs, models.data['architectures'],
                                        predictors)

            if embeddings is None:
                tracks_to_delete.append(track)
            else:
                for model_name, embedding in embeddings.items():
                    embeddings_file = data_root_dir / model_name / track.get_embeddings_filename(
                    )
                    if force or not embeddings_file.exists():
                        embeddings_file.parent.mkdir(parents=True,
                                                     exist_ok=True)
                        np.save(embeddings_file, embedding.astype(np.float16))

    for track in tracks_to_delete:
        track.delete()
Exemple #5
0
def synthesize_all_spread_metrics_command(input_dir, output_file):
    results = {}
    # only regular models, no projections
    for model in get_models().get_combinations():
        logging.info(f'Analyzing {model}..')
        results[str(model)] = synthesize_spread_metric(model, input_dir)

    df = pd.DataFrame.from_dict(results, orient='index', columns=['std'])
    df.to_csv(output_file, float_format=FLOAT_FORMAT)
Exemple #6
0
def app():

    app = create_app()
    models = get_models()

    with app.app_context():
        database.create_tables(models)
        yield app
        database.drop_tables(models)

    os.unlink(config_.get('DATABASE').get('database'))
Exemple #7
0
def reduce_all(projection=None, n_tracks=None, dry=False, force=False):
    app = current_app
    data_dir = Path(app.config['DATA_DIR'])

    models = get_models()
    if projection is not None:
        projection_models = models.get_offline_projections(projection)
    else:
        projection_models = models.get_all_offline_projections()

    for model in projection_models:
        logging.info(f'Generating {model}')
        reduce(
            data_dir / str(model.without_projection()),
            data_dir / str(model),
            model.projection,
            n_tracks, dry, force
        )
Exemple #8
0
def measure_hubness(n_tracks, output_file, metric, projection, dimensions, n_jobs, random):
    from skhubness import Hubness
    tracks = Track.get_all(limit=n_tracks, random=random)

    models = get_models()
    models_iter = models.get_combinations() if projection is None else models.get_offline_projections(projection)
    results = []
    for model in list(models_iter):
        for _dimensions in tqdm(range(2, dimensions+1), desc=str(model)):
            embeddings = model.get_embeddings(tracks, dimensions=slice(_dimensions))
            embeddings_stacked = np.vstack(embeddings)

            hub = Hubness(k=10, metric=metric, return_value='all', n_jobs=n_jobs)
            hub.fit(embeddings_stacked[:, :_dimensions])
            result = {key: value for key, value in hub.score().items() if key in RETURN_VALUES}
            result.update({
                'model': f'{model.dataset}-{model.architecture}',
                'layer': model.layer,
                'dimensions': _dimensions
            })
            results.append(result)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_file, float_format=FLOAT_FORMAT)
Exemple #9
0
def aggregate_command(dataset, architecture, layer, output_file, projection,
                      n_tracks):
    model = Model(get_models(), dataset, architecture, layer, projection)
    aggregate(model, Path(output_file), n_tracks)
Exemple #10
0
def measure_model_spread_command(dataset, architecture, layer, output_dir, n_tracks):
    model = Model(get_models().data, dataset, architecture, layer, projection=None)
    measure_model_spread(model, Path(output_dir), n_tracks)
Exemple #11
0
def plot_dimension_spread_command(dataset, architecture, layer, projection, input_dir):
    model = Model(get_models().data, dataset, architecture, layer, projection=projection)
    plot_dimension_spread(model, input_dir)