def aggregate_all_command(n_tracks): aggrdata_dir = Path(current_app.config['AGGRDATA_DIR']) aggrdata_dir.mkdir(parents=True, exist_ok=True) models = get_models() for model in models.get_all_offline(): aggregate(model, aggrdata_dir / str(model), n_tracks)
def measure_spread_command(output_dir, n_tracks, n_tags): output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) for model in get_models().get_all_offline(): logging.info(f'Analyzing {model}..') measure_model_spread(model, output_dir / str(model), n_tracks) measure_tag_spread(model, output_dir / str(model), n_tags)
def index_all_embeddings(n_trees=16, n_tracks=None, dry=False, force=False): models = get_models() for model in models.get_combinations(): index_embeddings( model, n_trees, n_tracks, dry, force )
def extract_all(models_dir, dry=False, force=False): # TODO: incorporate dry properly from app.processing.essentia_wrappers import get_embeddings, get_melspecs, get_predictors app = current_app audio_dir = Path(app.config['AUDIO_DIR']) data_root_dir = Path(app.config['DATA_DIR']) models_dir = Path(models_dir) models = get_models() predictors = get_predictors(models_dir, models.data['architectures']) tracks_to_delete = [] for track in tqdm(Track.get_all()): audio_file = audio_dir / track.path if force or not _already_extracted(track, models, data_root_dir): melspecs = get_melspecs(audio_file, models.data['algorithms']) embeddings = get_embeddings(melspecs, models.data['architectures'], predictors) if embeddings is None: tracks_to_delete.append(track) else: for model_name, embedding in embeddings.items(): embeddings_file = data_root_dir / model_name / track.get_embeddings_filename( ) if force or not embeddings_file.exists(): embeddings_file.parent.mkdir(parents=True, exist_ok=True) np.save(embeddings_file, embedding.astype(np.float16)) for track in tracks_to_delete: track.delete()
def synthesize_all_spread_metrics_command(input_dir, output_file): results = {} # only regular models, no projections for model in get_models().get_combinations(): logging.info(f'Analyzing {model}..') results[str(model)] = synthesize_spread_metric(model, input_dir) df = pd.DataFrame.from_dict(results, orient='index', columns=['std']) df.to_csv(output_file, float_format=FLOAT_FORMAT)
def app(): app = create_app() models = get_models() with app.app_context(): database.create_tables(models) yield app database.drop_tables(models) os.unlink(config_.get('DATABASE').get('database'))
def reduce_all(projection=None, n_tracks=None, dry=False, force=False): app = current_app data_dir = Path(app.config['DATA_DIR']) models = get_models() if projection is not None: projection_models = models.get_offline_projections(projection) else: projection_models = models.get_all_offline_projections() for model in projection_models: logging.info(f'Generating {model}') reduce( data_dir / str(model.without_projection()), data_dir / str(model), model.projection, n_tracks, dry, force )
def measure_hubness(n_tracks, output_file, metric, projection, dimensions, n_jobs, random): from skhubness import Hubness tracks = Track.get_all(limit=n_tracks, random=random) models = get_models() models_iter = models.get_combinations() if projection is None else models.get_offline_projections(projection) results = [] for model in list(models_iter): for _dimensions in tqdm(range(2, dimensions+1), desc=str(model)): embeddings = model.get_embeddings(tracks, dimensions=slice(_dimensions)) embeddings_stacked = np.vstack(embeddings) hub = Hubness(k=10, metric=metric, return_value='all', n_jobs=n_jobs) hub.fit(embeddings_stacked[:, :_dimensions]) result = {key: value for key, value in hub.score().items() if key in RETURN_VALUES} result.update({ 'model': f'{model.dataset}-{model.architecture}', 'layer': model.layer, 'dimensions': _dimensions }) results.append(result) results_df = pd.DataFrame(results) results_df.to_csv(output_file, float_format=FLOAT_FORMAT)
def aggregate_command(dataset, architecture, layer, output_file, projection, n_tracks): model = Model(get_models(), dataset, architecture, layer, projection) aggregate(model, Path(output_file), n_tracks)
def measure_model_spread_command(dataset, architecture, layer, output_dir, n_tracks): model = Model(get_models().data, dataset, architecture, layer, projection=None) measure_model_spread(model, Path(output_dir), n_tracks)
def plot_dimension_spread_command(dataset, architecture, layer, projection, input_dir): model = Model(get_models().data, dataset, architecture, layer, projection=projection) plot_dimension_spread(model, input_dir)