Esempio n. 1
0
def main():
    _CLUSTERING_TYPE = 'n2d'
    _BATCH_SIZE = 496
    _N_BATCH = 5
    _PLOTS = True
    _MOSAICS = True
    _SILHOUETTE = True
    _OUTPUT_IMAGE_WIDTH = 96
    _OUTPUT_IMAGE_HEIGHT = 54
    _MOSAIC_NROW = 10
    _MOSAIC_NCOL_MAX = 10

    model, model_config = Tools.load_model(
        CONFIG,
        CONFIG.get('clustering')[_CLUSTERING_TYPE]['model']['type'],
        CONFIG.get('clustering')[_CLUSTERING_TYPE]['model']['name'])
    filenames = Tools.list_directory_filenames(
        CONFIG.get('sampling')['autoencoder']['directory']['train'])
    pictures_id, intermediate_output = Tools.load_latent_representation(
        CONFIG, model, model_config, filenames, _BATCH_SIZE, _N_BATCH, False)

    clustering = run_clustering(CONFIG, _CLUSTERING_TYPE, pictures_id,
                                intermediate_output)

    clustering.save()

    if _PLOTS:
        run_plots(CONFIG, _CLUSTERING_TYPE, clustering)

    if _SILHOUETTE:
        plot_silhouette(CONFIG, _CLUSTERING_TYPE, clustering)

    if _MOSAICS:
        plot_mosaics(CONFIG, _CLUSTERING_TYPE, clustering, _OUTPUT_IMAGE_WIDTH,
                     _OUTPUT_IMAGE_HEIGHT, _MOSAIC_NROW, _MOSAIC_NCOL_MAX)
Esempio n. 2
0
def populate_embedding(config, db_manager, clustering_type, clustering_version, clustering_model_type, clustering_model_name, drop=False):

    clustering, clustering_config = Tools.load_clustering(CONFIG, clustering_type, clustering_version, clustering_model_type, clustering_model_name)

    if drop:
        db_manager.drop_embedding_partition(clustering_type, clustering_version, clustering_model_type, clustering_model_name)

    if clustering_type == 'n2d':
        clustering = N2DClustering(clustering_config)
    elif clustering_type == 'classical':
        clustering = ClassicalClustering(clustering_config)
    elif clustering_type == 'dbscan':
        clustering = DBScanClustering(clustering_config)
    else:
        raise Exception

    clustering.load()
    model, model_config = Tools.load_model(CONFIG, clustering_model_type, clustering_model_name)
    filenames = Tools.list_directory_filenames(CONFIG.get('directory')['collections'])
    generator = Tools.load_latent_representation(CONFIG, model, model_config, filenames, 496, None, True)

    count = 0
    for ids, latents in generator:
        pictures_embedding = clustering.predict_embedding(latents)
        pictures_label = clustering.predict_label(pictures_embedding)
        rows = []
        for i, id in enumerate(ids):
            rows.append((
                id,
                float(np.round(pictures_embedding[i][0], 4)),
                float(np.round(pictures_embedding[i][1], 4)),
                int(pictures_label[i]),
                clustering_type,
                clustering_version,
                clustering_model_type,
                clustering_model_name
            ))
        count += db_manager.insert_row_pictures_embedding(rows)
        print("Nombre d'insertion: %s / %s" % (count, len(filenames)))


    return
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))

#%% [markdown]
# ## Chargement du modèle

#%%
## charger le modèle
model_type = 'simple_conv'
cfg.get('models')[model_type]['model_name'] = 'model_colab'
model = SimpleConvAutoEncoder(cfg.get('models')[model_type])

#%% [markdown]
## Chargement des images

#%%
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 10)

#%%
pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)

#%%
intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], 3*6*16))


#%% [markdown]
# ## ACP
# Réduction de la dimension

#%%
clustering = ClassicalClustering(cfg.get('clustering')['classical'], pictures_id, intermediate_output)