예제 #1
0
def plot_mosaics(config, clustering_type, clustering, output_image_width,
                 output_image_height, mosaic_nrow, mosaic_ncol_max):
    """
    Mosaic of each cluster
    """
    clusters_id = np.unique(clustering.final_labels)
    clustering_res = clustering.get_results()

    for cluster_id in clusters_id:
        cluster_image_filenames = [
            os.path.join(
                config.get('directory')['collections'], "%s.jpg" % one_res[0])
            for one_res in clustering_res if one_res[1] == cluster_id
        ]

        images_array = [
            Tools.read_np_picture(img_filename,
                                  target_size=(output_image_height,
                                               output_image_width))
            for img_filename in cluster_image_filenames
        ]

        img = Tools.display_mosaic(images_array,
                                   nrow=mosaic_nrow,
                                   ncol_max=mosaic_ncol_max)
        img.save(
            os.path.join(clustering.save_directory,
                         "cluster_%s.png" % str(cluster_id).zfill(2)), "PNG")

    return clusters_id
예제 #2
0
def main():
    _CLUSTERING_TYPE = 'n2d'
    _BATCH_SIZE = 496
    _N_BATCH = 5
    _PLOTS = True
    _MOSAICS = True
    _SILHOUETTE = True
    _OUTPUT_IMAGE_WIDTH = 96
    _OUTPUT_IMAGE_HEIGHT = 54
    _MOSAIC_NROW = 10
    _MOSAIC_NCOL_MAX = 10

    model, model_config = Tools.load_model(
        CONFIG,
        CONFIG.get('clustering')[_CLUSTERING_TYPE]['model']['type'],
        CONFIG.get('clustering')[_CLUSTERING_TYPE]['model']['name'])
    filenames = Tools.list_directory_filenames(
        CONFIG.get('sampling')['autoencoder']['directory']['train'])
    pictures_id, intermediate_output = Tools.load_latent_representation(
        CONFIG, model, model_config, filenames, _BATCH_SIZE, _N_BATCH, False)

    clustering = run_clustering(CONFIG, _CLUSTERING_TYPE, pictures_id,
                                intermediate_output)

    clustering.save()

    if _PLOTS:
        run_plots(CONFIG, _CLUSTERING_TYPE, clustering)

    if _SILHOUETTE:
        plot_silhouette(CONFIG, _CLUSTERING_TYPE, clustering)

    if _MOSAICS:
        plot_mosaics(CONFIG, _CLUSTERING_TYPE, clustering, _OUTPUT_IMAGE_WIDTH,
                     _OUTPUT_IMAGE_HEIGHT, _MOSAIC_NROW, _MOSAIC_NCOL_MAX)
예제 #3
0
def create_sprite(config, df):

    images_array = [
        Tools.read_np_picture(os.path.join(
            config.get('directory')['collections'], "%s.jpg" % picture_id),
                              target_size=(TARGET_SIZE_HEIGHT,
                                           TARGET_SIZE_WIDTH))
        for picture_id in df['pictures_id']
    ]
    sprite = np.zeros((TARGET_SIZE_HEIGHT * SPRITE_NB_LIGNE,
                       TARGET_SIZE_WIDTH * SPRITE_NB_COLONNE, 3))
    index = 0
    for i in range(SPRITE_NB_LIGNE):
        for j in range(SPRITE_NB_COLONNE):
            sprite[(i * TARGET_SIZE_HEIGHT):(i + 1) * TARGET_SIZE_HEIGHT,
                   (j * TARGET_SIZE_WIDTH):(j + 1) *
                   TARGET_SIZE_WIDTH, :] = images_array[index]
            index += 1
            if index >= len(images_array):
                break
        if index >= len(images_array):
            break

    img = Tools.display_one_picture(sprite)
    return img
예제 #4
0
    def save(self):
        Tools.create_dir_if_not_exists(self.save_directory)

        joblib.dump(self.umap_fit,
                    os.path.join(self.save_directory, self.umap_save_name))
        joblib.dump(self.dbscan_fit,
                    os.path.join(self.save_directory, self.dbscan_save_name))
예제 #5
0
    def save(self):
        Tools.create_dir_if_not_exists(self.save_directory)

        joblib.dump(self.pca_fit,
                    os.path.join(self.save_directory, self.pca_save_name))
        joblib.dump(self.kmeans_fit,
                    os.path.join(self.save_directory, self.kmeans_save_name))
        joblib.dump(self.cah_fit,
                    os.path.join(self.save_directory, self.cah_save_name))
예제 #6
0
    def save(self):
        Tools.create_dir_if_not_exists(self.config['save_directory'])

        joblib.dump(
            self.pca_fit,
            os.path.join(self.config['save_directory'], self.pca_save_name))
        joblib.dump(
            self.kmeans_fit,
            os.path.join(self.config['save_directory'], self.kmeans_save_name))
        joblib.dump(
            self.dbscan_fit,
            os.path.join(self.config['save_directory'], self.dbscan_save_name))
예제 #7
0
    def __init__(self, config, pictures_id, pictures_np):

        self.config = config
        self.save_directory = os.path.join(
            self.config['save_directory'], '%s_%s_%s' %
            (self.config['model']['type'], self.config['model']['name'],
             self.config['version']))
        self.pictures_id = pictures_id
        self.pictures_np = pictures_np
        self.final_labels = None
        self.colors = None

        Tools.create_dir_if_not_exists(self.save_directory)
예제 #8
0
 def compute_colors(self):
     n_classes = len(list(set(self.final_labels)))
     self.colors = [
         Tools.get_color_from_label(label, n_classes)
         for label in self.final_labels
     ]
     return self
예제 #9
0
def main():

    ## db manager
    db_manager = Tools.create_db_manager(CONFIG)

    ## request data
    df = request_data(CONFIG, db_manager)

    ## create sprite
    sprite = create_sprite(CONFIG, df)

    ## save sprite
    sprite.save(
        os.path.join(
            CONFIG.get('directory')['reports'], 'figures', 'sprite_altas.png'),
        "PNG")

    ## generate facets
    html_facets = generate_facets(CONFIG, df)
    with open(os.path.join(CONFIG.get('directory')['reports'], 'facets.html'),
              'w') as f:
        f.write(html_facets)

    ## generate facets-dive
    html_facets_dive = generate_facets_dive(CONFIG, df,
                                            './figures/sprite_altas.png')
    with open(
            os.path.join(
                CONFIG.get('directory')['reports'], 'facets-dive.html'),
            'w') as f:
        f.write(html_facets_dive)
예제 #10
0
def create_poster_picture(config, poster_config, poster, pictures_df):

    pictures_id_positions = []
    for part in poster_config['DECOUPAGE']:

        picture_id = None
        directory = config.get('directory')['collections']
        if 'directory' in part.keys() and part['directory']:
            directory = os.path.join(
                config.get('directory')['project_dir'], part['directory'])

        if part['cluster']:
            selection = pictures_df.loc[pictures_df.label.isin(
                part['cluster'])].sample(1)
            pictures_df = pictures_df.drop(selection.index)

            picture_id = selection.pictures_id.values[0]
        else:
            picture_id = random.choice(part['picture_ids'])

        pictures_id_positions.append({
            'pictures_id': picture_id,
            'position': part['position']
        })

        if part['size'][0] < O_WIDTH or part['size'][1] < O_HEIGHT:
            part_array = Tools.read_np_picture(os.path.join(
                directory, "%s.jpg" % picture_id),
                                               target_size=(O_HEIGHT, O_WIDTH))
            index_col = np.floor((O_WIDTH - part['size'][0]) / 2).astype(int)
            index_row = np.floor((O_HEIGHT - part['size'][1]) / 2).astype(int)
            part_array = part_array[index_col:(index_col + part['size'][1]),
                                    index_row:(index_row +
                                               part['size'][0]), :].copy()
        else:
            part_array = Tools.read_np_picture(
                os.path.join(directory, "%s.jpg" % picture_id),
                target_size=(part['size'][1], part['size'][0]))

        poster[part['origin'][1]:(part['origin'][1] + part['size'][1]),
               part['origin'][0]:(part['origin'][0] +
                                  part['size'][0]), :] = part_array

    return poster, pictures_id_positions
예제 #11
0
def populate_embedding(config, db_manager, clustering_type, clustering_version, clustering_model_type, clustering_model_name, drop=False):

    clustering, clustering_config = Tools.load_clustering(CONFIG, clustering_type, clustering_version, clustering_model_type, clustering_model_name)

    if drop:
        db_manager.drop_embedding_partition(clustering_type, clustering_version, clustering_model_type, clustering_model_name)

    if clustering_type == 'n2d':
        clustering = N2DClustering(clustering_config)
    elif clustering_type == 'classical':
        clustering = ClassicalClustering(clustering_config)
    elif clustering_type == 'dbscan':
        clustering = DBScanClustering(clustering_config)
    else:
        raise Exception

    clustering.load()
    model, model_config = Tools.load_model(CONFIG, clustering_model_type, clustering_model_name)
    filenames = Tools.list_directory_filenames(CONFIG.get('directory')['collections'])
    generator = Tools.load_latent_representation(CONFIG, model, model_config, filenames, 496, None, True)

    count = 0
    for ids, latents in generator:
        pictures_embedding = clustering.predict_embedding(latents)
        pictures_label = clustering.predict_label(pictures_embedding)
        rows = []
        for i, id in enumerate(ids):
            rows.append((
                id,
                float(np.round(pictures_embedding[i][0], 4)),
                float(np.round(pictures_embedding[i][1], 4)),
                int(pictures_label[i]),
                clustering_type,
                clustering_version,
                clustering_model_type,
                clustering_model_name
            ))
        count += db_manager.insert_row_pictures_embedding(rows)
        print("Nombre d'insertion: %s / %s" % (count, len(filenames)))


    return
예제 #12
0
def write_poster_picture(config, poster_config, poster, poster_id=None):

    poster_id = poster_id if poster_id else uuid.uuid1()
    img = Tools.display_one_picture(poster)
    img.save(
        os.path.join(
            CONFIG.get('directory')['data_dir'], 'posters',
            "version_%s_%s.jpg" % (poster_config['version'], poster_id)),
        "JPEG")

    return poster_id
예제 #13
0
def main(action = 'populate_embedding'):

    db_manager = Tools.create_db_manager(CONFIG)

    if action == 'population_locations':
        populate_locations(CONFIG, db_manager)
    elif action == 'populate_embedding':
        db_manager.create_pictures_embedding_table(False)
        to_load = [
            {'clustering_type': 'n2d', 'clustering_version': 1, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
            {'clustering_type': 'n2d', 'clustering_version': 2, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
            {'clustering_type': 'n2d', 'clustering_version': 3, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
            ]
        for kwargs in to_load:
            try:
                populate_embedding(CONFIG, db_manager, **kwargs)
            except Exception as err:
                print(err)
                pass
            
    else:
        pass
예제 #14
0
def main(config_id, generate, poster_id):

    N_GENERATE = generate
    POSTER_ID = poster_id

    db_manager = Tools.create_db_manager(CONFIG)

    ## creation de la base
    db_manager.create_posters_table()

    ##
    poster_config = POSTERS_CONFIG[config_id]

    ##
    pictures_df = get_pictures_df(CONFIG, db_manager, poster_config)

    ##
    poster = create_empty_poster(CONFIG, poster_config)

    ##
    poster = create_poster_template_picture(CONFIG, poster_config, poster)

    ##
    write_poster_picture(CONFIG, poster_config, poster, poster_id="template")

    for i in range(N_GENERATE):
        print("%s / %s" % (i + 1, N_GENERATE))
        ##
        poster, p_pos = create_poster_picture(CONFIG, poster_config, poster,
                                              pictures_df)

        ##
        poster_id = save_poster_picture(CONFIG, poster_config, p_pos,
                                        db_manager, POSTER_ID)

        ##
        write_poster_picture(CONFIG, poster_config, poster, poster_id)
예제 #15
0
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))

#%% [markdown]
# ## Chargement du modèle

#%%
## charger le modèle
model_type = 'simple_conv'
cfg.get('models')[model_type]['model_name'] = 'model_colab'
model = SimpleConvAutoEncoder(cfg.get('models')[model_type])

#%% [markdown]
## Chargement des images

#%%
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 10)

#%%
pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)

#%%
intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], 3*6*16))


#%% [markdown]
# ## ACP
# Réduction de la dimension

#%%
clustering = ClassicalClustering(cfg.get('clustering')['classical'], pictures_id, intermediate_output)
예제 #16
0
def run_plots(config, clustering_type, clustering):
    """
    Plots specifics graphs
    """

    if clustering_type in ['classical']:
        ## Graphs of PCA and final clusters
        fig, ax = plt.subplots(figsize=(24, 14))
        scatter = ax.scatter(clustering.pca_reduction[:, 0],
                             clustering.pca_reduction[:, 1],
                             c=clustering.colors)
        legend1 = ax.legend(*scatter.legend_elements(),
                            loc="lower left",
                            title="Classes")
        ax.add_artist(legend1)
        plt.savefig(os.path.join(clustering.save_directory,
                                 'pca_clusters.png'))

    if clustering_type in ['classical']:
        ## Graphs of TSNE and final clusters
        fig, ax = plt.subplots(figsize=(24, 14))
        classes = clustering.final_labels
        scatter = ax.scatter(clustering.tsne_embedding[:, 0],
                             clustering.tsne_embedding[:, 1],
                             c=clustering.colors)
        legend1 = ax.legend(*scatter.legend_elements(),
                            loc="lower left",
                            title="Classes")
        ax.add_artist(legend1)
        plt.savefig(
            os.path.join(clustering.save_directory, 'tsne_clusters.png'))

    if clustering_type in ['n2d', 'dbscan']:
        ## Graphs of TSNE and final clusters
        fig, ax = plt.subplots(figsize=(24, 14))
        classes = clustering.final_labels
        scatter = ax.scatter(clustering.umap_embedding[:, 0],
                             clustering.umap_embedding[:, 1],
                             c=clustering.colors)
        legend1 = ax.legend(*scatter.legend_elements(),
                            loc="lower left",
                            title="Classes")
        ax.add_artist(legend1)
        plt.savefig(
            os.path.join(clustering.save_directory, 'umap_clusters.png'))

    if clustering_type in ['n2d', 'classical', 'dbscan']:
        filenames = [
            os.path.join(
                config.get('directory')['collections'], "%s.jpg" % one_res[0])
            for one_res in clustering.get_results()
        ]
        images_array = [
            Tools.read_np_picture(img_filename, target_size=(54, 96))
            for img_filename in filenames
        ]
        base64_images = [Tools.base64_image(img) for img in images_array]

        if clustering_type in ['n2d', 'dbscan']:
            x = clustering.umap_embedding[:, 0]
            y = clustering.umap_embedding[:, 1]
            html_file = 'umap_bokeh.html'
            title = 'UMAP projection of iss clusters'
        elif clustering_type == 'classical':
            x = clustering.tsne_embedding[:, 0]
            y = clustering.tsne_embedding[:, 1]
            html_file = 'tsne_bokeh.html'
            title = 't-SNE projection of iss clusters'

        df = pd.DataFrame({'x': x, 'y': y})
        df['image'] = base64_images
        df['label'] = clustering.final_labels.astype(str)
        df['color'] = df['label'].apply(Tools.get_color_from_label)

        datasource = ColumnDataSource(df)

        output_file(os.path.join(clustering.save_directory, html_file))

        plot_figure = figure(
            title=title,
            # plot_width=1200,
            # plot_height=1200,
            tools=('pan, wheel_zoom, reset'))

        plot_figure.add_tools(
            HoverTool(tooltips="""
        <div>
            <div>
                <img src='@image' style='float: left; margin: 5px 5px 5px 5px'/>
            </div>
            <div>
                <span style='font-size: 16px'>Cluster:</span>
                <span style='font-size: 18px'>@label</span>
            </div>
        </div>
        """))

        plot_figure.circle('x',
                           'y',
                           source=datasource,
                           color=dict(field='color'),
                           line_alpha=0.6,
                           fill_alpha=0.6,
                           size=4)

        show(plot_figure)

    if clustering_type in ['classical']:
        ## Dendogram
        fig, ax = plt.subplots(figsize=(24, 14))
        plt.title('Hierarchical Clustering Dendrogram')
        Tools.plot_dendrogram(clustering.cah_fit, labels=clustering.cah_labels)
        plt.savefig(os.path.join(clustering.save_directory, 'dendograms.png'))

    return True
예제 #17
0
# -*- coding: utf-8 -*-

from iss.tools import Config
from iss.tools import Tools
from iss.models import SimpleConvAutoEncoder
from iss.clustering import ClassicalClustering
from dotenv import find_dotenv, load_dotenv

## Config
load_dotenv(find_dotenv())
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))

## charger le modèle
model_type = 'simple_conv'
cfg.get('models')[model_type]['model_name'] = 'model_colab'
model = SimpleConvAutoEncoder(cfg.get('models')[model_type])

## Générateur d'image
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 2)

## Générer des images