Exemple #1
0
def plot_train_radii_separation(obj_dir=C.obj_dir, std=False):
    """ 
    Plots the average cluster radius and average distance to other clusters
    during training
    """
    vh = read_validation_history(obj_dir) #is squeezed
    centroids = {}
    radii = {}
    distances = {}
    av_dist = {}
    dist_all = {}
    for i in vh:
        #unsqueeze as we go:
        vh[i] = dict_unsqueeze(vh[i])
        centroids[i] = {}
        radii[i] = {}
        distances[i] = {}
        dist_all[i] = []
        for c in vh[i]:
            centroids[i][c] = T.centroid(vh[i][c])
            radii[i][c] = T.radius(centroids[i][c], vh[i][c])
        av_dist[i] = {}    
        for c in vh[i]:
            distances[i][c] = {}
            for c2 in vh[i]:
                distances[i][c][c2] = T.dist(centroids[i][c], centroids[i][c2])
                dist_all[i].append(distances[i][c][c2])
            av_dist[i][c] = Average([distances[i][c][c2] for c2 in distances[i]])
    its = [i for i in vh]
    its.sort()
    rads = [Average([radii[i][c] for c in radii[i]]) for i in its]
    rads_std = [stdev([radii[i][c] for c in radii[i]]) for i in its]
    dists = [Average([av_dist[i][c] for c in av_dist[i]]) for i in its]
    
    dists_std = [stdev(dist_all[i]) for i in its]
    fig, ax = plt.subplots(1)
    ax.errorbar(its, rads, yerr=rads_std, linewidth=3, linestyle='dashed', 
                label='Radii', elinewidth=1, capsize=10)
    ax.errorbar(its, dists, yerr=dists_std, linewidth=3, 
                label='Average separation', elinewidth=1, capsize=10)
    ax.legend()
    ax.set(xlabel='Iteration')
    ax.set(ylabel='radius')  
    fig.suptitle('Average class radii and separation during training', fontsize=16)
    return fig
Exemple #2
0
    else:
        train_step(optimizer=optimizer)
    C.learn_rate = C.learn_rate * C.lr_decay
    base_model.save(save_name(i))

    vs = T.get_vectors(base_model, C.val_dir)
    T.save_obj(vs, os.path.join(C.obj_dir, 'val_pred_' + str(i)))
    c = T.count_nearest_centroid(vs)
    log('Summarizing ' + str(i))
    with open(os.path.join(C.log_dir, 'summarize.' + str(i) + '.log'),
              'w') as sumfile:
        T.summarize(vs, outfile=sumfile)
    with open(os.path.join(C.log_dir, 'clusters.' + str(i) + '.log'),
              'w') as cfile:
        T.confusion_counts(c, outfile=cfile)
    c_tmp = {}
    r_tmp = {}
    for v in vs:
        c_tmp[v] = T.centroid(vs[v])
        r_tmp[v] = T.radius(c_tmp[v], vs[v])
    c_rad = [round(100 * r_tmp[v]) / 100 for v in vs]
    c_mv = [round(100 * T.dist(c_tmp[v], cents[v])) / 100 for v in vs]
    log('Centroid radius: ' + str(c_rad))
    log('Centroid moved: ' + str(c_mv))
    cents = c_tmp

    with open(C.logfile, 'a') as f:
        T.accuracy_counts(c, outfile=f)
    # todo: avg cluster radius, avg cluster distances
    log('Avg centr rad: %.2f move: %.2f' % (avg(c_rad), avg(c_mv)))
Exemple #3
0
def plotly_animate_spheres(validation_history, classes=None, dims=[0,1],
                   ax_lims=1., notebook=False):
    """ 
    Create a kick-ass animation (3D) of cluster evolution during training, representing
    the clusters as spheres for clarity and speed   
    """
    vh = validation_history
    its = list(vh)
    its.sort()
    if classes is None:
        classes = list(vh[its[0]])
    classes.sort()
    #at step i:
    #class c is:
    #vh[i][c] - np array (100x64) - will plot the 1st 3 dims as demo
    axl = ax_lims

    #calculate centroids and radii:
    cent = {}
    rads = {}

    circles = {}
    for e in its:
        cent[e] = {c: T.centroid(dict_unsqueeze(vh[e])[c]) for c in classes}
        rads[e] = {c: T.radius(cent[e][c], dict_unsqueeze(vh[e])[c]) for c in classes}
        circles[e] = [
                dict(
                text=c,
                name=c,
                mode='markers',
                x=[cent[e][c][dims[0]]],
                y=[cent[e][c][dims[1]]],
                marker = dict(color=colours[i%len(colours)],
                            size=rads[e][c]*100,
                            ),
                ) for i, c in enumerate(classes)
                ]
    #starting data:     
    data = circles[1]
    #list of dicts of data items to update at each step  
    frames=[dict(data = circles[e],
                     name = 'frame{}'.format(e)       
                     ) for e in its]
                
    sliders=[dict(steps= [dict(method= 'animate',#Sets the Plotly method to be called when the
                                                    #slider value is changed.
                               args= [['frame{}'.format(e)],#Sets the arguments values to be passed to 
                                                                  #the Plotly method set in method on slide
                                      dict(mode= 'immediate',
                                           frame= dict(duration=300, redraw=False),
                                           transition=dict(duration=300, easing='cubic-in-out')
                                           )
                                        ],
                                label='{}'.format(e)
                                 ) for e in its], 
                    transition= dict(duration= 300, easing='cubic-in-out'),
                    currentvalue=dict(font=dict(size=12), 
                                      prefix='Step: ', 
                                      visible=True, 
                                      xanchor= 'center'
                                     ),
                    active=0,
                    len=1.0)#slider length)
               ]
        
    layout = dict(
                title = 'Interactive Cluster Shapes, dims={}'.format(dims),
                xaxis = dict(range=[-axl, axl], zeroline=True),
                yaxis = dict(range=[-axl, axl], zeroline=True),
                sliders=sliders,
                legend = dict(itemsizing='constant'),
                updatemenus=[{
                            'buttons': [
                                {
                                    'args': [None, {'frame': {'duration': 500, 'redraw': False},
                                            'fromcurrent': True, 'transition': transition}],
                                    'label': 'Play',
                                    'method': 'animate'
                                },
                                {
                                    'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                                    'transition': {'duration': 0}}],
                                    'label': 'Pause',
                                    'method': 'animate'
                                }
                            ],
                            'direction': 'left',
                            'pad': {'r': 10, 't': 87},
                            'showactive': False,
                            'type': 'buttons',
                            'x': 0.1,
                            'xanchor': 'right',
                            'y': 0,
                            'yanchor': 'top'
                        }]
                )
                        
    fig=dict(data=data, layout=layout, frames=frames)
    if notebook:
        iplot(fig, validate=False)
    else:
        plot(fig, validate=False)
    return
Exemple #4
0
    #K-means clustering (unsupervised)
    classes = list(vs)
    classes.sort()
    n_clusters = len(classes)
    centersSKL = cluster.MiniBatchKMeans(n_clusters)
    centersSKL.fit(X_val) 
    y_pred = centersSKL.predict(X_val)
    #identify the clusters by class (hopefully)
    centroids = {} #for the predicted clusters
    radii = {}
    #find centroids, radii of detected clusters
    for k in set(y_pred):
        k_vecs = [X_val[i, :] for i in range(len(y_pred)) if y_pred[i]==k]
        centroids[k] = T.centroid(k_vecs)
        radii[k] = T.radius(centroids[k], k_vecs)
    #find centroids, radii of the true classes
    centroids_actual = {} 
    radii_actual = {}
    for k in set(y_val):
        c_vecs = [X_val[i, :] for i in range(len(y_val)) if y_val[i]==k]
        centroids_actual[k] = T.centroid(c_vecs)
        radii_actual[k] = T.radius(centroids_actual[k], c_vecs)
    cent_list = [centroids_actual[c] for c in classes]
    rad_list = [radii_actual[c] for c in classes]
    #match each detected cluster to closest class 
    #by minimising L2 norm of (centroid; radius) difference
    def cluster_metric(c1, c2, r1, r2):
        return T.dist(c1, c2) + T.dist(r1,r2)
    def find_nearest_cluster(centroid, radius, centroid_list, radius_list):
        (c1, r1) = (centroid, radius)