def plot_train_radii_separation(obj_dir=C.obj_dir, std=False): """ Plots the average cluster radius and average distance to other clusters during training """ vh = read_validation_history(obj_dir) #is squeezed centroids = {} radii = {} distances = {} av_dist = {} dist_all = {} for i in vh: #unsqueeze as we go: vh[i] = dict_unsqueeze(vh[i]) centroids[i] = {} radii[i] = {} distances[i] = {} dist_all[i] = [] for c in vh[i]: centroids[i][c] = T.centroid(vh[i][c]) radii[i][c] = T.radius(centroids[i][c], vh[i][c]) av_dist[i] = {} for c in vh[i]: distances[i][c] = {} for c2 in vh[i]: distances[i][c][c2] = T.dist(centroids[i][c], centroids[i][c2]) dist_all[i].append(distances[i][c][c2]) av_dist[i][c] = Average([distances[i][c][c2] for c2 in distances[i]]) its = [i for i in vh] its.sort() rads = [Average([radii[i][c] for c in radii[i]]) for i in its] rads_std = [stdev([radii[i][c] for c in radii[i]]) for i in its] dists = [Average([av_dist[i][c] for c in av_dist[i]]) for i in its] dists_std = [stdev(dist_all[i]) for i in its] fig, ax = plt.subplots(1) ax.errorbar(its, rads, yerr=rads_std, linewidth=3, linestyle='dashed', label='Radii', elinewidth=1, capsize=10) ax.errorbar(its, dists, yerr=dists_std, linewidth=3, label='Average separation', elinewidth=1, capsize=10) ax.legend() ax.set(xlabel='Iteration') ax.set(ylabel='radius') fig.suptitle('Average class radii and separation during training', fontsize=16) return fig
else: train_step(optimizer=optimizer) C.learn_rate = C.learn_rate * C.lr_decay base_model.save(save_name(i)) vs = T.get_vectors(base_model, C.val_dir) T.save_obj(vs, os.path.join(C.obj_dir, 'val_pred_' + str(i))) c = T.count_nearest_centroid(vs) log('Summarizing ' + str(i)) with open(os.path.join(C.log_dir, 'summarize.' + str(i) + '.log'), 'w') as sumfile: T.summarize(vs, outfile=sumfile) with open(os.path.join(C.log_dir, 'clusters.' + str(i) + '.log'), 'w') as cfile: T.confusion_counts(c, outfile=cfile) c_tmp = {} r_tmp = {} for v in vs: c_tmp[v] = T.centroid(vs[v]) r_tmp[v] = T.radius(c_tmp[v], vs[v]) c_rad = [round(100 * r_tmp[v]) / 100 for v in vs] c_mv = [round(100 * T.dist(c_tmp[v], cents[v])) / 100 for v in vs] log('Centroid radius: ' + str(c_rad)) log('Centroid moved: ' + str(c_mv)) cents = c_tmp with open(C.logfile, 'a') as f: T.accuracy_counts(c, outfile=f) # todo: avg cluster radius, avg cluster distances log('Avg centr rad: %.2f move: %.2f' % (avg(c_rad), avg(c_mv)))
def plotly_animate_spheres(validation_history, classes=None, dims=[0,1], ax_lims=1., notebook=False): """ Create a kick-ass animation (3D) of cluster evolution during training, representing the clusters as spheres for clarity and speed """ vh = validation_history its = list(vh) its.sort() if classes is None: classes = list(vh[its[0]]) classes.sort() #at step i: #class c is: #vh[i][c] - np array (100x64) - will plot the 1st 3 dims as demo axl = ax_lims #calculate centroids and radii: cent = {} rads = {} circles = {} for e in its: cent[e] = {c: T.centroid(dict_unsqueeze(vh[e])[c]) for c in classes} rads[e] = {c: T.radius(cent[e][c], dict_unsqueeze(vh[e])[c]) for c in classes} circles[e] = [ dict( text=c, name=c, mode='markers', x=[cent[e][c][dims[0]]], y=[cent[e][c][dims[1]]], marker = dict(color=colours[i%len(colours)], size=rads[e][c]*100, ), ) for i, c in enumerate(classes) ] #starting data: data = circles[1] #list of dicts of data items to update at each step frames=[dict(data = circles[e], name = 'frame{}'.format(e) ) for e in its] sliders=[dict(steps= [dict(method= 'animate',#Sets the Plotly method to be called when the #slider value is changed. args= [['frame{}'.format(e)],#Sets the arguments values to be passed to #the Plotly method set in method on slide dict(mode= 'immediate', frame= dict(duration=300, redraw=False), transition=dict(duration=300, easing='cubic-in-out') ) ], label='{}'.format(e) ) for e in its], transition= dict(duration= 300, easing='cubic-in-out'), currentvalue=dict(font=dict(size=12), prefix='Step: ', visible=True, xanchor= 'center' ), active=0, len=1.0)#slider length) ] layout = dict( title = 'Interactive Cluster Shapes, dims={}'.format(dims), xaxis = dict(range=[-axl, axl], zeroline=True), yaxis = dict(range=[-axl, axl], zeroline=True), sliders=sliders, legend = dict(itemsizing='constant'), updatemenus=[{ 'buttons': [ { 'args': [None, {'frame': {'duration': 500, 'redraw': False}, 'fromcurrent': True, 'transition': transition}], 'label': 'Play', 'method': 'animate' }, { 'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate', 'transition': {'duration': 0}}], 'label': 'Pause', 'method': 'animate' } ], 'direction': 'left', 'pad': {'r': 10, 't': 87}, 'showactive': False, 'type': 'buttons', 'x': 0.1, 'xanchor': 'right', 'y': 0, 'yanchor': 'top' }] ) fig=dict(data=data, layout=layout, frames=frames) if notebook: iplot(fig, validate=False) else: plot(fig, validate=False) return
#K-means clustering (unsupervised) classes = list(vs) classes.sort() n_clusters = len(classes) centersSKL = cluster.MiniBatchKMeans(n_clusters) centersSKL.fit(X_val) y_pred = centersSKL.predict(X_val) #identify the clusters by class (hopefully) centroids = {} #for the predicted clusters radii = {} #find centroids, radii of detected clusters for k in set(y_pred): k_vecs = [X_val[i, :] for i in range(len(y_pred)) if y_pred[i]==k] centroids[k] = T.centroid(k_vecs) radii[k] = T.radius(centroids[k], k_vecs) #find centroids, radii of the true classes centroids_actual = {} radii_actual = {} for k in set(y_val): c_vecs = [X_val[i, :] for i in range(len(y_val)) if y_val[i]==k] centroids_actual[k] = T.centroid(c_vecs) radii_actual[k] = T.radius(centroids_actual[k], c_vecs) cent_list = [centroids_actual[c] for c in classes] rad_list = [radii_actual[c] for c in classes] #match each detected cluster to closest class #by minimising L2 norm of (centroid; radius) difference def cluster_metric(c1, c2, r1, r2): return T.dist(c1, c2) + T.dist(r1,r2) def find_nearest_cluster(centroid, radius, centroid_list, radius_list): (c1, r1) = (centroid, radius)