Ejemplo n.º 1
0
    def plot(self, xy = (0,1)):
        """
        Outputs 2d embeded plot based on `pos`

        :param xy: specifies the dimsntions of pos to be plotted.
        :type xy: tuple, optional

        """
        return plot_clusters(self.pos[:,[xy[0],xy[1]]], self.labels, clusters=self._cls)
Ejemplo n.º 2
0
			break
		else:
			print "Replacing cluster %i with trajectory %i" % (clusterIndex, tjcIndex)
			means[clusterIndex] = tjcs[tjcIndex]
			meansIndex.append(tjcIndex)
			ll_old = 0
			i = 0
			continue
	else:
		ll_old = ll_new
		i += 1
	print "Iteration Number: ", i 
    
# This is to plot the results of the E-M algorithm.
fig2 = plt.figure()
myplt.plot_clusters(clusters, tjcs, "Results after Clustering", "600x500+850+0")

# Save means structre, which represents the clusters typical trayectories into
# a npy file.
timestr = time.strftime("%Y%m%d-%H%M%S")
filename=path.join("data/"+timestr+"-clusters")
np.save(filename, means)
    
# Let's plot the found clusters.
fig3 = plt.figure()
myplt.plot_time_model(means, "Clusters", "600x500+200+600")

# Plot cluster contribs
cluster_contribs = np.sum(clusters, 0)
fig4 = plt.figure()
wm = plt.get_current_fig_manager()
Ejemplo n.º 3
0
def main(_):
    data_folder = FLAGS.data_folder
    filename = FLAGS.filename

    if FLAGS.task == 'metrics':
        output = FLAGS.output
        k = FLAGS.k
        # find top-k, mid-k and bottom-k
        scores, phrases = read_phrase_list(data_folder=data_folder,
                                           filename=filename)
        bottom = bottom_k(scores, phrases, k=k)
        mid = mid_k(scores, phrases, k=k)
        top = top_k(scores, phrases, k=k)

        with open(data_folder + '/' + output, 'w') as out:
            out.write('Bottom-k (' + str(k) + ')' + '\n')
            for entry in bottom:
                out.write(entry)
            out.write('\n')

            out.write('Mid-k (' + str(k) + ')' + '\n')
            for entry in mid:
                out.write(entry)
            out.write('\n')

            out.write('Top-k (' + str(k) + ')' + '\n')
            for entry in top:
                out.write(entry)

    elif FLAGS.task == 'clustering':
        clusters_folder = data_folder + '/' + FLAGS.cluster_folder
        print('Current step: Reading data set', end='\r')
        y_train, x_train = read_dataset(filename=filename,
                                        data_folder=data_folder)

        print('Current step: Performing clustering', end='\r')
        task = PhraseClustering()
        y_pred = task.run(x_train,
                          n_clusters=FLAGS.n_clusters,
                          distance=FLAGS.distance)

        # create result files
        print('Current step: Saving clustered phrases', end='\r')
        if not os.path.exists(clusters_folder):
            os.makedirs(clusters_folder)

        cluster_ids = np.unique(y_pred)
        files = {}
        for cluster in cluster_ids:
            out = open(clusters_folder + '/cluster' + str(cluster) + '.txt',
                       'w')
            files[cluster] = out

        for i, cluster in enumerate(y_pred.tolist()):
            files[cluster].write(y_train[i] + '\n')

        for _, file in files.items():
            file.close()

        print('Current step: Saving numpy files', end='\r')
        save_cluster_numpy(x_train, y_pred, data_folder)
    elif FLAGS.task == 'plot':
        print('Current step: Reading data set', end='\r')
        y_train, x_train = read_dataset(filename=filename,
                                        data_folder=data_folder)

        y_pred = load_cluster_numpy(data_folder)
        print('Current step: Generating plots', end='\r')
        plot_clusters(x_train, y_pred, FLAGS.n_clusters)
    elif FLAGS.task == 'compare':
        data = read_segmentation_metrics(filename=filename,
                                         data_folder=data_folder)
        plot_avg_phrases_curve(data)
        plot_total_phrases_curve(data)
Ejemplo n.º 4
0
    if i < iterations - 1:
        c_index, c_score = em.worst_cluster(clusters)
        t_index, t_score = em.worst_trajectory(clusters, c_index, c_score, 
                                               tjcIndex, tjcs, covariance)
        # If a worst trajectory is not found.
        if t_index == -1:
            #break
            means[c_index] = -1
        else:    
            #print "Replacing cluster %i with trajectory %i" % ( c_index, t_index )
            means[c_index] = tjcs[t_index]
            tjcIndex.append( t_index )
            
    # This is for plotting the results.
    plt.title("Clustering, Iteration Number: %s" %(i+1))
    myplt.plot_clusters(clusters, tjcs)
    plt.pause(0.01)

# Save means structre, which represents the clusters typical trayectories into
# a npy file.
np.save("data/clusters.npy", means)
    
# Let's plot the found clusters.
fig3 = plt.figure()
wm = plt.get_current_fig_manager()
wm.window.wm_geometry("600x500+350+200")
fi3 = plt.title("Clusters")
plt.grid()
plt.xticks(np.arange(0, 16, 1.0))
plt.yticks(np.arange(0, 12, 1.0))
plt.xlabel('X (mts)')
    #####################################################################
    #####################################################################
    best_clustering = get_best_clustering(working_directory)
    best_clustering = Clustering.from_dic(best_clustering["clustering"])
    scores = []
    for a_cluster in best_clustering.clusters:
        scores.append((score_cluster(a_cluster, m_handler.distance_matrix, all_metrics), a_cluster))

    # Remember: first metric is spawning point,  second metric must be energy
    most_negative = find_most_negative_cluster(scores)
    most_negative_cluster = most_negative[1]

    #######################################################################################################################
    # Plot clusters
    #######################################################################################################################
    plot_clusters(os.path.join(base_dir, "clusters.svg"), all_metrics, scores, scores.index(most_negative))

    #######################################################################################################################
    # Store the elements of the most negative cluster
    #######################################################################################################################
    # This works because we have only one traj. Merging before should be mandatory.
    trajectory_path = os.path.join(os.getcwd(), traj_pdb)
    in_handler = open(trajectory_path,"r")
    most_negative_path = os.path.join(base_dir,"most_negative_cluster.pdb")
    out_handler = open(most_negative_path,"w")
    extract_frames_from_trajectory_sequentially(file_handler_in = in_handler,
                                               number_of_frames = get_number_of_frames(trajectory_path),
                                               file_handler_out = out_handler,
                                               frames_to_save = most_negative_cluster.all_elements,
                                               keep_header = True,
                                               write_frame_number_instead_of_correlative_model_number = True)