def identify_cluster(self, tree): gn = tree can_cluster = [] final_cluster = [] # print [node.name for node in PreOrderIter(gn)] for pn in gn.children: # print "hehe " + pn.name first_cluster = Cluster(pn.name, pn.doc) can_cluster.append(first_cluster) # print "lol " + pn.name for cn in pn.children: next_cluster = Cluster(cn.parent.name + " " + cn.name, cn.doc) temp = next_cluster can_cluster.append(next_cluster) for ccn in PreOrderIter(cn, filter_=lambda n: n.name != cn.name): new_cluster = Cluster(next_cluster.label + " " + ccn.name, ccn.doc) can_cluster.append(new_cluster) if ccn.is_leaf: next_cluster = temp else: next_cluster = new_cluster # compute clus sim self.compute_clus_sim(can_cluster) final_cluster += can_cluster can_cluster = [] # filter useless cluster for cluster in reversed(final_cluster): if not cluster.documents: final_cluster.remove(cluster) else: for cluster2 in filter(lambda x: x != cluster, final_cluster): if len( list( set(cluster.documents) & set(cluster2.documents))) >= 1: if len(cluster.documents) <= len(cluster2.documents): final_cluster.remove(cluster) break # for i in range(0, len(final_cluster)): # print("[%s: %s]" % (final_cluster[i].label, "; ".join([cd.title for cd in final_cluster[i].documents]))) del can_cluster return final_cluster
def get_wf(): wf = pe.Workflow(name="main_workflow") wf.base_dir = os.path.join(workingdir, "clustering_pipeline") wf.config['execution']['crashdump_dir'] = wf.base_dir + "/crash_files" ##Infosource## fs_infosource = pe.Node(util.IdentityInterface(fields=['fs']), name="fs_infosource") fs_infosource.iterables = ('fs', fsaverage) hemi_infosource = pe.Node(util.IdentityInterface(fields=['hemi']), name="hemi_infosource") hemi_infosource.iterables = ('hemi', hemispheres) cluster_infosource = pe.Node(util.IdentityInterface(fields=['cluster']), name="cluster_infosource") cluster_infosource.iterables = ('cluster', cluster_types) n_clusters_infosource = pe.Node( util.IdentityInterface(fields=['n_clusters']), name="n_clusters_infosource") n_clusters_infosource.iterables = ('n_clusters', n_clusters) ##Datagrabber## datagrabber = pe.Node(nio.DataGrabber( infields=['fs', 'hemi'], outfields=['simmatrix', 'maskindex', 'targetmask']), name="datagrabber") datagrabber.inputs.base_directory = '/' datagrabber.inputs.template = '*' datagrabber.inputs.field_template = clustering_dg_template datagrabber.inputs.template_args = clustering_dg_args datagrabber.inputs.sort_filelist = True wf.connect(fs_infosource, 'fs', datagrabber, 'fs') wf.connect(hemi_infosource, 'hemi', datagrabber, 'hemi') ##clustering## clustering = pe.Node(Cluster(), name='clustering') clustering.inputs.epsilon = epsilon wf.connect(hemi_infosource, 'hemi', clustering, 'hemi') wf.connect(cluster_infosource, 'cluster', clustering, 'cluster_type') wf.connect(n_clusters_infosource, 'n_clusters', clustering, 'n_clusters') wf.connect(datagrabber, 'simmatrix', clustering, 'in_File') ##reinflate to surface indices## clustermap = pe.Node(ClusterMap(), name='clustermap') wf.connect(clustering, 'out_File', clustermap, 'clusteredfile') wf.connect(datagrabber, 'maskindex', clustermap, 'indicesfile') wf.connect(datagrabber, 'targetmask', clustermap, 'maskfile') ##Datasink## ds = pe.Node(nio.DataSink(), name="datasink") ds.inputs.base_directory = clusterdir wf.connect(clustermap, 'clustermapfile', ds, 'clustered') wf.connect(clustermap, 'clustermaptext', ds, 'clustered.@1') wf.write_graph() return wf
def computeClusters(self, symbolClasses): for cl in symbolClasses: cl.clusters = [] distortedClassesOfSingleClass = cl.learning_set[:] centroids, labels = self.__computeClusters( distortedClassesOfSingleClass) # distinguish k clusters for j in range(0, global_v.K): # points of this cluster points = [] for c in range(0, len(distortedClassesOfSingleClass)): if labels[c] == j: points.append(distortedClassesOfSingleClass[c]. characteristicsValues) cluster = Cluster(centroids[j], points, cl.name, j) cl.clusters.append(cluster)
def compute(k, training_set): clusters = [] centroids, labels = __computeClusters(k, training_set) # distinguish k clusters for j in range(0,k): # points of this cluster points = [] for c in range(0, len(training_set)): if labels[c] == j: points.append(training_set[c].characteristicsValues) cluster = Cluster(centroids[j], points, training_set[c].name, j, give_info = False, do_ellipsoid=False, do_cuboid=False) clusters.append(cluster) return clusters
def get_wf(): wf = pe.Workflow(name="main_workflow") wf.base_dir = os.path.join(workingdir, "consensus_pipeline") wf.config['execution']['crashdump_dir'] = wf.base_dir + "/crash_files" ##Infosource## #session_infosource = pe.Node(util.IdentityInterface(fields=['session']), name="session_infosource") #session_infosource.iterables = ('session', analysis_sessions) hemi_infosource = pe.Node(util.IdentityInterface(fields=['hemi']), name="hemi_infosource") hemi_infosource.iterables = ('hemi', hemispheres) sim_infosource = pe.Node(util.IdentityInterface(fields=['sim']), name="sim_infosource") sim_infosource.iterables = ('sim', similarity_types) cluster_infosource = pe.Node(util.IdentityInterface(fields=['cluster']), name="cluster_infosource") cluster_infosource.iterables = ('cluster', cluster_types) n_clusters_infosource = pe.Node( util.IdentityInterface(fields=['n_clusters']), name="n_clusters_infosource") n_clusters_infosource.iterables = ('n_clusters', n_clusters) ##Datagrabber for subjects## dg_subjects = pe.Node(nio.DataGrabber( infields=['hemi', 'cluster', 'sim', 'n_clusters'], outfields=['all_subjects', 'maskindex', 'targetmask']), name="dg_subjects") dg_subjects.inputs.base_directory = '/' dg_subjects.inputs.template = '*' dg_subjects.inputs.field_template = consensus_dg_template dg_subjects.inputs.template_args = consensus_dg_args dg_subjects.inputs.sort_filelist = True #wf.connect(session_infosource, 'session', dg_subjects, 'session') wf.connect(hemi_infosource, 'hemi', dg_subjects, 'hemi') wf.connect(cluster_infosource, 'cluster', dg_subjects, 'cluster') wf.connect(sim_infosource, 'sim', dg_subjects, 'sim') wf.connect(n_clusters_infosource, 'n_clusters', dg_subjects, 'n_clusters') ##Consensus between subjects## intersubject = pe.Node(Consensus(), name='intersubject') wf.connect(dg_subjects, 'all_subjects', intersubject, 'in_Files') wf.connect(dg_subjects, 'targetmask', intersubject, 'maskfile') ##Cluster the Consensus Matrix## intersubject_cluster = pe.Node(Cluster(), name='intersubject_cluster') wf.connect(intersubject, 'consensus_mat', intersubject_cluster, 'in_File') wf.connect(hemi_infosource, 'hemi', intersubject_cluster, 'hemi') wf.connect(cluster_infosource, 'cluster', intersubject_cluster, 'cluster_type') wf.connect(n_clusters_infosource, 'n_clusters', intersubject_cluster, 'n_clusters') ##Decompress## clustermap = pe.Node(ClusterMap(), name='clustermap') wf.connect(intersubject_cluster, 'out_File', clustermap, 'clusteredfile') wf.connect(dg_subjects, 'maskindex', clustermap, 'indicesfile') wf.connect(dg_subjects, 'targetmask', clustermap, 'maskfile') ##Datasink## ds = pe.Node(nio.DataSink(), name="datasink") ds.inputs.base_directory = consensusdir wf.connect(intersubject, 'variation_mat', ds, 'variation_mat') wf.connect(intersubject, 'consensus_mat', ds, 'consensus_not_clustered') #wf.connect(intersession_cluster, 'out_File', ds, 'consensus_intersession') wf.connect(clustermap, 'clustermapfile', ds, 'consensus_clustered') wf.write_graph() return wf