def identify_cluster(self, tree):
        gn = tree
        can_cluster = []
        final_cluster = []
        # print [node.name for node in PreOrderIter(gn)]
        for pn in gn.children:
            # print "hehe " + pn.name
            first_cluster = Cluster(pn.name, pn.doc)
            can_cluster.append(first_cluster)
            # print "lol " + pn.name
            for cn in pn.children:
                next_cluster = Cluster(cn.parent.name + " " + cn.name, cn.doc)
                temp = next_cluster
                can_cluster.append(next_cluster)
                for ccn in PreOrderIter(cn,
                                        filter_=lambda n: n.name != cn.name):
                    new_cluster = Cluster(next_cluster.label + " " + ccn.name,
                                          ccn.doc)
                    can_cluster.append(new_cluster)
                    if ccn.is_leaf:
                        next_cluster = temp
                    else:
                        next_cluster = new_cluster

            # compute clus sim
            self.compute_clus_sim(can_cluster)
            final_cluster += can_cluster
            can_cluster = []

        # filter useless cluster
        for cluster in reversed(final_cluster):
            if not cluster.documents:
                final_cluster.remove(cluster)
            else:
                for cluster2 in filter(lambda x: x != cluster, final_cluster):
                    if len(
                            list(
                                set(cluster.documents)
                                & set(cluster2.documents))) >= 1:
                        if len(cluster.documents) <= len(cluster2.documents):
                            final_cluster.remove(cluster)
                            break

        # for i in range(0, len(final_cluster)):
        #     print("[%s: %s]" % (final_cluster[i].label,  "; ".join([cd.title for cd in final_cluster[i].documents])))

        del can_cluster
        return final_cluster
Example #2
0
def get_wf():

    wf = pe.Workflow(name="main_workflow")
    wf.base_dir = os.path.join(workingdir, "clustering_pipeline")
    wf.config['execution']['crashdump_dir'] = wf.base_dir + "/crash_files"

    ##Infosource##

    fs_infosource = pe.Node(util.IdentityInterface(fields=['fs']),
                            name="fs_infosource")
    fs_infosource.iterables = ('fs', fsaverage)

    hemi_infosource = pe.Node(util.IdentityInterface(fields=['hemi']),
                              name="hemi_infosource")
    hemi_infosource.iterables = ('hemi', hemispheres)

    cluster_infosource = pe.Node(util.IdentityInterface(fields=['cluster']),
                                 name="cluster_infosource")
    cluster_infosource.iterables = ('cluster', cluster_types)

    n_clusters_infosource = pe.Node(
        util.IdentityInterface(fields=['n_clusters']),
        name="n_clusters_infosource")
    n_clusters_infosource.iterables = ('n_clusters', n_clusters)

    ##Datagrabber##
    datagrabber = pe.Node(nio.DataGrabber(
        infields=['fs', 'hemi'],
        outfields=['simmatrix', 'maskindex', 'targetmask']),
                          name="datagrabber")
    datagrabber.inputs.base_directory = '/'
    datagrabber.inputs.template = '*'
    datagrabber.inputs.field_template = clustering_dg_template
    datagrabber.inputs.template_args = clustering_dg_args
    datagrabber.inputs.sort_filelist = True

    wf.connect(fs_infosource, 'fs', datagrabber, 'fs')
    wf.connect(hemi_infosource, 'hemi', datagrabber, 'hemi')

    ##clustering##
    clustering = pe.Node(Cluster(), name='clustering')
    clustering.inputs.epsilon = epsilon
    wf.connect(hemi_infosource, 'hemi', clustering, 'hemi')
    wf.connect(cluster_infosource, 'cluster', clustering, 'cluster_type')
    wf.connect(n_clusters_infosource, 'n_clusters', clustering, 'n_clusters')
    wf.connect(datagrabber, 'simmatrix', clustering, 'in_File')

    ##reinflate to surface indices##
    clustermap = pe.Node(ClusterMap(), name='clustermap')
    wf.connect(clustering, 'out_File', clustermap, 'clusteredfile')
    wf.connect(datagrabber, 'maskindex', clustermap, 'indicesfile')
    wf.connect(datagrabber, 'targetmask', clustermap, 'maskfile')

    ##Datasink##
    ds = pe.Node(nio.DataSink(), name="datasink")
    ds.inputs.base_directory = clusterdir
    wf.connect(clustermap, 'clustermapfile', ds, 'clustered')
    wf.connect(clustermap, 'clustermaptext', ds, 'clustered.@1')
    wf.write_graph()
    return wf
    def computeClusters(self, symbolClasses):
        for cl in symbolClasses:
            cl.clusters = []
            distortedClassesOfSingleClass = cl.learning_set[:]

            centroids, labels = self.__computeClusters(
                distortedClassesOfSingleClass)

            # distinguish k clusters
            for j in range(0, global_v.K):
                # points of this cluster
                points = []
                for c in range(0, len(distortedClassesOfSingleClass)):
                    if labels[c] == j:
                        points.append(distortedClassesOfSingleClass[c].
                                      characteristicsValues)

                cluster = Cluster(centroids[j], points, cl.name, j)
                cl.clusters.append(cluster)
Example #4
0
def compute(k, training_set):
    clusters = []
    centroids, labels = __computeClusters(k, training_set)

    # distinguish k clusters
    for j in range(0,k):
        # points of this cluster
        points = []
        for c in range(0, len(training_set)):
            if labels[c] == j:
                points.append(training_set[c].characteristicsValues)

        cluster = Cluster(centroids[j], points, training_set[c].name, j,
                            give_info = False,
                            do_ellipsoid=False,
                            do_cuboid=False)

        clusters.append(cluster)

    return clusters
Example #5
0
def get_wf():
    wf = pe.Workflow(name="main_workflow")
    wf.base_dir = os.path.join(workingdir, "consensus_pipeline")
    wf.config['execution']['crashdump_dir'] = wf.base_dir + "/crash_files"

    ##Infosource##
    #session_infosource = pe.Node(util.IdentityInterface(fields=['session']), name="session_infosource")
    #session_infosource.iterables = ('session', analysis_sessions)

    hemi_infosource = pe.Node(util.IdentityInterface(fields=['hemi']),
                              name="hemi_infosource")
    hemi_infosource.iterables = ('hemi', hemispheres)

    sim_infosource = pe.Node(util.IdentityInterface(fields=['sim']),
                             name="sim_infosource")
    sim_infosource.iterables = ('sim', similarity_types)

    cluster_infosource = pe.Node(util.IdentityInterface(fields=['cluster']),
                                 name="cluster_infosource")
    cluster_infosource.iterables = ('cluster', cluster_types)

    n_clusters_infosource = pe.Node(
        util.IdentityInterface(fields=['n_clusters']),
        name="n_clusters_infosource")
    n_clusters_infosource.iterables = ('n_clusters', n_clusters)

    ##Datagrabber for subjects##
    dg_subjects = pe.Node(nio.DataGrabber(
        infields=['hemi', 'cluster', 'sim', 'n_clusters'],
        outfields=['all_subjects', 'maskindex', 'targetmask']),
                          name="dg_subjects")
    dg_subjects.inputs.base_directory = '/'
    dg_subjects.inputs.template = '*'
    dg_subjects.inputs.field_template = consensus_dg_template
    dg_subjects.inputs.template_args = consensus_dg_args
    dg_subjects.inputs.sort_filelist = True

    #wf.connect(session_infosource, 'session', dg_subjects, 'session')
    wf.connect(hemi_infosource, 'hemi', dg_subjects, 'hemi')
    wf.connect(cluster_infosource, 'cluster', dg_subjects, 'cluster')
    wf.connect(sim_infosource, 'sim', dg_subjects, 'sim')
    wf.connect(n_clusters_infosource, 'n_clusters', dg_subjects, 'n_clusters')

    ##Consensus between subjects##
    intersubject = pe.Node(Consensus(), name='intersubject')
    wf.connect(dg_subjects, 'all_subjects', intersubject, 'in_Files')
    wf.connect(dg_subjects, 'targetmask', intersubject, 'maskfile')

    ##Cluster the Consensus Matrix##
    intersubject_cluster = pe.Node(Cluster(), name='intersubject_cluster')
    wf.connect(intersubject, 'consensus_mat', intersubject_cluster, 'in_File')
    wf.connect(hemi_infosource, 'hemi', intersubject_cluster, 'hemi')
    wf.connect(cluster_infosource, 'cluster', intersubject_cluster,
               'cluster_type')
    wf.connect(n_clusters_infosource, 'n_clusters', intersubject_cluster,
               'n_clusters')

    ##Decompress##
    clustermap = pe.Node(ClusterMap(), name='clustermap')
    wf.connect(intersubject_cluster, 'out_File', clustermap, 'clusteredfile')
    wf.connect(dg_subjects, 'maskindex', clustermap, 'indicesfile')
    wf.connect(dg_subjects, 'targetmask', clustermap, 'maskfile')

    ##Datasink##
    ds = pe.Node(nio.DataSink(), name="datasink")
    ds.inputs.base_directory = consensusdir
    wf.connect(intersubject, 'variation_mat', ds, 'variation_mat')
    wf.connect(intersubject, 'consensus_mat', ds, 'consensus_not_clustered')
    #wf.connect(intersession_cluster, 'out_File', ds, 'consensus_intersession')
    wf.connect(clustermap, 'clustermapfile', ds, 'consensus_clustered')
    wf.write_graph()
    return wf