Example #1
0
def sscAdj_inNeighbour(file):  ## use neighbour adj as prior, limiting ssc's adj choice to be within neighbours
    """fixe me..... :/"""

    xtrj = file['x_re']
    ytrj = file['y_re']
    xspd = file['xspd']
    yspd = file['yspd']
    trjID = file['trjID']
    trjAdj = (file['adj'] > 0).astype('float')  ## adj mtx
    CClabel = file['c']  # labels from connected Component

    # dataFeature = np.concatenate((xtrj,xspd,ytrj,yspd), axis = 1)
    dataFeature = np.concatenate((xtrj, ytrj), axis=1)

    labels = np.zeros(CClabel.size)
    adj = np.zeros((CClabel.size, CClabel.size))
    for i in np.unique(CClabel):
        print i
        sub_index = np.where(CClabel == i)[1]
        if sub_index.size > 3:
            if sub_index.size < 100:  # if connected component too big, just use the binary adj
                project_dimension = int(np.floor(sub_index.size / 100) + 1)
                ssc = sparse_subspace_clustering(lambd=2000, dataset=dataFeature[sub_index, :],
                                                 n_dimension=project_dimension)
                ssc.construct_adjacency()
            else:
                project_dimension = int(np.floor(sub_index.size / 100) + 1)
                sub_adjMtx = trjAdj[sub_index][:, sub_index]
                ssc = sparse_subspace_clustering(lambd=2000000, dataset=trjAdj, n_dimension=project_dimension)
                ssc.get_adjacency(sub_adjMtx)

            """adj assignment not successful!!! whyyyyyyyy"""
            # adjInNeighbour = ssc.adjacency
            # adj[sub_index][:,sub_index]= adjInNeighbour[:][:] 


            ssc.manifold()
            sub_labels, model = ssc.clustering(n_components=int(np.floor(sub_index.size / 2) + 1), alpha=0.1)
            # sub_labels = ssc.clustering_kmeans(int(np.floor(sub_index.size/4)+1))
            # visulize(ssc.embedding_,sub_labels,model)
            labels[sub_index] = np.max(labels) + (sub_labels + 1)
            print 'number of trajectory in this connected components%s' % sub_labels.size + '  unique labels %s' % np.unique(
                    sub_labels).size
        else:
            sub_labels = np.ones(sub_index.size)
            labels[sub_index] = np.max(labels) + sub_labels
            print 'number of trajectory %s' % sub_labels.size + '  unique labels %s' % np.unique(sub_labels).size
    j = 0
    labels_new = np.zeros(labels.shape)
    unique_array = np.unique(labels)

    for i in range(unique_array.size):
        labels_new[np.where(labels == unique_array[i])] = j
        j = j + 1
    labels = labels_new

    return trjID, labels
Example #2
0
def sscConstructedAdj_CC(
        DataPathobj
):  # use ssc to construct adj, use concatenated raw data????
    """fix me....??? :/"""
    # if len(sorted(glob.glob(DataPathobj.adjpath+'extremeValue'+self.DirName[directionInd]+str(matidx+1).zfill(3))))>0:
    #     print "mean_std_ForKernel(ignore) and extremeValue already stored, load..."
    #     self.extremeValue = pickle.load(open(DataPathobj.adjpath+'extremeValue'+self.DirName[directionInd]+str(matidx+1).zfill(3),'rb'))

    if len(
            sorted(
                glob.glob(self.savePath + feaName +
                          self.DirName[directionInd] +
                          str(matidx + 1).zfill(3)))) > 0:
        print "distance diff already stored, load..."
        self.feature_diff_tensor = pickle.load(
            open(
                self.savePath + feaName + self.DirName[directionInd] +
                str(matidx + 1).zfill(3), 'rb'))

    xtrj = self.feature_diff_tensor[:, :, 0]
    ytrj = file['y_re']
    xspd = file['xspd']
    yspd = file['yspd']
    trjID = file['trjID']
    dataFeature = np.concatenate((xtrj, xspd, ytrj, yspd), axis=1)
    project_dimension = int(np.floor(dataFeature.shape[0] / 10) +
                            1)  # assume per group has max 10 trjs????
    ssc = sparse_subspace_clustering(lambd=2000,
                                     dataset=dataFeature,
                                     n_dimension=project_dimension)
    ssc.construct_adjacency()
    adj = ssc.adjacency
    ssc.manifold()
    labels = ssc.clustering_connected(threshold=0,
                                      min_sample_cluster=50,
                                      alpha=0.1)
    return trjID, labels, adj
Example #3
0
def sscConstructedAdj_CC(DataPathobj):  # use ssc to construct adj, use concatenated raw data????
    """fix me....??? :/"""
    # if len(sorted(glob.glob(DataPathobj.adjpath+'extremeValue'+self.DirName[directionInd]+str(matidx+1).zfill(3))))>0:
    #     print "mean_std_ForKernel(ignore) and extremeValue already stored, load..."
    #     self.extremeValue = pickle.load(open(DataPathobj.adjpath+'extremeValue'+self.DirName[directionInd]+str(matidx+1).zfill(3),'rb'))


    if len(sorted(glob.glob(self.savePath+feaName+self.DirName[directionInd]+str(matidx+1).zfill(3))))>0:
        print "distance diff already stored, load..."
        self.feature_diff_tensor = pickle.load(open(self.savePath+feaName+self.DirName[directionInd]+str(matidx+1).zfill(3),'rb'))

    xtrj = self.feature_diff_tensor[:,:,0]
    ytrj = file['y_re']
    xspd = file['xspd']
    yspd = file['yspd']
    trjID = file['trjID']
    dataFeature = np.concatenate((xtrj, xspd, ytrj, yspd), axis=1)
    project_dimension = int(np.floor(dataFeature.shape[0] / 10) + 1)  # assume per group has max 10 trjs????
    ssc = sparse_subspace_clustering(lambd=2000, dataset=dataFeature, n_dimension=project_dimension)
    ssc.construct_adjacency()
    adj = ssc.adjacency
    ssc.manifold()
    labels = ssc.clustering_connected(threshold=0, min_sample_cluster=50, alpha=0.1)
    return trjID, labels, adj
Example #4
0
def ssc_with_Adj_CC(trjAdj, CClabel, trjID, Parameterobj):
    small_CC_oneCls = []

    labels = np.ones(CClabel.size) * (-222)

    print 'connected component number:', len(np.unique(CClabel))
    """data is featureMtx or adj????"""
    # FeatureMtx = pickle.load(open('./NGSIM_FeatureMtx', 'rb'))
    # FeatureMtx = pickle.load(open('./Johnson00115_FeatureMtx', 'rb'))
    # FeatureMtx[np.isnan(FeatureMtx)] = 0

    # color_choice = np.array([np.random.randint(0, 255) for _ in range(3 * int(np.unique(CClabel).size))]).reshape(int(np.unique(CClabel).size), 3)
    # for i in np.unique(CClabel):
    for ind in range(len(np.unique(CClabel))):
        i = np.unique(CClabel)[ind]
        # color = ((color_choice[ind].T) / 255.)
        sub_index = np.where(
            CClabel == i
        )[1]  # noted, after saving to Mat, got appened zeros, should use [1] instead of [0]
        sub_adjMtx = trjAdj[sub_index][:, sub_index]
        # sub_FeatureMtx = FeatureMtx[sub_index,:]
        sub_FeatureMtx = []

        if sub_index.size > Parameterobj.smallclssize and sub_index.size < 50:
            # project_dimension = int(np.floor(sub_index.size/Parameterobj.embedding_projection_factor) + 1)
            project_dimension = 10

            print "CC size:", sub_index.size
            """restrict the prj dim <=200, otherwise too slow"""
            print "project dimension is: ", min(
                int(np.sqrt(sub_index.size)),
                project_dimension)  ## embeded lower dimension
            ssc = sparse_subspace_clustering(2000000,
                                             sub_FeatureMtx,
                                             n_dimension=min(
                                                 int(np.sqrt(sub_index.size)),
                                                 project_dimension))
            sub_adjMtx = csr_matrix(sub_adjMtx,
                                    shape=sub_adjMtx.shape).toarray()
            ssc.get_adjacency(sub_adjMtx)

            if Parameterobj.clustering_choice == 'labels_DPGMM_':
                ssc.manifold()
                """DPGMM"""
                # n_components_DPGMM = int(np.floor(sub_index.size/Parameterobj.DPGMM_num_component_shirink_factor))
                # n_components_DPGMM = max(1,n_components_DPGMM)
                n_components_DPGMM = sub_index.size
                sub_labels_DPGMM = ssc.clustering_DPGMM(
                    n_components=n_components_DPGMM,
                    alpha=Parameterobj.DPGMM_alpha)
                # visulize(ssc.embedding_,sub_labels,model,color)
                """vis rearrangement of the adj"""
                # visAdj_rearrange(sub_adjMtx,sub_labels_DPGMM)
                labels[sub_index] = max(np.max(labels),
                                        0) + (sub_labels_DPGMM + 1)

            elif Parameterobj.clustering_choice == 'labels_spectral_':
                """k-means"""
                # num_cluster_prior = len(np.unique(sub_labels_DPGMM))
                # num_cluster_prior = n_components_DPGMM
                # sub_labels_k_means = ssc.clustering_kmeans(num_cluster_prior)

                ###################################################################################
                """N cut spectral"""
                # n_components_spectral = int(np.floor(sub_index.size/Parameterobj.spectral_num_component_shirink_factor))
                # n_components_spectral = max(2,n_components_spectral)
                # n_components_spectral = min(sub_index.size,n_components_spectral)
                n_components_spectral = 2
                print 'spectral clustering num_cluster is', n_components_spectral
                sub_labels_spectral = ssc.clustering_spectral(
                    n_components_spectral)
                # visAdj_rearrange(sub_adjMtx,sub_labels_spectral)
                labels[sub_index] = max(np.max(labels),
                                        0) + (sub_labels_spectral + 1)

            elif Parameterobj.clustering_choice == 'labels_affinity_':
                """affinity propogation"""
                sub_labels_affinity = ssc.clustering_Affini_prpoga()
                labels[sub_index] = max(np.max(labels),
                                        0) + (sub_labels_affinity + 1)

        else:  ## if size small, treat as one group
            sub_labels = np.ones(sub_index.size)
            labels[sub_index] = max(np.max(labels), 0) + sub_labels
            small_CC_oneCls.append(sub_index)

    labels = uniqulizeLabel(labels)
    return labels, small_CC_oneCls
Example #5
0
def sscAdj_inNeighbour(
    file
):  ## use neighbour adj as prior, limiting ssc's adj choice to be within neighbours
    """fixe me..... :/"""

    xtrj = file['x_re']
    ytrj = file['y_re']
    xspd = file['xspd']
    yspd = file['yspd']
    trjID = file['trjID']
    trjAdj = (file['adj'] > 0).astype('float')  ## adj mtx
    CClabel = file['c']  # labels from connected Component

    # dataFeature = np.concatenate((xtrj,xspd,ytrj,yspd), axis = 1)
    dataFeature = np.concatenate((xtrj, ytrj), axis=1)

    labels = np.zeros(CClabel.size)
    adj = np.zeros((CClabel.size, CClabel.size))
    for i in np.unique(CClabel):
        print i
        sub_index = np.where(CClabel == i)[1]
        if sub_index.size > 3:
            if sub_index.size < 100:  # if connected component too big, just use the binary adj
                project_dimension = int(np.floor(sub_index.size / 100) + 1)
                ssc = sparse_subspace_clustering(
                    lambd=2000,
                    dataset=dataFeature[sub_index, :],
                    n_dimension=project_dimension)
                ssc.construct_adjacency()
            else:
                project_dimension = int(np.floor(sub_index.size / 100) + 1)
                sub_adjMtx = trjAdj[sub_index][:, sub_index]
                ssc = sparse_subspace_clustering(lambd=2000000,
                                                 dataset=trjAdj,
                                                 n_dimension=project_dimension)
                ssc.get_adjacency(sub_adjMtx)
            """adj assignment not successful!!! whyyyyyyyy"""
            # adjInNeighbour = ssc.adjacency
            # adj[sub_index][:,sub_index]= adjInNeighbour[:][:]

            ssc.manifold()
            sub_labels, model = ssc.clustering(
                n_components=int(np.floor(sub_index.size / 2) + 1), alpha=0.1)
            # sub_labels = ssc.clustering_kmeans(int(np.floor(sub_index.size/4)+1))
            # visulize(ssc.embedding_,sub_labels,model)
            labels[sub_index] = np.max(labels) + (sub_labels + 1)
            print 'number of trajectory in this connected components%s' % sub_labels.size + '  unique labels %s' % np.unique(
                sub_labels).size
        else:
            sub_labels = np.ones(sub_index.size)
            labels[sub_index] = np.max(labels) + sub_labels
            print 'number of trajectory %s' % sub_labels.size + '  unique labels %s' % np.unique(
                sub_labels).size
    j = 0
    labels_new = np.zeros(labels.shape)
    unique_array = np.unique(labels)

    for i in range(unique_array.size):
        labels_new[np.where(labels == unique_array[i])] = j
        j = j + 1
    labels = labels_new

    return trjID, labels
Example #6
0
def ssc_with_Adj_CC(trjAdj,CClabel,trjID,Parameterobj):
    small_CC_oneCls = []

    labels = np.ones(CClabel.size)*(-222)

    print 'connected component number:',len(np.unique(CClabel))

    """data is featureMtx or adj????"""
    # FeatureMtx = pickle.load(open('./NGSIM_FeatureMtx', 'rb'))
    # FeatureMtx = pickle.load(open('./Johnson00115_FeatureMtx', 'rb'))
    # FeatureMtx[np.isnan(FeatureMtx)] = 0
    
    # color_choice = np.array([np.random.randint(0, 255) for _ in range(3 * int(np.unique(CClabel).size))]).reshape(int(np.unique(CClabel).size), 3)
    # for i in np.unique(CClabel):
    for ind in range(len(np.unique(CClabel))):
        i = np.unique(CClabel)[ind]
        # color = ((color_choice[ind].T) / 255.)
        sub_index = np.where(CClabel == i)[1]  # noted, after saving to Mat, got appened zeros, should use [1] instead of [0]
        sub_adjMtx = trjAdj[sub_index][:, sub_index]
        # sub_FeatureMtx = FeatureMtx[sub_index,:]
        sub_FeatureMtx = []

        if sub_index.size > Parameterobj.smallclssize and sub_index.size < 50:
            # project_dimension = int(np.floor(sub_index.size/Parameterobj.embedding_projection_factor) + 1)
            project_dimension = 10

            print "CC size:", sub_index.size
            """restrict the prj dim <=200, otherwise too slow"""
            print "project dimension is: ", min(int(np.sqrt(sub_index.size)), project_dimension)  ## embeded lower dimension
            ssc = sparse_subspace_clustering(2000000, sub_FeatureMtx, n_dimension=min(int(np.sqrt(sub_index.size)), project_dimension))
            sub_adjMtx = csr_matrix(sub_adjMtx, shape=sub_adjMtx.shape).toarray()
            ssc.get_adjacency(sub_adjMtx)
            
            if Parameterobj.clustering_choice == 'labels_DPGMM_':
                ssc.manifold()
                """DPGMM"""
                # n_components_DPGMM = int(np.floor(sub_index.size/Parameterobj.DPGMM_num_component_shirink_factor))
                # n_components_DPGMM = max(1,n_components_DPGMM)
                n_components_DPGMM = sub_index.size
                sub_labels_DPGMM = ssc.clustering_DPGMM(n_components=n_components_DPGMM, alpha=Parameterobj.DPGMM_alpha)
                # visulize(ssc.embedding_,sub_labels,model,color)
                """vis rearrangement of the adj"""
                # visAdj_rearrange(sub_adjMtx,sub_labels_DPGMM)
                labels[sub_index] = max(np.max(labels),0) + (sub_labels_DPGMM + 1)

            elif Parameterobj.clustering_choice == 'labels_spectral_':
                """k-means"""
                # num_cluster_prior = len(np.unique(sub_labels_DPGMM))
                # num_cluster_prior = n_components_DPGMM
                # sub_labels_k_means = ssc.clustering_kmeans(num_cluster_prior)

                ###################################################################################
                """N cut spectral"""
                # n_components_spectral = int(np.floor(sub_index.size/Parameterobj.spectral_num_component_shirink_factor))
                # n_components_spectral = max(2,n_components_spectral)
                # n_components_spectral = min(sub_index.size,n_components_spectral)
                n_components_spectral = 2
                print 'spectral clustering num_cluster is', n_components_spectral
                sub_labels_spectral = ssc.clustering_spectral(n_components_spectral)
                # visAdj_rearrange(sub_adjMtx,sub_labels_spectral)                
                labels[sub_index] = max(np.max(labels),0) + (sub_labels_spectral + 1)

            elif Parameterobj.clustering_choice == 'labels_affinity_':
                """affinity propogation"""
                sub_labels_affinity = ssc.clustering_Affini_prpoga()
                labels[sub_index] = max(np.max(labels),0) + (sub_labels_affinity + 1)

        else:  ## if size small, treat as one group
            sub_labels = np.ones(sub_index.size)
            labels[sub_index] = max(np.max(labels),0) + sub_labels
            small_CC_oneCls.append(sub_index)
        
    labels = uniqulizeLabel(labels)
    return labels, small_CC_oneCls