Esempio n. 1
0
    def nodes_graph_custom(self,
                           datacustom,
                           colnum=0,
                           show=False,
                           printout=True,
                           path='./',
                           name=''):
        """Plot a 2D map with hexagonal nodes and weights values

        Args:
            colnum (int): The index of the weight that will be shown as colormap.
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.

        """

        centers = [[node.pos[0], node.pos[1]] for node in self.nodeList]

        widthP = 100
        dpi = 72
        xInch = self.netWidth * widthP / dpi
        yInch = self.netHeight * widthP / dpi
        fig = plt.figure(figsize=(xInch, yInch), dpi=dpi)

        if self.colorEx == True:
            cols = [[
                np.float(node.weights[0]),
                np.float(node.weights[1]),
                np.float(node.weights[2])
            ] for node in self.nodeList]
            ax = hx.plot_hex(fig, centers, cols)
            ax.set_title('Node Grid w Color Features', size=80)
            printName = os.path.join(path, 'nodesColors.png')

        else:
            if len(datacustom.shape) == 1:
                cols = datacustom
            else:
                cols = datacustom[:, colnum]
            ax = hx.plot_hex(fig, centers, cols)
            #ax.set_title('Node Grid w Feature #' + str(colnum), size=80)
            ax.set_title('Node Grid feature' + name, size=80)
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.0)
            cbar = plt.colorbar(ax.collections[0], cax=cax)
            cbar.set_label('Feature #' + str(colnum) + ' value',
                           size=80,
                           labelpad=50)
            cbar.ax.tick_params(labelsize=60)
            plt.sca(ax)
            printName = os.path.join(path,
                                     'nodesFeatureCustom_' + name + '.png')

        if printout == True:
            plt.savefig(printName, bbox_inches='tight', dpi=dpi)
        if show == True:
            plt.show()
        if show != False and printout != False:
            plt.clf()
Esempio n. 2
0
    def nodes_graph(self,
                    colnum=0,
                    show=False,
                    printout=True,
                    path='./',
                    colname=None,
                    cbar=True,
                    cmap_type="Greys"):
        """Plot a 2D map with hexagonal nodes and weights values

        Args:
            colnum (int): The index of the weight that will be shown as colormap.
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.
            colname (str, optional): Name of the column to be shown on the map.
        """

        if not colname:
            colname = str(colnum)

        centers = [[node.pos[0], node.pos[1]] for node in self.nodeList]

        widthP = 100
        dpi = 72
        xInch = self.netWidth * widthP / dpi
        yInch = self.netHeight * widthP / dpi
        fig = plt.figure(figsize=(xInch, yInch), dpi=dpi)

        if self.colorEx == True:
            cols = [[
                np.float(node.weights[0]),
                np.float(node.weights[1]),
                np.float(node.weights[2])
            ] for node in self.nodeList]
            ax = hx.plot_hex(fig, centers, cols, cmap_type=cmap_type)
            ax.set_title('Node Grid w Color Features', size=80)
            printName = os.path.join(path, 'nodesColors.png')

        else:
            cols = [node.weights[colnum] for node in self.nodeList]
            ax = hx.plot_hex(fig, centers, cols, cmap_type=cmap_type)
            ax.set_title('Node Grid w Feature ' + colname, size=80)
            divider = make_axes_locatable(ax)
            if cbar:
                cax = divider.append_axes("right", size="5%", pad=0.0)
                cbar = plt.colorbar(ax.collections[0], cax=cax)
                cbar.set_label(colname, size=80, labelpad=50)
                cbar.ax.tick_params(labelsize=60)
            plt.sca(ax)
            printName = os.path.join(path,
                                     'nodesFeature_' + str(colnum) + '.png')

        if printout == True:
            plt.savefig(printName, bbox_inches='tight', dpi=dpi)
        if show == True:
            plt.show()
        if show != False and printout != False:
            plt.clf()
Esempio n. 3
0
    def diff_graph(self, show=False, printout=True, returns=False, path='./'):
        """Plot a 2D map with nodes and weights difference among neighbouring nodes.

        Args:
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.
            returns (bool, optional): Choose to return the difference value.

        Returns:
            (list): difference value for each node.             
        """

        neighbours = []
        for node in self.nodeList:
            nodelist = []
            for nodet in self.nodeList:
                if node != nodet and node.get_nodeDistance(nodet) <= 1.001:
                    nodelist.append(nodet)
            neighbours.append(nodelist)

        diffs = []
        for node, neighbours in zip(self.nodeList, neighbours):
            diff = 0
            for nb in neighbours:
                diff = diff + node.get_distance(nb.weights)
            diffs.append(diff)

        centers = [[node.pos[0], node.pos[1]] for node in self.nodeList]

        if show == True or printout == True:

            widthP = 100
            dpi = 72
            xInch = self.netWidth * widthP / dpi
            yInch = self.netHeight * widthP / dpi
            fig = plt.figure(figsize=(xInch, yInch), dpi=dpi)

            ax = hx.plot_hex(fig, centers, diffs)
            ax.set_title('Nodes Grid w Weights Difference', size=80)

            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.0)
            cbar = plt.colorbar(ax.collections[0], cax=cax)
            cbar.set_label('Weights Difference', size=80, labelpad=50)
            cbar.ax.tick_params(labelsize=60)
            plt.sca(ax)

            printName = os.path.join(path, 'nodesDifference.png')

            if printout == True:
                plt.savefig(printName, bbox_inches='tight', dpi=dpi)
            if show == True:
                plt.show()
            if show != False and printout != False:
                plt.clf()

        if returns == True:
            return diffs
Esempio n. 4
0
    def show_exogenous(self, array, exogenous, colname=None):
        """Plots an exogenous variable into the grid

        Args:
            array (np.array): An array containing datapoints to be clustered.
            array (np.array): An array containing the value of the variable to be plotted
            
        Returns:
            ax: The matplotlib.pyplot ax
            
        """

        hash_point = lambda p: round(p[0], 2) * 1000 + round(p[1], 2)
        mean_per_node = {}  # hash_point: (point_coords, sum, N)

        for row, ex in zip(array, exogenous):
            point = self.find_bmu(row)[0].pos
            hashed_point = hash_point(point)

            if hashed_point in mean_per_node:
                mean_per_node[hashed_point][2] += 1
                mean_per_node[hashed_point][1] += ex
            else:
                mean_per_node[hashed_point] = [np.array(point), ex, 1]

        points = [point for _, (point, s, N) in mean_per_node.items()]
        values = [s / N for _, (point, s, N) in mean_per_node.items()]

        printName = os.path.join(
            self.path,
            'SOM_%s.png' % ('exogenous' if colname is None else colname))

        fig = self.get_fig()
        cmap = 'Set3' if len(
            np.unique(exogenous)
        ) < 8 else self.cmap  #Use a set cmap if the exog. var. is a class.
        ax = hx.plot_hex(fig, points, values, cmap=cmap, radius=self.radius)
        ax.set_title('Exogenous variable' if colname is None else colname,
                     size=self.text_size)

        if self.bar:
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.0)
            cbar = plt.colorbar(ax.collections[0], cax=cax)
            cbar.ax.tick_params(labelsize=self.text_size)

        plt.sca(ax)

        plt.gca().invert_yaxis()
        ax.axis('off')

        if self.printout == True:
            plt.savefig(printName, bbox_inches='tight', dpi=self.dpi)
        if self.show == True:
            plt.show()
        plt.clf()
Esempio n. 5
0
    def diff_graph(self, colors=[], return_ax_only=False):
        """Plot a 2D map with nodes and weights difference among neighbouring nodes.

        Args:
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.
            returns (bool, optional): Choose to return the difference value.

        Returns:
            (list): difference value for each node.             
        """

        diffs, neighbours, centers = self.get_diffs()
        fig = self.get_fig()

        ax = hx.plot_hex(fig,
                         centers,
                         diffs,
                         edges=colors,
                         radius=self.radius,
                         cmap=self.cmap)
        ax.set_title('Weights Difference', size=self.text_size)

        if self.bar:
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.0)
            cbar = plt.colorbar(ax.collections[0], cax=cax)
            cbar.ax.tick_params(labelsize=self.text_size)
        plt.sca(ax)

        printName = os.path.join(self.path, 'SOM_difference.png')

        if not return_ax_only:
            if self.printout == True:
                plt.savefig(printName, bbox_inches='tight', dpi=self.dpi)
            if self.show == True:
                plt.show()
            if self.show != False and self.printout != False:
                plt.clf()

        return ax
Esempio n. 6
0
    def find_clusters(self, array, type='qthresh', cutoff=5, quant=0.2, percent=0.02, numcl=8,\
                savefile=True, filetype='dat'):
        """Clusters the data in a given array according to the SOM trained map.
            The clusters can also be plotted.

        Args:
            array (np.array): An array containing datapoints to be clustered.
            type (str, optional): The type of clustering to be applied, so far only quality threshold (qthresh) 
                and density peak algorithma are directly implemented, other algorithms require sklearn, and do not
                support PBC
            cutoff (float, optional): Cutoff for the quality threshold algorithm. This also doubles as
                maximum distance of two points to be considered in the same cluster with DBSCAN.
            percent (float, optional): The percentile that defines the reference distance in density peak clustering (dpeak).
            numcl (int, optional): The number of clusters for K-Means clustering
            quant (float, optional): Quantile used to calculate the bandwidth of the mean shift algorithm.
            savefile (bool, optional): Choose to save the resulting clusters in a text file.
            filetype (string, optional): Format of the file where the clusters will be saved (csv or dat)
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.
            
        Returns:
            (list of int): A nested list containing the clusters with indexes of the input array points.
            
        """
        """ Call project to first find the bmu for each array datapoint, but without producing any graph. """

        bmuList = self.project(array, return_only=True)
        clusters = []

        if type == 'qthresh':
            #Cluster according to the quality threshold algorithm (slow!).
            clusters = qt.qualityThreshold(bmuList, cutoff, self.PBC,
                                           self.netHeight, self.netWidth)

        elif type == 'dpeak':
            #Cluster according to the density peak algorithm.
            clusters = dp.densityPeak(bmuList,
                                      PBC=self.PBC,
                                      netHeight=self.netHeight,
                                      netWidth=self.netWidth)

        elif type in ['MeanShift', 'DBSCAN', 'KMeans']:
            #Cluster according to algorithms implemented in sklearn.
            if self.PBC == True:
                print(
                    "Warning: Only Quality Threshold and Density Peak clustering work with PBC"
                )
            try:

                if type == 'MeanShift':
                    bandwidth = cluster.estimate_bandwidth(np.asarray(bmuList),
                                                           quantile=quant,
                                                           n_samples=500)
                    cl = cluster.MeanShift(bandwidth=bandwidth,
                                           bin_seeding=True).fit(bmuList)

                if type == 'DBSCAN':
                    cl = cluster.DBSCAN(eps=cutoff, min_samples=5).fit(bmuList)

                if type == 'KMeans':
                    cl = cluster.KMeans(n_clusters=numcl).fit(bmuList)

                clLabs = cl.labels_

                for i in np.unique(clLabs):
                    clList = []
                    tmpList = range(len(bmuList))
                    for j, k in zip(tmpList, clLabs):
                        if i == k:
                            clList.append(j)
                    clusters.append(clList)
            except:
                print(('Unexpected error: ', sys.exc_info()[0]))
                raise
        else:
            sys.exit("Error: unkown clustering algorithm " + type)

        if savefile == True:
            file = open(
                os.path.join(self.path,
                             'SOM_' + type + '_clusters.' + filetype), 'w')
            if filetype == 'csv':
                separator = ','
            else:
                separator = ' '
            for line in clusters:
                for id in line:
                    file.write(str(id) + separator)
                file.write('\n')
            file.close()

        xc, yc, color = [], [], []
        for i in range(len(clusters)):
            for c in clusters[i]:
                #again, invert y and x to be consistent with the previous maps
                xc.append(bmuList[int(c)][0])
                yc.append(self.netHeight - bmuList[int(c)][1])
                color.append(i / len(clusters))

        if self.printout == True or self.show == True:
            printName = os.path.join(self.path,
                                     'SOM_' + type + '_clusters.png')

            fig = self.get_fig()
            #ax.scatter(xc, yc, color=color, label='Cluster %d'%(i))
            ax = hx.plot_hex(fig,
                             list(zip(xc, yc)),
                             color,
                             cmap='Set3',
                             radius=self.radius)
            ax.set_title('Clusters', size=self.text_size)
            plt.sca(ax)

            plt.gca().invert_yaxis()
            ax.axis('off')

            if self.printout == True:
                plt.savefig(printName, bbox_inches='tight', dpi=self.dpi)
            if self.show == True:
                plt.show()
            plt.clf()

        self.clusters, self.xc, self.yc, self.color = clusters, xc, yc, color
Esempio n. 7
0
    def nodes_graph(self,
                    colnum=0,
                    colname=None,
                    colors=[],
                    return_ax_only=False):
        """Plot a 2D map with hexagonal nodes and weights values

        Args:
            colnum (int): The index of the weight that will be shown as colormap.
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.
            colname (str, optional): Name of the column to be shown on the map.
        """

        if not colname:
            colname = str(colnum)

        centers = [[node.pos[0], node.pos[1]] for node in self.nodeList]
        fig = self.get_fig()

        if self.colorEx == True:
            cols = [[
                np.float(node.weights[0]),
                np.float(node.weights[1]),
                np.float(node.weights[2])
            ] for node in self.nodeList]
            ax = hx.plot_hex(fig,
                             centers,
                             cols,
                             radius=self.radius,
                             cmap=self.cmap)
            ax.set_title('Node Grid with Color Features', size=self.text_size)
            printName = os.path.join(self.path, 'nodesColors.png')

        else:
            cols = [node.weights[colnum] for node in self.nodeList]
            ax = hx.plot_hex(fig,
                             centers,
                             cols,
                             edges=colors,
                             radius=self.radius)
            if colname is None:
                ax.set_title('Feature #' + str(colnum), size=self.text_size)
            else:
                ax.set_title(str(colname), size=self.text_size)

            if self.bar:
                divider = make_axes_locatable(ax)
                cax = divider.append_axes("right", size="5%", pad=0.0)
                cbar = plt.colorbar(ax.collections[0], cax=cax)
                cbar.set_label(colname, size=self.text_size, labelpad=10)
                cbar.ax.tick_params(labelsize=self.text_size)
            plt.sca(ax)
            printName = os.path.join(
                self.path,
                'SOM_' + str(colnum if colname is None else colname) + '.png')

        if not return_ax_only:
            if self.printout == True:
                plt.savefig(printName, bbox_inches='tight', dpi=self.dpi)
            if self.show == True:
                plt.show()
            if self.show != False and self.printout != False:
                plt.clf()

        return ax
Esempio n. 8
0
    def projectsample(self,
                      array,
                      colnum=-1,
                      name="",
                      normbybmu=False,
                      show=False,
                      printout=True,
                      path='./',
                      meanmatrix=[],
                      clim=3):
        """Project the datapoints of a given array to the 2D space of the
            SOM by calculating the bmus. If requested plot a 2D map with as
            implemented in nodes_graph and adds circles to the bmu
            of each datapoint in a given array.

        Args:
            array (np.array): An array containing datapoints to be mapped.
            colnum (int): The index of the weight that will be shown as colormap.
                If not chosen, the difference map will be used instead.
            show (bool, optional): Choose to display the plot.
            printout (bool, optional): Choose to save the plot to a file.

        Returns:
            (list): bmu x,y position for each input array datapoint.

        """
        if len(self.bmuList) == 0:
            print("load setBMUtable before")
            return 0

        proj = np.zeros(len(self.nodeList), dtype=np.float)
        projmean = np.zeros(len(self.nodeList), dtype=np.float)
        if len(array.shape) == 2:
            array = array[:, colnum]
        for i, val in enumerate(array):
            proj[self.bmuList[i]] += val
            if len(meanmatrix) != 0:
                projmean[self.bmuList[i]] += meanmatrix[i]
        #normalize by the number of kmer by node
        if normbybmu:
            uniqubmu, countbmu = np.unique(self.bmuList, return_counts=True)
            allcountbmu = np.ones(proj.shape)
            allcountbmu[uniqubmu] = countbmu
            proj = proj / allcountbmu

        centers = [[node.pos[0], node.pos[1]] for node in self.nodeList]
        if show == True or printout == True:

            widthP = 100
            dpi = 72
            xInch = self.netWidth * widthP / dpi
            yInch = self.netHeight * widthP / dpi
            fig = plt.figure(figsize=(xInch, yInch), dpi=dpi)

            #ax = hx.plot_hex(fig, centers, proj)
            #test visualize from mean:
            ax = hx.plot_hex(fig, centers, proj - projmean)
            ax.set_title(name)  #, size=80)

            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.0)
            cbar = plt.colorbar(ax.collections[0], cax=cax)
            #cbar.set_clim(-clim, clim)
            cbar.set_label('Weights Difference')  #, size=80, labelpad=50
            #cbar.ax.tick_params(labelsize=60)
            plt.sca(ax)

            printName = os.path.join(path, name + 'centeredSampleProj.png')

            if printout == True:
                plt.savefig(printName, bbox_inches='tight', dpi=dpi)
            if show == True:
                plt.show()
            if show != False and printout != False:
                plt.clf()
            plt.close()

        return proj