Ejemplo n.º 1
0
    def getClusters(self, plot=False):

        from sklearn.cluster import DBSCAN
        from iDBSCAN import iDBSCAN
        from sklearn import metrics
        from scipy.spatial import distance
        from scipy.stats import pearsonr
        from random import random

        outname = self.options.plotDir
        if outname and not os.path.exists(outname):
            os.system("mkdir -p " + outname)
            os.system("cp utils/index.php " + outname)

        #   Plot parameters  #

        vmin = 99
        vmax = 125

        #   IDBSCAN parameters  #

        tip = self.options.tip

        scale = 1
        iterative = self.options.iterative  # number of iterations for the IDBSC

        vector_eps = self.options.vector_eps
        vector_min_samples = self.options.vector_min_samples

        vector_eps = list(np.array(vector_eps, dtype=float) * scale)
        vector_min_samples = list(
            np.array(vector_min_samples, dtype=float) * scale)
        cuts = self.options.cuts
        nb_it = 3

        #-----Pre-Processing----------------#
        rescale = int(2048 / self.rebin)
        rebin_image = tl.rebin(self.img_ori, (rescale, rescale))

        edges = median_filter(self.image, size=4)
        edcopy = edges.copy()
        edcopyTight = tl.noisereductor(edcopy, rescale,
                                       self.options.min_neighbors_average)

        # make the clustering with DBSCAN algo
        # this kills all macrobins with N photons < 1
        points = np.array(np.nonzero(np.round(edcopyTight))).astype(int).T
        lp = points.shape[0]

        if tip == '3D':
            Xl = [(ix, iy) for ix, iy in points
                  ]  # Aux variable to simulate the Z-dimension
            X1 = np.array(Xl).copy()  # variable to keep the 2D coordinates
            for ix, iy in points:  # Looping over the non-empty coordinates
                nreplicas = int(self.image[ix, iy]) - 1
                for count in range(
                        nreplicas
                ):  # Looping over the number of 'photons' in that coordinate
                    Xl.append((ix, iy))  # add a coordinate repeatedly
            X = np.array(Xl)  # Convert the list to an array
        else:
            X = points.copy()
            X1 = X

        if self.options.debug_mode == 0:
            self.options.flag_plot_noise = 0

        # returned collections
        clusters = []
        superclusters = []

        # clustering will crash if the vector of pixels is empty (it may happen after the zero-suppression + noise filtering)
        if len(X) == 0:
            return clusters, superclusters

        # - - - - - - - - - - - - - -
        db = iDBSCAN(iterative=iterative,
                     vector_eps=vector_eps,
                     vector_min_samples=vector_min_samples,
                     cuts=cuts,
                     flag_plot_noise=self.options.flag_plot_noise).fit(X)

        if self.options.debug_mode == 1 and self.options.flag_plot_noise == 1:
            for ext in ['png', 'pdf']:
                plt.savefig('{pdir}/{name}_{esp}.{ext}'.format(pdir=outname,
                                                               name=self.name,
                                                               esp='0th',
                                                               ext=ext),
                            bbox_inches='tight',
                            pad_inches=0)
            plt.gcf().clear()
            plt.close('all')

        # Returning to '2' dimensions
        if tip == '3D':
            db.labels_ = db.labels_[range(
                0, lp)]  # Returning theses variables to the length
            db.tag_ = db.tag_[range(
                0,
                lp)]  # of the 'real' edges, to exclude the fake repetitions.
        # - - - - - - - - - - - - - -

        labels = db.labels_

        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

        ##################### plot
        # the following is to preserve the square aspect ratio with all the camera pixels
        # plt.axes().set_aspect('equal','box')
        # plt.ylim(0,2040)
        # plt.xlim(0,2040)

        # Black removed and is used for noise instead.
        unique_labels = set(labels)

        colors = [(random(), random(), random(), 1.0)
                  for each in range(len(unique_labels))]

        # colors = [plt.cm.Spectral(each)
        #           for each in np.linspace(0, 1, len(unique_labels))]
        #canv = ROOT.TCanvas('c1','',600,600)
        if plot:
            #fig_edges = plt.figure(figsize=(10, 10))
            #plt.imshow(self.image.T, cmap='gray', vmin=0, vmax=1, origin='lower' )
            #plt.savefig('{pdir}/{name}_edges.png'.format(pdir=outname,name=self.name))
            fig = plt.figure(figsize=(10, 10))
            plt.imshow(self.image,
                       cmap='viridis',
                       vmin=1,
                       vmax=25,
                       interpolation=None,
                       origin='lower')
            #plt.savefig('{pdir}/{name}_edges.png'.format(pdir=outname,name=self.name))

        for k, col in zip(unique_labels, colors):
            if k == -1:
                col = [0, 0, 0, 1]
                break  # noise: the unclustered

            class_member_mask = (labels == k)

            #xy = X[class_member_mask & core_samples_mask]
            xy = X1[class_member_mask]

            x = xy[:, 0]
            y = xy[:, 1]

            # only add the cores to the clusters saved in the event
            if k > -1 and len(x) > 1:
                cl = Cluster(xy,
                             self.rebin,
                             self.image_fr,
                             self.image_fr_zs,
                             debug=False)
                cl.iteration = db.tag_[labels == k][0]
                cl.nclu = k

                #corr, p_value = pearsonr(x, y)
                cl.pearson = 999  #p_value

                clusters.append(cl)
                if plot:
                    xri, yri = tl.getContours(y, x)
                    cline = {1: 'r', 2: 'b', 3: 'y'}
                    plt.plot(xri,
                             yri,
                             '-{lcol}'.format(lcol=cline[cl.iteration]),
                             linewidth=0.5)
                # if plot: cl.plotAxes(plot=plt,num_steps=100)
                # cl.calcProfiles(plot=None)
                # for dir in ['long','lat']:
                #     prof = cl.getProfile(dir)
                #     if prof and cl.widths[dir]>10: # plot the profiles only of sufficiently long snakes
                #         prof.Draw()
                #         for ext in ['png','pdf']:
                #             canv.SaveAs('{pdir}/{name}_snake{iclu}_{dir}profile.{ext}'.format(pdir=outname,name=self.name,iclu=k,dir=dir,ext=ext))

        ## SUPERCLUSTERING
        from supercluster import SuperClusterAlgorithm
        superclusterContours = []
        scAlgo = SuperClusterAlgorithm(shape=rescale,
                                       debugmode=self.options.debug_mode)
        u, indices = np.unique(db.labels_, return_index=True)
        allclusters_it1 = [
            X1[db.labels_ == i]
            for i in u[list(np.where(db.tag_[indices] == 1)[0])].tolist()
        ]
        allclusters_it2 = [
            X1[db.labels_ == i]
            for i in u[list(np.where(db.tag_[indices] == 2)[0])].tolist()
        ]
        allclusters_it12 = allclusters_it1 + allclusters_it2
        # note: passing the edges, not the filtered ones for deeper information
        superclusters, superclusterContours = scAlgo.findSuperClusters(
            allclusters_it12, edges, self.image_fr, self.image_fr_zs, 0)

        if plot:
            for ext in ['png', 'pdf']:
                plt.savefig('{pdir}/{name}.{ext}'.format(pdir=outname,
                                                         name=self.name,
                                                         ext=ext),
                            bbox_inches='tight',
                            pad_inches=0)
            plt.gcf().clear()
            plt.close('all')

        ## DEBUG MODE
        if self.options.debug_mode == 1:
            print('[DEBUG-MODE ON]')
            print('[%s Method]' % (self.options.tip))

            if self.options.flag_full_image or self.options.flag_rebin_image or self.options.flag_edges_image or self.options.flag_first_it or self.options.flag_second_it or self.options.flag_third_it or self.options.flag_all_it or self.options.flag_supercluster:
                import matplotlib.pyplot as plt

            if self.options.flag_full_image == 1:
                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(self.image_fr.T,
                           cmap=self.options.cmapcolor,
                           vmin=1,
                           vmax=25,
                           origin='upper')
                plt.title("Original Image")
                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}.{ext}'.format(
                        pdir=outname, name=self.name, esp='oriIma', ext=ext),
                                bbox_inches='tight',
                                pad_inches=0)
                with open(
                        '{pdir}/{name}_{esp}.pkl'.format(pdir=outname,
                                                         name=self.name,
                                                         esp='oriIma',
                                                         ext=ext), "wb") as fp:
                    pickle.dump(fig, fp, protocol=4)
                plt.gcf().clear()
                plt.close('all')

            if self.options.flag_rebin_image == 1:
                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(rebin_image,
                           cmap=self.options.cmapcolor,
                           vmin=vmin,
                           vmax=vmax,
                           origin='lower')
                plt.title("Rebin Image")
                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}.{ext}'.format(
                        pdir=outname, name=self.name, esp='rebinIma', ext=ext),
                                bbox_inches='tight',
                                pad_inches=0)
                plt.gcf().clear()
                plt.close('all')

            if self.options.flag_edges_image == 1:
                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(edcopyTight,
                           cmap=self.options.cmapcolor,
                           vmin=0,
                           vmax=1,
                           origin='lower')
                plt.title('Edges after Filtering')
                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}.{ext}'.format(
                        pdir=outname, name=self.name, esp='edgesIma', ext=ext),
                                bbox_inches='tight',
                                pad_inches=0)
                plt.gcf().clear()
                plt.close('all')

            if self.options.flag_stats == 1:
                print('[Statistics]')
                n_clusters_ = len(set(
                    db.labels_)) - (1 if -1 in db.labels_ else 0)
                print("Total number of Clusters: %d" % (n_clusters_))
                u, indices = np.unique(db.labels_, return_index=True)
                print("Clusters found in iteration 1: %d" %
                      (sum(db.tag_[indices] == 1)))
                print("Clusters found in iteration 2: %d" %
                      (sum(db.tag_[indices] == 2)))
                print("Clusters found in iteration 3: %d" %
                      (sum(db.tag_[indices] == 3)))
                print("SuperClusters found: %d" % len(superclusters))

            if self.options.flag_first_it == 1:
                print('[Plotting 1st iteration]')
                u, indices = np.unique(db.labels_, return_index=True)
                clu = [
                    X1[db.labels_ == i]
                    for i in u[list(np.where(
                        db.tag_[indices] == 1)[0])].tolist()
                ]

                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(rebin_image,
                           cmap=self.options.cmapcolor,
                           vmin=vmin,
                           vmax=vmax,
                           origin='lower')
                plt.title("Clusters found in iteration 1")

                for j in range(0, np.shape(clu)[0]):

                    ybox = clu[j][:, 0]
                    xbox = clu[j][:, 1]

                    if (len(ybox) > 0) and (len(xbox) > 0):
                        contours = tl.findedges(ybox, xbox, self.rebin)
                        for n, contour in enumerate(contours):
                            plt.plot(contour[:, 1],
                                     contour[:, 0],
                                     '-r',
                                     linewidth=2.5)

                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}_{tip}.{ext}'.format(
                        pdir=outname,
                        name=self.name,
                        esp='1st',
                        ext=ext,
                        tip=self.options.tip),
                                bbox_inches='tight',
                                pad_inches=0)
                with open(
                        '{pdir}/{name}_{esp}_{tip}.pkl'.format(
                            pdir=outname,
                            name=self.name,
                            esp='1st',
                            ext=ext,
                            tip=self.options.tip), "wb") as fp:
                    pickle.dump(fig, fp, protocol=4)

                plt.gcf().clear()
                plt.close('all')

            if self.options.flag_second_it == 1:
                print('[Plotting 2nd iteration]')
                u, indices = np.unique(db.labels_, return_index=True)
                clu = [
                    X1[db.labels_ == i]
                    for i in u[list(np.where(
                        db.tag_[indices] == 2)[0])].tolist()
                ]

                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(rebin_image,
                           cmap=self.options.cmapcolor,
                           vmin=vmin,
                           vmax=vmax,
                           origin='lower')
                plt.title("Clusters found in iteration 2")

                for j in range(0, np.shape(clu)[0]):

                    ybox = clu[j][:, 0]
                    xbox = clu[j][:, 1]

                    if (len(ybox) > 0) and (len(xbox) > 0):
                        contours = tl.findedges(ybox, xbox, self.rebin)
                        for n, contour in enumerate(contours):
                            plt.plot(contour[:, 1],
                                     contour[:, 0],
                                     '-b',
                                     linewidth=2.5)

                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}_{tip}.{ext}'.format(
                        pdir=outname,
                        name=self.name,
                        esp='2nd',
                        ext=ext,
                        tip=self.options.tip),
                                bbox_inches='tight',
                                pad_inches=0)
                with open(
                        '{pdir}/{name}_{esp}_{tip}.pkl'.format(
                            pdir=outname,
                            name=self.name,
                            esp='2nd',
                            ext=ext,
                            tip=self.options.tip), "wb") as fp:
                    pickle.dump(fig, fp, protocol=4)

                plt.gcf().clear()
                plt.close('all')

            if self.options.flag_third_it == 1:
                print('[Plotting 3rd iteration]')
                u, indices = np.unique(db.labels_, return_index=True)
                clu = [
                    X1[db.labels_ == i]
                    for i in u[list(np.where(
                        db.tag_[indices] == 3)[0])].tolist()
                ]

                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(rebin_image,
                           cmap=self.options.cmapcolor,
                           vmin=vmin,
                           vmax=vmax,
                           origin='lower')
                plt.title("Clusters found in iteration 3")

                for j in range(0, np.shape(clu)[0]):

                    ybox = clu[j][:, 0]
                    xbox = clu[j][:, 1]

                    if (len(ybox) > 0) and (len(xbox) > 0):
                        contours = tl.findedges(ybox, xbox, self.rebin)
                        for n, contour in enumerate(contours):
                            plt.plot(contour[:, 1],
                                     contour[:, 0],
                                     '-y',
                                     linewidth=2.5)

                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}_{tip}.{ext}'.format(
                        pdir=outname,
                        name=self.name,
                        esp='3rd',
                        ext=ext,
                        tip=self.options.tip),
                                bbox_inches='tight',
                                pad_inches=0)
                plt.gcf().clear()
                plt.close('all')

            if self.options.flag_all_it == 1:
                print('[Plotting ALL iteration]')
                u, indices = np.unique(db.labels_, return_index=True)
                clu = [
                    X1[db.labels_ == i]
                    for i in u[list(np.where(
                        db.tag_[indices] == 1)[0])].tolist()
                ]

                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(rebin_image,
                           cmap=self.options.cmapcolor,
                           vmin=vmin,
                           vmax=vmax,
                           origin='lower')
                plt.title("Final Image")

                for j in range(0, np.shape(clu)[0]):

                    ybox = clu[j][:, 0]
                    xbox = clu[j][:, 1]

                    if (len(ybox) > 0) and (len(xbox) > 0):
                        contours = tl.findedges(ybox, xbox, self.rebin)
                        for n, contour in enumerate(contours):
                            line, = plt.plot(contour[:, 1],
                                             contour[:, 0],
                                             '-r',
                                             linewidth=2.5)
                        if j == 0:
                            line.set_label('1st Iteration')

                clu = [
                    X1[db.labels_ == i]
                    for i in u[list(np.where(
                        db.tag_[indices] == 2)[0])].tolist()
                ]

                for j in range(0, np.shape(clu)[0]):

                    ybox = clu[j][:, 0]
                    xbox = clu[j][:, 1]

                    if (len(ybox) > 0) and (len(xbox) > 0):
                        contours = tl.findedges(ybox, xbox, self.rebin)
                        for n, contour in enumerate(contours):
                            line, = plt.plot(contour[:, 1],
                                             contour[:, 0],
                                             '-b',
                                             linewidth=2.5)
                        if j == 0:
                            line.set_label('2nd Iteration')

                clu = [
                    X1[db.labels_ == i]
                    for i in u[list(np.where(
                        db.tag_[indices] == 3)[0])].tolist()
                ]

                for j in range(0, np.shape(clu)[0]):

                    ybox = clu[j][:, 0]
                    xbox = clu[j][:, 1]

                    if (len(ybox) > 0) and (len(xbox) > 0):
                        contours = tl.findedges(ybox, xbox, self.rebin)
                        for n, contour in enumerate(contours):
                            line, = plt.plot(contour[:, 1],
                                             contour[:, 0],
                                             '-y',
                                             linewidth=2.5)
                        if j == 0:
                            line.set_label('3rd Iteration')
                plt.legend(loc='upper left')

                if len(superclusters):
                    supercluster_contour = plt.contour(superclusterContours,
                                                       [0.5],
                                                       colors='limegreen',
                                                       linewidths=2)
                    supercluster_contour.collections[0].set_label(
                        'supercluster')

                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}_{tip}.{ext}'.format(
                        pdir=outname,
                        name=self.name,
                        esp='all',
                        ext=ext,
                        tip=self.options.tip),
                                bbox_inches='tight',
                                pad_inches=0)
                with open(
                        '{pdir}/{name}_{esp}_{tip}.pkl'.format(
                            pdir=outname,
                            name=self.name,
                            esp='all',
                            ext=ext,
                            tip=self.options.tip), "wb") as fp:
                    pickle.dump(fig, fp, protocol=4)

                plt.gcf().clear()
                plt.close('all')

            #################### PLOT SUPERCLUSTER ONLY ###############################
            if self.options.flag_supercluster == 1:
                if len(superclusters):
                    fig = plt.figure(figsize=(self.options.figsizeX,
                                              self.options.figsizeY))
                    supercluster_contour = plt.contour(superclusterContours,
                                                       [0.5],
                                                       colors='limegreen',
                                                       linewidths=2,
                                                       alhpa=0.5)
                    #supercluster_contour.collections[0].set_label('supercluster it 1+2')
                    plt.imshow(rebin_image,
                               cmap=self.options.cmapcolor,
                               vmin=vmin,
                               vmax=vmax,
                               origin='lower')
                    plt.title("Superclusters found")

                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{esp}_{tip}.{ext}'.format(
                        pdir=outname,
                        name=self.name,
                        esp='sc',
                        ext=ext,
                        tip=self.options.tip),
                                bbox_inches='tight',
                                pad_inches=0)
                with open(
                        '{pdir}/{name}_{esp}_{tip}.pkl'.format(
                            pdir=outname,
                            name=self.name,
                            esp='sc',
                            ext=ext,
                            tip=self.options.tip), "wb") as fp:
                    pickle.dump(fig, fp, protocol=4)

                plt.gcf().clear()
                plt.close('all')
            #################### PLOT SUPERCLUSTER ONLY ###############################

            if self.options.nclu >= 0:
                print('[Plotting just the cluster %d]' % (self.options.nclu))

                fig = plt.figure(figsize=(self.options.figsizeX,
                                          self.options.figsizeY))
                plt.imshow(rebin_image,
                           cmap=self.options.cmapcolor,
                           vmin=vmin,
                           vmax=vmax,
                           origin='lower')
                plt.title('Plotting just the cluster %d' % (self.options.nclu))

                cl_mask = (db.labels_ == self.options.nclu)

                xy = X1[cl_mask]
                xbox = xy[:, 1]
                ybox = xy[:, 0]

                if (len(ybox) > 0) and (len(xbox) > 0):
                    contours = tl.findedges(ybox, xbox, self.rebin)
                    for n, contour in enumerate(contours):
                        line, = plt.plot(contour[:, 1],
                                         contour[:, 0],
                                         '-r',
                                         linewidth=2.5)
                for ext in ['png', 'pdf']:
                    plt.savefig('{pdir}/{name}_{tip}_{nclu}.{ext}'.format(
                        pdir=outname,
                        name=self.name,
                        ext=ext,
                        tip=self.options.tip,
                        nclu=self.options.nclu),
                                bbox_inches='tight',
                                pad_inches=0)
                plt.gcf().clear()
                plt.close('all')

        return clusters, superclusters
Ejemplo n.º 2
0
 def arrrebin(self, img, rebin):
     newshape = int(2048 / rebin)
     img_rebin = tl.rebin(img, (newshape, newshape))
     return img_rebin
Ejemplo n.º 3
0
 def arrrebin(self, img, rebin):
     newshape = int(self.geometry.npixx / rebin)
     img_rebin = tl.rebin(img, (newshape, newshape))
     return img_rebin