Ejemplo n.º 1
0
    def plot_kde(self, metric_to_x, metric_to_y, output_folder, kde_structs):
        """
        Given 2 metrics, it generates the kde plot.

        Parameters
        ----------
        metric_to_x : str or int
            The metric id to plot in the X axis. It can be either a string
            with the name of the metric or an integer with the column index
        metric_to_y : str or int
            The metric id to plot in the Y axis. It can be either a string
            with the name of the metric or an integer with the column index
        output_folder : str
            The path where the plot will be saved
        kde_structs : int
            The number of structures to represent in the plot
        """
        import seaborn as sb

        check_make_folder(output_folder)
        metric_to_x, metric_to_y, metric_to_z = \
            self._get_column_names(metric_to_x, metric_to_y)

        # Define output path
        output_name = "{}_{}_kde.png".format(metric_to_x, metric_to_y)
        output_name = output_name.replace(" ", "_")
        output_name = os.path.join(output_folder, output_name)

        # Filter out the number of structures from dataframe to plot
        structures_to_keep = min(int(kde_structs), len(self._dataframe) - 1)
        sorted_df = self._dataframe.sort_values(metric_to_y, ascending=True)
        top = sorted_df[0:structures_to_keep]

        # Plot and save it
        plot = sb.kdeplot(top[metric_to_x], top[metric_to_y],
                          cmap="crest", fill=False,
                          shade=True, cbar=True)
        figure = plot.get_figure()
        figure.savefig(output_name)
        return output_name
Ejemplo n.º 2
0
    def plot_two_metrics(self,
                         metric_to_x,
                         metric_to_y,
                         metric_to_z=None,
                         output_name=None,
                         output_folder="."):
        """
        Given 2 or 3 metrics, it generates the scatter plot. In case that
        a 3rd metric is supplied, it will be represented as the color bar.

        Parameters
        ----------
        metric_to_x : str or int
            The metric id to plot in the X axis. It can be either a string
            with the name of the metric or an integer with the column index
        metric_to_y : str or int
            The metric id to plot in the Y axis. It can be either a string
            with the name of the metric or an integer with the column index
        metric_to_z : str or int
            The metric id to plot in the color bar. It can be either a string
            with the name of the metric or an integer with the column index.
            Default is None
        output_name : str
            The name that will be given to the resulting plot. Default is None
        output_folder : str
            The path where the plot will be saved. Default is '.', so it
            will be stored in the local directory
        """
        from pele_platform.Utilities.Helpers.helpers import backup_logger

        check_make_folder(output_folder)

        # Initialize a data handler from the current dataframe and get column names
        metric_to_x, metric_to_y, metric_to_z = self._get_column_names(
            metric_to_x, metric_to_y, metric_to_z)

        # Prepare plot name
        if output_name is None:
            if metric_to_z is not None:
                output_name = "{}_{}_{}_plot.png".format(
                    metric_to_x, metric_to_y, metric_to_z)
            else:
                output_name = "{}_{}_plot.png".format(metric_to_x, metric_to_y)

        # Replace whitespaces in the output name
        output_name = output_name.replace(" ", "_")
        output_name = os.path.join(output_folder, output_name)

        # Generate plot with matplotlib
        import matplotlib

        matplotlib.use("Agg")
        from matplotlib import pyplot as plt

        fig, ax = plt.subplots()

        if metric_to_z is not None:
            scatter = ax.scatter(self._dataframe[metric_to_x],
                                 self._dataframe[metric_to_y],
                                 c=self._dataframe[metric_to_z],
                                 s=20)
            cbar = plt.colorbar(scatter)
            cbar.ax.set_ylabel(metric_to_z)
            ax.set_xlabel(metric_to_x)
            ax.set_ylabel(metric_to_y)
            plt.savefig(output_name)
            backup_logger(
                self._logger,
                "Plotted {} vs {} vs {}".format(metric_to_x, metric_to_y,
                                                metric_to_z))

        else:
            ax.scatter(self._dataframe[metric_to_x],
                       self._dataframe[metric_to_y])
            ax.set_xlabel(metric_to_x)
            ax.set_ylabel(metric_to_y)
            plt.savefig(output_name)
            backup_logger(self._logger,
                          "Plotted {} vs {}".format(metric_to_x, metric_to_y))

        plt.close("all")
        return output_name
Ejemplo n.º 3
0
    def plot_clusters(self,
                      metric_to_x,
                      metric_to_y,
                      output_folder,
                      clusters,
                      representative_structures=None):
        """
        It creates a scatter plot with the two metrics that are supplied
        and displays the points belonging to each top cluster with a
        different color.

        Parameters
        ----------
        metric_to_x : str or int
            The metric id to plot in the X axis. It can be either a string
            with the name of the metric or an integer with the column index
        metric_to_y : str or int
            The metric id to plot in the Y axis. It can be either a string
            with the name of the metric or an integer with the column index
        output_folder : str
            The path where the plot will be saved
        clusters : a numpy.array object
            The array of cluster labels that were obtained
        representative_structures : dict[str, tuple[str, int]]
            Dictionary containing the representative structures that
            were selected. Cluster label is the key and value is a list
            with [trajectory, step] of each cluster. If supplied, points
            belonging to representative structures will be represented
        """
        import copy
        from matplotlib.colors import LinearSegmentedColormap

        from pele_platform.analysis.clustering import get_cluster_label
        from pele_platform.Utilities.Helpers.helpers import backup_logger

        check_make_folder(output_folder)

        # Initialize a data handler from the current dataframe
        metric_to_x, metric_to_y, metric_to_z = \
            self._get_column_names(metric_to_x, metric_to_y)

        import matplotlib

        matplotlib.use("Agg")
        from matplotlib import pyplot as plt
        from matplotlib import colors, cm

        # Initialize figure
        fig, ax = plt.subplots(figsize=(6, 6),
                               dpi=100,
                               facecolor="w",
                               edgecolor="k")
        fig.subplots_adjust(right=0.8)  # To make room for the legend

        # Set axis labels
        plt.xlabel(metric_to_x)
        plt.ylabel(metric_to_y)

        # Configurate grid
        ax.set_axisbelow(True)
        ax.grid(True)
        ax.xaxis.grid(color="#AEB6BF", linestyle="dashed")
        ax.yaxis.grid(color="#AEB6BF", linestyle="dashed")
        ax.spines["right"].set_visible(False)
        ax.spines["top"].set_visible(False)
        ax.spines["left"].set_color("black")
        ax.spines["bottom"].set_color("black")
        ax.set_facecolor("#E6E9EB")

        # Extract the list of cluster labels
        cluster_labels = sorted(list(set(clusters)))

        # Configurate colormap
        if len(cluster_labels) > 18:
            cmap = copy.copy(cm.get_cmap("jet"))
        elif 9 < len(cluster_labels) <= 18:
            cmap = LinearSegmentedColormap.from_list('custom_tab20',
                                                     constants.custom_colors)
        else:
            cmap = copy.copy(cm.get_cmap("Set1"))

        norm = colors.Normalize(vmin=0, vmax=len(cluster_labels))
        cmap.set_under("grey")

        # Values to plot
        all_xs = self._dataframe[metric_to_x]
        all_ys = self._dataframe[metric_to_y]

        colors_used = []
        rep_struct_marker = []
        if representative_structures is not None:
            marker_cm = cm.get_cmap('binary')
            marker_norm = colors.Normalize(vmin=0, vmax=1)
            # Draw points with representative structures
            trajectories = self._dataframe['trajectory']
            steps = self._dataframe['numberOfAcceptedPeleSteps']

            rep_trajs = [
                traj for (traj, step) in representative_structures.values()
            ]
            rep_steps = [
                step for (traj, step) in representative_structures.values()
            ]

            for current_cluster in cluster_labels:
                xs = []
                ys = []
                for x, y, cluster, traj, step in zip(all_xs, all_ys, clusters,
                                                     trajectories, steps):
                    if cluster == current_cluster:
                        traj_idxs = set(
                            [i for i, x in enumerate(rep_trajs) if x == traj])
                        step_idxs = set(
                            [i for i, x in enumerate(rep_steps) if x == step])
                        if len(traj_idxs.intersection(step_idxs)) == 1:
                            sc = ax.scatter([
                                x,
                            ], [
                                y,
                            ],
                                            c=[
                                                1,
                                            ],
                                            zorder=3,
                                            marker='x',
                                            cmap=marker_cm,
                                            norm=marker_norm)
                            rep_struct_marker = sc.legend_elements()[0]
                        else:
                            xs.append(x)
                            ys.append(y)

                # In case there is only one point and it is the
                # representative structure
                if len(xs) == 0:
                    continue

                if current_cluster == -1:
                    zorder = 1
                else:
                    zorder = 2

                sc = ax.scatter(xs,
                                ys,
                                c=[
                                    current_cluster,
                                ] * len(xs),
                                cmap=cmap,
                                norm=norm,
                                alpha=0.7,
                                zorder=zorder)
                colors_used += sc.legend_elements()[0]

        else:
            # Draw points without representative structures
            for current_cluster in cluster_labels:
                xs = []
                ys = []
                for x, y, cluster in zip(all_xs, all_ys, clusters):
                    if cluster == current_cluster:
                        xs.append(x)
                        ys.append(y)
                if current_cluster == -1:
                    zorder = 1
                else:
                    zorder = 2

                sc = ax.scatter(xs,
                                ys,
                                c=[
                                    current_cluster,
                                ] * len(xs),
                                cmap=cmap,
                                norm=norm,
                                alpha=0.7,
                                zorder=zorder)
                colors_used += sc.legend_elements()[0]

        # Configure legend
        cluster_names = []
        for cluster_id in cluster_labels:
            if cluster_id == -1:
                cluster_names.append("Others")
            else:
                cluster_names.append(get_cluster_label(cluster_id))

        if cluster_names[0] == "Others":
            n = cluster_names.pop(0)
            c = colors_used.pop(0)
            cluster_names.append(n)
            colors_used.append(c)

        if len(rep_struct_marker) == 1:
            cluster_names.append("Representative\nstructure")

        ax.legend(colors_used + rep_struct_marker,
                  cluster_names,
                  title="Clusters",
                  loc='center left',
                  bbox_to_anchor=(1, 0.5))

        # Set output name
        if representative_structures is not None:
            output_name = "{}_{}_representatives_plot.png".format(
                metric_to_x, metric_to_y)
        else:
            output_name = "{}_{}_plot.png".format(metric_to_x, metric_to_y)
        output_name = output_name.replace(" ", "_")
        output_name = os.path.join(output_folder, output_name)

        plt.savefig(output_name,
                    dpi=200,
                    edgecolor="k",
                    orientation="portrait",
                    transparent=True,
                    bbox_inches="tight")

        backup_logger(self._logger,
                      "Plotted {} vs {}".format(metric_to_x, metric_to_y))
        plt.close("all")
Ejemplo n.º 4
0
    def plot_kde(self, metric_to_x, metric_to_y, output_folder, kde_structs):
        """
        Given 2 metrics, it generates the kde plot.

        Parameters
        ----------
        metric_to_x : str or int
            The metric id to plot in the X axis. It can be either a string
            with the name of the metric or an integer with the column index
        metric_to_y : str or int
            The metric id to plot in the Y axis. It can be either a string
            with the name of the metric or an integer with the column index
        output_folder : str
            The path where the plot will be saved
        kde_structs : int
            The number of structures to represent in the plot
        """
        import seaborn as sb

        check_make_folder(output_folder)
        metric_to_x, metric_to_y, metric_to_z = \
            self._get_column_names(metric_to_x, metric_to_y)

        # Define output path
        output_name = "{}_{}_kde.png".format(metric_to_x, metric_to_y)
        output_name = output_name.replace(" ", "_")
        output_name = os.path.join(output_folder, output_name)

        # Filter out the number of structures from dataframe to plot
        structures_to_keep = min(int(kde_structs), len(self._dataframe) - 1)
        sorted_df = self._dataframe.sort_values(metric_to_y, ascending=True)
        top = sorted_df[0:structures_to_keep]

        x_values = top[metric_to_x]
        y_values = top[metric_to_y]

        color1 = "lightskyblue"
        color2 = "royalblue"

        ax = sb.JointGrid(x=x_values, y=y_values)
        ax.plot_joint(sb.scatterplot,
                      color=color1,
                      edgecolor=color2,
                      marker='o',
                      alpha=0.7,
                      s=20)
        sb.kdeplot(x=x_values,
                   ax=ax.ax_marg_x,
                   color=color1,
                   shade=True,
                   alpha=0.5,
                   edgecolor=color2)
        sb.kdeplot(y=y_values,
                   ax=ax.ax_marg_y,
                   color=color1,
                   shade=True,
                   alpha=0.5,
                   edgecolor=color2)
        ax.ax_joint.set_xlabel(metric_to_x, fontweight='bold')
        ax.ax_joint.set_ylabel(metric_to_y, fontweight='bold')

        ax.savefig(output_name)

        return output_name
Ejemplo n.º 5
0
    def plot_clusters(self, metric_to_x, metric_to_y, output_folder,
                      clusters):
        """
        It creates a scatter plot with the two metrics that are supplied
        and displays the points belonging to each top cluster with a
        different color.

        Parameters
        ----------
        metric_to_x : str or int
            The metric id to plot in the X axis. It can be either a string
            with the name of the metric or an integer with the column index
        metric_to_y : str or int
            The metric id to plot in the Y axis. It can be either a string
            with the name of the metric or an integer with the column index
        output_folder : str
            The path where the plot will be saved
        clusters : a numpy.array object
            The array of cluster labels that were obtained
        """
        import copy
        from matplotlib.colors import LinearSegmentedColormap

        from pele_platform.analysis.clustering import get_cluster_label
        from pele_platform.Utilities.Helpers.helpers import backup_logger

        check_make_folder(output_folder)

        # Initialize a data handler from the current dataframe
        metric_to_x, metric_to_y, metric_to_z = \
            self._get_column_names(metric_to_x, metric_to_y)

        import matplotlib

        matplotlib.use("Agg")
        from matplotlib import pyplot as plt
        from matplotlib import colors, cm

        # Initialize figure
        fig, ax = plt.subplots(figsize=(6, 6), dpi=100, facecolor="w",
                               edgecolor="k")
        fig.subplots_adjust(right=0.8)  # To make room for the legend

        # Set axis labels
        plt.xlabel(metric_to_x)
        plt.ylabel(metric_to_y)

        # Configurate grid
        ax.set_axisbelow(True)
        ax.grid(True)
        ax.xaxis.grid(color="#AEB6BF", linestyle="dashed")
        ax.yaxis.grid(color="#AEB6BF", linestyle="dashed")
        ax.spines["right"].set_visible(False)
        ax.spines["top"].set_visible(False)
        ax.spines["left"].set_color("black")
        ax.spines["bottom"].set_color("black")
        ax.set_facecolor("#E6E9EB")

        # Extract the list of cluster labels
        cluster_labels = sorted(list(set(clusters)))

        # Configurate colormap
        if len(cluster_labels) > 18:
            cmap = copy.copy(cm.get_cmap("jet"))
        elif 9 < len(cluster_labels) <= 18:
            cmap = LinearSegmentedColormap.from_list('custom_tab20',
                                                     constants.custom_colors)
        else:
            cmap = copy.copy(cm.get_cmap("Set1"))

        norm = colors.Normalize(vmin=0, vmax=len(cluster_labels))
        cmap.set_under("grey")

        # Values to plot
        all_xs = self._dataframe[metric_to_x]
        all_ys = self._dataframe[metric_to_y]

        # Draw points
        colors_used = []
        for current_cluster in cluster_labels:
            xs = []
            ys = []
            for x, y, cluster in zip(all_xs, all_ys, clusters):
                if cluster == current_cluster:
                    xs.append(x)
                    ys.append(y)
            if current_cluster == -1:
                zorder = 1
            else:
                zorder = 2
            sc = ax.scatter(xs, ys, c=[current_cluster, ] * len(xs),
                            cmap=cmap, norm=norm, alpha=0.7,
                            zorder=zorder)
            colors_used += sc.legend_elements()[0]

        # Configure legend
        cluster_names = []
        for cluster_id in cluster_labels:
            if cluster_id == -1:
                cluster_names.append("Others")
            else:
                cluster_names.append(get_cluster_label(cluster_id))

        if cluster_names[0] == "Others":
            n = cluster_names.pop(0)
            c = colors_used.pop(0)
            cluster_names.append(n)
            colors_used.append(c)
        ax.legend(colors_used, cluster_names, title="Clusters",
                  loc='center left', bbox_to_anchor=(1, 0.5))

        # Set output name
        output_name = "{}_{}_plot.png".format(metric_to_x, metric_to_y)
        output_name = output_name.replace(" ", "_")
        output_name = os.path.join(output_folder, output_name)

        plt.savefig(output_name, dpi=200, edgecolor="k",
                    orientation="portrait", transparent=True,
                    bbox_inches="tight")

        backup_logger(self._logger,
                      "Plotted {} vs {}".format(metric_to_x, metric_to_y))