Ejemplo n.º 1
0
class MyParamDataFrame(param.Parameterized):
    d = {'col1': [1, 2], 'col2': [3, 4]}
    df = pd.DataFrame(data=d)
    dataset = param.DataFrame(df)
Ejemplo n.º 2
0
class KickstarterDashboard(param.Parameterized):
    # pylint: disable=line-too-long
    """The purpose of the Kickstarter Dashboard is to test if the claims regarding Bokeh as of Jan 2018 in the
    [bokeh-dash-best-dashboard-framework](https://www.sicara.ai/blog/2018-01-30-bokeh-dash-best-dashboard-framework-python)
    article holds for Panel and the HoloViews suite of tools as of Dec 2019.

    The claims where

    - Data in Bokeh becomes inconsistent
    - Cannot link charts to dataframe
    - Bokeh is slow for big datasets
    - Interactions take a long time to develop

    You can evaluate this dashboard and the code to make your personal evaluation of the above
    statements.

    My evaluation is

    - the **first two statements does no longer hold**.
    - The third is up for discussion. I would also like the Dashboard updates to be a bit faster. Maybe it's because I don't yet know how to implement this efficiently.
    - The fourth I've also experienced
    see this [discussion](https://discourse.holoviz.org/t/how-to-create-a-parameterized-dashboard-with-seperation-between-data-transforms-and-data-views/53/13).

    I can see that I made a lot of mistakes because it takes time for me to understand how the api works.
    There is a lot to I need to learn across the HoloViz suite of tools."""
    # pylint: enable=line-too-long
    kickstarter_df = param.DataFrame()
    categories = param.ListSelector()
    scatter_df = param.DataFrame()
    bar_df = param.DataFrame()
    rangexy = param.ClassSelector(
        class_=hv.streams.RangeXY,
        default=hv.streams.RangeXY(),
    )

    def __init__(self, kickstarter_df: Optional[pd.DataFrame] = None, **kwargs):
        if not isinstance(
            kickstarter_df,
            pd.DataFrame,
        ):
            kickstarter_df = self.get_kickstarter_df()
        categories = self.get_categories(kickstarter_df)

        self.param.kickstarter_df.default = kickstarter_df
        self.param.categories.default = categories
        self.param.categories.objects = categories
        self.param.scatter_df.default = kickstarter_df
        self.param.bar_df.default = kickstarter_df

        super().__init__(**kwargs)

    @param.depends(
        "kickstarter_df",
        "categories",
        watch=True,
    )
    def _set_scatter_df(
        self,
    ):
        self.scatter_df = self.filter_on_categories(
            self.kickstarter_df,
            self.categories,
        )

    @param.depends("scatter_df")
    def scatter_plot_view(
        self,
    ):
        """A Reactive View of the scatter plot"""
        # Potential Improvements
        # Rename columns to Capitalized without under score
        # Add name of movie to hover tooltip
        scatter_plot = self.get_scatter_plot(self.scatter_df)
        # Please note that depending on how the scatter_plot is generated it might be a Scatter
        # or Ndoverlay objects
        # In the first case use scatter_plot. In the second case use scatter_plot.last
        self.rangexy.source = scatter_plot.last
        return scatter_plot

    @param.depends(
        "scatter_df",
        "rangexy.x_range",
        "rangexy.y_range",
        watch=True,
    )
    def _set_bar_df(
        self,
    ):
        """Update the bar_df dataframe"""
        self.bar_df = self.filter_on_ranges(
            self.scatter_df,
            self.rangexy.x_range,  # pylint: disable=no-member
            self.rangexy.y_range,  # pylint: disable=no-member
        )

    @param.depends("bar_df")
    def bar_chart_view(
        self,
    ):
        """A Reactive View of the Bar Chart"""
        return self.get_bar_chart(self.bar_df)

    def view(
        self,
    ):
        """A Reactive View of the KickstarterDashboard"""
        return pn.Column(
            pn.pane.Markdown(__doc__),
            pn.layout.HSpacer(height=25),
            pn.Row(
                pn.Column(self.scatter_plot_view, self.bar_chart_view, sizing_mode="stretch_width"),
                pn.Param(
                    self.param.categories,
                    widgets={
                        "categories": {
                            "max_width": 125,
                            "size": len(self.categories),
                        }
                    },
                    width=150,
                    height=500,
                    sizing_mode="fixed",
                ),
                sizing_mode="stretch_width",
            ),
            sizing_mode="stretch_width",
        )

    @staticmethod
    def _extract() -> pd.DataFrame:
        """Extracts the kickstarter data into a DataFrame

        Returns:
            pd.DataFrame -- A Dataframe of kickstarter data with
            columns=["created_at", "usd_pledged", "state", "category_slug"]
        """
        return pd.read_csv(
            KICKSTARTER_PATH,
            parse_dates=DATE_COLUMNS,
        )

    @staticmethod
    def _transform(
        source_data: pd.DataFrame,
        n_samples: int = N_SAMPLES,
    ) -> pd.DataFrame:
        """Transform the data by

        - adding broader_category,
        - converting usd_pledged to millions
        - sampling to n_samples

        Arguments:
            source_data {pd.DataFrame} -- The source kickstarter data

        Returns:
            pd.DataFrame -- The transformed DataFrame with
            columns=["created_at", "usd_pledged", "state", "category_slug", "broader_category"]
        """
        source_data["broader_category"] = source_data["category_slug"].str.split("/").str.get(0)
        source_data["usd_pledged"] = source_data["usd_pledged"] / 10 ** 6
        return source_data.sample(n_samples)

    @classmethod
    def get_kickstarter_df(
        cls,
    ) -> pd.DataFrame:
        """The Dataframe of Kickstarter Data

        Returns:
            [pd.DataFrame] -- The Dataframe of Kickstarter Data
        """
        source_data = cls._extract()
        kickstarter_df = cls._transform(source_data)
        return kickstarter_df

    @staticmethod
    def get_categories(
        kickstarter_df,
    ) -> List[str]:
        """The list of kickstarter broader categories

        Arguments:
            kickstarter_df {[type]} -- [description]

        Returns:
            List[str] -- [description]
        """
        return list(kickstarter_df["broader_category"].unique())

    @classmethod
    def filter_on_categories(
        cls,
        kickstarter_df: pd.DataFrame,
        categories: List[str],
    ) -> pd.DataFrame:
        """Filters the kickstarter_df by the specified categories

        Arguments:
            kickstarter_df {pd.DataFrame} -- A Kickstarter Dataframe
            categories {List[str]} -- The list of broader_category in the DataFrame

        Returns:
            pd.DataFrame -- The filtered DataFrame
        """
        if categories is None or categories == []:
            categories = cls.get_categories(kickstarter_df)
        categories_filter = kickstarter_df["broader_category"].isin(categories)
        return kickstarter_df[categories_filter]

    @staticmethod
    def filter_on_ranges(
        kickstarter_df: pd.DataFrame,
        x_range,
        y_range,
    ) -> pd.DataFrame:
        """Filter the kickstarter_df by x_range and y_range

        Arguments:
            kickstarter_df {pd.DataFrame} -- [description]
            x_range {[type]} -- The usd_pledged range
            y_range {[type]} -- The created_at range

        Returns:
            pd.DataFrame -- The filtered DataFrame
        """
        sub_df = kickstarter_df
        if y_range:
            y_filter = (kickstarter_df["usd_pledged"] >= y_range[0]) & (
                kickstarter_df["usd_pledged"] <= y_range[1]
            )
            sub_df = sub_df[y_filter]
        if x_range:
            x_filter = (kickstarter_df["created_at"] >= x_range[0]) & (
                kickstarter_df["created_at"] <= x_range[1]
            )
            sub_df = sub_df[x_filter]
        return sub_df

    @staticmethod
    def get_scatter_plot(
        kickstarter_df: pd.DataFrame,
    ):  # pylint: disable=missing-return-type-doc
        """A Scatter plot of the kickstarter_df

        Arguments:
            kickstarter_df {pd.DataFrame} -- The DataFrame of kickstarter data

        Returns:
            [type] -- A Scatter plot
        """
        # Potential Improvements
        # Rename columns to Capitalized without under score
        # Add name of movie to hover tooltip
        kickstarter_df["color"] = kickstarter_df["state"]
        return kickstarter_df.hvplot.scatter(
            x="created_at",
            y="usd_pledged",
            # color="color",
            by="state",
            cmap=list(CMAP.values()),
            height=400,
            responsive=True,
            yformatter="%.1fM",
        )

    @staticmethod
    def get_bar_chart(
        kickstarter_df: pd.DataFrame,
    ):  # pylint: disable=missing-return-type-doc
        """A bar chart of the kickstarter_df

        Arguments:
            kickstarter_df {pd.DataFrame} -- A DataFrame of Kickstarter data

        Returns:
            [type] -- A bar chart of the kickstarter_df
        """
        # Potential improvements
        # Sort by Number of Projects Desc to make it easier to see what large and small

        # Filter
        stacked_barchart_df = (
            kickstarter_df[
                [
                    "broader_category",
                    "state",
                    "created_at",
                ]
            ]
            .groupby(
                [
                    "broader_category",
                    "state",
                ]
            )
            .count()
            .rename(columns={"created_at": "Number of projects"})
        )

        # Plot
        bar_chart = stacked_barchart_df.hvplot.bar(
            stacked=True,
            height=400,
            responsive=True,
            xlabel="Number of projects",
            cmap=CMAP,
        )
        return bar_chart
Ejemplo n.º 3
0
class OverallParameters(param.Parameterized):
    localisation = param.String(default="Jegun", label="")
    score = param.Range(default=(0, 250), bounds=(0, 250),)

    tout_axes = param.Boolean(False, label="")
    interfaces_num = param.ListSelector(label="")
    infos_num = param.ListSelector(label="")
    comp_admin = param.ListSelector(label="")
    comp_usage_num = param.ListSelector(label="")

    point_ref = param.Selector(
        default=SELECT[2], objects=SELECT, label="Point de référence",
    )

    niveau_observation = param.Selector(
        default=SELECT[2], objects=SELECT, label="Niveau d'observation",
    )

    niveau_details = param.Selector(
        default=SELECT[2], objects=SELECT, label="Niveau de détail",
    )

    donnees_infra = param.Action(
        lambda x: x, doc="""Données Infra-Communales""", precedence=0.7
    )

    file_name = param.String(
        default="Export_mednum.csv",
        doc="""
      The filename to save to.""",
    )
    edit_report = param.Action(
        lambda x: x.timestamps.append(dt.datetime.utcnow()),
        doc="""Editer un rapport""",
        precedence=0.7,
    )
    tiles = gv.tile_sources.StamenTerrain

    df_merged = param.DataFrame()
    df_score = param.DataFrame()

    def __init__(self, **params):
        super(OverallParameters, self).__init__(**params)
        interim_data, cont_iris, indice_frag = self.define_paths()

        # Merged
        output_data_path = interim_data / "add_geom_data_to_merged_data.trc.pqt"
        if output_data_path.exists():
            import geopandas as gpd

            self.df_merged = gpd.read_parquet(output_data_path)
        else:
            self.df_merged = add_geom_data_to_merged_data(
                iris_df(cont_iris), read_merged_data(indice_frag)
            )

        # Create multindex
        self.set_dataframes_indexes()
        self.set_dataframes_level()

        # Create widgets for indicators
        self.define_indices_params()

        # Define what is level 0 and level 1 to consider
        self.set_entity_levels()

        # What is selected in each level
        self.get_selected_indice_by_level()

        # Define define_searchable_element
        self.define_searchable_element()

        self.score_calculation()

        # Download
        self.download = pn.widgets.FileDownload(
            label="""Exporter les résultats""",
            filename=self.file_name,
            callback=self._download_callback,
        )

    def define_paths(self):
        data_path = Path("../data")

        if not data_path.exists():
            data_path = Path("./data")
        if not data_path.exists():
            data_path = Path("../../data")
        raw_data = data_path / "raw/"

        external_data = data_path / "external/"

        interim_data = data_path / "interim/"

        cont_iris = external_data / "france-geojson" / "contours-iris.geojson"

        indice_frag = processed_data / "MERGE_data_clean.csv"
        return interim_data, cont_iris, indice_frag

    def define_searchable_element(self):
        self.seachable_localisation = list(
            self.df_merged.index.get_level_values(self.level_0_column_names).unique()
        )

    def define_indices_params(self):
        """ 
        Create all indices parameters -> Will become a TreeCheckBox or Checkbox
        """
        self.g_params = []
        for k, widget_opts in TREEVIEW_CHECK_BOX.items():
            # Voir si description ne peut être passée
            widgets_params = self.create_checkbox_type_widget_params(widget_opts)

            self.g_params.append(pn.Param(self.param[k], widgets={k: widgets_params}))

    def _download_callback(self):
        """
            A FileDownload callback will return a file-like object which can be serialized
            and sent to the client.
            """
        self.file_name = "Export_%s.csv" % self.point_ref
        self.download.filename = self.file_name
        sio = io.StringIO()
        self.df_score.drop("geometry", axis=1).to_csv(sio, index=False)
        sio.seek(0)
        return sio

    def get_params(self):
        paramater_names = [par[0] for par in self.get_param_values()]
        return pn.Param(
            self.param,
            parameters=[par for par in paramater_names if par != "df_merged"],
        )

    def set_dataframes_level(self):
        real_name_level = []
        for col in self.df_merged.columns:
            if col in CATEGORIES_INDICES.keys():
                real_name_level.append((col, CATEGORIES_INDICES[col]))
            else:
                real_name_level.append((col, col))

        self.df_merged.columns = pd.MultiIndex.from_tuples(
            real_name_level, names=["variable", "nom"]
        )

    def set_dataframes_indexes(self):
        indexes = list(
            set(
                list(MAP_COL_WIDGETS["level_0"].values())
                + list(MAP_COL_WIDGETS["level_1"].values())
            )
        )
        self.df_merged.set_index(indexes, inplace=True)

    @pn.depends("localisation", "point_ref", watch=True)
    def set_entity_levels(self):
        """Set the entity levels and point values for this entity.
        """
        self.level_0_column, self.level_1_column = (
            MAP_COL_WIDGETS["level_0"]["index"],
            MAP_COL_WIDGETS["level_1"][self.point_ref],
        )
        self.level_0_column_names = MAP_COL_WIDGETS["level_0"]["names"]
        self.level_0_value = self.localisation

    @pn.depends(
        "tout_axes",
        "interfaces_num",
        "infos_num",
        "comp_admin",
        "comp_usage_num",
        watch=True,
    )
    def get_selected_indice_by_level(self):
        """get the indices of the selected column

        Args:
            self ([type]): [description]

        Returns:
            [type]: [description]
        """
        param_values = {k: v for k, v in self.param.get_param_values()}
        selected_col = []
        for axe, indices in param_values.items():
            if axe in TREEVIEW_CHECK_BOX.keys() and indices:
                for indice in indices:
                    try:
                        selected_col += [CATEGORIES_INDICES_REV[indice]]
                    except:
                        pass

        self.selected_indices_level_0 = list(set(selected_col))
        self.selected_indices_level_1 = list(set(selected_col))
        return self.selected_indices_level_0, self.selected_indices_level_1

    def create_checkbox_type_widget_params(self, widget_opts):
        """Create dict of widget type and checkbox params .

        Args:
            widget_opts ([type]): [description]

        Returns:
            [type]: [description]
        """
        if len(widget_opts.items()) > 3:
            select_options = [
                val["nom"]
                for opt, val in widget_opts.items()
                if opt not in ["nom", "desc"]
            ]
            descriptions = [
                val["desc"]
                for opt, val in widget_opts.items()
                if opt not in ["nom", "desc"]
            ]
            widget_type = TreeViewCheckBox
            widgets_params = {
                "type": widget_type,
                "select_options": select_options,
                "select_all": widget_opts["nom"],
                "desc": descriptions,
            }
        else:
            descriptions = widget_opts["desc"]
            widget_type = Checkbox
            widgets_params = {
                "name": widget_opts["nom"],
                "type": widget_type,
                "value": True,
                "desc": descriptions,
            }
        return widgets_params

    def set_real_name(df):
        real_name_level = []
        for col in df.columns:
            if col in CATEGORIES_INDICES.keys():
                real_name_level.append((col, CATEGORIES_INDICES[col]))
            else:
                real_name_level.append((col, col))

        return real_name_level

    def info_localisation(self):
        info_loc = {}
        index = self.df_merged.xs(
            self.localisation, level=self.level_0_column_names, drop_level=False
        ).index
        ids = index.unique().to_numpy()[0]
        names = index.names
        for k, v in zip(names, ids):
            info_loc[k] = v
        return info_loc

    def get_indices_properties(self):
        indices_properties = {}
        import copy

        tree = copy.deepcopy(TREEVIEW_CHECK_BOX)
        for indic_dict in tree.values():
            indic_dict.pop("nom", None)
            indic_dict.pop("desc", None)
            indices_properties.update(indic_dict)
        return indices_properties

    @pn.depends(
        "localisation",
        "point_ref",
        "tout_axes",
        "interfaces_num",
        "infos_num",
        "comp_admin",
        "comp_usage_num",
        watch=True,
    )
    def score_calculation(self):
        indices_properties = self.get_indices_properties()
        selected_indices = self.selected_indices_level_0
        df = self.df_merged.copy().droplevel("nom", axis=1)
        info_loc = self.info_localisation()
        if selected_indices != []:
            selected_indices_aggfunc = {
                k: indices_properties[k]["aggfunc"] for k in selected_indices
            }

            #
            map_info = [self.level_0_column_names]
            vdims = map_info + selected_indices

            # Aggregation selon la fonction specifié (mean, median)
            # au niveau level_1_column sur les indice selectionne selected_indices_aggfunc

            score_agg_niveau = (
                df.xs(
                    info_loc[self.level_1_column],
                    level=self.level_1_column,
                    drop_level=False,
                )
                .groupby(self.level_1_column)
                .agg(selected_indices_aggfunc)
            )

            # Division par l'aggregation sur la zone level_1_column (pondération)
            score_niveau = (
                df.xs(
                    info_loc[self.level_1_column],
                    level=self.level_1_column,
                    drop_level=False,
                )[selected_indices].div(score_agg_niveau)
                * 100
            )

            # Dissolution (i.e. agregation geographique) au niveau de découpage souhaité level_0_column
            df = df.xs(
                info_loc[self.level_1_column],
                level=self.level_1_column,
                drop_level=False,
            ).dissolve(
                by=[self.level_0_column, self.level_0_column_names],
                aggfunc=selected_indices_aggfunc,
            )
            # Score sur les indices merge sur l'index pour récupérer la geometry.
            # _BRUT : initial
            # _SCORE : Score de l'indice sur le découpage level_0_column divisé par la fonction d'aggragation au level_1_column
            scores = df.merge(
                score_niveau,
                on=[self.level_0_column, self.level_0_column_names],
                suffixes=("_BRUT", "_SCORE"),
            ).drop_duplicates()  # Drop duplicate pour supprimer les doublons (zone homogène)

            # Calcul des scores sur chaque axes et au total
            number_axes = 0
            for axe, indices in AXES_INDICES.items():
                selected_in_axes = [
                    k + "_SCORE" for k in indices.keys() if k in selected_indices
                ]
                if selected_in_axes != []:
                    scores.loc[:, axe] = scores[selected_in_axes].mean(axis=1)
                    number_axes += 1
                else:
                    scores.loc[:, axe] = 0

            # Score total
            scores.loc[:, "tout_axes"] = scores[list(AXES_INDICES.keys())].sum(axis=1)
            if number_axes != 0:
                scores.loc[:, "tout_axes"] /= number_axes

            #
            self.df_score = df.merge(
                scores, on=[self.level_0_column, self.level_0_column_names, "geometry"]
            ).drop_duplicates()  # Suppression des doublons sur les communes découpées en IRIS

        else:
            df = df.xs(
                info_loc[self.level_1_column],
                level=self.level_1_column,
                drop_level=False,
            ).dissolve(
                by=[self.level_0_column, self.level_0_column_names],
                # aggfunc='first',
            )

            for axe, indices in AXES_INDICES.items():
                df.loc[:, axe] = 0
            df.loc[:, "tout_axes"] = 0
            self.df_score = df
Ejemplo n.º 4
0
class OutputControl(param.Parameterized):
    output_control_option = param.ObjectSelector(
        default='Specify output frequency (OC)',
        objects=['Specify output frequency (OC)', 'Specify autobuild (SERIES AWRITE)'],
        doc='Output control option to specify output time using a time series (OC) or an autobuild time series (OS).',
        precedence=1,
    )
    oc_time_series_id = param.Integer(
        default=0,
        bounds=(0, None),
        doc='OC: Time Series ID for output times.',
        precedence=2,
    )
    output_flow_strings = param.DataFrame(
        default=None,
        doc='FLX: CARD S_ID, '
            'CARD - FLX, '
            'S_ID - String ID for mid string or edge string for which flow is to be output.',
    )
    print_adaptive_mesh = param.Boolean(
        default=False,
        doc='PC ADP: Adaptive mesh printing active.',
        precedence=4,
    )
    print_numerical_fish_surrogate = param.Boolean(
        default=False,
        doc='PC ELM: Print numerical fish surrogate information in TecPlot format.',
        precedence=5,
    )
    screen_output_residual = param.Boolean(
        default=False,
        doc='SOUT RESID: output the residual to the screen.',
        precedence=6,
    )
    screen_output_all = param.Boolean(
        default=False,
        doc='SOUT ALL: output all information to the screen.',
        precedence=7,
    )
    screen_output_mass_error = param.Boolean(
        default=False,
        doc='SOUT MERROR: Screen output mass error active.',
        precedence=8,
    )
    screen_output_worst_nonlinear_node = param.Boolean(
        default=False,
        doc='SOUT NLNODE: output the id of the worst nonlinear node.',
        precedence=9,
    )
    screen_output_worst_linear_node = param.Boolean(
        default=False,
        doc='SOUT LNODE: output the id of the worst linear node.',
        precedence=10,
    )
    file_output_wind = param.Boolean(
        default=False,
        doc='FOUT WIND: output wind to a file.',
        precedence=11,
    )
    file_output_wave = param.Boolean(
        default=False,
        doc='FOUT WAVE: output wave to a file.',
        precedence=12,
    )
    file_output_adapted_grid = param.Boolean(
        default=False,
        doc='FOUT ADAPT GRID: write adapted grid to a file.',
        precedence=13,
    )
    file_output_adapted_solution = param.Boolean(
        default=False,
        doc='FOUT ADAPT SW: write the adapted grid and solution to a file.',
        precedence=14,
    )
    file_output_adapted_transport = param.Boolean(
        default=False,
        doc='FOUT ADAPT CON: write the adapted transport files (does not include sediment).',
        precedence=15,
    )
    file_output_sediment = param.Boolean(
        default=False,
        doc='FOUT SED: write the adapted sediment files.',
        precedence=16,
    )

    def panel(self):
        return pn.Pane(self.param, show_name=False)
Ejemplo n.º 5
0
class HpcJobMonitor(HpcConfigurable):
    uit_client = param.ClassSelector(Client)
    jobs = param.List()
    update = param.Action(lambda self: self.update_statuses())
    statuses = param.DataFrame()
    selected_job = param.ObjectSelector(label='Job')
    active_sub_job = param.ObjectSelector(label='Iteration')
    log = param.ObjectSelector(objects=[], label='Log File')
    custom_logs = param.List(default=[])
    num_log_lines = param.Integer(default=100, label='n')
    file_viewer = param.ClassSelector(FileViewer)
    ready = param.Boolean()
    next_btn = param.Action(lambda self: self.next(), label='Next')

    def __init__(self, **params):
        super().__init__(**params)
        self.tabs = [
            ('Status', self.status_panel),
            ('Logs', self.logs_panel),
            ('Files', self.file_browser_panel),
        ]

    def next(self):
        self.ready = True

    @param.output(finished_job_ids=list)
    def finished_jobs(self):
        return self.statuses[self.statuses['status'] == 'F']['job_id'].tolist()

    @param.depends('jobs', watch=True)
    def update_selected_job(self):
        self.param.selected_job.names = {j.job_id: j for j in self.jobs}
        self.param.selected_job.objects = self.jobs
        self.selected_job = self.jobs[0] if self.jobs else None

    @param.depends('selected_job', watch=True)
    def update_statuses(self):
        self.statuses = None
        sub_jobs = self.selected_job.sub_jobs
        self.statuses = PbsJob.update_statuses(sub_jobs, as_df=True)
        objects = [j for j in sub_jobs if j.status != 'Q']
        self.param.active_sub_job.names = {j.job_id: j for j in objects}
        self.param.active_sub_job.objects = objects
        if objects:
            self.active_sub_job = objects[0]

    @param.depends('active_sub_job', watch=True)
    def udpate_log(self):
        self.param.log.objects = ['stdout', 'stderr'] + [
            self.active_sub_job.resolve_path(p) for p in self.custom_logs
        ]
        self.log = 'stdout'

    @param.depends('active_sub_job')
    def out_log(self):
        return self.get_log(lambda job: job.get_stdout_log())

    @param.depends('active_sub_job')
    def err_log(self):
        return self.get_log(lambda job: job.get_stderr_log())

    @param.depends('active_sub_job')
    def x_log(self, log_file):
        try:
            return self.get_log(lambda job: job.get_custom_log(
                log_file, num_lines=self.num_log_lines))
        except RuntimeError as e:
            log.exception(e)

    def get_log(self, func):
        job = self.active_sub_job
        if job is not None:
            log_contents = func(job)
            return pn.pane.Str(log_contents, width=800)

    @param.depends('statuses')
    def statuses_panel(self):
        statuses = pn.panel(self.statuses, width=1300) \
            if self.statuses is not None \
            else pn.pane.GIF(resource_filename('panel', 'assets/spinner.gif'))
        return statuses

    @param.depends('selected_job')
    def status_panel(self):
        if self.selected_job:
            return pn.Column(
                self.statuses_panel,
                pn.Param(self.param.update,
                         widgets={
                             'update': {
                                 'button_type': 'primary',
                                 'width': 100
                             }
                         }),
            )
        else:
            return pn.pane.HTML('<h2>No jobs are available</h2>')

    @param.depends('active_sub_job', 'log')
    def log_pane(self):
        if self.log == 'stdout':
            return self.out_log()
        elif self.log == 'stderr':
            return self.err_log()
        else:
            return self.x_log(self.log)

    @param.depends('jobs')
    def logs_panel(self):
        return pn.Column(
            pn.Param(self,
                     parameters=['active_sub_job', 'log'],
                     show_name=False,
                     width=300),
            self.log_pane,
        )

    @param.depends('uit_client', watch=True)
    def configure_file_viewer(self):
        self.file_viewer = FileViewer(uit_client=self.uit_client)
        self.file_viewer.configure_file_selector()

    @param.depends('selected_job')
    def file_browser_panel(self):
        viewer = self.file_viewer.panel if self.file_viewer else pn.Spacer()
        self.file_viewer.file_path = str(self.selected_job.working_dir)
        return pn.Column(
            viewer,
            name='Files',
            width_policy='max',
        )

    def panel(self):
        return pn.Column(
            '# Job Status',
            pn.Row(
                pn.panel(self.param.selected_job, width_policy='max'),
                pn.Param(self.param.next_btn,
                         widgets={
                             'next_btn': {
                                 'button_type': 'success',
                                 'width': 100
                             }
                         }),
            ),
            pn.layout.Tabs(*self.tabs, ),
        )
Ejemplo n.º 6
0
class ObservationsExplorer(param.Parameterized):
    """Param interface for inspecting observations"""
    observation_df = param.DataFrame(
        doc='The DataFrame for the observations.',
        precedence=-1  # Don't show widget
    )
    images_df = param.DataFrame(
        doc='The DataFrame for the images from the selected observations.',
        precedence=-1  # Don't show widget
    )
    show_recent = param.Boolean(label='Show recent observations',
                                doc='Show recent observations',
                                default=True)
    search_name = param.String(
        label='Coordinates for object',
        doc='Field name for coordinate lookup',
    )
    coords = param.XYCoordinates(label='RA/Dec Coords [deg]',
                                 doc='RA/Dec Coords [degrees]',
                                 default=(0, 0))
    radius = param.Number(label='Search radius [deg]',
                          doc='Search radius [degrees]',
                          default=15.,
                          bounds=(0, 180),
                          softbounds=(0, 25))
    time = param.DateRange(
        label='Date Range',
        default=(pendulum.parse('2016-01-01').replace(tzinfo=None), now),
        bounds=(pendulum.parse('2016-01-01').replace(tzinfo=None), now))
    min_num_images = param.Integer(doc='Minimum number of images.',
                                   default=1,
                                   bounds=(1, 50),
                                   softbounds=(1, 10))
    unit_id = param.ListSelector(
        doc='Unit IDs',
        label='Unit IDs',
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        logger.debug(f'Getting recent stats from {BASE_URL}')
        self._observations_path = download_file(f'{BASE_URL}',
                                                cache='update',
                                                show_progress=False,
                                                pkgname='panoptes')
        self._observations_df = pd.read_csv(
            self._observations_path).convert_dtypes()

        # Setup up widgets

        # Set some default for the params now that we have data.
        units = sorted(self._observations_df.unit_id.unique())
        units.insert(0, 'The Whole World! 🌎')
        self.param.unit_id.objects = units
        self.unit_id = [units[0]]

        # Create the source objects.
        self.update_dataset()

    @param.depends('coords', 'radius', 'time', 'min_num_images', 'unit_id',
                   'search_name')
    def update_dataset(self):
        if self.show_recent:
            # Get just the recent result on initial load
            df = search_observations(ra=180,
                                     dec=0,
                                     radius=180,
                                     start_date=now.subtract(months=1),
                                     end_date=now,
                                     min_num_images=1,
                                     source=self._observations_df).sort_values(
                                         by=['time', 'unit_id', 'camera_id'],
                                         ascending=False)
        else:
            # If using the default unit_ids option, then search for all.
            unit_ids = self.unit_id
            if unit_ids == self.param.unit_id.objects[0:1]:
                unit_ids = self.param.unit_id.objects[1:]

            if self.search_name != '':
                coords = SkyCoord.from_name(self.search_name)
                self.coords = (round(coords.ra.value,
                                     3), round(coords.dec.value, 3))

            # Search for the observations given the current params.
            df = search_observations(ra=self.coords[0],
                                     dec=self.coords[1],
                                     radius=self.radius,
                                     start_date=self.time[0],
                                     end_date=self.time[1],
                                     min_num_images=self.min_num_images,
                                     unit_id=unit_ids).sort_values(
                                         by=['time', 'unit_id', 'camera_id'],
                                         ascending=False)

        df.time = pd.to_datetime(df.time)
        cds = ColumnDataSource(data=df, name='observations_source')

        def obs_row_selected(attrname, old_row_index, new_row_index):
            # We only lookup one even if they select multiple rows.
            newest_index = new_row_index[-1]
            row = df.iloc[newest_index]
            print(f'Looking up sequence_id={row.sequence_id}')
            self.images_df = get_metadata(sequence_id=row.sequence_id)
            if self.images_df is not None:
                self.images_df = self.images_df.dropna()

        cds.selected.on_change('indices', obs_row_selected)

        return cds

    @param.depends("images_df")
    def selected_title(self):
        try:
            sequence_id = self.images_df.sequence_id.iloc[0]
        except AttributeError:
            sequence_id = ''
        return pn.panel(f'<h5>{sequence_id}</h5>')

    @param.depends('images_df')
    def image_table(self):
        columns = [('time', 'Time [UTC]')]
        try:
            images_table = self.images_df.hvplot.table(columns=columns).opts(
                width=250,
                height=100,
                title=f'Images ({len(self.images_df)})',
            )
        except AttributeError:
            images_table = self.images_df

        return images_table

    @param.depends('images_df')
    def image_preview(self):
        try:
            image_url = self.images_df.public_url.dropna().iloc[0].replace(
                '.fits.fz', '.jpg')
            return pn.pane.HTML(f'''
                <div class="media" style="width: 300px; height: 200px">
                    <a href="{image_url}" target="_blank">
                      <img src="{image_url}" class="card-img-top" alt="Observation Image">
                    </a>
                </div>
            ''')
        except AttributeError:
            return ''

    @param.depends('observation_df')
    def fits_file_list_to_csv_cb(self):
        """ Generates a CSV file from current image list."""
        df = self.images_df.public_url.dropna()
        sio = StringIO()
        df.to_csv(sio, index=False, header=False)
        sio.seek(0)
        return sio

    def table_download_button(self):
        """ A button for downloading the images CSV."""
        try:
            sequence_id = self.images_df.sequence_id.iloc[0]
            return pn.widgets.FileDownload(
                callback=self.fits_file_list_to_csv_cb,
                filename=f'fits-list-{sequence_id}.txt',
                label='Download FITS List (.txt)',
            )
        except AttributeError:
            return ''

    def sources_download_button(self):
        try:
            sequence_id = self.images_df.sequence_id.iloc[0]
            parquet_url = f'{OBSERVATIONS_BASE_URL}/{sequence_id}-sources.parquet'
            source_btn = pn.widgets.Button(
                name='Download sources list (.parquet)', )

            source_btn.js_on_click(args=dict(url=parquet_url),
                                   code='''
                window.open(url, '_blank')
            ''')

            return source_btn
        except AttributeError:
            return ''

    def table(self):
        columns = [
            TableColumn(
                field="unit_id",
                title="Unit ID",
                width=60,
            ),
            TableColumn(
                field="camera_id",
                title="Camera ID",
                width=60,
            ),
            TableColumn(
                field="time",
                title="Time [UTC]",
                formatter=DateFormatter(format='%Y-%m-%d %H:%M'),
                width=130,
            ),
            TableColumn(
                field="field_name",
                title="Field Name",
                width=240,
            ),
            TableColumn(
                field="ra",
                title="RA [deg]",
                formatter=NumberFormatter(format="0.000"),
                width=70,
            ),
            TableColumn(
                field="dec",
                title="Dec [deg]",
                formatter=NumberFormatter(format="0.000"),
                width=70,
            ),
            TableColumn(
                field="num_images",
                title="Images",
                width=40,
            ),
            TableColumn(
                field="status",
                title="Status",
                width=75,
            ),
            TableColumn(
                field="exptime",
                title="Exptime [sec]",
                formatter=NumberFormatter(format="0.00"),
                width=60,
            ),
            TableColumn(
                field="total_minutes_exptime",
                title="Total Minutes",
                formatter=NumberFormatter(format="0.0"),
                width=60,
            ),
        ]

        cds = self.update_dataset()
        data_table = DataTable(
            source=cds,
            name='observations_table',
            columns=columns,
            index_position=None,
            min_width=1100,
            fit_columns=True,
            sizing_mode='stretch_both',
        )

        return data_table
Ejemplo n.º 7
0
class Model(param.Parameterized):
    meta = param.Dict(default={
        'Analyst': "N/A",
        'Element': "N/A",
    })
    g_inputs = param.ClassSelector(GlobalInputs,
                                   GlobalInputs(),
                                   instantiate=False)
    u_inputs = param.Dict(
        default={'uncertainty': ng.NormalRandom(mu=1, sigma=.25)})
    uncertainty = param.Number(1)
    simulate = param.Action(lambda self: self.run_simulation(100))
    results = param.DataFrame(
        default=pd.DataFrame(
            columns=['Element', 'APPN', 'BaseYear', 'FY', 'Value']),
        doc=
        '''Results container shared by all estimates. This value should be updated using the generic _calc method''',
        columns=set(['Element', 'APPN', 'BaseYear', 'FY', 'Value']),
        precedence=.1)
    sim_results = param.DataFrame(precedence=.1)

    def __init__(self, **params):
        super().__init__(**params)
        # Automatically set results to a calculation given defaults
        self._calc()

    def _calc(self):
        print(self.name, "Not Implement")
        self.results = self.results

    @param.depends('results', watch=True)
    def _update_results(self):
        self.results['Value'] = self.results['Value'] * self.uncertainty

    def _prepare_sim(self):
        if self.u_inputs is not None:
            self.param.set_param(**self.u_inputs)

    def _end_sim(self):
        if self.u_inputs is not None:
            for key, val in self.u_inputs.items():
                self.param.set_param(**{key: self.param[key].default})
        self._calc()

    def run_simulation(self,
                       trials=100,
                       clear_previous_sim=True,
                       agg_results=True,
                       agg_columns=['APPN', 'FY']):
        self._prepare_sim()
        if clear_previous_sim: self.sim_results = pd.DataFrame()
        for i in range(trials):
            self._prepare_sim()
            self._calc()
            if agg_results:
                self.sim_results = self.sim_results.append(
                    self.results.groupby(
                        by=agg_columns)['Value'].sum().reset_index().assign(
                            Trial=i))
            else:
                self.sim_results = self.sim_results.append(
                    self.results.assign(Trial=i))
        self._end_sim()

    def run_simulation_parallel(self,
                                trials=100,
                                agg_results=True,
                                agg_columns=['APPN', 'FY']):
        import multiprocessing
        with multiprocessing.Pool() as pool:
            pool.map(self.run_simulation, range(len(self.models)))

    def build_panel_app(self):
        self.app = pn.Pane(self)

    def build_app(self):
        try:
            import panel
            self._build_panel()
        except:
            try:
                import ipywidets
            except:
                print(
                    "No dashboard apps available. Try downloading panel or ipywidgets"
                )
Ejemplo n.º 8
0
class OrthoAnnotationDashBoard(OrthoSegmentationDashBoard):

    annot_channel = param.Parameter(doc='id of annotation channel')
    z_freehand_editor = param.Parameter(FreehandEditor())
    y_freehand_editor = param.Parameter(FreehandEditor())
    x_freehand_editor = param.Parameter(FreehandEditor())
    old_subdf = param.DataFrame()

    def _make_dataset(self, key, img):
        if key == self.annot_channel:
            annot_dataset = EditableHvDataset(img=img,
                                              label=self.index_to_str(key),
                                              spacing=self.spacing)
            # force reset drawing tool axis
            self.z_freehand_editor = FreehandEditor(
                dataset=annot_dataset, slicer=self.ortho_viewer.z_viewer)
            self.y_freehand_editor = FreehandEditor(
                dataset=annot_dataset, slicer=self.ortho_viewer.y_viewer)
            self.x_freehand_editor = FreehandEditor(
                dataset=annot_dataset,
                slicer=self.ortho_viewer.x_viewer,
                swap_axes=True)

            return annot_dataset
        else:
            return HvDataset(img=img,
                             label=self.index_to_str(key),
                             spacing=self.spacing)

    # NOTE overriding base class --> watch=True not needed (else triggers double update)
    @param.depends('_dynamic_update_counter')
    def _dynamic_img_update(self):
        self.save_annot()

        for hv_ds, img in zip(self.hv_datasets, self.loaded_objects.values()):
            hv_ds.img = img

    def dmap(self):

        if not self.segmentation_viewer.channel_viewers or self._has_multiselect_changed:
            selected_channel_config = {
                key: self.channel_config[key]
                for key in self.loaded_objects.keys()
            }
            self.segmentation_viewer = SegmentationViewer.from_channel_config(
                selected_channel_config,
                composite_channels=self.composite_channels,
                overlay_channels=self.overlay_channels)
            self._widget_update_counter += 1

        self.hv_datasets = [
            self._make_dataset(key, img)
            for key, img in self.loaded_objects.items()
        ]
        dmaps = [hv_ds.dmap() for hv_ds in self.hv_datasets]

        dmaps = [self.ortho_viewer(dmap) for dmap in dmaps]

        # invert slices and channels
        dmaps = list(zip(*dmaps))

        # add crosshair overlay + drawingtool overlay, bug if adding to an existing overlay
        # NOTE: workaround to overlay drawingtool. does not work if overlayed after Overlay + collate
        # similar to reported holoviews bug. tap stream attached to a dynamic map does not update
        # https://github.com/holoviz/holoviews/issues/3533
        cross = self.ortho_viewer.get_crosshair()
        freehands = [(self.z_freehand_editor.path_plot, ),
                     (self.x_freehand_editor.path_plot, ),
                     (self.y_freehand_editor.path_plot, )]
        dmaps = [
            dmap + cr + fh for dmap, cr, fh in zip(dmaps, cross, freehands)
        ]

        dmaps = [self.segmentation_viewer(dmap) for dmap in dmaps]

        @param.depends(self.z_freehand_editor.param.draw_in_3D, watch=True)
        def _sync_freehands_3D(draw_in_3D):
            self.x_freehand_editor.draw_in_3D = draw_in_3D
            self.y_freehand_editor.draw_in_3D = draw_in_3D

        @param.depends(self.z_freehand_editor.param.tool_width, watch=True)
        def _sync_freehands_toolsize(tool_width):
            self.x_freehand_editor.tool_width = tool_width
            self.y_freehand_editor.tool_width = tool_width

        return dmaps

    def save_annot(self, event=None):
        npimg = self.z_freehand_editor.dataset.img.astype(np.int16)
        if npimg.shape != (2, 2) and self.old_subdf is not None:
            single_index = list(
                set(self.old_subdf.index.names) -
                set(self.multi_select_levels))
            row = self.old_subdf.reset_index(single_index).dc[
                self.annot_channel]
            row.dc.write(npimg, compress=9, overwrite=True)

    def discard_changes(self, event=None):

        single_index = list(
            set(self.old_subdf.index.names) - set(self.multi_select_levels))
        row = self.old_subdf.reset_index(single_index).dc[self.annot_channel]
        img = row.dc.read()[0]

        self.z_freehand_editor.dataset.img = img

    @param.depends('subdf', watch=True)
    def _backup_subdf(self):
        self.old_subdf = self.subdf

    @param.depends('_widget_update_counter')
    def widgets(self):
        wg = super().widgets()

        save_button = pn.widgets.Button(name='save')
        save_button.on_click(self.save_annot)

        discard_button = pn.widgets.Button(name='discard changes')
        discard_button.on_click(self.discard_changes)

        edit_wg = self.z_freehand_editor.dataset.widgets()
        edit_wg.append(self.z_freehand_editor.param.tool_width)
        edit_wg.append(self.z_freehand_editor.param.draw_in_3D)
        edit_wg.append(save_button)
        edit_wg.append(discard_button)

        return pn.Column(self.io_widgets, self.segmentation_viewer.widgets,
                         edit_wg)
class GlobalPowerPlantDatabaseApp(param.Parameterized):

    data = param.DataFrame(precedence=-1)

    opacity = param.Number(default=0.8, step=0.05, bounds=(0, 1))

    pitch = param.Number(default=0, bounds=(0, 90))

    zoom = param.Integer(default=1, bounds=(1, 22))

    view_state = param.ObjectSelector(default=VIEW_STATES["World"],
                                      objects=VIEW_STATES)

    def __init__(self, nrows: Optional[int] = None, **params):
        if "data" not in params:
            if nrows:
                params["data"] = self._get_pp_data(nrows=nrows)
            else:
                params["data"] = self._get_pp_data()

        super(GlobalPowerPlantDatabaseApp, self).__init__(**params)

        self._view_state = pdk.ViewState(
            latitude=52.2323,
            longitude=-1.415,
            zoom=self.zoom,
            min_zoom=self.param.zoom.bounds[0],
            max_zoom=self.param.zoom.bounds[1],
        )
        self._scatter = pdk.Layer(
            "ScatterplotLayer",
            data=self.data,
            get_position=["longitude", "latitude"],
            get_fill_color="[color_r, color_g, color_b, color_a]",
            get_radius="capacity_mw*10",
            pickable=True,
            opacity=self.opacity,
            filled=True,
            wireframe=True,
        )
        self._deck = pdk.Deck(
            map_style="mapbox://styles/mapbox/light-v9",
            initial_view_state=self._view_state,
            layers=[self._scatter],
            tooltip=True,
            mapbox_key=MAPBOX_KEY,
        )
        self.pane = pn.pane.DeckGL(self._deck,
                                   sizing_mode="stretch_width",
                                   height=700)
        self.param.watch(self._update, ["data", "opacity", "pitch", "zoom"])

    @staticmethod
    def _get_pp_data(nrows: Optional[int] = None):
        pp_data = pd.read_csv(POWER_PLANT_PATH, nrows=nrows)

        pp_data["primary_fuel_color"] = pp_data.primary_fuel.map(FUEL_COLORS)
        pp_data["primary_fuel_color"] = pp_data["primary_fuel_color"].fillna(
            "gray")
        pp_data["color_r"] = pp_data["primary_fuel_color"].map(COLORS_R)
        pp_data["color_g"] = pp_data["primary_fuel_color"].map(COLORS_G)
        pp_data["color_b"] = pp_data["primary_fuel_color"].map(COLORS_B)
        pp_data["color_a"] = 140

        # "name", "primary_fuel", "capacity_mw",
        pp_data = pp_data[[
            "latitude",
            "longitude",
            "name",
            "capacity_mw",
            "color_r",
            "color_g",
            "color_b",
            "color_a",
        ]]
        return pp_data

    @pn.depends("pane.hover_state", "data")
    def _info_pane(self):
        index = self.pane.hover_state.get("index", -1)
        if index == -1:
            index = slice(0, 0)
        return self.data.iloc[index][["name", "capacity_mw"]]

    @pn.depends("view_state", watch=True)
    def _update_view_state_from_selection(self):
        self._view_state.latitude = self.view_state.latitude
        self._view_state.longitude = self.view_state.longitude
        self._view_state.zoom = self.view_state.zoom
        self.pane.param.trigger("object")
        print(self._view_state)

    @pn.depends("pane.view_State", watch=True)
    def _update(self):
        print("update")
        state = self.pane.view_state
        self._view_state.longitude = state["longitude"]
        self._view_state.latitude = state["latitude"]

    def _update(self, event):
        print(event.name)
        if event.name == "data":
            self._scatter.data = self.data
        if event.name == "opacity":
            self._scatter.opacity = self.opacity
        if event.name == "zoom":
            self._view_state.zoom = self.zoom
        if event.name == "pitch":
            self._view_state.pitch = self.pitch
        self.pane.param.trigger("object")

    def _view_state_pane(self):
        return pn.Param(
            self,
            parameters=["view_state"],
            show_name=False,
            widgets={"view_state": pn.widgets.RadioButtonGroup},
            default_layout=pn.Column,
        )

    def _settings_pane(self):
        return pn.Param(
            self,
            parameters=["opacity", "pitch", "zoom"],
            show_name=False,
            widgets={"view_state": pn.widgets.RadioButtonGroup},
        )

    def view(self):
        # self._info_pane,  does not work
        return pn.Row(
            pn.Column(self._view_state_pane, self.pane),
            pn.Column(self._settings_pane, width=300, sizing_mode="fixed"),
        )
Ejemplo n.º 10
0
class TraceParam(param.Parameterized):
    select_column = param.Parameter()

    data = param.DataFrame(precedence=-1)

    listeEntier = param.Selector(default=[])
    listeDecimal = param.Selector(default=[])
    listeBooleen = param.Selector(default=[])
    listeTexte = param.Selector(default=[])
    listeDate = param.Selector(default=[])
    listeObjet = param.Selector(default=[])

    x = param.String(default='x', doc="Colonne à afficher sur l'axe X.")
    y = param.String(default='frequence',
                     doc="Colonne à afficher sur l'axe Y.")

    opts = param.Dict(default={}, doc="Options a appliquer sur le graphique.")

    def __init__(self, **params):
        self.progress = ProgressExtMod()
        self.completed = self.progress.completed
        self.viz = params.get("viz")
        self.trace = params.get("trace")
        self.data = self.getUpdatedData(self.trace.dataFrame)
        self.overlayElement = None
        self.overlay = None
        self.idOverlayElement = None
        self.groupeName = None
        self.labelName = None

        super(TraceParam, self).__init__(data=self.data, viz=self.viz)

    @param.depends("progress.completed")
    def viewProgress(self):
        print("TraceParam ", self, " id:", id(self))

        return pn.Column(self.progress.view)

    def view(self):
        if not self.overlay:
            table = pn.pane.Markdown("## Pas de donnée chargée")
        else:

            #dataCentroid = GeomUtil.convertGeomToCentroid(self.trace,self.data)
            #dataCentroid['x']= dataCentroid["geometry"].x
            #dataCentroid['y'] = dataCentroid["geometry"].y
            #longitude = Dimension('x', label= 'Longitude')
            #latitude = Dimension('y', label='Latitude')
            data = self.data
            if isinstance(self.data, gpd.GeoDataFrame):
                data = GeomUtil.transformGdfToDf(self.trace, data)
            table = hv.Table(data)

            if len(table) == len(self.overlayElement):
                pass
                #DataLink(table,self.overlayElement)

        return pn.Row(table)

    def panel(self):
        panel = pn.Row(self.viz.getConfigTracePanel)
        return panel

    def populateListeType(self, dataFrame):

        self.listeEntier = list(dataFrame.select_dtypes('int64').columns)
        self.listeDecimal = list(dataFrame.select_dtypes('float64').columns)
        self.listeObjet = list(dataFrame.select_dtypes('object').columns)
        self.listeTexte = list(dataFrame.select_dtypes('string').columns)

    def setOverlay(self, overlay):

        self.overlay = overlay
        if isinstance(overlay, Overlay) or isinstance(overlay, NdOverlay):

            for k, v in self.overlay.items():
                self.overlayElement = v
                self.idOverlayElement = v.id
                self.groupeName = v.group
                self.labelName = v.label

    def getUpdatedData(self, dataFrame):

        if self.viz.type not in GEOMPLOT and not dataFrame.empty:  # on vire la geom si on est pas dans une Viz GEOMPLOT
            dataFrame = GeomUtil.transformGdfToDf(self.trace, dataFrame)

        self.populateListeType(dataFrame)
        return dataFrame

    def settingWidgetSelectColumn(self):
        columnList = list(self.data)

        panelWidgetSelectCol = pn.widgets.Select(
            name="Sélection d'une colonne", options=columnList)

        widgetColumnSelector = pn.Param(
            self.param,
            parameters=['select_column'],
            widgets={'select_column': panelWidgetSelectCol},
            sizing_mode='stretch_width')
        return widgetColumnSelector

    @param.depends('select_column', watch=True)
    def onChangeSelectColumn(self):
        pass
Ejemplo n.º 11
0
class AnnotationDashBoard(SegmentationDashBoard):

    annot_channel = param.Parameter(doc='id of annotation channel')
    freehand_editor = param.Parameter(FreehandEditor())
    old_subdf = param.DataFrame()

    def _make_dataset(self, key, img):
        if key == self.annot_channel:
            annot_dataset = EditableHvDataset(img=img,
                                              label=self.index_to_str(key))
            # force reset drawing tool axis
            self.freehand_editor = FreehandEditor(dataset=annot_dataset,
                                                  slicer=self.slicer)
            return annot_dataset
        else:
            return HvDataset(img=img, label=self.index_to_str(key))

    # NOTE overriding base class --> watch=True not needed (else triggers double update)
    @param.depends('_dynamic_update_counter')
    def _dynamic_img_update(self):
        self.save_annot()

        for hv_ds, img in zip(self.hv_datasets, self.loaded_objects.values()):
            hv_ds.img = img

    @param.depends('_complete_update_counter')
    def dmap(self):
        self.save_annot()

        if not self.segmentation_viewer.channel_viewers or self._has_multiselect_changed:
            selected_channel_config = {
                key: self.channel_config[key]
                for key in self.loaded_objects.keys()
            }
            self.segmentation_viewer = SegmentationViewer.from_channel_config(
                selected_channel_config,
                composite_channels=self.composite_channels,
                overlay_channels=self.overlay_channels)
            self._widget_update_counter += 1

        self.hv_datasets = [
            self._make_dataset(key, img)
            for key, img in self.loaded_objects.items()
        ]
        dmaps = [hv_ds.dmap() for hv_ds in self.hv_datasets]

        # apply slicer if 3d image
        if next(iter(self.loaded_objects.values())).ndim > 2:
            dmaps = [self.slicer(dmap) for dmap in dmaps]

        # NOTE: workaround to overlay drawingtool. does not work if overlayed after Overlay + collate
        # similar to reported holoviews bug. tap stream attached to a dynamic map does not update
        # https://github.com/holoviz/holoviews/issues/3533
        dmaps.append(self.freehand_editor.path_plot)
        dmap = self.segmentation_viewer(dmaps)

        # Note
        # dmap * self.freehand_editor.path_plot does not work (no drawing tool available)
        # self.freehand_editor.path_plot * dmap works but path is drawn behind the image
        return dmap

    def save_annot(self, event=None):
        npimg = self.freehand_editor.dataset.img.astype(np.int16)
        if npimg.shape != (2, 2) and self.old_subdf is not None:
            single_index = list(
                set(self.old_subdf.index.names) -
                set(self.multi_select_levels))
            row = self.old_subdf.reset_index(single_index).dc[
                self.annot_channel]
            row.dc.write(npimg, compress=9, overwrite=True)

    def discard_changes(self, event=None):

        single_index = list(
            set(self.old_subdf.index.names) - set(self.multi_select_levels))
        row = self.old_subdf.reset_index(single_index).dc[self.annot_channel]
        img = row.dc.read()[0]

        self.freehand_editor.dataset.img = img

    @param.depends('subdf', watch=True)
    def _backup_subdf(self):
        self.old_subdf = self.subdf

    def widgets(self):
        wg = super().widgets()

        save_button = pn.widgets.Button(name='save')
        save_button.on_click(self.save_annot)

        discard_button = pn.widgets.Button(name='discard changes')
        discard_button.on_click(self.discard_changes)

        edit_wg = self.freehand_editor.widgets()
        edit_wg.append(save_button)
        edit_wg.append(discard_button)

        return pn.Column(wg, edit_wg)
Ejemplo n.º 12
0
class ReactiveDashboard(param.Parameterized):
    title = pn.pane.Markdown("# Booze Cruise YYC")
    # Add a widget that picks the environment and bucket
    number_dest = param.Integer(len(DEFAULT_DEST),
                                label="Select number of destinations",
                                bounds=(0, 15))
    waypoints_per_batch = param.Integer(
        10, label="Waypoints per batch in Google Maps URL", bounds=(1, 12))

    progress_bar = pnw.misc.Progress(
        active=False,
        bar_color="light",
        value=None,
        width_policy="max",
        sizing_mode="stretch_width",
    )

    date_custom_map: Dict = {}
    get_best_route_action = pnw.Button(name="Optimize Route",
                                       button_type="primary")
    get_batch_destinations = pnw.Button(name="Import Destinations",
                                        button_type="primary")

    destinations_pane = param.Parameter(default=destinations_pane_default)
    destinations_wlist = param.List(default=destinations_wlist_default)

    destinations_latlongs = param.List(default=[(0, 0), (0, 0)],
                                       precedence=-0.5)
    gmaps_urls = param.List(default=['', ''], precedence=-0.5)

    destinations_addresses = param.List(default=[(0, 0), (0, 0)],
                                        precedence=-0.5)
    all_dates_forecast = default_altair()
    default_plot = pn.Pane(default_altair())

    start_location = param.String(label='Departure Point')
    end_location = param.String(label='Destination Point')
    batch_import_str = pnw.TextAreaInput(
        name='Batch import',
        placeholder=
        'Add locations here by e.g. copy-pasting from a spreadsheet',
        width=300,
        height=450,
        sizing_mode='scale_both')
    is_start_equal_end = param.Boolean(
        default=True, label='My final destination same as Departure Point')
    start_latlong = param.Tuple(default=(0, 0), precedence=-0.5)
    end_latlong = param.Tuple(default=(0, 0), precedence=-0.5)
    df_label = param.DataFrame(precedence=-0.5, default=pd.DataFrame())
    df_all_pts = param.DataFrame(precedence=-0.5, default=pd.DataFrame())

    # Placeholder for tabs:
    tabs = pn.Tabs(('Batch Location Import', pn.Row()))

    tmp_buffer = 'Temporary buffer'

    @param.depends("number_dest", watch=True)
    def change_destinations_number(self):
        new_destinations = create_destination_inputs(
            n=self.number_dest, prev_destinations=self.destinations_wlist)
        self.destinations_pane, self.destinations_wlist = (
            new_destinations[0],
            new_destinations[1],
        )
        self.tabs.active = 0
        return self.destinations_pane

    def geocode_dest_list_latlong(self, event, destinations_list):
        self.progress_bar.bar_color = 'info'
        self.progress_bar.active = True

        logger_bc.info(event)
        destinations_str = [_pull_value_wlist(x) for x in destinations_list]
        logger_bc.info(f"Geocoding the destinations list: {destinations_str}")
        destinations_jsons = [
            _geocode_destination_here(x) for x in destinations_str
        ]
        latlongs = [
            _pull_lat_long_here(x, n_entry=0) for x in destinations_jsons
        ]
        addresses = [
            _pull_address_here(x, n_entry=0) for x in destinations_jsons
        ]

        logger_bc.info(latlongs)
        logger_bc.info(addresses)

        # latlongs = [(random.randint(i, 20), random.randint(i, 40)) for i in range(len(destinations_list))]
        self.destinations_latlongs = latlongs
        self.destinations_addresses = addresses
        logger_bc.info(self.destinations_latlongs)
        logger_bc.info(self.destinations_addresses)

        self.progress_bar.bar_color = 'light'
        self.progress_bar.active = False

    @param.depends('destinations_latlongs')
    def show_latlongs(self):
        destinations_str = [
            _pull_value_wlist(x) for x in self.destinations_wlist
        ]

        x = f' Length = {len(self.destinations_wlist)}, vals = {destinations_str}'
        x += f' Latlongs = {len(self.destinations_latlongs)}, vals = {self.destinations_addresses}'

        res_md = pn.pane.Markdown(x)
        return res_md

    def find_best_route(self,
                        event,
                        latlong_list,
                        start_point: Tuple = (0, 0),
                        end_point: Tuple = (0, 0)):
        '''
        Find optimal route using TomTom routing service
        :param start_point:
        :param end_point:
        :param event:
        :param latlong_list:
        :return:
        '''
        self.progress_bar.bar_color = 'info'
        self.progress_bar.active = True

        latlongs = [start_point] + latlong_list + [end_point]
        latlong_concat = concat_latlongs(latlongs)

        url_locations = f'{base_url_tomtom}/{latlong_concat}/json'
        params = {
            'key': API_KEY_TOMTOM,
            'travelMode': 'car',
            'computeBestOrder': 'true',
            'traffic': 'true',
            'instructionsType': 'text',
            'computeTravelTimeFor': 'all',
        }
        response = requests.get(url_locations, params=params)
        response_json = response.json()
        latlongs_original_optimal = rearrange_waypoints(response_json)

        sorted_addresses = self.get_ordered_addresses(
            latlongs_original_optimal)
        sorted_addresses_with_terminals = [
            self.start_location
        ] + sorted_addresses + [self.end_location]
        _, urls = construct_gmaps_urls(sorted_addresses_with_terminals,
                                       waypoints_batch_size=10)
        self.gmaps_urls = urls

        # Prepare dataframes to feed Bokeh with
        self.df_label = create_label_df(start_point,
                                        end_point,
                                        latlongs_original_optimal,
                                        sorted_addresses=sorted_addresses,
                                        start_location=self.start_location,
                                        end_location=self.end_location)
        self.df_all_pts = create_legs_df(response_json)

        self.progress_bar.bar_color = 'light'
        self.progress_bar.active = False

    @param.depends('df_all_pts')
    def plot_bokeh(self):
        if self.df_all_pts.shape[0] > 0:
            print('Plotting bokeh')
            p = create_bokeh_figure(df_all_pts=self.df_all_pts,
                                    df_label=self.df_label)
        else:
            p = figure()
        return p

    def get_ordered_addresses(self, ordered_latlongs):
        """
        Sort geocoded addresses into optimal order
        """
        def closest_node(node, nodes):
            nodes = np.asarray(nodes)
            deltas = nodes - node
            dist_2 = np.einsum('ij,ij->i', deltas, deltas)
            return np.argmin(dist_2)

        sort_vector = [
            closest_node(x, self.destinations_latlongs)
            for x in ordered_latlongs
        ]
        sorted_addresses = [
            self.destinations_addresses[x]['label'] for x in sort_vector
        ]
        return sorted_addresses

    @param.depends('gmaps_urls')
    def show_urls(self):
        base_url_string = """
        ### The route links for navigation in Google Maps:
        
        URL
        """
        urls_links_md = [
            f'**[Group {i}]({u})**' for i, u in enumerate(self.gmaps_urls)
        ]
        url_string = '/n/n'.join(urls_links_md)
        base_url_string = base_url_string.replace('URL', url_string)
        res_md = pn.pane.Markdown(base_url_string)
        print(res_md)
        return res_md

    def optimize_route(self, event):
        print(f'start_loc: {self.start_location}')
        start_latlong = _pull_lat_long_here(
            _geocode_destination_here(self.start_location))
        if self.is_start_equal_end:
            end_latlong = start_latlong
            self.end_latlong = start_latlong
            self.end_location = self.start_location
        else:
            end_latlong = _pull_lat_long_here(
                _geocode_destination_here(self.end_location))
        self.start_latlong = start_latlong
        self.end_latlong = end_latlong
        self.geocode_dest_list_latlong(
            event, destinations_list=self.destinations_wlist)
        self.find_best_route(event,
                             latlong_list=self.destinations_latlongs,
                             start_point=start_latlong,
                             end_point=end_latlong)

    def destinations_from_import_str(self, event):
        self.progress_bar.bar_color = 'info'
        self.progress_bar.active = True

        destinations_new = self.batch_import_str.value.split('\n')
        self.destinations_pane, self.destinations_wlist = create_destination_inputs(
            n=len(destinations_new),
            prev_destinations=None,
            init_vals=destinations_new)
        self.number_dest = len(destinations_new)
        self.progress_bar.bar_color = 'light'
        self.progress_bar.active = False

    @param.depends('is_start_equal_end')
    def start_end_widget(self):
        if self.is_start_equal_end:
            self.end_location = self.start_location
            self.end_latlong = self.start_latlong
            return pn.Column(self.param.start_location,
                             self.param.is_start_equal_end)
        else:
            return pn.Column(self.param.start_location,
                             self.param.is_start_equal_end,
                             self.param.end_location)

    def panel(self):
        # Attach a callback to geocoding & optimal route search
        self.get_best_route_action.on_click(lambda x: self.optimize_route(x))
        # Attach a callback to batch import:
        self.batch_import_str.link(self.batch_import_str,
                                   callbacks={'value': clean_space_callback})
        self.batch_import_str.value = ''
        # Attach a callback to Import Destinations button so the destinations pasted propagate into the Destinations list & sidebar
        self.get_batch_destinations.on_click(
            lambda x: self.destinations_from_import_str(x))

        # Setup the sidebar:
        widgets_sliders = pn.Column(self.param.number_dest,
                                    self.param.waypoints_per_batch)
        widgets_start_end = self.start_end_widget
        buttons_ = pn.Column(self.get_best_route_action)
        progress_bar = pn.Pane(self.progress_bar,
                               sizing_mode="stretch_width",
                               width_policy="max")

        # Set up tabs
        tab_bokeh = pn.Column(pn.Column(self.plot_bokeh),
                              self.show_urls,
                              sizing_mode="stretch_width",
                              width_policy="max")
        tab_import = pn.Row(self.batch_import_str, self.get_batch_destinations)
        self.tabs = pn.Tabs(('Optimal Route Map', tab_bokeh),
                            ('Batch Location Import', tab_import))

        result = pn.Row(
            pn.Column(
                self.title,
                widgets_sliders,
                progress_bar,
                widgets_start_end,
                buttons_,
                self.change_destinations_number,
            ),
            self.tabs,
            sizing_mode="stretch_width",
        )
        return result
Ejemplo n.º 13
0
class trainer(param.Parameterized):

    display_df = param.DataFrame(default=pd.DataFrame())

    results = param.Boolean(default=False)

    X = param.Array(default=None)

    result_string = param.String(default='')

    result_string = param.String('')

    def __init__(self, **params):
        super().__init__(**params)
        self.name_of_page = 'Test and Train'

        self.test_slider = pn.widgets.IntSlider(name='Test Percentage',
                                                start=0,
                                                end=100,
                                                step=10,
                                                value=20)

        self.tt_button = pn.widgets.Button(name='Train and Test',
                                           button_type='primary')
        self.tt_button.on_click(self.train_test)

        self.tt_model = pn.widgets.Select(
            name='Select', options=['Random Forrest Classifier'])

    def train_test(self, event):

        #get values from sentiment.
        self.display_df = convert_sentiment_values(self.display_df)

        y = self.display_df['label']

        #get train test sets
        X_train, X_test, y_train, y_test = train_test_split(
            self.X, y, test_size=self.test_slider.value / 100, random_state=0)

        if self.tt_model.value == 'Random Forrest Classifier':
            sentiment_classifier = RandomForestClassifier(n_estimators=1000,
                                                          random_state=0)

            sentiment_classifier.fit(X_train, y_train)

            y_pred = sentiment_classifier.predict(X_test)

        self.y_test = y_test
        self.y_pred = y_pred
        self.analyze()

    def analyze(self):
        self.cm = confusion_matrix(self.y_test, self.y_pred)
        self.cr = classification_report(self.y_test, self.y_pred)
        self.acc_score = accuracy_score(self.y_test, self.y_pred)

        splits = self.cr.split('\n')
        cml = self.cm.tolist()
        self.result_string = f"""
            ### Classification Report
            <pre>
            {splits[0]}
            {splits[1]}
            {splits[2]}
            {splits[3]}
            {splits[4]}
            {splits[5]}
            {splits[6]}
            {splits[7]}
            {splits[8]}
            </pre>
            ### Confusion Matrix
            <pre>
            {cml[0]}
            {cml[1]}

            </pre>

            ### Accuracy Score
            <pre>
            {round(self.acc_score, 4)}
            </pre
            """

        self.results = True

    def options_page(self, help_text):

        return pn.WidgetBox(help_text,
                            self.tt_model,
                            self.test_slider,
                            self.tt_button,
                            height=375,
                            width=300)

    @pn.depends('results')
    def df_pane(self):

        if self.results == False:
            self.result_pane = self.display_df

        else:
            self.result_pane = pn.pane.Markdown(f"""
                {self.result_string}
                """,
                                                width=500,
                                                height=350)

        return pn.WidgetBox(self.result_pane, height=375, width=450)

    def panel(self):

        help_text = (
            "Your text will now be trained and tested using a selected model.  You may "
            +
            "choose a percentage of your data to reserve for testing, the rest will be used for "
            +
            "training.  For example, if I reserve 20%, the rest of the 80% will be used for training "
            +
            "and the 20% will be used to determine how well the trained model does assigning a "
            +
            "sentiment label to the testing text.  Currently, the only model available is the sklearn "
            + "Random Forrest Classifier model.")

        return pn.Row(
            pn.Column(
                pn.pane.Markdown(f'##Train and Test'),
                self.options_page(help_text),
            ), pn.Column(
                pn.Spacer(height=52),
                self.df_pane,
            ))
class WordEmbedder(base_page):
    
    spark_df = param.ClassSelector(
        class_= sdf
    )
    
    display_df = param.DataFrame(default = pd.DataFrame())
    
    df = param.DataFrame()
    X = param.Array(default = None)
    
    def __init__(self, **params):
        super().__init__(**params)
#         self.spark_df = spark_df
        
        self.param.name_of_page.default = 'Word Embedding'
        self.we_model = pn.widgets.Select(name='Select', options=['SKLearn Count Vectorizer', 'Glove', 'Bert'])

        self.we_button = pn.widgets.Button(name='Transform', button_type='primary')
        self.we_button.on_click(self.transform)
        
    def options_page(self):
        
        return pn.WidgetBox(self.we_model,
                            self.we_button,
                height = 300,
                width = 300
        
        )
    
    def transform(self, event):
        print('embedding')
        
        if self.we_model.value == 'Glove':
            print('glove')
            from sparknlp.annotator import WordEmbeddingsModel
            word_embeddings=WordEmbeddingsModel.pretrained()
            word_embeddings.setInputCols(['document','stem'])
            word_embeddings.setOutputCol('embeddings')

            self.spark_df = word_embeddings.transform(self.spark_df)
            
            embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings')
            
        if self.we_model.value == 'SKLearn Count Vectorizer':
            from sklearn.feature_extraction.text import CountVectorizer
            print('join lines')
            corpus = join_lines(self.display_df)
            print('doing vectorizer')
            vectorizer = CountVectorizer(max_features=1500)
            print('vectorizing 2')
            X = vectorizer.fit_transform(corpus).toarray()

            cnt = self.spark_df.count()
            print('getting sentiment from spark df')
            labels = self.spark_df.select('sentiment').take(cnt)

            for n in range(cnt):
                labels[n] = labels[n][0]
            print('done getting sentiment, creating dataframe')
            xlist = []
            for n in range(len(X)):
                xlist.append(list(X[n]))
            self.X = X
            embeddings_df = pd.DataFrame({'embeddings': xlist, 'sentiment': labels})
        
        else: 
            print('bert')
            from sparknlp.annotator import BertEmbeddings
            bertEmbeddings = BertEmbeddings.pretrained()
            
            bertEmbeddings.setInputCols(['document','stem'])
            bertEmbeddings.setOutputCol('embeddings')

            embeddings_df=bertEmbeddings.transform(self.spark_df)
        
            self.spark_df = embeddings_df
            
            embeddings_df = get_all_lines(self.spark_df, 'embeddings.embeddings', col = 'embeddings')
        

        self.display_df = embeddings_df
        self.continue_button.disabled = False
    
    
    @param.output('X', 'display_df')
    def output(self):
        return self.X, self.display_df
        
        
        
Ejemplo n.º 15
0
 class Test(param.Parameterized):
     df = param.DataFrame(default=valid_df, columns={'a', 'd'})
Ejemplo n.º 16
0
class RiskRewardCalculation(param.Parameterized):
    """A model of a Risk Reward Calculation"""

    days_to_delivery_start: int = param.Integer(DEFAULT_DAYS_TO_DELIVERY_START,
                                                bounds=DAYS_TO_DELIVERY_BOUNDS)
    days_to_delivery_end: int = param.Integer(DEFAULT_DAYS_TO_DELIVERY_END,
                                              bounds=DAYS_TO_DELIVERY_BOUNDS)
    aggregation: str = param.ObjectSelector(
        default=DEFAULT_RISK_REWARD_AGGREGATION,
        objects=RISK_REWARD_AGGREGATIONS)

    spreads: pd.DataFrame = param.DataFrame()

    analysis = param.DataFrame()
    payoff_up = param.Number()
    payoff_down = param.Number()
    risk_reward = param.Number()

    @param.depends(
        "days_to_delivery_start",
        "days_to_delivery_end",
        "aggregation",
        "spreads",
        watch=True,
    )
    def _update(self):
        print("_update")
        spreads_filter = (self.spreads["days_to_delivery"]
                          == self.days_to_delivery_start) | (
                              self.spreads["days_to_delivery"]
                              == self.days_to_delivery_end)
        spreads_on_days_to_delivery = self.spreads[spreads_filter]

        analysis = spreads_on_days_to_delivery.pivot(
            columns="days_to_delivery",
            values="value",
            index="spread",
        )
        analysis["change"] = (analysis[self.days_to_delivery_end] -
                              analysis[self.days_to_delivery_start])
        analysis = analysis.dropna()

        up_filter = analysis["change"] > 0
        down_filter = analysis["change"] < 0
        up_data = analysis[up_filter]
        down_data = analysis[down_filter]

        if self.aggregation == "mean":
            payoff_up = up_data.change.mean()
            payoff_down = down_data.change.mean()
        elif self.aggregation == "sum":
            payoff_up = up_data.change.sum()
            payoff_down = down_data.change.sum()

        if payoff_up and payoff_down:
            risk_reward = -payoff_up / payoff_down
        else:
            payoff_up = 0.0
            payoff_down = 0.0
            risk_reward = 0.0

        print(analysis.round(1).index)
        self.analysis = analysis.round(1)
        self.payoff_up = round(payoff_up, 1)
        self.payoff_down = round(payoff_down, 1)
        self.risk_reward = round(risk_reward, 1)
Ejemplo n.º 17
0
class PandasProfilingApp(param.Parameterized):
    """The PandasProfilingApp showcases how to integrate the Pandas Profiling Report with Panel"""

    csv_url = param.String(label="CSV URL")

    dataframe = param.DataFrame()
    report = param.ClassSelector(class_=ProfileReport)
    html_report = param.String()

    update_report = param.Action(label="UPDATE REPORT")
    random_report = param.Action(label="RANDOM REPORT")

    progress = param.Parameter()
    html_report_pane = param.ClassSelector(class_=pn.pane.HTML)
    view = param.ClassSelector(class_=pn.layout.Reactive)

    config = param.ClassSelector(class_=Config, instantiate=False)

    def __init__(self, **params):
        self._csv_urls_cycle = cycle(CSV_URLS)
        params["config"] = Config()
        params["update_report"] = self._update_report
        params["random_report"] = self._random_report
        params["progress"], params["html_report_pane"], params[
            "view"] = self._get_view(params["config"])

        super().__init__(**params)

        self._set_random_csv_url()

    def _update_report(self, _=None):
        self.progress.active = True

        self._generate_report()

        self.html_report_pane.object = HTML_LOADING_REPORT

        html_report = html.escape(self.html_report)
        self.html_report_pane.object = (
            f"""<iframe srcdoc="{html_report}" frameborder="0" allowfullscreen></iframe>"""
        )

        self.progress.active = False
        self.csv_url = self.csv_url

    def _random_report(self, _=None):
        self.progress.active = True
        self._set_random_csv_url()
        self._update_report()

    def _get_view(self, config):
        style = pn.pane.HTML(STYLE,
                             width=0,
                             height=0,
                             margin=0,
                             sizing_mode="fixed")
        description = pn.pane.Markdown(__doc__)
        app_bar = pn.Row(
            pn.pane.PNG(
                LOGO_URL,
                embed=False,
                height=50,
                width=62,
                sizing_mode="fixed",
                margin=(10, 0, 10, 25),
            ),
            pn.pane.Markdown(
                "# Pandas Profiling Report",
                sizing_mode="stretch_width",
                margin=(None, None, None, 25),
                align="center",
            ),
            sizing_mode="stretch_width",
            margin=(25, 5, 0, 5),
            css_classes=["app-bar"],
            background=GREEN,
        )
        progress = pn.widgets.Progress(bar_color="secondary",
                                       width=335,
                                       sizing_mode="fixed",
                                       margin=(0, 5, 10, 5))
        progress.active = False
        widgets = {
            "csv_url": {
                "sizing_mode": "stretch_width",
            },
            "update_report": {
                "align": "end",
                "width": 150,
                "sizing_mode": "fixed"
            },
            "random_report": {
                "button_type": "success",
                "align": "end",
                "width": 150,
                "sizing_mode": "fixed",
                "css_classes": ["id-random-report-button"],
            },
        }
        top_selections = pn.Param(
            self,
            parameters=["csv_url", "update_report", "random_report"],
            widgets=widgets,
            default_layout=pn.Row,
            show_name=False,
            sizing_mode="stretch_width",
        )

        html_report_pane = pn.pane.HTML(EMPTY_HTML_REPORT,
                                        height=900,
                                        sizing_mode="stretch_both")

        report_tab = pn.Column(
            top_selections,
            html_report_pane,
            sizing_mode="stretch_width",
            name="Report",
        )
        config_tab = pn.Param(config,
                              sizing_mode="stretch_width",
                              name="Configuration",
                              show_name=False)
        tabs = pn.Tabs(
            report_tab,
            config_tab,
        )

        _view = pn.Column(
            style,
            description,
            app_bar,
            pn.Row(pn.layout.HSpacer(), progress, sizing_mode="stretch_width"),
            tabs,
            pn.layout.HSpacer(height=400),  # Gives better scrolling
            sizing_mode="stretch_width",
        )

        return progress, html_report_pane, _view

    def _generate_report(self):
        print(self.csv_url, self.config.title, self.config.minimal)
        self.html_report_pane.object = HTML_LOADING_DATA
        self.dataframe = self._get_dataframe(self.csv_url)
        self.html_report_pane.object = HTML_CREATING_PROFILER
        self.report = self._get_profile_report(self.csv_url, self.config.title,
                                               self.config.minimal)
        self.html_report_pane.object = HTML_GENERATING_REPORT
        self.html_report = self._get_html_report(self.csv_url,
                                                 self.config.title,
                                                 self.config.minimal)

    @staticmethod
    @lru_cache(maxsize=128)
    def _get_dataframe(url):
        return pd.read_csv(url, nrows=MAX_ROWS)

    @lru_cache(maxsize=128)
    def _get_profile_report(self, url, title, minimal):
        print(url, title, minimal)
        return ProfileReport(self.dataframe, minimal=minimal, title=title)

    @lru_cache(maxsize=128)
    def _get_html_report(self, url, title, minimal):
        print(url, title, minimal)
        return self.report.to_html()

    def _set_random_csv_url(self):
        self.csv_url = next(self._csv_urls_cycle)

    def __str__(self):
        return "Pandas Profiler App"

    def __repr__(self):
        return self.__str__()
Ejemplo n.º 18
0
class DeepLearningConfig(WorkflowParams, DatasetParams, OutputParams,
                         OptimizerParams, TrainerParams, GenericConfig):
    """
    A class that holds all settings that are shared across segmentation models and regression/classification models.
    """
    _model_category: ModelCategory = param.ClassSelector(
        class_=ModelCategory,
        doc="The high-level model category described by this config.")

    num_dataload_workers: int = param.Integer(
        2,
        bounds=(0, None),
        doc="The number of data loading workers (processes). When set to 0,"
        "data loading is running in the same process (no process startup "
        "cost, hence good for use in unit testing. However, it "
        "does not give the same result as running with 1 worker process)")
    shuffle: bool = param.Boolean(
        True,
        doc="If true, the dataset will be shuffled randomly during training.")
    train_batch_size: int = param.Integer(
        4,
        bounds=(0, None),
        doc="The number of crops that make up one minibatch during training.")
    use_model_parallel: bool = param.Boolean(
        False,
        doc="If true, neural network model is partitioned across all "
        "available GPUs to fit in a large model. It shall not be used "
        "together with data parallel.")
    pin_memory: bool = param.Boolean(
        True, doc="Value of pin_memory argument to DataLoader")
    restrict_subjects: Optional[str] = \
        param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). "
                         "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where "
                         "i, j and k are integers, modify just the corresponding sets (i for train, j for val, k for "
                         "test). If any of i, j or j are missing or are negative, do not modify the corresponding "
                         "set. Thus a value of 20,,5 means limit training set to 20, keep validation set as is, and "
                         "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added "
                         "to that set.",
                     allow_None=True)
    _dataset_data_frame: Optional[DataFrame] = \
        param.DataFrame(default=None,
                        doc="The dataframe that contains the dataset for the model. This is usually read from disk "
                            "from dataset.csv")
    avoid_process_spawn_in_data_loaders: bool = \
        param.Boolean(is_windows(), doc="If True, use a data loader logic that avoid spawning new processes at the "
                                        "start of each epoch. This speeds up training on both Windows and Linux, but"
                                        "on Linux, inference is currently disabled as the data loaders hang. "
                                        "If False, use the default data loader logic that starts new processes for "
                                        "each epoch.")
    max_batch_grad_cam: int = param.Integer(
        default=0,
        doc="Max number of validation batches for which "
        "to save gradCam images. By default "
        "visualizations are saved for all images "
        "in the validation set")
    label_smoothing_eps: float = param.Number(
        0.0,
        bounds=(0.0, 1.0),
        doc="Target smoothing value for label smoothing")
    log_to_parent_run: bool = param.Boolean(
        default=False,
        doc="If true, hyperdrive child runs will log their metrics"
        "to their parent run.")
    use_imbalanced_sampler_for_training: bool = param.Boolean(
        default=False,
        doc="If True, use an imbalanced sampler during training.")
    drop_last_batch_in_training: bool = param.Boolean(
        default=False,
        doc="If True, drop the last incomplete batch during"
        "training. If all batches are complete, no batch gets "
        "dropped. If False, keep all batches.")
    log_summaries_to_files: bool = param.Boolean(
        default=True,
        doc=
        "If True, model summaries are logged to files in logs/model_summaries; "
        "if False, to stdout or driver log")
    mean_teacher_alpha: float = param.Number(
        bounds=(0, 1),
        allow_None=True,
        default=None,
        doc="If this value is set, the mean teacher model will be computed. "
        "Currently only supported for scalar models. In this case, we only "
        "report metrics and cross-validation results for "
        "the mean teacher model. Likewise the model used for inference "
        "is the mean teacher model. The student model is only used for "
        "training. Alpha is the momentum term for weight updates of the mean "
        "teacher model. After each training step the mean teacher model "
        "weights are updated using mean_teacher_"
        "weight = alpha * (mean_teacher_weight) "
        " + (1-alpha) * (current_student_weights). ")
    #: Name of the csv file providing information on the dataset to be used.
    dataset_csv: str = param.String(
        DATASET_CSV_FILE_NAME,
        doc=
        "Name of the CSV file providing information on the dataset to be used. "
        "For segmentation models, this file must contain at least the fields: `subject`, `channel`, `filePath`."
    )

    def __init__(self, **params: Any) -> None:
        self._model_name = type(self).__name__
        # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here.
        self._datasets_for_training: Optional[Dict[ModelExecutionMode,
                                                   Any]] = None
        self._datasets_for_inference: Optional[Dict[ModelExecutionMode,
                                                    Any]] = None
        self.recovery_start_epoch = 0
        super().__init__(throw_if_unknown_param=True, **params)
        logging.info("Creating the default output folder structure.")
        self.create_filesystem(fixed_paths.repository_root_directory())
        # Disable the PL progress bar because all InnerEye models have their own console output
        self.pl_progress_bar_refresh_rate = 0
        self.extra_downloaded_run_id: Optional[Any] = None

    def validate(self) -> None:
        """
        Validates the parameters stored in the present object.
        """
        WorkflowParams.validate(self)
        OptimizerParams.validate(self)

        if self.azure_dataset_id is None and self.local_dataset is None:
            raise ValueError(
                "Either of local_dataset or azure_dataset_id must be set.")

    @property
    def model_category(self) -> ModelCategory:
        """
        Gets the high-level model category that this configuration objects represents (segmentation or scalar output).
        """
        return self._model_category

    @property
    def is_segmentation_model(self) -> bool:
        """
        Returns True if the present model configuration belongs to the high-level category ModelCategory.Segmentation.
        """
        return self.model_category == ModelCategory.Segmentation

    @property
    def is_scalar_model(self) -> bool:
        """
        Returns True if the present model configuration belongs to the high-level category ModelCategory.Scalar
        i.e. for Classification or Regression models.
        """
        return self.model_category.is_scalar

    @property
    def compute_grad_cam(self) -> bool:
        return self.max_batch_grad_cam > 0

    @property
    def dataset_data_frame(self) -> Optional[DataFrame]:
        """
        Gets the pandas data frame that the model uses.
        :return:
        """
        return self._dataset_data_frame

    @dataset_data_frame.setter
    def dataset_data_frame(self, data_frame: Optional[DataFrame]) -> None:
        """
        Sets the pandas data frame that the model uses.
        :param data_frame: The data frame to set.
        """
        self._dataset_data_frame = data_frame

    def get_train_epochs(self) -> List[int]:
        """
        Returns the epochs for which training will be performed.
        :return:
        """
        return list(range(self.recovery_start_epoch + 1, self.num_epochs + 1))

    def get_total_number_of_training_epochs(self) -> int:
        """
        Returns the number of epochs for which a model will be trained.
        :return:
        """
        return len(self.get_train_epochs())

    def get_total_number_of_validation_epochs(self) -> int:
        """
        Returns the number of epochs for which a model will be validated.
        :return:
        """
        return self.get_total_number_of_training_epochs()

    @property
    def compute_mean_teacher_model(self) -> bool:
        """
        Returns True if the mean teacher model should be computed.
        """
        return self.mean_teacher_alpha is not None

    def __str__(self) -> str:
        """Returns a string describing the present object, as a list of key: value strings."""
        arguments_str = "\nArguments:\n"
        # Avoid callable params, the bindings that are printed out can be humongous.
        # Avoid dataframes
        skip_params = {
            name
            for name, value in self.param.params().items()
            if isinstance(value, (param.Callable, param.DataFrame))
        }
        for key, value in self.param.get_param_values():
            if key not in skip_params:
                arguments_str += f"\t{key:40}: {value}\n"
        return arguments_str

    def load_checkpoint_and_modify(self,
                                   path_to_checkpoint: Path) -> Dict[str, Any]:
        """
        By default, uses torch.load to read and return the state dict from the checkpoint file, and does no modification
        of the checkpoint file.

        Overloading this function:
        When weights_url or local_weights_path is set, the file downloaded may not be in the exact
        format expected by the model's load_state_dict() - for example, pretrained Imagenet weights for networks
        may have mismatched layer names in different implementations.
        In such cases, you can overload this function to extract the state dict from the checkpoint.

        NOTE: The model checkpoint will be loaded using the torch function load_state_dict() with argument strict=False,
        so extra care needs to be taken to check that the state dict is valid.
        Check the logs for warnings related to missing and unexpected keys.
        See https://pytorch.org/tutorials/beginner/saving_loading_models.html#warmstarting-model-using-parameters
        -from-a-different-model
        for an explanation on why strict=False is useful when loading parameters from other models.
        :param path_to_checkpoint: Path to the checkpoint file.
        :return: Dictionary with model and optimizer state dicts. The dict should have at least the following keys:
        1. Key ModelAndInfo.MODEL_STATE_DICT_KEY and value set to the model state dict.
        2. Key ModelAndInfo.EPOCH_KEY and value set to the checkpoint epoch.
        Other (optional) entries corresponding to keys ModelAndInfo.OPTIMIZER_STATE_DICT_KEY and
        ModelAndInfo.MEAN_TEACHER_STATE_DICT_KEY are also supported.
        """
        return load_checkpoint(path_to_checkpoint=path_to_checkpoint,
                               use_gpu=self.use_gpu)
Ejemplo n.º 19
0
class Dashboard(param.Parameterized):
    # Setup
    model_options = {
        'Linear Regression': LinearRegression(),
        **{f'Poly Degree {i}': PolyRegression(i)
           for i in range(2, 16, 2)}
    }
    relationsships = {'linear': 'linear', 'sine wave': 'sine_wave'}

    # Widgets for controling simulation
    n = param.Integer(default=100, bounds=(20, MAX_N), step=20)
    Noise_Amplitude = param.Number(default=1, bounds=(0, 10))
    noise = param.ObjectSelector(default='normal',
                                 objects=['normal', 'constant'])
    Underlying_Relation = param.ObjectSelector(
        default=relationsships['linear'], objects=relationsships)

    # Widgets for modeling
    estimator = param.ObjectSelector(
        default=model_options['Linear Regression'], objects=model_options)
    N_Folds = param.Integer(default=10, bounds=(5, MAX_N_FOLDS))
    Shuffle_Folds = param.Boolean(False)

    # Widgets for changing visuals
    Show_Unselected_Folds = param.Boolean(True)
    Select_Fold = param.ObjectSelector(
        default='all',
        objects={
            'all': 'all',
            **{
                f'fold:{fold}': f'fold:{fold}'
                for fold in range(N_Folds.default)
            }
        })
    # interactive changes on data
    data = param.DataFrame(data_generator(n.default, noise.default,
                                          Noise_Amplitude.default,
                                          Underlying_Relation.default),
                           precedence=-1)
    data_extra = param.DataFrame(data_generator(N_EXTERNAL_TEST, noise.default,
                                                Noise_Amplitude.default,
                                                Underlying_Relation.default),
                                 precedence=-1)
    data_splitted = param.DataFrame(KFold_split(data.default,
                                                data_extra.default,
                                                N_Folds.default, False),
                                    precedence=-1)
    data_plot = param.DataFrame(fit_transform(data_splitted.default,
                                              estimator.default),
                                precedence=-1)

    @param.depends('n',
                   'noise',
                   'Noise_Amplitude',
                   'Underlying_Relation',
                   watch=True)
    def update_data_creation(self):
        self.data = data_generator(self.n, self.noise, self.Noise_Amplitude,
                                   self.Underlying_Relation)
        self.data_extra = data_generator(N_EXTERNAL_TEST, self.noise,
                                         self.Noise_Amplitude,
                                         self.Underlying_Relation)

    @param.depends('data',
                   'data_extra',
                   'N_Folds',
                   'Shuffle_Folds',
                   watch=True)
    def update_split(self):

        # So that never try more folds than samples => would run into an error in sklearn
        if self.N_Folds > self.n:
            self.N_Folds = self.n
        if self.N_Folds > self.param['N_Folds'].bounds[1]:
            self.N_Folds = self.param['N_Folds'].bounds[1]

        self.param['N_Folds'].bounds = (5, min(MAX_N_FOLDS, self.n))

        self.data_splitted = KFold_split(self.data, self.data_extra,
                                         self.N_Folds, self.Shuffle_Folds)

    @param.depends('data_splitted', 'estimator', watch=True)
    def update_estimator(self):
        self.data_plot = fit_transform(self.data_splitted, self.estimator)

    @param.depends('data_plot', 'Show_Unselected_Folds', 'Select_Fold')
    def view(self):
        ds = hv.Dataset(self.data_plot,
                        kdims=['x', 'data_split', 'in_train_set', 'y'],
                        vdims=['y_pred'])

        self.param['Select_Fold'].objects = {
            'all': 'all',
            **{f'fold:{fold}': f'fold:{fold}'
               for fold in range(self.N_Folds)}
        }

        scatter = ds.apply(create_scatter, fold=self.Select_Fold)
        lines = ds.apply(create_line,
                         fold=self.Select_Fold,
                         show_unselected=self.Show_Unselected_Folds)
        dist_plot = ds.apply(create_dist_plot, fold=self.Select_Fold)
        return pn.Column(pn.Row((scatter * lines), dist_plot))
Ejemplo n.º 20
0
 class Test(param.Parameterized):
     df = param.DataFrame(default=invalid_df, columns=(None, 2))
Ejemplo n.º 21
0
class AnnotationMission(param.Parameterized):
    # (input) parameters
    cpr = param.Parameter()
    signals = param.Parameter()
    titles = param.Parameter()
    mission_id = param.Parameter()
    annotations = param.DataFrame(default=df_default)
    next_classification = param.ObjectSelector(default=CLASSIFICATIONS[0],
                                               objects=CLASSIFICATIONS)
    pending_start = param.Number(default=None)

    ############################
    ##  ANNOTATIONS PLOT
    ############################
    # Display the interactive elements, like annotations as colorful ranges over the plot
    def plot_annotations(self, **kwargs):
        flag = [
            str(i) == str(self.mission_id)
            for i in self.annotations.mission_id.values
        ]
        rows = self.annotations[flag].iterrows()
        plots = []
        # We remember the first double-click and draw a vertical line if we expect another click to happen
        if self.pending_start:
            plots.append(hv.VLine(self.pending_start).opts(line_dash="dashed"))
        plots.extend([
            hv.VSpan(r["start_clock_ms"], r["end_clock_ms"]).opts(
                color=color_dict.get(r["classification"], "yellow"))  #*
            #hv.Text((r["start_clock_ms"]+r["end_clock_ms"])/2,0.9,str(r["classification"])).opts(color="red")
            for ix, r in rows
        ])
        return hv.Overlay(plots)

    ############################
    ##  ANNOTATIONS REFRESH
    ############################
    def refresh_annotations(self):
        if hasattr(self, "_plot_update_stream"):
            self._plot_update_stream.event()

    ############################
    ##  SIGNALS PLOT
    ############################
    # Plot and datashade the ecg signal
    def plot_signal(self, **kwargs):

        curves = []
        curves.append(hv.Curve(self.cpr))
        curves.append(hv.Curve(self.signals[0]))
        curves.append(
            hv.Curve(self.signals[1], label=self.titles[1]).opts(opt))
        curves.append(
            hv.Curve(self.signals[2], label=self.titles[2]).opts(opt))

        return curves

    ############################
    ##  PLOT
    ############################

    def plot(self):
        signal_curves = self.plot_signal()
        # This is the clicking behaviour.
        self._plot_update_stream = hv.streams.Counter()

        def on_tap(x, y):
            # We have two modes, either there is no annotation pending,
            # so we remember the first click, or we record the annotation and reset the pending state.
            if not self.pending_start:
                self.pending_start = x
            else:
                values = (self.pending_start, x)
                start, end = min(values), max(values)
                self.annotations = self.annotations.append(pd.DataFrame({
                    "mission_id": [self.mission_id],
                    "annotation_time": [datetime.now()],
                    "start_clock_ms": [start],
                    "end_clock_ms": [end],
                    "classification": [self.next_classification],
                }),
                                                           ignore_index=True)
                self.pending_start = None
            self.refresh_annotations()

        tap0 = hv.streams.DoubleTap(source=signal_curves[1])
        tap1 = hv.streams.DoubleTap(source=signal_curves[2])
        tap2 = hv.streams.DoubleTap(source=signal_curves[3])

        @tap0.add_subscriber
        def on_tap0(x, y):
            on_tap(x, y)

        @tap1.add_subscriber
        def on_tap1(x, y):
            on_tap(x, y)

        @tap2.add_subscriber
        def on_tap2(x, y):
            on_tap(x, y)

        ## annotation dynamic map
        annotations_dmap = hv.DynamicMap(self.plot_annotations,
                                         streams=[self._plot_update_stream])

        ## ECG and CPR plot overlay
        ecg_opt = hv.opts.Overlay(title='ECG and CPR')
        ecg_curve = hv.Overlay([
            datashade(signal_curves[1], cmap=["grey", "black"]).opts(opt),
            annotations_dmap,
        ])

        ecg_annot = hv.Overlay([
            ecg_curve,
            signal_curves[0].opts(color="red"),
        ]).opts(ecg_opt)

        ## output plot I
        output_plots = []
        output_plots.append(ecg_annot)

        ## output plot II
        output_plots.append(
            hv.Overlay([
                datashade(signal_curves[2], cmap=["grey", "black"]).opts(opt),
                annotations_dmap,
            ]).opts(opt))

        ## output plot III
        output_plots.append(
            hv.Overlay([
                datashade(signal_curves[3], cmap=["grey", "black"]).opts(opt),
                annotations_dmap,
            ]).opts(opt))
        return tuple(output_plots)

    ############################
    ##  ANNOTATION REMOVE
    ############################

    # These are the handlers for the "detail table"
    def on_remove_annotation(self, ix):
        self.annotations = self.annotations.drop(ix)
        self.refresh_annotations()

    ############################
    ##  ANNOTATION CHANGE
    ############################

    def on_change_annotation(self, ix, value):
        self.annotations.loc[ix, "classification"] = value
        # This line is needed to notify param of the inplace updated annotations dataframe
        self.annotations = self.annotations
        self.refresh_annotations()

    ############################
    ##  ANNOTATION SAVE
    ############################
    @param.depends("annotations")
    def action_save_annotations(self):
        try:
            self.annotations.to_csv(pth_df, mode='w')
        except:
            self.annotations.to_csv(pth_df)

    save_annotations = param.Action(action_save_annotations,
                                    doc="Save Changes",
                                    label="Save Mission Changes")
    ############################
    ##  CONTROL PANEL
    ############################

    # This is the detail table below where you can change the annotation made, or remove it.
    @param.depends("annotations")
    def plot_annotation_details(self):

        elements = []
        for i, (ix, r) in enumerate(
                self.annotations
                # Sorting the dataframe here is necessary,
                # otherwise we would number the ranges by their insertion, not by their time.
                .sort_values("start_clock_ms").iterrows()):
            if str(r["mission_id"]) == str(self.mission_id):
                select = pn.widgets.RadioButtonGroup(
                    name="Select classification",
                    options=CLASSIFICATIONS,
                    value=r["classification"],
                    inline=True,
                )

                remove = pn.widgets.Button(
                    name="remove",
                    width=40,
                )
                clock_ms = int(float(r['start_clock_ms']) / 1000)
                tstamp = datetime.fromtimestamp(clock_ms).strftime("%H:%M:%S")
                select.param.watch(
                    partial(
                        lambda ix, event: self.on_change_annotation(
                            ix, event.new), ix), "value")
                remove.param.watch(
                    partial(lambda ix, event: self.on_remove_annotation(ix),
                            ix), "clicks")
                elements.extend([
                    pn.widgets.StaticText(name=f"@ {tstamp} ", value=""),
                    remove, select
                ])
        return pn.GridBox(*elements, ncols=3, width=1200)

    def render(self):
        return pn.Column(
            pn.pane.Markdown(
                '### Start annotating by double clicking into the plot. This will mark the start of a range. Double click again to mark the end of the range.',
                style={
                    'font-family': "serif",
                    'color': "#ff0000"
                }),
            pn.Row(
                pn.pane.Markdown('### Classification for next annotation:',
                                 style={'font-family': "serif"}),
                pn.Param(
                    self.param.next_classification,
                    widgets={
                        "next_classification":
                        pn.widgets.RadioButtonGroup(
                            options=CLASSIFICATIONS,
                        )  #style={'font-size':'10pt'},css_classes=["widget-button"])
                    }),
                pn.Spacer(background='white', width=100, height=10),
                #self.param.remove_last_annotation,
                self.param.save_annotations,
            ),
            *(self.plot()),
            pn.pane.Markdown(
                f"### List of annotations for mission {self.mission_id}",
                style={'font-family': "serif"}),
            self.plot_annotation_details,
        )
Ejemplo n.º 22
0
 class Test(param.Parameterized):
     df = param.DataFrame(default=valid_df, rows=(None, 3))
Ejemplo n.º 23
0
class WaveformWatcher(param.Parameterized):
    DIMS = [
        ["cs1", "cs2"],
        ["z", "r"],
        ["e_light", 'e_charge'],
        ["e_light", 'e_ces'],
        ["drift_time", "n_peaks"],
    ]

    dates = param.DateRange(default=(dt.datetime(2016, 11,
                                                 10), dt.datetime.utcnow()),
                            bounds=(dt.datetime(2016, 11,
                                                10), dt.datetime.utcnow()))
    runs = param.List(default=[])
    sources = param.List(default=[], )
    linked_selection = param.Parameter()
    selection_spaces = param.List(default=DIMS)
    events = param.DataFrame(default=pd.DataFrame())

    def __init__(self, **params):
        super().__init__(**params)
        self.linked_selection = hv.selection.link_selections.instance()

    @param.depends("selection_spaces", watch=True)
    def event_selection(self):
        if not self.selection_spaces:
            return hv.Points(dset, ["cs1", "cs2"]).opts(color="blue")
        colors = hv.Cycle('Category10').values
        plots = [
            hv.Points(dset, dims).opts(color=c)
            for c, dims in zip(colors, self.selection_spaces)
        ]

        layout = hv.Layout(plots).cols(6)
        lsp = hv.selection.link_selections.instance()
        self.linked_selection = lsp
        layout = self.linked_selection(layout)

        return layout

    @param.depends("linked_selection.selection_expr")
    def selection(self):
        table = hv.Table(dset).opts(width=1550)

        if self.linked_selection and self.linked_selection.selection_expr:
            selected = table[self.linked_selection.selection_expr.apply(table)]
            self.events = selected.data
            return selected
        self.events = table.data
        return table

    def panel(self):
        date_picker = self.param.dates
        runs_picker = pn.widgets.MultiChoice(value=["181028_0045"],
                                             name="Runs",
                                             options=["181028_0045"],
                                             solid=False,
                                             width=1000)
        runs_picker.link(self, value="runs")
        source_picker = pn.widgets.CheckButtonGroup(
            value=["None"],
            name="Source",
            options=["None", "AmBe", "NG", "Rn220"])
        source_picker.link(self, value="source")

        selection_spaces = pn.widgets.CheckButtonGroup(
            value=self.DIMS,
            name="Selection spaces",
            options={f"{x} vs {y}": [x, y]
                     for x, y in self.DIMS},
            width=1000)
        selection_spaces.link(self, value="selection_spaces")

        return pn.Column(
            pn.layout.Divider(),
            pn.pane.Markdown(
                "## First allow the user to load events by date range/run_id/source"
            ),
            date_picker,
            runs_picker,
            pn.pane.Markdown("  Source"),
            source_picker,
            pn.layout.Divider(),
            pn.pane.Markdown(
                "## Allow user to choose the selection spaces of interest e.g. cut spaces, energy etc."
            ),
            selection_spaces,
            pn.pane.Markdown(
                "## Plot events in selection spaces of interest for user to apply selections."
            ),
            pn.panel(self.event_selection),
            pn.layout.Divider(),
            pn.pane.Markdown("## Preview selected events with properties"),
            self.selection,
            width=1600,
        )
Ejemplo n.º 24
0
 class Test(param.Parameterized):
     df = param.DataFrame(default=invalid_df, rows=(5, 7))
Ejemplo n.º 25
0
Archivo: utils.py Proyecto: erdc/pyuit
class StatusTab(TabView):
    title = param.String(default='Status')
    statuses = param.DataFrame(precedence=0.1)
    update = param.Action(lambda self: self.update_statuses(), precedence=0.2)
    terminate_btn = param.Action(lambda self: None,
                                 label='Terminate',
                                 precedence=0.3)
    yes_btn = param.Action(lambda self: self.terminate_job(),
                           label='Yes',
                           precedence=0.4)
    cancel_btn = param.Action(lambda self: None,
                              label='Cancel',
                              precedence=0.5)
    disable_update = param.Boolean()

    @param.depends('parent.selected_job', watch=True)
    def update_statuses(self):
        if self.selected_job is not None:
            if self.disable_update:
                qstat = self.selected_job.qstat
                if qstat is None:
                    statuses = None
                elif self.is_array:
                    statuses = pd.DataFrame.from_dict(qstat).T
                else:
                    statuses = pd.DataFrame(qstat, index=[0])
            else:
                jobs = [self.selected_job]
                if self.is_array:
                    jobs += self.selected_job.sub_jobs
                statuses = PbsJob.update_statuses(jobs, as_df=True)
                self.update_terminate_btn()
            if statuses is not None:
                statuses.set_index('job_id', inplace=True)
            self.statuses = statuses

    def terminate_job(self):
        self.selected_job.terminate()
        time.sleep(10)
        self.update_statuses()

    def update_terminate_btn(self):
        self.param.terminate_btn.constant = self.selected_job.status not in (
            'Q', 'R', 'B')

    @param.depends('statuses')
    def statuses_panel(self):
        statuses_table = pn.widgets.DataFrame.from_param(self.param.statuses, width=1300) \
            if self.statuses is not None else pn.pane.Alert('No status information available.', alert_type='info')

        if self.disable_update:
            buttons = None
        else:
            update_btn = pn.widgets.Button.from_param(self.param.update,
                                                      button_type='primary',
                                                      width=100)
            terminate_btn = pn.widgets.Button.from_param(
                self.param.terminate_btn, button_type='danger', width=100)
            yes_btn = pn.widgets.Button.from_param(self.param.yes_btn,
                                                   button_type='danger',
                                                   width=100)
            cancel_btn = pn.widgets.Button.from_param(self.param.cancel_btn,
                                                      button_type='success',
                                                      width=100)

            yes_btn.visible = False
            cancel_btn.visible = False

            msg = pn.indicators.String(
                value=
                'Are you sure you want to terminate the job. This cannot be undone.',
                css_classes=['bk', 'alert', 'alert-danger'],
                default_color='inherit',
                font_size='inherit',
                visible=False,
            )

            terminate_confirmation = pn.Column(
                msg,
                pn.Row(yes_btn, cancel_btn, margin=20),
                background='#ffffff',
            )

            args = {
                'update_btn': update_btn,
                'terminate_btn': terminate_btn,
                'statuses_table': statuses_table,
                'msg': msg,
                'yes_btn': yes_btn,
                'cancel_btn': cancel_btn,
                'term_col': terminate_confirmation
            }
            terminate_code = 'update_btn.disabled=true; terminate_btn.visible=false; ' \
                             'msg.visible=true; yes_btn.visible=true; cancel_btn.visible=true; ' \
                             'term_col.css_classes=["panel-widget-box"]'
            cancel_code = 'update_btn.disabled=false; terminate_btn.visible=true; ' \
                          'msg.visible=false; yes_btn.visible=false; cancel_btn.visible=false; term_col.css_classes=[]'

            terminate_btn.js_on_click(args=args, code=terminate_code)
            cancel_btn.js_on_click(args=args, code=cancel_code)

            code = 'btn.css_classes.push("pn-loading", "arcs"); btn.properties.css_classes.change.emit(); ' \
                   'other_btn.disabled=true; ' \
                   'statuses_table.push("pn-loading", "arcs"); statuses_table.properties.css_classes.change.emit();'

            update_btn.js_on_click(
                args={
                    'btn': update_btn,
                    'other_btn': terminate_btn,
                    'statuses_table': statuses_table
                },
                code=code,
            )
            yes_btn.js_on_click(
                args={
                    'btn': terminate_btn,
                    'other_btn': update_btn,
                    'statuses_table': statuses_table
                },
                code=code,
            )

            buttons = pn.Row(update_btn, terminate_btn, terminate_confirmation)

        return pn.Column(
            statuses_table,
            buttons,
            sizing_mode='stretch_width',
        )

    @param.depends('parent.selected_job')
    def panel(self):
        if self.selected_job:
            return self.statuses_panel
        else:
            return pn.pane.HTML('<h2>No jobs are available</h2>')
Ejemplo n.º 26
0
 class Test(param.Parameterized):
     df = param.DataFrame(valid_df)
Ejemplo n.º 27
0
class DeepLearningConfig(GenericConfig, CudaAwareConfig):
    """
    A class that holds all settings that are shared across segmentation models and regression/classification models.
    """
    _model_category: ModelCategory = param.ClassSelector(
        class_=ModelCategory,
        doc="The high-level model category described by this config.")
    _model_name: str = param.String(
        None,
        doc="The human readable name of the model (for example, Liver). This is "
        "usually set from the class name.")

    random_seed: int = param.Integer(
        42, doc="The seed to use for all random number generators.")
    azure_dataset_id: str = param.String(
        doc=
        "If provided, the ID of the dataset to use. This dataset must exist as a "
        "folder of the same name in the 'datasets' "
        "container in the datasets storage account.")
    local_dataset: Optional[Path] = param.ClassSelector(
        class_=Path,
        default=None,
        allow_None=True,
        doc="The path of the dataset to use, when training is running "
        "outside Azure.")
    num_dataload_workers: int = param.Integer(
        8,
        bounds=(0, None),
        doc="The number of data loading workers (processes). When set to 0,"
        "data loading is running in the same process (no process startup "
        "cost, hence good for use in unit testing. However, it "
        "does not give the same result as running with 1 worker process)")
    shuffle: bool = param.Boolean(
        True,
        doc="If true, the dataset will be shuffled randomly during training.")
    num_epochs: int = param.Integer(100,
                                    bounds=(1, None),
                                    doc="Number of epochs to train.")
    start_epoch: int = param.Integer(
        0,
        bounds=(0, None),
        doc="The first epoch to train. Set to 0 to start a new "
        "training. Set to a value larger than zero for starting"
        " from a checkpoint.")

    l_rate: float = param.Number(1e-4,
                                 doc="The initial learning rate",
                                 bounds=(0, None))
    _min_l_rate: float = param.Number(
        0.0,
        doc=
        "The minimum learning rate for the Polynomial and Cosine schedulers.",
        bounds=(0.0, None))
    l_rate_scheduler: LRSchedulerType = param.ClassSelector(
        default=LRSchedulerType.Polynomial,
        class_=LRSchedulerType,
        instantiate=False,
        doc="Learning rate decay method (Cosine, Polynomial, "
        "Step, MultiStep or Exponential)")
    l_rate_exponential_gamma: float = param.Number(
        0.9,
        doc="Controls the rate of decay for the Exponential "
        "LR scheduler.")
    l_rate_step_gamma: float = param.Number(
        0.1, doc="Controls the rate of decay for the "
        "Step LR scheduler.")
    l_rate_step_step_size: int = param.Integer(
        50, bounds=(0, None), doc="The step size for Step LR scheduler")
    l_rate_multi_step_gamma: float = param.Number(
        0.1,
        doc="Controls the rate of decay for the "
        "MultiStep LR scheduler.")
    l_rate_multi_step_milestones: Optional[List[int]] = param.List(
        None,
        bounds=(1, None),
        allow_None=True,
        class_=int,
        doc="The milestones for MultiStep decay.")
    l_rate_polynomial_gamma: float = param.Number(
        1e-4,
        doc="Controls the rate of decay for the "
        "Polynomial LR scheduler.")
    l_rate_warmup: LRWarmUpType = param.ClassSelector(
        default=LRWarmUpType.NoWarmUp,
        class_=LRWarmUpType,
        instantiate=False,
        doc="The type of learning rate warm up to use. "
        "Can be NoWarmUp (default) or Linear.")
    l_rate_warmup_epochs: int = param.Integer(
        0,
        bounds=(0, None),
        doc="Number of warmup epochs (linear warmup) before the "
        "scheduler starts decaying the learning rate. "
        "For example, if you are using MultiStepLR with "
        "milestones [50, 100, 200] and warmup epochs = 100, warmup "
        "will last for 100 epochs and the first decay of LR "
        "will happen on epoch 150")
    optimizer_type: OptimizerType = param.ClassSelector(
        default=OptimizerType.Adam,
        class_=OptimizerType,
        instantiate=False,
        doc="The optimizer_type to use")
    opt_eps: float = param.Number(
        1e-4, doc="The epsilon parameter of RMSprop or Adam")
    rms_alpha: float = param.Number(0.9, doc="The alpha parameter of RMSprop")
    adam_betas: TupleFloat2 = param.NumericTuple(
        (0.9, 0.999),
        length=2,
        doc="The betas parameter of Adam, default is (0.9, 0.999)")
    momentum: float = param.Number(
        0.6, doc="The momentum parameter of the optimizers")
    weight_decay: float = param.Number(
        1e-4, doc="The weight decay used to control L2 regularization")

    save_start_epoch: int = param.Integer(
        100,
        bounds=(0, None),
        doc="Save epoch checkpoints only when epoch is "
        "larger or equal to this value.")
    save_step_epochs: int = param.Integer(
        50,
        bounds=(0, None),
        doc="Save epoch checkpoints when epoch number is a "
        "multiple of save_step_epochs")
    train_batch_size: int = param.Integer(
        4,
        bounds=(0, None),
        doc="The number of crops that make up one minibatch during training.")
    detect_anomaly: bool = param.Boolean(
        False,
        doc="If true, test gradients for anomalies (NaN or Inf) during "
        "training.")
    use_mixed_precision: bool = param.Boolean(
        False,
        doc="If true, mixed precision training is activated during "
        "training.")
    use_model_parallel: bool = param.Boolean(
        False,
        doc="If true, neural network model is partitioned across all "
        "available GPUs to fit in a large model. It shall not be used "
        "together with data parallel.")
    test_diff_epochs: Optional[int] = param.Integer(
        None,
        doc="Number of different epochs of the same model to test",
        allow_None=True)
    test_step_epochs: Optional[int] = param.Integer(
        None, doc="How many epochs to move for each test", allow_None=True)
    test_start_epoch: Optional[int] = param.Integer(
        None,
        doc="The first epoch on which testing should run.",
        allow_None=True)
    monitoring_interval_seconds: int = param.Integer(
        0,
        doc="Seconds delay between logging GPU/CPU resource "
        "statistics. If 0 or less, do not log any resource "
        "statistics.")
    number_of_cross_validation_splits: int = param.Integer(
        0,
        bounds=(0, None),
        doc="Number of cross validation splits for k-fold cross "
        "validation")
    cross_validation_split_index: int = param.Integer(
        DEFAULT_CROSS_VALIDATION_SPLIT_INDEX,
        bounds=(-1, None),
        doc="The index of the cross validation fold this model is "
        "associated with when performing k-fold cross validation")
    file_system_config: DeepLearningFileSystemConfig = param.ClassSelector(
        default=DeepLearningFileSystemConfig(),
        class_=DeepLearningFileSystemConfig,
        instantiate=False,
        doc="File system related configs")
    pin_memory: bool = param.Boolean(
        True, doc="Value of pin_memory argument to DataLoader")
    _overrides: Dict[str, Any] = param.Dict(
        instantiate=True,
        doc="Model config properties that were overridden from the commandline"
    )
    restrict_subjects: Optional[str] = \
        param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). "
                         "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where "
                         "i, j and k are integers, modify just the corresponding sets (i for train, j for val, k for "
                         "test). If any of i, j or j are missing or are negative, do not modify the corresponding "
                         "set. Thus a value of 20,,5 means limit training set to 20, keep validation set as is, and "
                         "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added "
                         "to that set.",
                     allow_None=True)
    perform_training_set_inference: bool = \
        param.Boolean(False,
                      doc="If False (default), run full image inference on validation and test set after training. If "
                          "True, also run full image inference on the training set")
    perform_validation_and_test_set_inference: bool = \
        param.Boolean(True,
                      doc="If True (default), run full image inference on validation and test set after training.")
    _metrics_data_frame_loggers: MetricsDataframeLoggers = param.ClassSelector(
        default=None,
        class_=MetricsDataframeLoggers,
        instantiate=False,
        doc="Data frame loggers for this model "
        "config")
    _dataset_data_frame: Optional[DataFrame] = \
        param.DataFrame(default=None,
                        doc="The dataframe that contains the dataset for the model. This is usually read from disk "
                            "from dataset.csv")
    _use_gpu: Optional[bool] = param.Boolean(
        None,
        doc="If true, a CUDA capable GPU with at least 1 device is "
        "available. If None, the use_gpu property has not yet been called.")
    avoid_process_spawn_in_data_loaders: bool = \
        param.Boolean(is_windows(), doc="If True, use a data loader logic that avoid spawning new processes at the "
                                        "start of each epoch. This speeds up training on both Windows and Linux, but"
                                        "on Linux, inference is currently disabled as the data loaders hang. "
                                        "If False, use the default data loader logic that starts new processes for "
                                        "each epoch.")
    # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and
    # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux
    # can reduce the chance of stuck jobs.
    multiprocessing_start_method: MultiprocessingStartMethod = \
        param.ClassSelector(class_=MultiprocessingStartMethod,
                            default=(MultiprocessingStartMethod.spawn if is_windows()
                                     else MultiprocessingStartMethod.fork),
                            doc="Method to be used to start child processes in pytorch. Should be one of forkserver, "
                                "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. "
                                "Set to forkserver as a possible remedy for stuck jobs.")
    output_to: Optional[str] = \
        param.String(default=None,
                     doc="If provided, the run outputs will be written to the given folder. If not provided, outputs "
                         "will go into a subfolder of the project root folder.")
    max_batch_grad_cam: int = param.Integer(
        default=0,
        doc="Max number of validation batches for which "
        "to save gradCam images. By default "
        "visualizations are saved for all images "
        "in the validation set")
    label_smoothing_eps: float = param.Number(
        0.0,
        bounds=(0.0, 1.0),
        doc="Target smoothing value for label smoothing")
    log_to_parent_run: bool = param.Boolean(
        default=False,
        doc="If true, hyperdrive child runs will log their metrics"
        "to their parent run.")

    use_imbalanced_sampler_for_training: bool = param.Boolean(
        default=False,
        doc="If True, use an imbalanced sampler during training.")
    drop_last_batch_in_training: bool = param.Boolean(
        default=False,
        doc="If True, drop the last incomplete batch during"
        "training. If all batches are complete, no batch gets "
        "dropped. If False, keep all batches.")
    log_summaries_to_files: bool = param.Boolean(
        default=True,
        doc=
        "If True, model summaries are logged to files in logs/model_summaries; "
        "if False, to stdout or driver log")
    mean_teacher_alpha: float = param.Number(
        bounds=(0, 1),
        allow_None=True,
        default=None,
        doc="If this value is set, the mean teacher model will be computed. "
        "Currently only supported for scalar models. In this case, we only "
        "report metrics and cross-validation results for "
        "the mean teacher model. Likewise the model used for inference "
        "is the mean teacher model. The student model is only used for "
        "training. Alpha is the momentum term for weight updates of the mean "
        "teacher model. After each training step the mean teacher model "
        "weights are updated using mean_teacher_"
        "weight = alpha * (mean_teacher_weight) "
        " + (1-alpha) * (current_student_weights). ")

    def __init__(self, **params: Any) -> None:
        self._model_name = type(self).__name__
        # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here.
        self._datasets_for_training: Optional[Dict[ModelExecutionMode,
                                                   Any]] = None
        self._datasets_for_inference: Optional[Dict[ModelExecutionMode,
                                                    Any]] = None
        super().__init__(throw_if_unknown_param=True, **params)
        logging.info("Creating the default output folder structure.")
        self.create_filesystem(fixed_paths.repository_root_directory())

    def validate(self) -> None:
        """
        Validates the parameters stored in the present object.
        """
        if len(self.adam_betas) < 2:
            raise ValueError(
                "The adam_betas parameter should be the coefficients used for computing running averages of "
                "gradient and its square")

        if self.azure_dataset_id is None and self.local_dataset is None:
            raise ValueError(
                "Either of local_dataset or azure_dataset_id must be set.")

        if self.number_of_cross_validation_splits == 1:
            raise ValueError(
                f"At least two splits required to perform cross validation found "
                f"number_of_cross_validation_splits={self.number_of_cross_validation_splits}"
            )
        if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index:
            raise ValueError(
                f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, "
                f"which is invalid for CV with {self.number_of_cross_validation_splits} splits."
            )
        elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1:
            raise ValueError(
                f"Cross validation split index must be -1 for a non cross validation run, "
                f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} "
                f"and cross_validation_split_index={self.cross_validation_split_index}"
            )

        if self.l_rate_scheduler == LRSchedulerType.MultiStep:
            if not self.l_rate_multi_step_milestones:
                raise ValueError(
                    "Must specify l_rate_multi_step_milestones to use LR scheduler MultiStep"
                )
            if sorted(set(self.l_rate_multi_step_milestones)
                      ) != self.l_rate_multi_step_milestones:
                raise ValueError(
                    "l_rate_multi_step_milestones must be a strictly increasing list"
                )
            if self.l_rate_multi_step_milestones[0] <= 0:
                raise ValueError(
                    "l_rate_multi_step_milestones cannot be negative or 0.")

    @property
    def model_name(self) -> str:
        """
        Gets the human readable name of the model (e.g., Liver). This is usually set from the class name.
        :return: A model name as a string.
        """
        return self._model_name

    @property
    def model_category(self) -> ModelCategory:
        """
        Gets the high-level model category that this configuration objects represents (segmentation or scalar output).
        """
        return self._model_category

    @property
    def is_segmentation_model(self) -> bool:
        """
        Returns True if the present model configuration belongs to the high-level category ModelCategory.Segmentation.
        """
        return self.model_category == ModelCategory.Segmentation

    @property
    def is_scalar_model(self) -> bool:
        """
        Returns True if the present model configuration belongs to the high-level category ModelCategory.Scalar
        i.e. for Classification or Regression models.
        """
        return self.model_category.is_scalar

    @property
    def compute_grad_cam(self) -> bool:
        return self.max_batch_grad_cam > 0

    @property
    def min_l_rate(self) -> float:
        return self._min_l_rate

    @min_l_rate.setter
    def min_l_rate(self, value: float) -> None:
        if value > self.l_rate:
            raise ValueError(
                "l_rate must be >= min_l_rate, found: {}, {}".format(
                    self.l_rate, value))
        self._min_l_rate = value

    @property
    def outputs_folder(self) -> Path:
        """Gets the full path in which the model outputs should be stored."""
        return self.file_system_config.outputs_folder

    @property
    def logs_folder(self) -> Path:
        """Gets the full path in which the model logs should be stored."""
        return self.file_system_config.logs_folder

    @property
    def checkpoint_folder(self) -> str:
        """Gets the full path in which the model checkpoints should be stored during training."""
        return str(self.outputs_folder / CHECKPOINT_FOLDER)

    @property
    def visualization_folder(self) -> Path:
        """Gets the full path in which the visualizations notebooks should be saved during training."""
        return self.outputs_folder / VISUALIZATION_FOLDER

    @property
    def perform_cross_validation(self) -> bool:
        """
        True if cross validation will be be performed as part of the training procedure.
        :return:
        """
        return self.number_of_cross_validation_splits > 1

    @property
    def overrides(self) -> Optional[Dict[str, Any]]:
        return self._overrides

    @property
    def dataset_data_frame(self) -> Optional[DataFrame]:
        """
        Gets the pandas data frame that the model uses.
        :return:
        """
        return self._dataset_data_frame

    @dataset_data_frame.setter
    def dataset_data_frame(self, data_frame: Optional[DataFrame]) -> None:
        """
        Sets the pandas data frame that the model uses.
        :param data_frame: The data frame to set.
        """
        self._dataset_data_frame = data_frame

    @property
    def metrics_data_frame_loggers(self) -> MetricsDataframeLoggers:
        """
        Gets the metrics data frame loggers for this config.
        :return:
        """
        return self._metrics_data_frame_loggers

    def set_output_to(self, output_to: PathOrString) -> None:
        """
        Adjusts the file system settings in the present object such that all outputs are written to the given folder.
        :param output_to: The absolute path to a folder that should contain the outputs.
        """
        if isinstance(output_to, Path):
            output_to = str(output_to)
        self.output_to = output_to
        self.create_filesystem()

    def create_filesystem(
        self, project_root: Path = fixed_paths.repository_root_directory()
    ) -> None:
        """
        Creates new file system settings (outputs folder, logs folder) based on the information stored in the
        present object. If any of the folders do not yet exist, they are created.
        :param project_root: The root folder for the codebase that triggers the training run.
        """
        self.file_system_config = DeepLearningFileSystemConfig.create(
            project_root=project_root,
            model_name=self.model_name,
            is_offline_run=self.is_offline_run,
            output_to=self.output_to)

    def create_dataframe_loggers(self) -> None:
        """
        Initializes the metrics loggers that are stored in self._metrics_data_frame_loggers
        :return:
        """
        self._metrics_data_frame_loggers = MetricsDataframeLoggers(
            outputs_folder=self.outputs_folder)

    def should_load_checkpoint_for_training(self) -> bool:
        """Returns true if start epoch > 0, that is, if an existing checkpoint is used to continue training."""
        return self.start_epoch > 0

    def should_save_epoch(self, epoch: int) -> bool:
        """Returns True if the present epoch should be saved, as per the save_start_epoch and save_step_epochs
        settings. Epoch writing starts with the first epoch that is >= save_start_epoch, and that
        is evenly divisible by save_step_epochs. A checkpoint is always written for the last epoch (num_epochs),
        such that it is easy to overwrite num_epochs on the commandline without having to change the test parameters
        at the same time.
        :param epoch: The current epoch. The first epoch is assumed to be 1."""
        should_save_epoch = epoch >= self.save_start_epoch \
                            and epoch % self.save_step_epochs == 0
        is_last_epoch = epoch == self.num_epochs
        return should_save_epoch or is_last_epoch

    def get_train_epochs(self) -> List[int]:
        """
        Returns the epochs for which training will be performed.
        :return:
        """
        return list(range(self.start_epoch + 1, self.num_epochs + 1))

    def get_total_number_of_training_epochs(self) -> int:
        """
        Returns the number of epochs for which a model will be trained.
        :return:
        """
        return len(self.get_train_epochs())

    def get_total_number_of_save_epochs(self) -> int:
        """
        Returns the number of epochs for which a model checkpoint will be saved.
        :return:
        """
        return len(
            list(filter(self.should_save_epoch, self.get_train_epochs())))

    def get_total_number_of_validation_epochs(self) -> int:
        """
        Returns the number of epochs for which a model will be validated.
        :return:
        """
        return self.get_total_number_of_training_epochs()

    def get_test_epochs(self) -> List[int]:
        """
        Returns the list of epochs for which the model should be evaluated on full images in the test set.
        These are all epochs starting at self.test_start_epoch, in intervals of self.n_steps_epoch.
        The last training epoch is always included. If either of the self.test_* fields is missing (set to None),
        only the last training epoch is returned.
        :return:
        """
        test_epochs = {self.num_epochs}
        if self.test_diff_epochs is not None and self.test_start_epoch is not None and \
                self.test_step_epochs is not None:
            for j in range(self.test_diff_epochs):
                epoch = self.test_start_epoch + self.test_step_epochs * j
                if epoch > self.num_epochs:
                    break
                test_epochs.add(epoch)
        return sorted(test_epochs)

    def get_path_to_checkpoint(self, epoch: int) -> Path:
        """
        Returns full path to a checkpoint given an epoch
        :param epoch: the epoch number
        :param for_mean_teacher_model: if True looking returns path to the mean teacher checkpoint. Else returns the
        path to the (main / student) model checkpoint.
        :return: path to a checkpoint given an epoch
        """
        return create_checkpoint_path(
            path=fixed_paths.repository_root_directory() /
            self.checkpoint_folder,
            epoch=epoch)

    def get_effective_random_seed(self) -> int:
        """
        Returns the random seed set as part of this configuration. If the configuration corresponds
        to a cross validation split, then the cross validation fold index will be added to the
        set random seed in order to return the effective random seed.
        :return:
        """
        seed = self.random_seed
        if self.perform_cross_validation:
            # offset the random seed based on the cross validation split index so each
            # fold has a different initial random state.
            seed += self.cross_validation_split_index
        return seed

    @property  # type: ignore
    def use_gpu(self) -> bool:  # type: ignore
        """
        Returns True if a CUDA capable GPU is present and should be used, False otherwise.
        """
        if self._use_gpu is None:
            # Use a local import here because we don't want the whole file to depend on pytorch.
            from InnerEye.ML.utils.ml_util import is_gpu_available
            self._use_gpu = is_gpu_available()
        return self._use_gpu

    @use_gpu.setter
    def use_gpu(self, value: bool) -> None:
        """
        Sets the flag that controls the use of the GPU. Raises a ValueError if the value is True, but no GPU is
        present.
        """
        if value:
            # Use a local import here because we don't want the whole file to depend on pytorch.
            from InnerEye.ML.utils.ml_util import is_gpu_available
            if not is_gpu_available():
                raise ValueError(
                    "Can't set use_gpu to True if there is not CUDA capable GPU present."
                )
        self._use_gpu = value

    @property
    def use_data_parallel(self) -> bool:
        """
        Data parallel is used if GPUs are usable and the number of CUDA devices are greater than 1.
        :return:
        """
        _devices = self.get_cuda_devices()
        return _devices is not None and len(_devices) > 1

    def write_args_file(self, root: Optional[Path] = None) -> None:
        """
        Writes the current config to disk. The file is written either to the given folder, or if omitted,
        to the default outputs folder.
        """
        dst = (root or self.outputs_folder) / ARGS_TXT
        dst.write_text(data=str(self))

    def should_wait_for_other_cross_val_child_runs(self) -> bool:
        """
        Returns True if the current run is an online run and is the 0th cross validation split.
        In this case, this will be the run that will wait for all other child runs to finish in order
        to aggregate their results.
        :return:
        """
        return (
            not self.is_offline_run) and self.cross_validation_split_index == 0

    @property
    def is_offline_run(self) -> bool:
        """
        Returns True if the run is executing outside AzureML, or False if inside AzureML.
        """
        return is_offline_run_context(RUN_CONTEXT)

    @property
    def compute_mean_teacher_model(self) -> bool:
        """
        Returns True if the mean teacher model should be computed.
        """
        return self.mean_teacher_alpha is not None

    def __str__(self) -> str:
        """Returns a string describing the present object, as a list of key == value pairs."""
        arguments_str = "\nArguments:\n"
        property_dict = vars(self)
        keys = sorted(property_dict)
        for key in keys:
            arguments_str += "\t{:18}: {}\n".format(key, property_dict[key])
        return arguments_str
Ejemplo n.º 28
0
 class Test(param.Parameterized):
     df = param.DataFrame(default=empty)
Ejemplo n.º 29
0
 class Test(param.Parameterized):
     test = param.DataFrame(default=valid_df, columns=['b', 'a', 'c'])
Ejemplo n.º 30
0
class PanelDeck(param.Parameterized):
    """
    PanelDeck class for panel.pane.DeckGL + multi_select(Boolean) parameter
    """

    x = param.String("x")
    data = param.DataFrame()
    colors = param.DataFrame()
    indices = set()
    multi_select = param.Boolean(False, doc="multi-select")
    callback = param.Callable()
    spec = param.Dict()
    default_color = param.List([211, 211, 211, 50])
    sizing_mode = param.String("stretch_both")
    height = param.Integer(400)
    width = param.Integer(400)
    tooltip_include_cols = param.List(
        [], doc="list of columns to include in tooltip")

    def get_tooltip_html(self):
        """
        get tooltip info from dataframe columns, if not already present
        """
        html_str = ""
        tooltip_columns = (list(
            set(self.data.columns) -
            set(["index", "coordinates"] + list(self.colors.columns))) if len(
                self.tooltip_include_cols) == 0 else self.tooltip_include_cols)

        for i in tooltip_columns:
            html_str += f"<b> {i} </b>: {{{i}}} <br>"
        return html_str

    def __init__(self, **params):
        """
        initialize pydeck object, and set a listener on self.data
        """
        super(PanelDeck, self).__init__(**params)
        self._view_state = pdk.ViewState(**self.spec["initialViewState"],
                                         bearing=0.45)
        self._layers = pdk.Layer("PolygonLayer",
                                 data=self.data,
                                 **self.spec["layers"][0])
        self._tooltip = {"html": self.get_tooltip_html()}
        self._deck = pdk.Deck(
            mapbox_key=self.spec["mapboxApiAccessToken"],
            views=[
                pdk.View(
                    type="MapView",
                    controller=True,
                    height="100%",
                    width="100%",
                )
            ],
            layers=[self._layers],
            initial_view_state=self._view_state,
            tooltip=self._tooltip,
        )
        if self.spec["map_style"]:
            self._deck.map_style = self.spec["map_style"]
        self.pane = pn.pane.DeckGL(
            self._deck,
            sizing_mode=self.sizing_mode,
            height=self.height,
            css_classes=["deck-chart"],
        )
        self.param.watch(self._update, ["data"])

    def selected_points(self):
        """
        returns a list of currently selected column_x values as a list
        """
        return self.data[self.x].loc[self.indices].tolist()

    @pn.depends("pane.click_state")
    def click_event(self):
        """
        callback for click events, highlights the selected indices
        (single_select/multi_select) and sets the color of
        unselected indices to default_color
        """
        index = self.pane.click_state.get("index", -1)
        old_indices = list(self.indices)
        if index == -1:
            index = slice(0, 0)
            self.indices = set()
            self.data[self.colors.columns] = self.colors
        else:
            if self.multi_select:
                if index not in self.indices:
                    self.indices.add(index)
                else:
                    self.indices.remove(index)
            else:
                if index in self.indices:
                    self.indices.clear()
                else:
                    self.indices.clear()
                    self.indices.add(index)
            temp_colors = self.colors.copy()
            if len(self.indices) > 0:
                temp_colors.loc[set(self.data.index) - self.indices,
                                self.colors.columns] = self.default_color
            self.data[self.colors.columns] = temp_colors
        self._layers.data = self.data
        self.pane.param.trigger("object")
        self.callback(
            self.data[self.x].loc[old_indices].tolist(),
            self.data[self.x].loc[list(self.indices)].tolist(),
        )

    def _update(self, event):
        """
        trigger deck_gl pane when layer data is updated
        """
        if event.name == "data":
            self._layers.data = self.data
        self.pane.param.trigger("object")

    def view(self):
        """
        view object
        """
        x = pn.Column(
            self.param.multi_select,
            sizing_mode=self.sizing_mode,
            css_classes=["multi-select"],
        )

        return pn.Column(
            x,
            self.click_event,
            self.pane,
            width=self.width,
            height=self.height,
            sizing_mode=self.sizing_mode,
        )