Ejemplo n.º 1
0
    def get_numeric_channels_values(self, *channel_names):
        """
        Retrieve values of specified numeric channels.

        The returned DataFrame contains 1 additional column x along with the requested channels.

        E.g. get_numeric_channels_values('loss', 'auc') will return a DataFrame of the following structure:
            x, loss, auc

        The returned DataFrame may contain NaNs if one of the channels has more values than others.

        Args:
            *channel_names: variable length list of names of the channels to retrieve values for.

        Returns:
            `pandas.DataFrame`: Dataframe containing the values for the requested numerical channels.

        Examples:
            Instantiate a session.

            >>> from neptune.sessions import Session
            >>> session = Session()

            Fetch a project and a list of experiments.

            >>> project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            >>> experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

            Get an experiment instance.

            >>> exp = experiments[0]

            Get numeric channel value for channels 'unet_0 batch sum loss' and 'unet_1 batch sum loss'.

            >>> batch_channels = exp.get_numeric_channels_values('unet_0 batch sum loss', 'unet_1 batch sum loss')
            >>> epoch_channels = exp.get_numeric_channels_values('unet_0 epoch_val sum loss', 'Learning Rate')

        Note:
            Remember to fetch the dataframe for the channels that have a common temporal/iteration axis x.
            For example combine epoch channels to one dataframe and batch channels to the other
        """

        channels_data = {}
        channels_by_name = self.get_channels()
        for channel_name in channel_names:
            channel_id = channels_by_name[channel_name].id
            try:
                channels_data[channel_name] = pd.read_csv(
                    self._client.get_channel_points_csv(self, channel_id),
                    header=None,
                    names=['x_{}'.format(channel_name), 'y_{}'.format(channel_name)],
                    dtype=float
                )
            except EmptyDataError:
                channels_data[channel_name] = pd.DataFrame(
                    columns=['x_{}'.format(channel_name), 'y_{}'.format(channel_name)],
                    dtype=float
                )

        return align_channels_on_x(pd.concat(channels_data.values(), axis=1, sort=False))
Ejemplo n.º 2
0
    def test_fraction_x(self):
        # when

        batch_x = [1.2, 0.3, 0.9, 123.4]
        epoch_x = [np.nan] + [1.7, 2.9, 4.5]

        batch_y = [7.3, 2.1, 9.5, 1.2]
        epoch_y = [np.nan, 0.35, 5.4, 0.9]

        df = pd.DataFrame(
            {
                'x_batch_channel': batch_x,
                'y_batch_channel': batch_y,
                'x_epoch_channel': epoch_x,
                'y_epoch_channel': epoch_y
            },
            dtype=float)

        expected_result = pd.DataFrame(
            {
                'x': [0.3, 0.9, 1.2, 1.7, 2.9, 4.5, 123.4],
                'batch_channel': [2.1, 9.5, 7.3, np.nan, np.nan, np.nan, 1.2],
                'epoch_channel':
                [np.nan, np.nan, np.nan, 0.35, 5.4, 0.9, np.nan]
            },
            dtype=float)
        expected_result = sort_df_by_columns(expected_result)

        # then
        result = align_channels_on_x(df)
        result = sort_df_by_columns(result)

        assert_frame_equal(result, expected_result)
Ejemplo n.º 3
0
    def test_ordered_x(self):
        # when
        np.random.seed(1234)
        random_batch = np.random.random(10).tolist()
        random_epoch = np.random.random(5).tolist()
        random_odd = np.random.random(7).tolist()

        df = pd.DataFrame(
            {
                'x_batch_channel': list(range(10)),
                'y_batch_channel': random_batch,
                'x_epoch_channel': list(range(5)) + [np.nan] * 5,
                'y_epoch_channel': random_epoch + [np.nan] * 5,
                'x_odd_channel': list(range(7)) + [np.nan] * 3,
                'y_odd_channel': random_odd + [np.nan] * 3
            },
            dtype=float)

        expected_result = pd.DataFrame(
            {
                'x': list(range(10)),
                'batch_channel': random_batch,
                'epoch_channel': random_epoch + [np.nan] * 5,
                'odd_channel': random_odd + [np.nan] * 3
            },
            dtype=float)
        expected_result = sort_df_by_columns(expected_result)

        # then
        result = align_channels_on_x(df)
        result = sort_df_by_columns(result)

        assert_frame_equal(result, expected_result)
Ejemplo n.º 4
0
    def get_numeric_channels_values(self, *channel_names):
        """Retrieve values of specified metrics (numeric logs).

        The returned
        `pandas.DataFrame <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html>`_
        contains 1 additional column `x` along with the requested metrics.

        Args:
            *channel_names (one or more :obj:`str`): comma-separated metric names.

        Returns:
            :obj:`pandas.DataFrame` - DataFrame containing values for the requested metrics.

            | The returned DataFrame may contain ``NaN`` s if one of the metrics has more values than others.

        Example:
            Invoking ``get_numeric_channels_values('loss', 'auc')`` returns DataFrame with columns
            `x`, `loss`, `auc`.

            Assuming that `experiment` is an instance of :class:`~neptune.experiments.Experiment`:

            .. code:: python3

                batch_channels = experiment.get_numeric_channels_values('batch-1-loss', 'batch-2-metric')
                epoch_channels = experiment.get_numeric_channels_values('epoch-1-loss', 'epoch-2-metric')

        Note:
            It's good idea to get metrics with common temporal pattern (like iteration or batch/epoch number).
            Thanks to this each row of returned DataFrame has metrics from the same moment in experiment.
            For example, combine epoch metrics to one DataFrame and batch metrics to the other.
        """

        channels_data = {}
        channels_by_name = self._backend.get_channels(self)
        for channel_name in channel_names:
            channel_id = channels_by_name[channel_name].id
            try:
                channels_data[channel_name] = pd.read_csv(
                    self._backend.get_channel_points_csv(
                        self, channel_id, channel_name),
                    header=None,
                    names=[
                        "x_{}".format(channel_name),
                        "y_{}".format(channel_name)
                    ],
                    dtype=float,
                )
            except EmptyDataError:
                channels_data[channel_name] = pd.DataFrame(
                    columns=[
                        "x_{}".format(channel_name),
                        "y_{}".format(channel_name)
                    ],
                    dtype=float,
                )

        return align_channels_on_x(
            pd.concat(channels_data.values(), axis=1, sort=False))
Ejemplo n.º 5
0
    def test_shuffled_x(self):
        # when

        batch_x = [4, 2, 10, 28]
        epoch_x = [np.nan] + [1, 2, 21]
        odd_x = [21, 10, 15, 4]
        detached_x = [3, 5, 9] + [np.nan]

        batch_y = [7, 2, 9, 1]
        epoch_y = [np.nan, 3, 5, 9]
        odd_y = [21, 15, 4, 3]
        detached_y = [1, 5, 12, np.nan]

        df = pd.DataFrame(
            {
                'x_batch_channel': batch_x,
                'y_batch_channel': batch_y,
                'x_epoch_channel': epoch_x,
                'y_epoch_channel': epoch_y,
                'x_odd_channel': odd_x,
                'y_odd_channel': odd_y,
                'x_detached_channel': detached_x,
                'y_detached_channel': detached_y
            },
            dtype=float)

        expected_result = pd.DataFrame(
            {
                'x': [1, 2, 3, 4, 5, 9, 10, 15, 21, 28],
                'batch_channel':
                [np.nan, 2, np.nan, 7, np.nan, np.nan, 9, np.nan, np.nan, 1],
                'epoch_channel': [
                    3, 5, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 9,
                    np.nan
                ],
                'odd_channel':
                [np.nan, np.nan, np.nan, 3, np.nan, np.nan, 15, 4, 21, np.nan],
                'detached_channel': [
                    np.nan, np.nan, 1, np.nan, 5, 12, np.nan, np.nan, np.nan,
                    np.nan
                ]
            },
            dtype=float)
        expected_result = sort_df_by_columns(expected_result)

        # then
        result = align_channels_on_x(df)
        result = sort_df_by_columns(result)

        assert_frame_equal(result, expected_result)