Ejemplo n.º 1
0
def test_promotion(columns, default):
    # Lists stay as lists
    assert promote_columns(columns, default) == columns

    # Tuples promoted to lists
    assert promote_columns(tuple(columns), default) == columns

    # Singleton promoted to list
    assert promote_columns(columns[0], default) == [columns[0]]

    # None gives us the default
    assert promote_columns(None, default) == default
Ejemplo n.º 2
0
def xds_from_ms(ms, columns=None, index_cols=None, group_cols=None, **kwargs):
    """
    Generator yielding a series of xarray datasets representing
    the contents a Measurement Set.
    It defers to :func:`xds_from_table`, which should be consulted
    for more information.

    Parameters
    ----------
    ms : str
        Measurement Set filename
    columns : tuple or list, optional
        Columns present on the resulting dataset.
        Defaults to all if ``None``.
    index_cols  : tuple or list, optional
        Sequence of indexing columns.
        Defaults to :code:`%(index)s`
    group_cols  : tuple or list, optional
        Sequence of grouping columns.
        Defaults to :code:`%(parts)s`
    **kwargs : optional

    Returns
    -------
    datasets : list of :class:`xarray.Dataset`
        xarray datasets for each group
    """

    columns = promote_columns(columns, [])
    index_cols = promote_columns(index_cols, _DEFAULT_INDEX_COLUMNS)
    group_cols = promote_columns(group_cols, _DEFAULT_GROUP_COLUMNS)

    kwargs.setdefault("table_schema", "MS")

    return xds_from_table(ms,
                          columns=columns,
                          index_cols=index_cols,
                          group_cols=group_cols,
                          **kwargs)
Ejemplo n.º 3
0
def xds_from_table(table_name,
                   columns=None,
                   index_cols=None,
                   group_cols=None,
                   **kwargs):
    """
    Create multiple :class:`xarray.Dataset` objects
    from CASA table ``table_name`` with the rows lexicographically
    sorted according to the columns in ``index_cols``.
    If ``group_cols`` is supplied, the table data is grouped into
    multiple :class:`xarray.Dataset` objects, each associated with a
    permutation of the unique values for the columns in ``group_cols``.

    Notes
    -----
    Both ``group_cols`` and ``index_cols`` should consist of
    columns that are part of the table index.

    However, this may not always be possible as CASA tables
    may not always contain indexing columns.
    The ``ANTENNA`` or ``SPECTRAL_WINDOW`` Measurement Set subtables
    are examples in which the ``row id`` serves as the index.

    Generally, calling

    .. code-block:: python

        antds = list(xds_from_table("WSRT.MS::ANTENNA"))

    is fine, since the data associated with each row of the ``ANTENNA``
    table has the same shape and so a dask or numpy array can be
    constructed around the contents of the table.

    This may not be the case for the ``SPECTRAL_WINDOW`` subtable.
    Here, each row defines a separate spectral window, but each
    spectral window may contain different numbers of frequencies.
    In this case, it is probably better to group the subtable
    by ``row``.

    There is a *special* group column :code:`"__row__"`
    that can be used to group the table by row.

    .. code-block:: python

        for spwds in xds_from_table("WSRT.MS::SPECTRAL_WINDOW",
                                            group_cols="__row__"):
            ...

    If :code:`"__row__"` is used for grouping, then no other
    column may be used. It should also only be used for *small*
    tables, as the number of datasets produced, may be prohibitively
    large.

    Parameters
    ----------
    table_name : str
        CASA table
    columns : list or tuple, optional
        Columns present on the returned dataset.
        Defaults to all if ``None``
    index_cols  : list or tuple, optional
        List of CASA table indexing columns. Defaults to :code:`()`.
    group_cols : list or tuple, optional
        List of columns on which to group the CASA table.
        Defaults to :code:`()`
    table_schema : dict or str or list of dict or str, optional
        A schema dictionary defining the dimension naming scheme for
        each column in the table. For example:

        .. code-block:: python

            {
                "UVW": {'dims': ('uvw',)},
                "DATA": {'dims': ('chan', 'corr')},
            }

        will result in the UVW and DATA arrays having dimensions
        :code:`('row', 'uvw')` and :code:`('row', 'chan', 'corr')`
        respectively.

        A string can be supplied, which will be matched
        against existing default schemas. Examples here include
        ``MS``, ``ANTENNA`` and ``SPECTRAL_WINDOW``
        corresponding to ``Measurement Sets`` the ``ANTENNA`` subtable
        and the ``SPECTRAL_WINDOW`` subtable, respectively.

        By default, the end of ``table_name`` will be
        inspected to see if it matches any default schemas.

        It is also possible to supply a list of strings or dicts defining
        a sequence of schemas which are combined. Later elements in the
        list override previous elements. In the following
        example, the standard UVW MS component name scheme is overridden
        with "my-uvw".

        .. code-block:: python

            ["MS", {"UVW": {'dims': ('my-uvw',)}}]

    table_keywords : {False, True}, optional
        If True, returns table keywords.
        Changes return type of the function into a tuple

    column_keywords : {False, True}, optional
        If True return keywords for each column on the table
        Changes return type of the function into a tuple

    table_proxy : {False, True}, optional
        If True returns the Table Proxy associated with the Dataset

    taql_where : str, optional
        TAQL where clause. For example, to exclude auto-correlations

        .. code-block:: python

            xds_from_table("WSRT.MS", taql_where="ANTENNA1 != ANTENNA2")

    chunks : list of dicts or dict, optional
        A :code:`{dim: chunk}` dictionary, specifying the chunking
        strategy of each dimension in the schema.
        Defaults to :code:`{'row': 100000 }` which will partition
        the row dimension into chunks of 100000.

        * If a dict, the chunking strategy is applied to each group.
        * If a list of dicts, each element is applied
          to the associated group. The last element is
          extended over the remaining groups if there
          are insufficient elements.

        It's also possible to specify the individual chunks for
        multiple dimensions:

        .. code-block:: python

            {'row': (40000, 60000, 40000, 60000),
             'chan': (16, 16, 16, 16),
             'corr': (1, 2, 1)}

        The above chunks a 200,000 row, 64 channel and 4 correlation
        space into 4 x 4 x 3 = 48 chunks, but requires prior
        knowledge of dimensionality, probably obtained with an
        initial call to :func:`xds_from_table`.

    Returns
    -------
    datasets : list of :class:`xarray.Dataset`
        datasets for each group, each ordered by indexing columns
    table_keywords : dict, optional
        Returned if ``table_keywords is True``
    column_keywords : dict, optional
        Returned if ``column_keywords is True``
    table_proxy : :class:`daskms.TableProxy`, optional
        Returned if ``table_proxy is True``


    """
    columns = promote_columns(columns, [])
    index_cols = promote_columns(index_cols, [])
    group_cols = promote_columns(group_cols, [])

    return DatasetFactory(table_name, columns, group_cols, index_cols,
                          **kwargs).datasets()
Ejemplo n.º 4
0
def xds_from_table(table_name, columns=None,
                   index_cols=None, group_cols=None,
                   **kwargs):
    """
    Create multiple :class:`xarray.Dataset` objects
    from CASA table ``table_name`` with the rows lexicographically
    sorted according to the columns in ``index_cols``.
    If ``group_cols`` is supplied, the table data is grouped into
    multiple :class:`xarray.Dataset` objects, each associated with a
    permutation of the unique values for the columns in ``group_cols``.

    Notes
    -----
    Both ``group_cols`` and ``index_cols`` should consist of
    columns that are part of the table index.

    However, this may not always be possible as CASA tables
    may not always contain indexing columns.
    The ``ANTENNA`` or ``SPECTRAL_WINDOW`` Measurement Set subtables
    are examples in which the ``row id`` serves as the index.

    Generally, calling

    .. code-block:: python

        antds = list(xds_from_table("WSRT.MS::ANTENNA"))

    is fine, since the data associated with each row of the ``ANTENNA``
    table has the same shape and so a dask or numpy array can be
    constructed around the contents of the table.

    This may not be the case for the ``SPECTRAL_WINDOW`` subtable.
    Here, each row defines a separate spectral window, but each
    spectral window may contain different numbers of frequencies.
    In this case, it is probably better to group the subtable
    by ``row``.

    There is a *special* group column :code:`"__row__"`
    that can be used to group the table by row.

    .. code-block:: python

        for spwds in xds_from_table("WSRT.MS::SPECTRAL_WINDOW",
                                            group_cols="__row__"):
            ...

    If :code:`"__row__"` is used for grouping, then no other
    column may be used. It should also only be used for *small*
    tables, as the number of datasets produced, may be prohibitively
    large.

    Parameters
    ----------
    table_name : str
        CASA table
    columns : list or tuple, optional
        Columns present on the returned dataset.
        Defaults to all if ``None``
    index_cols  : list or tuple, optional
        List of CASA table indexing columns. Defaults to :code:`()`.
    group_cols : list or tuple, optional
        List of columns on which to group the CASA table.
        Defaults to :code:`()`
    table_schema : dict or str or list of dict or str, optional
        A schema dictionary defining the dimension naming scheme for
        each column in the table. For example:

        .. code-block:: python

            {
                "UVW": {'dims': ('uvw',)},
                "DATA": {'dims': ('chan', 'corr')},
            }

        will result in the UVW and DATA arrays having dimensions
        :code:`('row', 'uvw')` and :code:`('row', 'chan', 'corr')`
        respectively.

        A string can be supplied, which will be matched
        against existing default schemas. Examples here include
        ``MS``, ``ANTENNA`` and ``SPECTRAL_WINDOW``
        corresponding to ``Measurement Sets`` the ``ANTENNA`` subtable
        and the ``SPECTRAL_WINDOW`` subtable, respectively.

        By default, the end of ``table_name`` will be
        inspected to see if it matches any default schemas.

        It is also possible to supply a list of strings or dicts defining
        a sequence of schemas which are combined. Later elements in the
        list override previous elements. In the following
        example, the standard UVW MS component name scheme is overridden
        with "my-uvw".

        .. code-block:: python

            ["MS", {"UVW": {'dims': ('my-uvw',)}}]

    table_keywords : {False, True}, optional
        If True, returns table keywords.
        Changes return type of the function into a tuple

    column_keywords : {False, True}, optional
        If True return keywords for each column on the table
        Changes return type of the function into a tuple

    taql_where : str, optional
        TAQL where clause. For example, to exclude auto-correlations

        .. code-block:: python

            xds_from_table("WSRT.MS", taql_where="ANTENNA1 != ANTENNA2")

    chunks : list of dicts or dict, optional
        A :code:`{dim: chunk}` dictionary, specifying the chunking
        strategy of each dimension in the schema.
        Defaults to :code:`{'row': 100000 }`.

        * If a dict, the chunking strategy is applied to each group.
        * If a list of dicts, each element is applied
          to the associated group. The last element is
          extended over the remaining groups if there
          are insufficient elements.

    Returns
    -------
    datasets : list of :class:`xarray.Dataset`
        datasets for each group, each ordered by indexing columns
    table_keywords : dict, optional
        Returned if ``table_keywords==True``
    column_keywords : dict, optional
        return if ``column_keywords==True``

    """
    columns = promote_columns(columns, [])
    index_cols = promote_columns(index_cols, [])
    group_cols = promote_columns(group_cols, [])

    dask_datasets = DatasetFactory(table_name, columns,
                                   group_cols, index_cols,
                                   **kwargs).datasets()

    # Return dask datasets if xarray is not available
    if xr is None:
        return dask_datasets

    xarray_datasets = []

    # Extract dataset list in case of table_keyword and column_keyword returns
    if isinstance(dask_datasets, tuple):
        extra = dask_datasets[1:]
        dask_datasets = dask_datasets[0]
    else:
        extra = ()

    # Convert each dask dataset into an xarray dataset
    for ds in dask_datasets:
        data_vars = collections.OrderedDict()
        coords = collections.OrderedDict()

        for k, v in sorted(ds.data_vars.items()):
            data_vars[k] = xr.DataArray(v.data, dims=v.dims, attrs=v.attrs)

        for k, v in sorted(ds.coords.items()):
            coords[k] = xr.DataArray(v.data, dims=v.dims, attrs=v.attrs)

        xarray_datasets.append(xr.Dataset(data_vars,
                                          attrs=dict(ds.attrs),
                                          coords=coords))

    if len(extra) > 0:
        return (xarray_datasets,) + extra

    return xarray_datasets