Beispiel #1
0
def missing_spectrum(  # pylint: disable=too-many-locals
        df: DataArray, bins: int) -> Dict[str, da.Array]:
    """Calculate a missing spectrum for each column."""

    nrows, ncols = df.shape
    data = df.nulls

    if nrows > 1:
        num_bins = min(bins, nrows - 1)
        bin_size = nrows // num_bins
        chunk_size = min(1024 * 1024 * 128, nrows *
                         ncols)  # max 1024 x 1024 x 128 Bytes bool values
        nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1)
        chunk_size = nbins_per_chunk * bin_size
        data = data.rechunk((chunk_size, None))
        sep = nrows // chunk_size * chunk_size
    else:
        # avoid division or module by zero
        bin_size = 1
        nbins_per_chunk = 1
        chunk_size = 1
        data = data.rechunk((chunk_size, None))
        sep = 1

    spectrum_missing_percs = data[:sep].map_blocks(
        missing_perc_blockwise(bin_size),
        chunks=(nbins_per_chunk, *data.chunksize[1:]),
        dtype=float,
    )

    # calculation for the last chunk
    if sep != nrows:
        spectrum_missing_percs_remain = data[sep:].map_blocks(
            missing_perc_blockwise(bin_size),
            chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]),
            dtype=float,
        )
        spectrum_missing_percs = da.concatenate(
            [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0)

    num_bins = spectrum_missing_percs.shape[0]

    locs0 = da.arange(num_bins) * bin_size
    locs1 = da.minimum(locs0 + bin_size, nrows)
    locs_middle = locs0 + bin_size / 2

    return {
        "column":
        da.repeat(da.from_array(df.columns.values, (1, )), num_bins),
        "location":
        da.tile(locs_middle, ncols),
        "missing_rate":
        spectrum_missing_percs.T.ravel().rechunk(locs_middle.shape[0]),
        "loc_start":
        da.tile(locs0, ncols),
        "loc_end":
        da.tile(locs1, ncols),
    }
Beispiel #2
0
def missing_spectrum(  # pylint: disable=too-many-locals
        data: da.Array, cols: np.ndarray, bins: int) -> dd.DataFrame:
    """
    Calculate a missing spectrum for each column
    """
    nrows, ncols = data.shape
    num_bins = min(bins, nrows - 1)
    bin_size = nrows // num_bins
    chunk_size = min(1024 * 1024 * 128,
                     nrows * ncols)  # max 1024 x 1024 x 128 Bytes bool values
    nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1)

    chunk_size = nbins_per_chunk * bin_size

    data = data.rechunk((chunk_size, None))

    sep = nrows // chunk_size * chunk_size
    spectrum_missing_percs = data[:sep].map_blocks(
        missing_perc_blockwise(bin_size),
        chunks=(nbins_per_chunk, *data.shape[1:]),
        dtype=float,
    )

    # calculation for the last chunk
    if sep != nrows:
        spectrum_missing_percs_remain = data[sep:].map_blocks(
            missing_perc_blockwise(bin_size),
            chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]),
            dtype=float,
        )
        spectrum_missing_percs = da.concatenate(
            [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0)

    num_bins = spectrum_missing_percs.shape[0]

    locs0 = da.arange(num_bins) * bin_size
    locs1 = da.minimum(locs0 + bin_size, nrows)
    locs_middle = locs0 + bin_size / 2

    df = dd.from_dask_array(
        da.repeat(da.from_array(cols, (1, )), num_bins),
        columns=["column"],
    )

    df = df.assign(
        location=da.tile(locs_middle, ncols),
        missing_rate=spectrum_missing_percs.T.ravel().rechunk(
            locs_middle.shape[0]),
        loc_start=da.tile(locs0, ncols),
        loc_end=da.tile(locs1, ncols),
    )

    return df
Beispiel #3
0
def test_write_bw_inverted_ir_fill():
    """Test saving a BW image with transparency."""
    area = STEREOGRAPHIC_AREA
    scale = 1.0 / 120
    offset = 70.0 / 120
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'NOAA-18'),
                  ('sensor', 'avhrr-3'),
                  ('units', 'K'),
                  ('name', '4'),
                  ('level', None),
                  ('modifiers', ()),
                  ('wavelength', (10.3, 10.8, 11.3)),
                  ('calibration', 'brightness_temperature'),
                  ('start_time', TIME - datetime.timedelta(minutes=35)),
                  ('end_time', TIME - datetime.timedelta(minutes=30)),
                  ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{'offset': offset, 'scale': scale}])])

    kwargs = {'ch_min_measurement_unit': np.array([-70]),
              'ch_max_measurement_unit': np.array([50]),
              'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 900015, 'data_cat': 'P**N', 'data_source': 'SMHI',
              'physic_unit': 'C', 'nbits': 8}

    data1 = da.tile(da.repeat(da.arange(4, chunks=1024) /
                              3.0, 256), 256).reshape((1, 256, 1024))
    datanan = da.ones((1, 256, 1024), chunks=1024) * np.nan
    data2 = da.tile(da.repeat(da.arange(4, chunks=1024) /
                              3.0, 256), 512).reshape((1, 512, 1024))
    data = da.concatenate((data1, datanan, data2), axis=1)
    data = xr.DataArray(data, coords={'bands': ['L']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value
        for i in range(3):
            assert(np.all(np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(255, -1, -1) * 256))
        assert(np.all(res[0, ::256] == np.array([1,  86, 170, 255])))
        assert(np.all(res[256, :] == 0))
Beispiel #4
0
def test_write_rgb_classified():
    """Test saving a transparent RGB."""
    area = STEREOGRAPHIC_AREA

    x_size, y_size = 1024, 1024
    arr = np.zeros((3, y_size, x_size))

    attrs = dict([('platform_name', 'NOAA-18'),
                  ('resolution', 1050),
                  ('polarization', None),
                  ('start_time', TIME - datetime.timedelta(minutes=65)),
                  ('end_time', TIME - datetime.timedelta(minutes=60)),
                  ('level', None),
                  ('sensor', 'avhrr-3'),
                  ('ancillary_variables', []),
                  ('area', area),
                  ('wavelength', None),
                  ('optional_datasets', []),
                  ('standard_name', 'overview'),
                  ('name', 'overview'),
                  ('prerequisites', [0.6, 0.8, 10.8]),
                  ('optional_prerequisites', []),
                  ('calibration', None),
                  ('modifiers', None),
                  ('mode', 'P')])

    kwargs = {'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 1700015, 'data_cat': 'PPRN', 'data_source': 'SMHI', 'nbits': 8}

    data1 = da.tile(da.repeat(da.arange(4, chunks=1024), 256), 256).reshape((1, 256, 1024))
    datanan = da.ones((1, 256, 1024), chunks=1024) * 4
    data2 = da.tile(da.repeat(da.arange(4, chunks=1024), 256), 512).reshape((1, 512, 1024))
    data = da.concatenate((data1, datanan, data2), axis=1)
    data = xr.DataArray(data, coords={'bands': ['P']}, dims=['bands', 'y', 'x'], attrs=attrs)

    img = XRImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        res = tif[0].asarray()
        for idx in range(3):
            np.testing.assert_allclose(res[:, :, idx], np.round(
                np.nan_to_num(arr[idx, :, :]) * 255).astype(np.uint8))
        np.testing.assert_allclose(res[:, :, 3] == 0, np.isnan(arr[0, :, :]))
Beispiel #5
0
def missing_spectrum(df: dd.DataFrame, bins: int,
                     ncols: int) -> Tuple[dd.DataFrame, dd.DataFrame]:
    """
    Calculate a missing spectrum for each column
    """
    # pylint: disable=too-many-locals
    num_bins = min(bins, len(df) - 1)

    df = df.iloc[:, :ncols]
    cols = df.columns[:ncols]
    ncols = len(cols)
    nrows = len(df)
    chunk_size = len(df) // num_bins
    data = df.isnull().to_dask_array()
    data.compute_chunk_sizes()
    data = data.rechunk((chunk_size, None))

    notnull_counts = data.sum(axis=0) / data.shape[0]
    total_missing_percs = {
        col: notnull_counts[idx]
        for idx, col in enumerate(cols)
    }

    spectrum_missing_percs = data.map_blocks(missing_perc_blockwise,
                                             chunks=(1, data.shape[1]),
                                             dtype=float)
    nsegments = len(spectrum_missing_percs)

    locs0 = da.arange(nsegments) * chunk_size
    locs1 = da.minimum(locs0 + chunk_size, nrows)
    locs_middle = locs0 + chunk_size / 2

    df = dd.from_dask_array(
        da.repeat(da.from_array(cols.values, (1, )), nsegments),
        columns=["column"],
    )

    df = df.assign(
        location=da.tile(locs_middle, ncols),
        missing_rate=spectrum_missing_percs.T.ravel(),
        loc_start=da.tile(locs0, ncols),
        loc_end=da.tile(locs1, ncols),
    )

    return df, total_missing_percs
Beispiel #6
0
def test_write_bw():
    """Test saving a BW image."""
    from pyninjotiff.ninjotiff import save
    from pyninjotiff.tifffile import TiffFile

    area = FakeArea(
        {
            'ellps': 'WGS84',
            'lat_0': '90.0',
            'lat_ts': '60.0',
            'lon_0': '0.0',
            'proj': 'stere'
        }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024)
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050), ('polarization', None),
                  ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'),
                  ('units', '%'), ('name', '1'), ('level', None),
                  ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)),
                  ('calibration', 'brightness_temperature'),
                  ('start_time', TIME - datetime.timedelta(minutes=5)),
                  ('end_time', TIME), ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{
                      'offset': offset,
                      'scale': scale
                  }])])

    kwargs = {
        'ch_min_measurement_unit': np.array([0]),
        'ch_max_measurement_unit': np.array([120]),
        'compute': True,
        'fill_value': None,
        'sat_id': 6300014,
        'chan_id': 100015,
        'data_cat': 'P**N',
        'data_source': 'SMHI',
        'physic_unit': '%',
        'nbits': 8
    }

    data = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256),
                   1024).reshape((1, 1024, 1024))
    data = xr.DataArray(data,
                        coords={'bands': ['L']},
                        dims=['bands', 'y', 'x'],
                        attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        res = tif[0].asarray()
        assert (np.allclose(res[0, 0, ::256],
                            np.array([256, 22016, 43520, 65280])))
Beispiel #7
0
    def _preprocess(self, coords):  # da.array function
        adjacent_coords = da.tile(coords, (1, self.N_ATOMS, 1)).reshape(
            coords.shape[0], self.N_ATOMS, self.N_ATOMS, 3)
        adjacent_coords = adjacent_coords.rechunk(chunks=('auto', -1, -1, -1))

        descriptors = da.subtract(adjacent_coords,
                                  adjacent_coords.transpose(0, 2, 1, 3))

        return descriptors
Beispiel #8
0
def tile_grid_areas(cube, fx_files):
    """
    Tile the grid area data to match the dataset cube.

    Parameters
    ----------
    cube: iris.cube.Cube
        input cube.
    fx_files: dict
        dictionary of field:filename for the fx_files

    Returns
    -------
    iris.cube.Cube
        Freshly tiled grid areas cube.
    """
    grid_areas = None
    if fx_files:
        for key, fx_file in fx_files.items():
            if fx_file is None:
                continue
            logger.info('Attempting to load %s from file: %s', key, fx_file)
            fx_cube = iris.load_cube(fx_file)

            grid_areas = fx_cube.core_data()
            if cube.ndim == 4 and grid_areas.ndim == 2:
                grid_areas = da.tile(grid_areas,
                                     [cube.shape[0], cube.shape[1], 1, 1])
            elif cube.ndim == 4 and grid_areas.ndim == 3:
                grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1, 1])
            elif cube.ndim == 3 and grid_areas.ndim == 2:
                grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1])
            else:
                raise ValueError('Grid and dataset number of dimensions not '
                                 'recognised: {} and {}.'
                                 ''.format(cube.ndim, grid_areas.ndim))
    return grid_areas
Beispiel #9
0
def test_write_p():
    """Test saving an image in P mode.

    Values are 0, 1, 2, 3, 4, Palette is black, red, green, blue, gray.
    """
    area = STEREOGRAPHIC_AREA

    palette = [np.array((0, 0, 0, 1)),
               np.array((1, 0, 0, 1)),
               np.array((0, 1, 0, 1)),
               np.array((0, 0, 1, 1)),
               np.array((.5, .5, .5, 1)),
               ]
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'MSG'),
                  ('sensor', 'seviri'),
                  ("palette", palette),
                  ('name', 'msg_cloudtop_height'),
                  ('level', None),
                  ('modifiers', ()),
                  ('start_time', TIME - datetime.timedelta(minutes=85)),
                  ('end_time', TIME - datetime.timedelta(minutes=80)),
                  ('area', area),
                  ('ancillary_variables', [])])

    data = da.tile(da.repeat(da.arange(5, chunks=1024, dtype=np.uint8), 205)[:-1],
                   1024).reshape((1, 1024, 1024))[:, :1024]
    data = xr.DataArray(data, coords={'bands': ['P']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    kwargs = {'compute': True, 'fill_value': None, 'sat_id': 9000014,
              'chan_id': 1900015, 'data_cat': 'GPRN', 'data_source': 'SMHI',
              'physic_unit': 'NONE', "physic_value": "NONE",
              "description": "NWCSAF Cloud Top Height"}

    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        colormap, res = _load_file_values_with_colormap(filename)

        np.testing.assert_array_equal(res[0, ::205], [0, 1, 2, 3, 4])
        assert(len(colormap) == 768)
        for i, line in enumerate(palette):
            np.testing.assert_array_equal(colormap[i::256], (line[:3] * 255).astype(int))
Beispiel #10
0
def test_write_bw():
    """Test saving a BW image.

    Reflectances.
    """
    area = STEREOGRAPHIC_AREA
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'NOAA-18'),
                  ('sensor', 'avhrr-3'),
                  ('units', '%'),
                  ('name', '1'),
                  ('level', None),
                  ('modifiers', ()),
                  ('wavelength', (0.5, 0.6, 0.7)),
                  ('calibration', 'reflectance'),
                  ('start_time', TIME - datetime.timedelta(minutes=5)),
                  ('end_time', TIME),
                  ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{'offset': offset, 'scale': scale}])])

    kwargs = {'ch_min_measurement_unit': xr.DataArray(0),
              'ch_max_measurement_unit': xr.DataArray(120),
              'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI',
              'physic_unit': '%', 'nbits': 8}

    data = da.tile(da.repeat(da.arange(4, chunks=1024) /
                             3.0, 256), 1024).reshape((1, 1024, 1024))
    data = xr.DataArray(data, coords={'bands': ['L']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value
        for i in range(3):
            assert(np.all(np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(256) * 256))
        assert(np.all(res[0, ::256] == np.array([1,  86, 170, 255])))
Beispiel #11
0
    def from_array(
        cls,
        data: ArrayLike,
        *,
        name: str = "unnamed",
        label: str = "unlabeled",
        unit: str = "",
        axes: Optional[Sequence[ArrayLike]] = None,
    ) -> "GridDataset":
        if not isinstance(data, da.Array):
            data = da.asanyarray(data)

        if axes is None:
            axes = ()
            time_steps = None
            for i, l in enumerate(data.shape):
                if i == 0:
                    time_steps = l
                    time = Axis.from_array(da.arange(time_steps),
                                           name="time",
                                           label="time")
                    axes += (time, )
                else:
                    axis_shape = (time_steps, 1)
                    axis = Axis.from_array(da.tile(da.arange(l), axis_shape),
                                           name=f"axis{i-1}")
                    axes += (axis, )

        else:
            # ensure that every element in axes is an axis
            if any(not isinstance(ax, Axis) for ax in axes):
                tmp = []

                for i, ax in enumerate(axes):
                    name = "time" if i == 0 else f"axis{i-1}"
                    label = "time" if i == 0 else "unlabeled"

                    if not isinstance(ax, Axis):
                        ax = Axis.from_array(da.asanyarray(ax),
                                             name=name,
                                             label=label)

                    tmp.append(ax)

                axes = tuple(tmp)

        return cls(data, axes, name, label, unit)
Beispiel #12
0
def _match_array_shape(array_to_reshape,array_to_match):
    # Reshape in_weight to match dimnetionality of vis_data (vis_dataset[imaging_weights_parms['data_name']])
    # The order is assumed the same (there can be missing). array_to_reshape is a subset of array_to_match
    import dask.array as da
    import numpy as np
    
    match_array_chunksize = array_to_match.data.chunksize
    
    reshape_dims = np.ones(len(match_array_chunksize),dtype=int)  #Missing dimentions will be added using reshape command
    tile_dims = np.ones(len(match_array_chunksize),dtype=int) #Tiling is used so that number of elements in each dimention match
    
    array_to_match_dims = array_to_match.dims
    array_to_reshape_dims = array_to_reshape.dims
    
    for i in range(len(match_array_chunksize)):
        if array_to_match_dims[i] in array_to_reshape_dims:
            reshape_dims[i] = array_to_match.shape[i]
        else:
            tile_dims[i] = array_to_match.shape[i]
            
    return da.tile(da.reshape(array_to_reshape.data,reshape_dims),tile_dims).rechunk(match_array_chunksize)
def assign_randzs(ztype="LZEE", num=seed):
    ## Add named columns to Martin's randoms.fits, with redshifts drawn from the data.
    dfname = output_dirs[
        mock_output] + '/desi/logmocks/lognormal_bgs_seed-%03d.fits' % num
    rfname = "/global/homes/m/mjwilson/desi/randoms/randoms.fits"

    print("Loading:  ", dfname, rfname)

    data = FITSCatalog(dfname)
    rand = FITSCatalog(rfname)  ## Martin's DESI / BGS randoms.

    ngal = len(data)

    ## 20 x randoms as galaxies.
    rand = rand.gslice(0, 20 * ngal, redistribute=False)
    nrand = len(rand)

    ncopy = np.int(np.floor(1.0 * nrand / ngal))

    ## Damp removal of intrinsic radial structure.
    data['blur'] = 0.05 * (da.random.uniform(
        low=0.0, high=1.0, size=data['GZEE'].shape, chunks=chunks) - 0.5)

    print("Calculating randoms redshifts for z type: %s" % ztype)

    shuf = np.arange(ngal)

    np.random.shuffle(
        shuf)  ## Make sure there's no clustering in the redshift assignment
    ## Would be a problem if randoms are ordered on the sky.

    ##  Check that blurred redshifts are positive.
    array = da.tile(data[ztype][shuf] + data['blur'], ncopy)

    rand[ztype] = da.from_array(
        array, chunks=chunks
    )  ## da.random.choice(data[ztype] + data['blur'], size = rand['RA'].shape, chunks=chunks)

    return data, rand
def _calc_ant_pointing_ra_dec(mxds, use_pointing_table, gcf_parms, sel_parms):

    vis_dataset = mxds.attrs[sel_parms['xds']]

    if use_pointing_table:
        ant_ra_dec = mxds.POINTING.DIRECTION.interp(
            time=vis_dataset.time,
            assume_sorted=False,
            method=gcf_parms['interpolation_method'])[:, :, 0, :]
        ant_ra_dec = ant_ra_dec.chunk(
            {"time": vis_dataset[sel_parms['data']].chunks[0][0]})
    else:
        antenna_ids = mxds.antenna_ids.data
        field_dataset = mxds.attrs['FIELD']
        field_id = np.max(vis_dataset.FIELD_ID, axis=1).compute(
        )  #np.max ignores int nan values (nan values are large negative numbers for int).
        n_field = field_dataset.dims['d0']
        ant_ra_dec = field_dataset.PHASE_DIR.isel(d0=field_id)
        if n_field != 1:
            ant_ra_dec = ant_ra_dec[:, 0, :]
        ant_ra_dec = ant_ra_dec.expand_dims('ant', 1)
        n_ant = len(antenna_ids)
        ant_ra_dec = da.tile(ant_ra_dec.data, (1, n_ant, 1))

        time_chunksize = mxds.attrs[sel_parms['xds']][
            sel_parms['data']].chunks[0][0]
        ant_ra_dec = xr.DataArray(ant_ra_dec, {
            'time': vis_dataset.time,
            'ant': antenna_ids
        },
                                  dims=('time', 'ant', 'pair')).chunk({
                                      'time': time_chunksize,
                                      'ant': n_ant,
                                      'pair': 2
                                  })

    return ant_ra_dec
Beispiel #15
0
def test_tile_array_reps(shape, chunks, reps):
    x = np.random.random(shape)
    d = da.from_array(x, chunks=chunks)

    with pytest.raises(NotImplementedError):
        da.tile(d, reps)
Beispiel #16
0
def read_ms(infile, ddis=None, ignore=None, chunks=(400, 400, 64, 2)):
    """
    Read legacy format MS to xarray Visibility Dataset

    The CASA MSv2 format is converted to the MSv3 schema per the
    specified definition here: https://drive.google.com/file/d/10TZ4dsFw9CconBc-GFxSeb2caT6wkmza/view?usp=sharing

    The MS is partitioned by DDI, which guarantees a fixed data shape per partition. This results in separate xarray
    dataset (xds) partitions contained within a main xds (mxds).  There is no DDI in MSv3, so this simply serves as
    a partition id for each xds.

    Parameters
    ----------
    infile : str
        Input MS filename
    ddis : list
        List of specific DDIs to read. DDI's are integer values, or use 'global' string for subtables. Leave as None to read entire MS
    ignore : list
        List of subtables to ignore (case sensitive and generally all uppercase). This is useful if a particular subtable is causing errors
        or is very large and slowing down reads. Default is None
    chunks: 4-D tuple of ints
        Shape of desired chunking in the form of (time, baseline, channel, polarization). Larger values reduce the number of chunks and
        speed up the reads at the cost of more memory. Chunk size is the product of the four numbers. Default is (400, 400, 64, 2)

    Returns
    -------
    xarray.core.dataset.Dataset
      Main xarray dataset of datasets for this visibility set
    """
    import os
    import xarray
    import dask.array as da
    import numpy as np
    import cngi._utils._table_conversion2 as tblconv
    import cngi._utils._io as xdsio
    import warnings
    warnings.filterwarnings('ignore', category=FutureWarning)

    # parse filename to use
    infile = os.path.expanduser(infile)

    # as part of MSv3 conversion, these columns in the main table are no longer needed
    ignorecols = ['FLAG_CATEGORY', 'FLAG_ROW', 'DATA_DESC_ID']
    if ignore is None: ignore = []

    # we need to assume an explicit ordering of dims
    dimorder = ['time', 'baseline', 'chan', 'pol']

    # we need the spectral window, polarization, and data description tables for processing the main table
    spw_xds = tblconv.read_simple_table(infile,
                                        subtable='SPECTRAL_WINDOW',
                                        ignore=ignorecols,
                                        add_row_id=True)
    pol_xds = tblconv.read_simple_table(infile,
                                        subtable='POLARIZATION',
                                        ignore=ignorecols)
    ddi_xds = tblconv.read_simple_table(infile,
                                        subtable='DATA_DESCRIPTION',
                                        ignore=ignorecols)

    # let's assume that each DATA_DESC_ID (ddi) is a fixed shape that may differ from others
    # form a list of ddis to process, each will be placed it in its own xarray dataset and partition
    if ddis is None:
        ddis = list(ddi_xds['d0'].values) + ['global']
    else:
        ddis = np.atleast_1d(ddis)
    xds_list = []

    ####################################################################
    # process each selected DDI from the input MS, assume a fixed shape within the ddi (should always be true)
    # each DDI is written to its own subdirectory under the parent folder
    for ddi in ddis:
        if ddi == 'global': continue  # handled afterwards
        ddi = int(ddi)

        # convert columns that are common to MSv2 and MSv3
        xds = tblconv.read_main_table(infile,
                                      subsel=ddi,
                                      ignore=ignorecols,
                                      chunks=chunks)
        if len(xds.dims) == 0: continue

        # convert and append the ANTENNA1 and ANTENNA2 columns separately so we can squash the unnecessary time dimension
        xds = xds.assign({
            'ANTENNA1': xds.ANTENNA1.max(axis=0),
            'ANTENNA2': xds.ANTENNA2.max(axis=0)
        })

        # MSv3 changes to weight/sigma column handling
        # 1. DATA_WEIGHT = 1/sqrt(SIGMA)
        # 2. CORRECTED_DATA_WEIGHT = WEIGHT
        # 3. if SIGMA_SPECTRUM or WEIGHT_SPECTRUM present, use them instead of SIGMA and WEIGHT
        # 4. discard SIGMA, WEIGHT, SIGMA_SPECTRUM and WEIGHT_SPECTRUM from converted ms
        # 5. set shape of DATA_WEIGHT / CORRECTED_DATA_WEIGHT to (time, baseline, chan, pol) padding as necessary
        if 'DATA' in xds.data_vars:
            if 'SIGMA_SPECTRUM' in xds.data_vars:
                xds = xds.assign({
                    'DATA_WEIGHT': 1 / xds.SIGMA_SPECTRUM**2
                }).drop('SIGMA_SPECTRUM')
            elif 'SIGMA' in xds.data_vars:
                wts = xds.SIGMA.shape[:2] + (1, ) + (xds.SIGMA.shape[-1], )
                wt_da = da.tile(da.reshape(xds.SIGMA.data, wts),
                                (1, 1, len(xds.chan), 1)).rechunk(chunks)
                xds = xds.assign({
                    'DATA_WEIGHT':
                    xarray.DataArray(1 / wt_da**2, dims=dimorder)
                })
        if 'CORRECTED_DATA' in xds.data_vars:
            if 'WEIGHT_SPECTRUM' in xds.data_vars:
                xds = xds.rename({'WEIGHT_SPECTRUM': 'CORRECTED_DATA_WEIGHT'})
            elif 'WEIGHT' in xds.data_vars:
                wts = xds.WEIGHT.shape[:2] + (1, ) + (xds.WEIGHT.shape[-1], )
                wt_da = da.tile(da.reshape(xds.WEIGHT.data, wts),
                                (1, 1, len(xds.chan), 1)).rechunk(chunks)
                xds = xds.assign({
                    'CORRECTED_DATA_WEIGHT':
                    xarray.DataArray(wt_da, dims=dimorder)
                }).drop('WEIGHT')

        xds = xds.drop_vars(
            ['WEIGHT', 'SIGMA', 'SIGMA_SPECTRUM', 'WEIGHT_SPECTRUM'],
            errors='ignore')

        # add in relevant data grouping, spw and polarization attributes
        attrs = {'data_groups': [{}]}
        if ('DATA' in xds.data_vars) and ('DATA_WEIGHT' in xds.data_vars):
            attrs['data_groups'][0][str(len(attrs['data_groups'][0]))] = {
                'id': str(len(attrs['data_groups'][0])),
                'data': 'DATA',
                'uvw': 'UVW',
                'flag': 'FLAG',
                'weight': 'DATA_WEIGHT'
            }
        if ('CORRECTED_DATA' in xds.data_vars) and ('CORRECTED_DATA_WEIGHT'
                                                    in xds.data_vars):
            attrs['data_groups'][0][str(len(attrs['data_groups'][0]))] = {
                'id': str(len(attrs['data_groups'][0])),
                'data': 'CORRECTED_DATA',
                'uvw': 'UVW',
                'flag': 'FLAG',
                'weight': 'CORRECTED_DATA_WEIGHT'
            }

        for dv in spw_xds.data_vars:
            attrs[dv.lower()] = spw_xds[dv].values[
                ddi_xds['spectral_window_id'].values[ddi]]
            attrs[dv.lower()] = int(attrs[dv.lower()]) if type(attrs[dv.lower(
            )]) is np.bool_ else attrs[dv.lower()]  # convert bools
        for dv in pol_xds.data_vars:
            attrs[dv.lower()] = pol_xds[dv].values[
                ddi_xds['polarization_id'].values[ddi]]
            attrs[dv.lower()] = int(attrs[dv.lower()]) if type(attrs[dv.lower(
            )]) is np.bool_ else attrs[dv.lower()]  # convert bools

        # grab the channel frequency values from the spw table data and pol idxs from the polarization table, add spw and pol ids
        chan = attrs.pop('chan_freq')[:len(xds.chan)]
        pol = attrs.pop('corr_type')[:len(xds.pol)]

        # truncate per-chan values to the actual number of channels and move to coordinates
        chan_width = xarray.DataArray(da.from_array(
            attrs.pop('chan_width')[:len(xds.chan)], chunks=chunks[2]),
                                      dims=['chan'])
        effective_bw = xarray.DataArray(da.from_array(
            attrs.pop('effective_bw')[:len(xds.chan)], chunks=chunks[2]),
                                        dims=['chan'])
        resolution = xarray.DataArray(da.from_array(
            attrs.pop('resolution')[:len(xds.chan)], chunks=chunks[2]),
                                      dims=['chan'])

        coords = {
            'chan': chan,
            'pol': pol,
            'spw_id': [ddi_xds['spectral_window_id'].values[ddi]],
            'pol_id': [ddi_xds['polarization_id'].values[ddi]],
            'chan_width': chan_width,
            'effective_bw': effective_bw,
            'resolution': resolution
        }
        xds = xds.assign_coords(coords).assign_attrs(attrs)
        xds_list += [('xds' + str(ddi), xds)]

    # read other subtables
    skip_tables = ['DATA_DESCRIPTION', 'SORTED_TABLE'] + ignore
    subtables = sorted([
        tt for tt in os.listdir(infile)
        if os.path.isdir(os.path.join(infile, tt)) and tt not in skip_tables
    ])
    if 'global' in ddis:
        for ii, subtable in enumerate(subtables):
            if subtable == 'POINTING':  # expand the dimensions of the pointing table
                sxds = tblconv.read_pointing_table(
                    os.path.join(infile, subtable),
                    chunks=chunks[:2] + (20, 20))
            else:
                add_row_id = (subtable in [
                    'ANTENNA', 'FIELD', 'OBSERVATION', 'SCAN',
                    'SPECTRAL_WINDOW', 'STATE'
                ])
                sxds = tblconv.read_simple_table(infile,
                                                 subtable=subtable,
                                                 timecols=['TIME'],
                                                 ignore=ignorecols,
                                                 add_row_id=add_row_id)
            if len(sxds.dims) != 0: xds_list += [(subtable, sxds)]

    # build the master xds to return
    mxds = xdsio.vis_xds_packager(xds_list)
    return mxds
Beispiel #17
0
def test_tile_np_kroncompare_examples(shape, reps):
    x = np.random.random(shape)
    d = da.asarray(x)

    assert_eq(np.tile(x, reps), da.tile(d, reps))
Beispiel #18
0
def test_tile_zero_reps(shape, chunks, reps):
    x = np.random.random(shape)
    d = da.from_array(x, chunks=chunks)

    assert_eq(np.tile(x, reps), da.tile(d, reps))
Beispiel #19
0
def convert_ms(infile,
               outfile=None,
               ddis=None,
               ignore=['HISTORY'],
               compressor=None,
               chunks=(100, 400, 32, 1),
               sub_chunks=10000,
               append=False):
    """
    Convert legacy format MS to xarray Visibility Dataset and zarr storage format

    This function requires CASA6 casatools module. The CASA MSv2 format is converted to the MSv3 schema per the
    specified definition here: https://drive.google.com/file/d/10TZ4dsFw9CconBc-GFxSeb2caT6wkmza/view?usp=sharing
    
    The MS is partitioned by DDI, which guarantees a fixed data shape per partition. This results in different subdirectories
    under the main vis.zarr folder.  There is no DDI in MSv3, so this simply serves as a partition id in the zarr directory.

    Parameters
    ----------
    infile : str
        Input MS filename
    outfile : str
        Output zarr filename. If None, will use infile name with .vis.zarr extension
    ddis : list
        List of specific DDIs to convert. DDI's are integer values, or use 'global' string for subtables. Leave as None to convert entire MS
    ignore : list
        List of subtables to ignore (case sensitive and generally all uppercase). This is useful if a particular subtable is causing errors.
        Default is None. Note: default is now temporarily set to ignore the HISTORY table due a CASA6 issue in the table tool affecting a small
        set of test cases (set back to None if HISTORY is needed)
    compressor : numcodecs.blosc.Blosc
        The blosc compressor to use when saving the converted data to disk using zarr.
        If None the zstd compression algorithm used with compression level 2.
    chunks: 4-D tuple of ints
        Shape of desired chunking in the form of (time, baseline, channel, polarization), use -1 for entire axis in one chunk. Default is (100, 400, 20, 1)
        Note: chunk size is the product of the four numbers, and data is batch processed by time axis, so that will drive memory needed for conversion.
    sub_chunks: int
        Chunking used for subtable conversion (except for POINTING which will use time/baseline dims from chunks parameter). This is a single integer
        used for the row-axis (d0) chunking only, no other dims in the subtables will be chunked.
    append : bool
        Keep destination zarr store intact and add new DDI's to it. Note that duplicate DDI's will still be overwritten. Default False deletes and replaces
        entire directory.
    Returns
    -------
    xarray.core.dataset.Dataset
      Master xarray dataset of datasets for this visibility set
    """
    import itertools
    import os
    import xarray
    import dask.array as da
    import numpy as np
    import time
    import cngi._utils._table_conversion as tblconv
    import cngi._utils._io as xdsio
    import warnings
    import importlib_metadata
    warnings.filterwarnings('ignore', category=FutureWarning)

    # parse filename to use
    infile = os.path.expanduser(infile)
    prefix = infile[:infile.rindex('.')]
    if outfile is None: outfile = prefix + '.vis.zarr'
    outfile = os.path.expanduser(outfile)

    # need to manually remove existing zarr file (if any)
    if not append:
        os.system("rm -fr " + outfile)
        os.system("mkdir " + outfile)

    # as part of MSv3 conversion, these columns in the main table are no longer needed
    ignorecols = ['FLAG_CATEGORY', 'FLAG_ROW', 'DATA_DESC_ID']
    if ignore is None: ignore = []

    # we need to assume an explicit ordering of dims
    dimorder = ['time', 'baseline', 'chan', 'pol']

    # we need the spectral window, polarization, and data description tables for processing the main table
    spw_xds = tblconv.convert_simple_table(infile,
                                           outfile='',
                                           subtable='SPECTRAL_WINDOW',
                                           ignore=ignorecols,
                                           nofile=True,
                                           add_row_id=True)
    pol_xds = tblconv.convert_simple_table(infile,
                                           outfile='',
                                           subtable='POLARIZATION',
                                           ignore=ignorecols,
                                           nofile=True)
    ddi_xds = tblconv.convert_simple_table(infile,
                                           outfile='',
                                           subtable='DATA_DESCRIPTION',
                                           ignore=ignorecols,
                                           nofile=True)

    # let's assume that each DATA_DESC_ID (ddi) is a fixed shape that may differ from others
    # form a list of ddis to process, each will be placed it in its own xarray dataset and partition
    if ddis is None: ddis = list(ddi_xds['d0'].values) + ['global']
    else: ddis = np.atleast_1d(ddis)
    xds_list = []

    # extra data selection to split autocorr and crosscorr into separate xds
    # extrasels[0] is for autocorrelation
    # extrasels[1] is for others (corsscorrelations, correlations between feeds)
    extrasels = [
        'ANTENNA1 == ANTENNA2 && FEED1 == FEED2',
        'ANTENNA1 != ANTENNA2 || FEED1 != FEED2'
    ]

    ####################################################################
    # process each selected DDI from the input MS, assume a fixed shape within the ddi (should always be true)
    # each DDI is written to its own subdirectory under the parent folder
    for extrasel, ddi in itertools.product(extrasels, ddis):
        if ddi == 'global': continue  # handled afterwards

        extra_sel_index = extrasels.index(extrasel)
        if extra_sel_index == 0:
            xds_prefix = 'xdsa'
        else:
            xds_prefix = 'xds'
        xds_name = f'{xds_prefix}{ddi}'

        ddi = int(ddi)
        print('Processing ddi', ddi, f'xds name is {xds_name}', end='\r')
        start_ddi = time.time()

        # these columns are different / absent in MSv3 or need to be handled as special cases
        msv2 = [
            'WEIGHT', 'WEIGHT_SPECTRUM', 'SIGMA', 'SIGMA_SPECTRUM', 'ANTENNA1',
            'ANTENNA2', 'UVW'
        ]

        # convert columns that are common to MSv2 and MSv3
        xds = tblconv.convert_expanded_table(infile,
                                             os.path.join(outfile, xds_name),
                                             keys={
                                                 'TIME':
                                                 'time',
                                                 ('ANTENNA1', 'ANTENNA2'):
                                                 'baseline'
                                             },
                                             subsel={'DATA_DESC_ID': ddi},
                                             timecols=['time'],
                                             dimnames={
                                                 'd2': 'chan',
                                                 'd3': 'pol'
                                             },
                                             ignore=ignorecols + msv2,
                                             compressor=compressor,
                                             chunks=chunks,
                                             nofile=False,
                                             extraselstr=extrasel)
        if len(xds.dims) == 0: continue

        # convert and append UVW separately so we can handle its special dimension
        uvw_chunks = (chunks[0], chunks[1], 3)  #No chunking over uvw_index
        uvw_xds = tblconv.convert_expanded_table(
            infile,
            os.path.join(outfile, 'tmp'),
            keys={
                'TIME': 'time',
                ('ANTENNA1', 'ANTENNA2'): 'baseline'
            },
            subsel={'DATA_DESC_ID': ddi},
            timecols=['time'],
            dimnames={'d2': 'uvw_index'},
            ignore=ignorecols + list(xds.data_vars) + msv2[:-1],
            compressor=compressor,
            chunks=uvw_chunks,
            nofile=False,
            extraselstr=extrasel)
        uvw_xds.to_zarr(os.path.join(outfile, xds_name),
                        mode='a',
                        compute=True,
                        consolidated=True)

        # convert and append the ANTENNA1 and ANTENNA2 columns separately so we can squash the unnecessary time dimension
        ant_xds = tblconv.convert_expanded_table(
            infile,
            os.path.join(outfile, 'tmp'),
            keys={
                'TIME': 'time',
                ('ANTENNA1', 'ANTENNA2'): 'baseline'
            },
            subsel={'DATA_DESC_ID': ddi},
            timecols=['time'],
            ignore=ignorecols + list(xds.data_vars) + msv2[:4] + ['UVW'],
            compressor=compressor,
            chunks=chunks[:2],
            nofile=False,
            extraselstr=extrasel)
        ant_xds = ant_xds.assign({
            'ANTENNA1': ant_xds.ANTENNA1.max(axis=0),
            'ANTENNA2': ant_xds.ANTENNA2.max(axis=0)
        }).drop_dims('time')
        ant_xds.to_zarr(os.path.join(outfile, xds_name),
                        mode='a',
                        compute=True,
                        consolidated=True)

        # now convert just the WEIGHT and WEIGHT_SPECTRUM (if preset)
        # WEIGHT needs to be expanded to full dimensionality (time, baseline, chan, pol)
        wt_xds = tblconv.convert_expanded_table(
            infile,
            os.path.join(outfile, 'tmp'),
            keys={
                'TIME': 'time',
                ('ANTENNA1', 'ANTENNA2'): 'baseline'
            },
            subsel={'DATA_DESC_ID': ddi},
            timecols=['time'],
            dimnames={},
            ignore=ignorecols + list(xds.data_vars) + msv2[-3:],
            compressor=compressor,
            chunks=chunks,
            nofile=False,
            extraselstr=extrasel)

        # MSv3 changes to weight/sigma column handling
        # 1. DATA_WEIGHT = 1/sqrt(SIGMA)
        # 2. CORRECTED_DATA_WEIGHT = WEIGHT
        # 3. if SIGMA_SPECTRUM or WEIGHT_SPECTRUM present, use them instead of SIGMA and WEIGHT
        # 4. discard SIGMA, WEIGHT, SIGMA_SPECTRUM and WEIGHT_SPECTRUM from converted ms
        # 5. set shape of DATA_WEIGHT / CORRECTED_DATA_WEIGHT to (time, baseline, chan, pol) padding as necessary
        if 'DATA' in xds.data_vars:
            if 'SIGMA_SPECTRUM' in wt_xds.data_vars:
                wt_xds = wt_xds.rename(
                    dict(zip(wt_xds.SIGMA_SPECTRUM.dims, dimorder))).assign(
                        {'DATA_WEIGHT': 1 / wt_xds.SIGMA_SPECTRUM**2})
            elif 'SIGMA' in wt_xds.data_vars:
                wts = wt_xds.SIGMA.shape[:2] + (1, ) + (
                    wt_xds.SIGMA.shape[-1], )
                wt_da = da.tile(da.reshape(wt_xds.SIGMA.data, wts),
                                (1, 1, len(xds.chan), 1)).rechunk(chunks)
                wt_xds = wt_xds.assign({
                    'DATA_WEIGHT':
                    xarray.DataArray(1 / wt_da**2, dims=dimorder)
                })
        if 'CORRECTED_DATA' in xds.data_vars:
            if 'WEIGHT_SPECTRUM' in wt_xds.data_vars:
                wt_xds = wt_xds.rename(
                    dict(zip(wt_xds.WEIGHT_SPECTRUM.dims, dimorder))).assign(
                        {'CORRECTED_DATA_WEIGHT': wt_xds.WEIGHT_SPECTRUM})
            elif 'WEIGHT' in wt_xds.data_vars:
                wts = wt_xds.WEIGHT.shape[:2] + (1, ) + (
                    wt_xds.WEIGHT.shape[-1], )
                wt_da = da.tile(da.reshape(wt_xds.WEIGHT.data, wts),
                                (1, 1, len(xds.chan), 1)).rechunk(chunks)
                wt_xds = wt_xds.assign({
                    'CORRECTED_DATA_WEIGHT':
                    xarray.DataArray(wt_da, dims=dimorder)
                })

        wt_xds = wt_xds.drop([cc for cc in msv2 if cc in wt_xds.data_vars])
        wt_xds.to_zarr(os.path.join(outfile, xds_name),
                       mode='a',
                       compute=True,
                       consolidated=True)

        # add in relevant data grouping, spw and polarization attributes
        attrs = {'data_groups': [{}]}
        if ('DATA' in xds.data_vars) and ('DATA_WEIGHT' in wt_xds.data_vars):
            attrs['data_groups'][0][str(len(attrs['data_groups'][0]))] = {
                'id': str(len(attrs['data_groups'][0])),
                'data': 'DATA',
                'uvw': 'UVW',
                'flag': 'FLAG',
                'weight': 'DATA_WEIGHT'
            }
        if ('CORRECTED_DATA' in xds.data_vars) and ('CORRECTED_DATA_WEIGHT'
                                                    in wt_xds.data_vars):
            attrs['data_groups'][0][str(len(attrs['data_groups'][0]))] = {
                'id': str(len(attrs['data_groups'][0])),
                'data': 'CORRECTED_DATA',
                'uvw': 'UVW',
                'flag': 'FLAG',
                'weight': 'CORRECTED_DATA_WEIGHT'
            }

        for dv in spw_xds.data_vars:
            attrs[dv.lower()] = spw_xds[dv].values[
                ddi_xds['spectral_window_id'].values[ddi]]
            attrs[dv.lower()] = int(attrs[dv.lower()]) if type(attrs[dv.lower(
            )]) is np.bool_ else attrs[dv.lower()]  # convert bools
        for dv in pol_xds.data_vars:
            attrs[dv.lower()] = pol_xds[dv].values[
                ddi_xds['polarization_id'].values[ddi]]
            attrs[dv.lower()] = int(attrs[dv.lower()]) if type(attrs[dv.lower(
            )]) is np.bool_ else attrs[dv.lower()]  # convert bools

        # grab the channel frequency values from the spw table data and pol idxs from the polarization table, add spw and pol ids
        chan = attrs.pop('chan_freq')[:len(xds.chan)]
        pol = attrs.pop('corr_type')[:len(xds.pol)]

        # truncate per-chan values to the actual number of channels and move to coordinates
        chan_width = xarray.DataArray(da.from_array(
            attrs.pop('chan_width')[:len(xds.chan)], chunks=chunks[2]),
                                      dims=['chan'])
        effective_bw = xarray.DataArray(da.from_array(
            attrs.pop('effective_bw')[:len(xds.chan)], chunks=chunks[2]),
                                        dims=['chan'])
        resolution = xarray.DataArray(da.from_array(
            attrs.pop('resolution')[:len(xds.chan)], chunks=chunks[2]),
                                      dims=['chan'])

        coords = {
            'chan': chan,
            'pol': pol,
            'spw_id': [ddi_xds['spectral_window_id'].values[ddi]],
            'pol_id': [ddi_xds['polarization_id'].values[ddi]],
            'chan_width': chan_width,
            'effective_bw': effective_bw,
            'resolution': resolution
        }
        aux_xds = xarray.Dataset(coords=coords, attrs=attrs)

        aux_xds.to_zarr(os.path.join(outfile, xds_name),
                        mode='a',
                        compute=True,
                        consolidated=True)
        xds = xarray.open_zarr(os.path.join(outfile, xds_name))

        xds_list += [(xds_name, xds)]
        print('Completed ddi %i  process time {:0.2f} s'.format(time.time() -
                                                                start_ddi) %
              ddi)

    # clean up the tmp directory created by the weight conversion to MSv3
    os.system("rm -fr " + os.path.join(outfile, 'tmp'))

    # convert other subtables to their own partitions, denoted by 'global_' prefix
    skip_tables = ['DATA_DESCRIPTION', 'SORTED_TABLE'] + ignore
    subtables = sorted([
        tt for tt in os.listdir(infile)
        if os.path.isdir(os.path.join(infile, tt)) and tt not in skip_tables
    ])
    if 'global' in ddis:
        start_ddi = time.time()
        for ii, subtable in enumerate(subtables):
            print('processing subtable %i of %i : %s' %
                  (ii, len(subtables), subtable),
                  end='\r')
            if subtable == 'POINTING':  # expand the dimensions of the pointing table
                xds_sub_list = [(subtable,
                                 tblconv.convert_expanded_table(
                                     infile,
                                     os.path.join(outfile, 'global'),
                                     subtable=subtable,
                                     keys={
                                         'TIME': 'time',
                                         'ANTENNA_ID': 'antenna_id'
                                     },
                                     timecols=['time'],
                                     chunks=chunks))]
            else:
                add_row_id = (subtable in [
                    'ANTENNA', 'FIELD', 'OBSERVATION', 'SCAN',
                    'SPECTRAL_WINDOW', 'STATE'
                ])
                xds_sub_list = [(subtable,
                                 tblconv.convert_simple_table(
                                     infile,
                                     os.path.join(outfile, 'global'),
                                     subtable,
                                     timecols=['TIME'],
                                     ignore=ignorecols,
                                     compressor=compressor,
                                     nofile=False,
                                     chunks=(sub_chunks, -1),
                                     add_row_id=add_row_id))]

            if len(xds_sub_list[-1][1].dims) != 0:
                xds_list += xds_sub_list
            #else:
            #    print('Empty Subtable:',subtable)

        print(
            'Completed subtables  process time {:0.2f} s'.format(time.time() -
                                                                 start_ddi))

    # write sw version that did this conversion to zarr directory
    try:
        version = importlib_metadata.version('cngi-prototype')
    except:
        version = '0.0.0'

    with open(outfile + '/.version', 'w') as fid:
        fid.write('cngi-protoype ' + version + '\n')

    # build the master xds to return
    mxds = xdsio.vis_xds_packager(xds_list)
    print(' ' * 50)

    return mxds
def convert_ms(infile,
               outfile=None,
               ddis=None,
               ignore=['HISTORY'],
               compressor=None,
               chunk_shape=(100, 400, 32, 1),
               append=False):
    """
    Convert legacy format MS to xarray Visibility Dataset and zarr storage format

    This function requires CASA6 casatools module. The CASA MSv2 format is converted to the MSv3 schema per the
    specified definition here: https://drive.google.com/file/d/10TZ4dsFw9CconBc-GFxSeb2caT6wkmza/view?usp=sharing
    
    The MS is partitioned by DDI, which guarentees a fixed data shape per partition. This results in different subdirectories
    under the main vis.zarr folder.  There is no DDI in MSv3, so this simply serves as a partition id in the zarr directory.

    Parameters
    ----------
    infile : str
        Input MS filename
    outfile : str
        Output zarr filename. If None, will use infile name with .vis.zarr extension
    ddis : list
        List of specific DDIs to convert. DDI's are integer values, or use 'global' string for subtables. Leave as None to convert entire MS
    ignore : list
        List of subtables to ignore (case sensitive and generally all uppercase). This is useful if a particular subtable is causing errors.
        Default is None. Note: default is now temporarily set to ignore the HISTORY table due a CASA6 issue in the table tool affecting a small
        set of test cases (set back to None if HISTORY is needed)
    compressor : numcodecs.blosc.Blosc
        The blosc compressor to use when saving the converted data to disk using zarr.
        If None the zstd compression algorithm used with compression level 2.
    chunk_shape: 4-D tuple of ints
        Shape of desired chunking in the form of (time, baseline, channel, polarization), use -1 for entire axis in one chunk. Default is (100, 400, 20, 1)
        Note: chunk size is the product of the four numbers, and data is batch processed by time axis, so that will drive memory needed for conversion.
    append : bool
        Keep destination zarr store intact and add new DDI's to it. Note that duplicate DDI's will still be overwritten. Default False deletes and replaces
        entire directory.
    Returns
    -------
    xarray.core.dataset.Dataset
      Master xarray dataset of datasets for this visibility set
    """
    import os
    import xarray
    import dask.array as da
    import numpy as np
    import time
    import cngi._utils._table_conversion as tblconv
    import cngi._utils._io as xdsio
    import warnings
    import importlib_metadata
    warnings.filterwarnings('ignore', category=FutureWarning)

    # parse filename to use
    infile = os.path.expanduser(infile)
    prefix = infile[:infile.rindex('.')]
    if outfile is None: outfile = prefix + '.vis.zarr'
    outfile = os.path.expanduser(outfile)

    # need to manually remove existing zarr file (if any)
    if not append:
        os.system("rm -fr " + outfile)
        os.system("mkdir " + outfile)

    # as part of MSv3 conversion, these columns in the main table are no longer needed
    ignorecols = ['FLAG_CATEGORY', 'FLAG_ROW', 'DATA_DESC_ID']
    if ignore is None: ignore = []

    # we need the spectral window, polarization, and data description tables for processing the main table
    spw_xds = tblconv.convert_simple_table(infile,
                                           outfile='',
                                           subtable='SPECTRAL_WINDOW',
                                           ignore=ignorecols,
                                           nofile=True)
    pol_xds = tblconv.convert_simple_table(infile,
                                           outfile='',
                                           subtable='POLARIZATION',
                                           ignore=ignorecols,
                                           nofile=True)
    ddi_xds = tblconv.convert_simple_table(infile,
                                           outfile='',
                                           subtable='DATA_DESCRIPTION',
                                           ignore=ignorecols,
                                           nofile=True)

    # let's assume that each DATA_DESC_ID (ddi) is a fixed shape that may differ from others
    # form a list of ddis to process, each will be placed it in its own xarray dataset and partition
    if ddis is None: ddis = list(ddi_xds['d0'].values) + ['global']
    else: ddis = np.atleast_1d(ddis)
    xds_list = []

    ####################################################################
    # process each selected DDI from the input MS, assume a fixed shape within the ddi (should always be true)
    # each DDI is written to its own subdirectory under the parent folder
    for ddi in ddis:
        if ddi == 'global': continue  # handled afterwards
        ddi = int(ddi)
        print('Processing ddi', ddi, end='\r')
        start_ddi = time.time()

        # these columns are different / absent in MSv3 or need to be handled as special cases
        msv2 = ['WEIGHT', 'WEIGHT_SPECTRUM', 'SIGMA', 'SIGMA_SPECTRUM', 'UVW']

        # convert columns that are common to MSv2 and MSv3
        xds = tblconv.convert_expanded_table(infile,
                                             os.path.join(
                                                 outfile, 'xds' + str(ddi)),
                                             keys={
                                                 'TIME':
                                                 'time',
                                                 ('ANTENNA1', 'ANTENNA2'):
                                                 'baseline'
                                             },
                                             subsel={'DATA_DESC_ID': ddi},
                                             timecols=['time'],
                                             dimnames={
                                                 'd2': 'chan',
                                                 'd3': 'pol'
                                             },
                                             ignore=ignorecols + msv2,
                                             compressor=compressor,
                                             chunk_shape=chunk_shape,
                                             nofile=False)

        # convert and append UVW separately so we can handle its special dimension
        uvw_xds = tblconv.convert_expanded_table(
            infile,
            os.path.join(outfile, 'tmp'),
            keys={
                'TIME': 'time',
                ('ANTENNA1', 'ANTENNA2'): 'baseline'
            },
            subsel={'DATA_DESC_ID': ddi},
            timecols=['time'],
            dimnames={'d2': 'uvw_index'},
            ignore=ignorecols + list(xds.data_vars) + msv2[:-1],
            compressor=compressor,
            chunk_shape=chunk_shape,
            nofile=False)
        uvw_xds.to_zarr(os.path.join(outfile, 'xds' + str(ddi)),
                        mode='a',
                        compute=True,
                        consolidated=True)

        # now convert just the WEIGHT and WEIGHT_SPECTRUM (if preset)
        # WEIGHT needs to be expanded to full dimensionality (time, baseline, chan, pol)
        wt_xds = tblconv.convert_expanded_table(infile,
                                                os.path.join(outfile, 'tmp'),
                                                keys={
                                                    'TIME':
                                                    'time',
                                                    ('ANTENNA1', 'ANTENNA2'):
                                                    'baseline'
                                                },
                                                subsel={'DATA_DESC_ID': ddi},
                                                timecols=['time'],
                                                dimnames={},
                                                ignore=ignorecols +
                                                list(xds.data_vars) + msv2[2:],
                                                compressor=compressor,
                                                chunk_shape=chunk_shape,
                                                nofile=False)

        # if WEIGHT_SPECTRUM is present, append it to the main xds as the new WEIGHT column
        # otherwise expand the dimensionality of WEIGHT and add it to the xds
        if 'WEIGHT_SPECTRUM' in wt_xds.data_vars:
            wt_xds = wt_xds.drop_vars('WEIGHT').rename(
                dict(
                    zip(wt_xds.WEIGHT_SPECTRUM.dims,
                        ['time', 'baseline', 'chan', 'pol'])))
            wt_xds.to_zarr(os.path.join(outfile, 'xds' + str(ddi)),
                           mode='a',
                           compute=True,
                           consolidated=True)
        else:
            wts = wt_xds.WEIGHT.shape[:2] + (1, ) + (wt_xds.WEIGHT.shape[-1], )
            wt_da = da.tile(da.reshape(wt_xds.WEIGHT.data, wts),
                            (1, 1, len(xds.chan), 1)).rechunk(chunk_shape)
            wt_xds = wt_xds.drop_vars('WEIGHT').assign({
                'WEIGHT':
                xarray.DataArray(wt_da,
                                 dims=['time', 'baseline', 'chan', 'pol'])
            })
            wt_xds.to_zarr(os.path.join(outfile, 'xds' + str(ddi)),
                           mode='a',
                           compute=True,
                           consolidated=True)

        # add in relevant spw and polarization attributes
        attrs = {}
        for dv in spw_xds.data_vars:
            attrs[dv.lower()] = spw_xds[dv].values[
                ddi_xds['spectral_window_id'].values[ddi]]
            attrs[dv.lower()] = int(attrs[dv.lower()]) if type(attrs[dv.lower(
            )]) is np.bool_ else attrs[dv.lower()]  # convert bools
        for dv in pol_xds.data_vars:
            attrs[dv.lower()] = pol_xds[dv].values[
                ddi_xds['polarization_id'].values[ddi]]
            attrs[dv.lower()] = int(attrs[dv.lower()]) if type(attrs[dv.lower(
            )]) is np.bool_ else attrs[dv.lower()]  # convert bools

        # grab the channel frequency values from the spw table data and pol idxs from the polarization table, add spw and pol ids
        chan = attrs.pop('chan_freq')[:len(xds.chan)]
        pol = attrs.pop('corr_type')[:len(xds.pol)]

        # truncate per-chan values to the actual number of channels and move to coordinates
        chan_width = xarray.DataArray(attrs.pop('chan_width')[:len(xds.chan)],
                                      dims=['chan'])
        effective_bw = xarray.DataArray(
            attrs.pop('effective_bw')[:len(xds.chan)], dims=['chan'])
        resolution = xarray.DataArray(attrs.pop('resolution')[:len(xds.chan)],
                                      dims=['chan'])

        coords = {
            'chan': chan,
            'pol': pol,
            'spw_id': [ddi_xds['spectral_window_id'].values[ddi]],
            'pol_id': [ddi_xds['polarization_id'].values[ddi]],
            'chan_width': chan_width,
            'effective_bw': effective_bw,
            'resolution': resolution
        }
        aux_xds = xarray.Dataset(coords=coords, attrs=attrs)

        aux_xds.to_zarr(os.path.join(outfile, 'xds' + str(ddi)),
                        mode='a',
                        compute=True,
                        consolidated=True)
        xds = xarray.open_zarr(os.path.join(outfile, 'xds' + str(ddi)))

        xds_list += [('xds' + str(ddi), xds)]
        print('Completed ddi %i  process time {:0.2f} s'.format(time.time() -
                                                                start_ddi) %
              ddi)

    # clean up the tmp directory created by the weight conversion to MSv3
    os.system("rm -fr " + os.path.join(outfile, 'tmp'))

    # convert other subtables to their own partitions, denoted by 'global_' prefix
    skip_tables = ['DATA_DESCRIPTION', 'SORTED_TABLE'] + ignore
    subtables = sorted([
        tt for tt in os.listdir(infile)
        if os.path.isdir(os.path.join(infile, tt)) and tt not in skip_tables
    ])
    if 'global' in ddis:
        start_ddi = time.time()
        for ii, subtable in enumerate(subtables):
            print('processing subtable %i of %i : %s' %
                  (ii, len(subtables), subtable),
                  end='\r')
            if subtable == 'POINTING':  # expand the dimensions of the pointing table
                xds_sub_list = [(subtable,
                                 tblconv.convert_expanded_table(
                                     infile,
                                     os.path.join(outfile, 'global'),
                                     subtable=subtable,
                                     keys={
                                         'TIME': 'time',
                                         'ANTENNA_ID': 'antenna_id'
                                     },
                                     timecols=['time'],
                                     chunk_shape=chunk_shape))]
            else:
                xds_sub_list = [(subtable,
                                 tblconv.convert_simple_table(
                                     infile,
                                     os.path.join(outfile, 'global'),
                                     subtable,
                                     timecols=['TIME'],
                                     ignore=ignorecols,
                                     compressor=compressor,
                                     nofile=False))]

            if len(xds_sub_list[-1][1].dims) != 0:
                # to conform to MSv3, we need to add explicit ID fields to certain tables
                if subtable in [
                        'ANTENNA', 'FIELD', 'OBSERVATION', 'SCAN',
                        'SPECTRAL_WINDOW', 'STATE'
                ]:
                    #if 'd0' in xds_sub_list[-1][1].dims:
                    aux_xds = xarray.Dataset(
                        coords={
                            subtable.lower() + '_id':
                            xarray.DataArray(xds_sub_list[-1][1].d0.values,
                                             dims=['d0'])
                        })
                    aux_xds.to_zarr(os.path.join(outfile,
                                                 'global/' + subtable),
                                    mode='a',
                                    compute=True,
                                    consolidated=True)
                    xds_sub_list[-1] = (subtable,
                                        xarray.open_zarr(
                                            os.path.join(
                                                outfile,
                                                'global/' + subtable)))

                xds_list += xds_sub_list
            #else:
            #    print('Empty Subtable:',subtable)

        print(
            'Completed subtables  process time {:0.2f} s'.format(time.time() -
                                                                 start_ddi))

    # write sw version that did this conversion to zarr directory
    with open(outfile + '/.version', 'w') as fid:
        fid.write('cngi-protoype ' +
                  importlib_metadata.version('cngi-prototype') + '\n')

    # build the master xds to return
    mxds = xdsio.vis_xds_packager(xds_list)
    print(' ' * 50)

    return mxds
Beispiel #21
0
def test_write_bw_colormap():
    """Test saving a BW image with a colormap.

    Albedo with a colormap.

    Reflectances are 0, 29.76, 60, 90.24, 120.
    """
    area = STEREOGRAPHIC_AREA
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050),
                  ('polarization', None),
                  ('platform_name', 'NOAA-18'),
                  ('sensor', 'avhrr-3'),
                  ('units', '%'),
                  ('name', '1'),
                  ('level', None),
                  ('modifiers', ()),
                  ('wavelength', (0.5, 0.6, 0.7)),
                  ('calibration', 'reflectance'),
                  ('start_time', TIME - datetime.timedelta(minutes=75)),
                  ('end_time', TIME - datetime.timedelta(minutes=70)),
                  ('area', area),
                  ('ancillary_variables', []),
                  ('enhancement_history', [{'offset': offset, 'scale': scale}])])

    cm_vis = [0, 4095, 5887, 7167, 8191, 9215, 9983, 10751, 11519, 12287, 12799,
              13567, 14079, 14847, 15359, 15871, 16383, 16895, 17407, 17919, 18175,
              18687, 19199, 19711, 19967, 20479, 20735, 21247, 21503, 22015, 22271,
              22783, 23039, 23551, 23807, 24063, 24575, 24831, 25087, 25599, 25855,
              26111, 26367, 26879, 27135, 27391, 27647, 27903, 28415, 28671, 28927,
              29183, 29439, 29695, 29951, 30207, 30463, 30975, 31231, 31487, 31743,
              31999, 32255, 32511, 32767, 33023, 33279, 33535, 33791, 34047, 34303,
              34559, 34559, 34815, 35071, 35327, 35583, 35839, 36095, 36351, 36607,
              36863, 37119, 37119, 37375, 37631, 37887, 38143, 38399, 38655, 38655,
              38911, 39167, 39423, 39679, 39935, 39935, 40191, 40447, 40703, 40959,
              40959, 41215, 41471, 41727, 41983, 41983, 42239, 42495, 42751, 42751,
              43007, 43263, 43519, 43519, 43775, 44031, 44287, 44287, 44543, 44799,
              45055, 45055, 45311, 45567, 45823, 45823, 46079, 46335, 46335, 46591,
              46847, 46847, 47103, 47359, 47615, 47615, 47871, 48127, 48127, 48383,
              48639, 48639, 48895, 49151, 49151, 49407, 49663, 49663, 49919, 50175,
              50175, 50431, 50687, 50687, 50943, 50943, 51199, 51455, 51455, 51711,
              51967, 51967, 52223, 52223, 52479, 52735, 52735, 52991, 53247, 53247,
              53503, 53503, 53759, 54015, 54015, 54271, 54271, 54527, 54783, 54783,
              55039, 55039, 55295, 55551, 55551, 55807, 55807, 56063, 56319, 56319,
              56575, 56575, 56831, 56831, 57087, 57343, 57343, 57599, 57599, 57855,
              57855, 58111, 58367, 58367, 58623, 58623, 58879, 58879, 59135, 59135,
              59391, 59647, 59647, 59903, 59903, 60159, 60159, 60415, 60415, 60671,
              60671, 60927, 60927, 61183, 61439, 61439, 61695, 61695, 61951, 61951,
              62207, 62207, 62463, 62463, 62719, 62719, 62975, 62975, 63231, 63231,
              63487, 63487, 63743, 63743, 63999, 63999, 64255, 64255, 64511, 64511,
              64767, 64767, 65023, 65023, 65279]

    kwargs = {'ch_min_measurement_unit': np.array([0]),
              'ch_max_measurement_unit': np.array([120]),
              'compute': True, 'fill_value': None, 'sat_id': 6300014,
              'chan_id': 100015, 'data_cat': 'P**N', 'data_source': 'SMHI',
              'physic_unit': '%', 'nbits': 8, 'cmap': [cm_vis] * 3}

    data = da.tile(da.repeat(da.arange(5, chunks=1024) / 4.0, 205)[:-1],
                   1024).reshape((1, 1024, 1024))[:, :1024]
    data = xr.DataArray(data, coords={'bands': ['L']}, dims=[
                        'bands', 'y', 'x'], attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        colormap, res = _load_file_values_with_colormap(filename)

        assert(len(colormap) == 768)
        assert(np.allclose(colormap[:256], cm_vis))
        assert(np.allclose(colormap[256:512], cm_vis))
        assert(np.allclose(colormap[512:], cm_vis))
        assert(np.allclose(res[0, ::205], np.array([1,  64, 128, 192, 255])))
Beispiel #22
0
def test_tile_array_reps(shape, chunks, reps):
    x = np.random.random(shape)
    d = da.from_array(x, chunks=chunks)

    with pytest.raises(NotImplementedError):
        da.tile(d, reps)
Beispiel #23
0
def test_tile_neg_reps(shape, chunks, reps):
    x = np.random.random(shape)
    d = da.from_array(x, chunks=chunks)

    with pytest.raises(ValueError):
        da.tile(d, reps)
Beispiel #24
0
def test_tile(shape, chunks, reps):
    x = np.random.random(shape)
    d = da.from_array(x, chunks=chunks)

    assert_eq(np.tile(x, reps), da.tile(d, reps))
Beispiel #25
0
def test_tile_basic(reps):
    a = da.asarray([0, 1, 2])
    b = [[1, 2], [3, 4]]

    assert_eq(np.tile(a.compute(), reps), da.tile(a, reps))
    assert_eq(np.tile(b, reps), da.tile(b, reps))
Beispiel #26
0
def test_tile_neg_reps(shape, chunks, reps):
    x = np.random.random(shape)
    d = da.from_array(x, chunks=chunks)

    with pytest.raises(ValueError):
        da.tile(d, reps)
Beispiel #27
0
def test_write_bw_fill():
    """Test saving a BW image with transparency."""
    from pyninjotiff.ninjotiff import save
    from pyninjotiff.tifffile import TiffFile

    area = FakeArea(
        {
            'ellps': 'WGS84',
            'lat_0': 90.0,
            'lat_ts': 60.0,
            'lon_0': 0.0,
            'proj': 'stere'
        }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024)
    scale = 1.0 / 120
    offset = 0.0
    attrs = dict([('resolution', 1050), ('polarization', None),
                  ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'),
                  ('units', '%'), ('name', '1'), ('level', None),
                  ('modifiers', ()), ('wavelength', (0.5, 0.6, 0.7)),
                  ('calibration', 'reflectance'),
                  ('start_time', TIME - datetime.timedelta(minutes=25)),
                  ('end_time', TIME - datetime.timedelta(minutes=20)),
                  ('area', area), ('ancillary_variables', []),
                  ('enhancement_history', [{
                      'offset': offset,
                      'scale': scale
                  }])])

    kwargs = {
        'ch_min_measurement_unit': np.array([0]),
        'ch_max_measurement_unit': np.array([120]),
        'compute': True,
        'fill_value': None,
        'sat_id': 6300014,
        'chan_id': 100015,
        'data_cat': 'P**N',
        'data_source': 'SMHI',
        'physic_unit': '%',
        'nbits': 8
    }

    data1 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256),
                    256).reshape((1, 256, 1024))
    datanan = da.ones((1, 256, 1024), chunks=1024) * np.nan
    data2 = da.tile(da.repeat(da.arange(4, chunks=1024) / 3.0, 256),
                    512).reshape((1, 512, 1024))
    data = da.concatenate((data1, datanan, data2), axis=1)
    data = xr.DataArray(data,
                        coords={'bands': ['L']},
                        dims=['bands', 'y', 'x'],
                        attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value
        for i in range(3):
            assert (np.all(
                np.array(colormap[i * 256:(i + 1) * 256]) == np.arange(256) *
                256))
        assert (np.all(res[0, ::256] == np.array([1, 86, 170, 255])))
        assert (np.all(res[256, :] == 0))
Beispiel #28
0
def test_tile_empty_array(shape, chunks, reps):
    x = np.empty(shape)
    d = da.from_array(x, chunks=chunks)

    assert_eq(np.tile(x, reps), da.tile(d, reps))
Beispiel #29
0
def network_from_cif(path_to_cif, min_length=30, grid_spacing=0.2, probe_size=1.8, maxima_threshold=2):

    import numpy as np
    import os
    import pore_analyzer as pa
    import ase
    from ase.io import read

    data = read(path_to_cif)  # Read the CIF file
    print("Computing distance grid...")
    dgrid = pa.compute_dgrid_gpu(data, spacing=grid_spacing,
                                 chunk_size=10000)  # Compute a fine distance grid on one unit cell

    # Tile the grid to make supercell
    import dask.array as da
    import cupy as cp
    dgrid = cp.asnumpy(dgrid)

    # determine nx_cells ny_cells nz_cells automatically
    la = data.get_cell_lengths_and_angles()[0]
    lb = data.get_cell_lengths_and_angles()[1]
    lc = data.get_cell_lengths_and_angles()[2]
    alpha = data.get_cell_lengths_and_angles()[3] * (np.pi / 180.0)
    beta = data.get_cell_lengths_and_angles()[4] * (np.pi / 180.0)
    gamma = data.get_cell_lengths_and_angles()[5] * (np.pi / 180.0)
    vol = data.get_volume()
    eA = [la, 0, 0]
    eB = [lb * np.cos(gamma), lb * np.sin(gamma), 0]
    eC = [lc * np.cos(beta), lc * (np.cos(alpha) - np.cos(beta) * np.cos(gamma)) / np.sin(gamma),
          vol / (la * lb * np.sin(gamma))]

    # Find the perpendicular box lengths.
    # Those are the projections of the lattice vectors on the x, y and z axes
    # it can be shown that these lengths are equal to the inverse magnitude of the corresponding reciprocal vectors
    #  Eg . a.i                            = 1/|a*|

    lx_unit = vol / np.linalg.norm(np.cross(eB, eC))
    ly_unit = vol / np.linalg.norm(np.cross(eC, eA))
    lz_unit = vol / np.linalg.norm(np.cross(eA, eB))

    nx_cells = int(np.ceil(min_length / lx_unit))  # magic formula
    ny_cells = int(np.ceil(min_length / ly_unit))
    nz_cells = int(np.ceil(min_length / lz_unit))

    # Tile the distance grid
    dgrid_tiled = da.tile(dgrid, (nx_cells, ny_cells, nz_cells))

    # ASE atoms object for the super cell
    data_supercell = ase.build.make_supercell(data, [[nx_cells, 0, 0], [0, ny_cells, 0],
                                                     [0, 0, nz_cells]])  # Make a 2x2x2 super cell.

    print("Computing region labels...")

    # Compute the region labels, local maxima and the maxima locations
    region_labels, localmaxi, maxivals, maxima_coordinates = pa.make_labels_grid(dgrid_tiled.compute(), data_supercell,
                                                                                 peak_min=maxima_threshold,
                                                                                 dist_min=probe_size, apply_pbc=False)

    print("Computing connections and windows...")
    # Compute the connections
    connections = pa.find_windows_fixed_faster(data_supercell, region_labels)

    return maxima_coordinates, connections, maxivals
def calibration_double_ended_wls(ds,
                                 st_label,
                                 ast_label,
                                 rst_label,
                                 rast_label,
                                 st_var,
                                 ast_var,
                                 rst_var,
                                 rast_var,
                                 calc_cov=True,
                                 solver='sparse',
                                 dtype32=False):
    """


    Parameters
    ----------
    ds : DataStore
    st_label
    ast_label
    rst_label
    rast_label
    st_var
    ast_var
    rst_var
    rast_var
    calc_cov
    solver : {'sparse', 'stats'}

    Returns
    -------

    """

    # x_alpha_set_zero=0.,
    # set one alpha for all times to zero
    # x_alpha_set_zeroi = np.argmin(np.abs(ds.x.data - x_alpha_set_zero))
    # x_alpha_set_zeroidata = np.arange(nt) * no + x_alpha_set_zeroi

    cal_ref = ds.ufunc_per_section(label=st_label,
                                   ref_temp_broadcasted=True,
                                   calc_per='all')

    st = ds.ufunc_per_section(label=st_label, calc_per='all')
    ast = ds.ufunc_per_section(label=ast_label, calc_per='all')
    rst = ds.ufunc_per_section(label=rst_label, calc_per='all')
    rast = ds.ufunc_per_section(label=rast_label, calc_per='all')
    z = ds.ufunc_per_section(label='x', calc_per='all')

    nx = z.size

    _xsorted = np.argsort(ds.x.data)
    _ypos = np.searchsorted(ds.x.data[_xsorted], z)
    x_index = _xsorted[_ypos]

    no, nt = ds[st_label].data.shape

    p0_est = np.asarray([482., 0.1] + nt * [1.4] + no * [0.])

    # Data for F and B temperature, 2 * nt * nx items
    data1 = da.repeat(1 / (cal_ref.T.ravel() + 273.15), 2)  # gamma
    # data2 = da.tile(np.array([0., -1.]), nt * nx)  # alphaint
    data2 = da.stack((da.zeros(nt * nx, chunks=nt * nx),
                      -da.ones(nt * nx, chunks=nt * nx))).T.ravel()
    # data3 = da.tile(np.array([-1., -1.]), nt * nx)  # C
    data3 = -da.ones(2 * nt * nx, chunks=2 * nt * nx)
    # data5 = da.tile(np.array([-1., 1.]), nt * nx)  # alph
    data5 = da.stack((-da.ones(nt * nx, chunks=nt * nx),
                      da.ones(nt * nx, chunks=nt * nx))).T.ravel()

    # Data for alpha, nt * no items
    # data6 = da.repeat(np.array([-0.5]), nt * no)  # alphaint
    data6 = da.ones(nt * no, dtype=float,
                    chunks=(nt * no, )) * -0.5  # alphaint
    data9 = da.ones(nt * no, dtype=float, chunks=(nt * no, ))  # alpha

    # alpha should start at zero. But then the sparse solver crashes
    # data9[x_alpha_set_zeroidata] = 0.

    data = da.concatenate([data1, data2, data3, data5, data6, data9]).compute()

    # Coords (irow, icol)
    coord1row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # gamma
    coord2row = da.arange(2 * nt * nx, dtype=int,
                          chunks=(nt * nx, ))  # alphaint
    coord3row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # C
    coord5row = da.arange(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # alpha

    coord6row = da.arange(2 * nt * nx,
                          2 * nt * nx + nt * no,
                          dtype=int,
                          chunks=(nt * no, ))  # alphaint
    coord9row = da.arange(2 * nt * nx,
                          2 * nt * nx + nt * no,
                          dtype=int,
                          chunks=(nt * no, ))  # alpha

    coord1col = da.zeros(2 * nt * nx, dtype=int, chunks=(nt * nx, ))  # gamma
    coord2col = da.ones(2 * nt * nx, dtype=int, chunks=(nt * nx, )) * (
        2 + nt + no - 1)  # alphaint
    coord3col = da.repeat(da.arange(nt, dtype=int, chunks=(nt, )) + 2,
                          2 * nx).rechunk(nt * nx)  # C
    coord5col = da.tile(np.repeat(x_index, 2) + nt + 2,
                        nt).rechunk(nt * nx)  # alpha

    coord6col = da.ones(nt * no, dtype=int,
                        chunks=(nt * no, ))  # * (2 + nt + no - 1)  # alphaint
    coord9col = da.tile(
        da.arange(no, dtype=int, chunks=(nt * no, )) + nt + 2, nt)  # alpha

    rows = [coord1row, coord2row, coord3row, coord5row, coord6row, coord9row]
    cols = [coord1col, coord2col, coord3col, coord5col, coord6col, coord9col]
    coords = (da.concatenate(rows).compute(), da.concatenate(cols).compute())

    # try scipy.sparse.bsr_matrix
    X = sp.coo_matrix((data, coords),
                      shape=(2 * nx * nt + nt * no, nt + 2 + no),
                      dtype=float,
                      copy=False)

    # Spooky way to interleave and ravel arrays in correct order. Works!
    y1F = da.log(st / ast).T.ravel()
    y1B = da.log(rst / rast).T.ravel()
    y1 = da.stack([y1F, y1B]).T.ravel()

    y2F = da.log(ds[st_label].data / ds[ast_label].data).T.ravel()
    y2B = da.log(ds[rst_label].data / ds[rast_label].data).T.ravel()
    y2 = (y2B - y2F) / 2
    y = da.concatenate([y1, y2]).compute()

    # Calculate the reprocical of the variance (not std)
    w1F = (1 / st**2 * st_var + 1 / ast**2 * ast_var).T.ravel()
    w1B = (1 / rst**2 * rst_var + 1 / rast**2 * rast_var).T.ravel()
    w1 = da.stack([w1F, w1B]).T.ravel()

    w2 = (0.5 / ds[st_label].data**2 * st_var +
          0.5 / ds[ast_label].data**2 * ast_var +
          0.5 / ds[rst_label].data**2 * rst_var +
          0.5 / ds[rast_label].data**2 * rast_var).T.ravel()
    w = da.concatenate([w1, w2]).compute()

    if solver == 'sparse':
        p_sol, p_var, p_cov = wls_sparse(X,
                                         y,
                                         w=w,
                                         x0=p0_est,
                                         calc_cov=calc_cov,
                                         dtype32=dtype32)

    elif solver == 'stats':
        p_sol, p_var, p_cov = wls_stats(X, y, w=w, calc_cov=calc_cov)

    if calc_cov:
        return nt, z, p_sol, p_var, p_cov
    else:
        return nt, z, p_sol, p_var
Beispiel #31
0
def test_write_ir_colormap():
    """Test saving a IR image with a colormap.

    IR with a colormap.

    Temperatures are -70, -40.24, -10, 20.24, 50.
    """
    from pyninjotiff.ninjotiff import save
    from pyninjotiff.tifffile import TiffFile

    area = FakeArea(
        {
            'ellps': 'WGS84',
            'lat_0': 90.0,
            'lat_ts': 60.0,
            'lon_0': 0.0,
            'proj': 'stere'
        }, (-1000000.0, -4500000.0, 2072000.0, -1428000.0), 1024, 1024)
    scale = 1.0 / 120
    offset = 70.0 / 120
    attrs = dict([('resolution', 1050), ('polarization', None),
                  ('platform_name', 'NOAA-18'), ('sensor', 'avhrr-3'),
                  ('units', 'K'), ('name', '4'), ('level', None),
                  ('modifiers', ()), ('wavelength', (10.3, 10.8, 11.3)),
                  ('calibration', 'brightness_temperature'),
                  ('start_time', TIME - datetime.timedelta(minutes=85)),
                  ('end_time', TIME - datetime.timedelta(minutes=80)),
                  ('area', area), ('ancillary_variables', []),
                  ('enhancement_history', [{
                      'offset': offset,
                      'scale': scale
                  }])])

    ir_map = [
        255, 1535, 2559, 3327, 4095, 4863, 5375, 5887, 6399, 6911, 7423, 7935,
        8447, 8959, 9471, 9983, 10239, 10751, 11263, 11519, 12031, 12287,
        12799, 13055, 13567, 13823, 14335, 14591, 14847, 15359, 15615, 16127,
        16383, 16639, 17151, 17407, 17663, 17919, 18431, 18687, 18943, 19199,
        19711, 19967, 20223, 20479, 20735, 21247, 21503, 21759, 22015, 22271,
        22527, 22783, 23295, 23551, 23807, 24063, 24319, 24575, 24831, 25087,
        25343, 25599, 25855, 26367, 26623, 26879, 27135, 27391, 27647, 27903,
        28159, 28415, 28671, 28927, 29183, 29439, 29695, 29951, 30207, 30463,
        30719, 30975, 31231, 31487, 31743, 31999, 31999, 32255, 32511, 32767,
        33023, 33279, 33535, 33791, 34047, 34303, 34559, 34815, 35071, 35327,
        35327, 35583, 35839, 36095, 36351, 36607, 36863, 37119, 37375, 37375,
        37631, 37887, 38143, 38399, 38655, 38911, 39167, 39167, 39423, 39679,
        39935, 40191, 40447, 40703, 40703, 40959, 41215, 41471, 41727, 41983,
        41983, 42239, 42495, 42751, 43007, 43263, 43263, 43519, 43775, 44031,
        44287, 44287, 44543, 44799, 45055, 45311, 45311, 45567, 45823, 46079,
        46335, 46335, 46591, 46847, 47103, 47359, 47359, 47615, 47871, 48127,
        48127, 48383, 48639, 48895, 49151, 49151, 49407, 49663, 49919, 49919,
        50175, 50431, 50687, 50687, 50943, 51199, 51455, 51455, 51711, 51967,
        52223, 52223, 52479, 52735, 52991, 52991, 53247, 53503, 53759, 53759,
        54015, 54271, 54527, 54527, 54783, 55039, 55039, 55295, 55551, 55807,
        55807, 56063, 56319, 56319, 56575, 56831, 57087, 57087, 57343, 57599,
        57599, 57855, 58111, 58367, 58367, 58623, 58879, 58879, 59135, 59391,
        59391, 59647, 59903, 60159, 60159, 60415, 60671, 60671, 60927, 61183,
        61183, 61439, 61695, 61695, 61951, 62207, 62463, 62463, 62719, 62975,
        62975, 63231, 63487, 63487, 63743, 63999, 63999, 64255, 64511, 64511,
        64767, 65023, 65023, 65279
    ]

    kwargs = {
        'ch_min_measurement_unit': np.array([-70]),
        'ch_max_measurement_unit': np.array([50]),
        'compute': True,
        'fill_value': None,
        'sat_id': 6300014,
        'chan_id': 900015,
        'data_cat': 'P**N',
        'data_source': 'SMHI',
        'physic_unit': 'C',
        'nbits': 8,
        'cmap': [ir_map] * 3
    }

    data = da.tile(da.repeat(da.arange(5, chunks=1024) / 4.0, 205)[:-1],
                   1024).reshape((1, 1024, 1024))[:, :1024]
    data = xr.DataArray(data,
                        coords={'bands': ['L']},
                        dims=['bands', 'y', 'x'],
                        attrs=attrs)
    img = FakeImage(data)
    with tempfile.NamedTemporaryFile(delete=DELETE_FILES) as tmpfile:
        filename = tmpfile.name
        if not DELETE_FILES:
            print(filename)
        save(img, filename, data_is_scaled_01=True, **kwargs)
        tif = TiffFile(filename)
        page = tif[0]
        res = page.asarray(colormapped=False).squeeze()
        colormap = page.tags['color_map'].value

        assert (len(colormap) == 768)
        assert (np.allclose(colormap[:256], ir_map))
        assert (np.allclose(colormap[256:512], ir_map))
        assert (np.allclose(colormap[512:], ir_map))
        assert (np.allclose(res[0, ::205], np.array([1, 64, 128, 192, 255])))