Exemple #1
0
def resolve_dict_iterator(iterator: Any, nrows: int = None) -> tuple:
    """Note that this function produces sorted arrays."""
    sort_fields = ["row", "col", "amount", "uncertainty_type"]
    data = (dictionary_formatter(row) for row in iterator)
    array = create_structured_array(
        data,
        INDICES_DTYPE + [("amount", np.float32)] + UNCERTAINTY_DTYPE + [("flip", bool)],
        nrows=nrows,
        sort=True,
        sort_fields=sort_fields,
    )
    return (
        array["amount"],
        # Not repacking fields would cause this multi-field index to return a view
        # All columns would be serialized
        # See https://numpy.org/doc/stable/user/basics.rec.html#indexing-structured-arrays
        repack_fields(array[["row", "col"]]),
        repack_fields(
            array[
                [
                    "uncertainty_type",
                    "loc",
                    "scale",
                    "shape",
                    "minimum",
                    "maximum",
                    "negative",
                ]
            ]
        ),
        array["flip"],
    )
    def test_repack_fields(self):
        dt = np.dtype('u1,f4,i8', align=True)
        a = np.zeros(2, dtype=dt)

        assert_equal(repack_fields(dt), np.dtype('u1,f4,i8'))
        assert_equal(repack_fields(a).itemsize, 13)
        assert_equal(repack_fields(repack_fields(dt), align=True), dt)

        # make sure type is preserved
        dt = np.dtype((np.record, dt))
        assert_(repack_fields(dt).type is np.record)
Exemple #3
0
    def test_repack_fields(self):
        dt = np.dtype('u1,f4,i8', align=True)
        a = np.zeros(2, dtype=dt)

        assert_equal(repack_fields(dt), np.dtype('u1,f4,i8'))
        assert_equal(repack_fields(a).itemsize, 13)
        assert_equal(repack_fields(repack_fields(dt), align=True), dt)

        # make sure type is preserved
        dt = np.dtype((np.record, dt))
        assert_(repack_fields(dt).type is np.record)
Exemple #4
0
def pointcloud2_to_array(cloud_msg, split_rgb=False, remove_padding=True):
    ''' Converts a rospy PointCloud2 message to a numpy recordarray

    Reshapes the returned array to have shape (height, width), even if the height is 1.

    The reason for using np.fromstring rather than struct.unpack is speed... especially
    for large point clouds, this will be <much> faster.
    '''
    # construct a numpy record type equivalent to the point type of this cloud
    dtype_list = pointcloud2_to_dtype(cloud_msg)

    # parse the cloud into an array
    cloud_arr = np.fromstring(cloud_msg.data, dtype_list)

    # remove the dummy fields that were added
    if remove_padding:
        cloud_arr = recfuncs.repack_fields(cloud_arr[[
            fname for fname, _type in dtype_list
            if not (fname[:len(DUMMY_FIELD_PREFIX)] == DUMMY_FIELD_PREFIX)
        ]])

    if split_rgb:
        cloud_arr = split_rgb_field(cloud_arr)

    return np.reshape(cloud_arr, (cloud_msg.height, cloud_msg.width))
Exemple #5
0
def recarray_drop_columns(array, columns):
    '''
    Remove columns from rec array
    '''
    to_use = [col for col in array.dtype.names if col not in columns]
    subset = array.view(subset_dtype(array.dtype, to_use))
    return repack_fields(subset)
Exemple #6
0
async def coadd():
    ''' 
    '''
    release = request.args.get('release', 'dr16')
    run2d = request.args.get('run2d', None)
    plate = int(request.args.get('plate', None))
    mjd = int(request.args.get('mjd', None))
    fiber = int(request.args.get('fiber', 1))
    sample = int(request.args.get('sample', 1))
    bands = request.args.get('bands', 'all')
    debug_flag = request.args.get('debug', '')

    survey = 'sdss' if not run2d.startswith('v') else 'eboss'

    root = '/ssd0/sdss/%s/%s/spectro/redux/' % (release,survey)
    path = root + '%s/%04i/' % (run2d, plate)
    fname = path + 'spec-%04i-%05i-%04i.npy' %  (plate,mjd,fiber)

    print(fname)
    if bands == 'all':
        with open(fname, 'r') as fd:
            _bytes = fd.read()
        return web.response(body=_bytes)
    else:
        data = np.load(str(fname))
        dbands = data[[c for c in list(data.dtype.names) if c in bands]]
        dbands = rfn.repack_fields(dbands)

        tmp_file = NamedTemporaryFile(delete=False, dir='/tmp').name
        np.save(tmp_file, dbands, allow_pickle=False)
        with open(tmp_file+'.npy', 'r') as fd:
            _bytes = fd.read()
        os.unlink(tmp_file)
        return web.response(body=_bytes)
Exemple #7
0
    def drop_column(array, column):
        from numpy.lib.recfunctions import repack_fields

        cols = list(array.dtype.names)
        if column in cols:
            cols.remove(column)

        return repack_fields(array[cols])
Exemple #8
0
def rec_to_ndarr(rec_arr, data_type=float):
    """
    Function to transform a numpy record array to a nd array.
    """
    # fix for numpy >= 1.16.0 with masked arrays
    # https://numpy.org/devdocs/release/1.16.0-notes.html#multi-field-views-return-a-view-instead-of-a-copy
    return np.array(recFunc.structured_to_unstructured(
        recFunc.repack_fields(rec_arr[list(rec_arr.dtype.names)])),
                    dtype=data_type)
Exemple #9
0
def _rec_to_ndarr(rec_arr, data_type=float):
    """
    Function to transform a numpy record array to a nd array.
    dupe of SimPEG.electromagnetics.natural_source.utils.rec_to_ndarr to avoid circular import
    """
    # fix for numpy >= 1.16.0
    # https://numpy.org/devdocs/release/1.16.0-notes.html#multi-field-views-return-a-view-instead-of-a-copy
    return np.array(recFunc.structured_to_unstructured(recFunc.repack_fields(rec_arr[list(rec_arr.dtype.names)])),
                    dtype=data_type)
Exemple #10
0
def load_structured_data(file): ## file for data (x,y)
    if Path(str(file)).is_file():
        structured_data = np.genfromtxt(file, delimiter=',', names=True, dtype=float)
        data = rf.structured_to_unstructured(rf.repack_fields(structured_data)) 
    else:
        raise FileNotFoundError(file) # raise error
    data = data.reshape(1, -1) if len(data.shape) == 1 else data
    names = structured_data.dtype.names
    return names, data 
def polys_to_segments(self, as_basic=True, to_orig=False, as_3d=False):
    """Segment poly* structures into o-d pairs from start to finish.

    as_basic : boolean
        True, returns an Nx4 array (x0, y0, x1, y1) of from-to coordinates.
        False, returns a structured array
        If `as_3d` is True, then `as_basic` is set to False.
    to_origin : boolean
        True, moves the coordinates back to their original position
        defined by the `LL` property of the Geo array.
    as_3d : boolean
        True, the point pairs are returned as a 3D array in the form
        [[X_orig', Y_orig'], ['X_dest', 'Y_dest']], without the distances.

    Notes
    -----
    Use `prn_tbl` if you want to see a well formatted output.
    """
    if self.K not in (1, 2):
        print("Poly* features required.")
        return None
    # -- basic return as ndarray used by common_segments
    if as_3d:  # The array cannot be basic if it is 3d
        as_basic = False
    if to_orig:
        tmp = self.XY + self.LL
        b_vals = [tmp[ft[0]:ft[1]] for ft in self.FT]  # shift to orig extent
    else:
        b_vals = self.bits
    # -- Do the concatenation
    fr_to = np.concatenate(
        [np.concatenate((b[:-1], b[1:]), axis=1) for b in b_vals], axis=0)
    # -- return if simple and not 3d representation
    if as_basic:
        return fr_to
    # -- return 3d from-to representation
    if as_3d:
        fr_to = fr_to[:, :4]
        s0, s1 = fr_to.shape
        return fr_to.reshape(s0, s1 // 2, s1 // 2)
    # -- structured array section
    # add bit ids and lengths to the output array
    b_ids = self.IFT
    segs = np.asarray([[[b_ids[i][0], *(b_ids[i][-2:])],
                        len(b) - 1] for i, b in enumerate(b_vals)],
                      dtype='O')
    s_ids = np.concatenate([np.tile(i[0], i[1]).reshape(-1, 3) for i in segs],
                           axis=0)
    dist = (np.sqrt(np.sum((fr_to[:, :2] - fr_to[:, 2:4])**2, axis=1)))
    fr_to = np.hstack((fr_to, s_ids, dist.reshape(-1, 1)))
    dt = np.dtype([('X_fr', 'f8'), ('Y_fr', 'f8'), ('X_to', 'f8'),
                   ('Y_to', 'f8'), ('Orig_id', 'i4'), ('Part', 'i4'),
                   ('Seq_ID', 'i4'), ('Length', 'f8')])
    fr_to = uts(fr_to, dtype=dt)
    return repack_fields(fr_to)
Exemple #12
0
def convert_pc_to_numpy(pc):
    # Extracts the 3D array of points - needs to do some management of structured arrays for efficiency
    pts_struct = numpify(pc)[['x', 'y', 'z']]
    if numpy_ver[1] >= 15:
        from numpy.lib.recfunctions import repack_fields
        pts_struct = repack_fields(pts_struct)
    pts = pts_struct.view((pts_struct.dtype[0], 3))
    if len(pts.shape) == 3:
        pts = pts.transpose(2, 0, 1).reshape(3, -1).T

    return pts
Exemple #13
0
def plot_results(setup, results, samples, constraints, gridnames, obs, obs_err, photbands):

    # check for 10 possible plots. Should be enough for now.
    for i in range(10):

        pindex = 'plot' + str(i)
        if not pindex in setup: continue

        if setup[pindex]['type'] == 'sed_fit':

            res = setup[pindex].get('result', 'best')

            pl.figure(i)
            pl.clf()
            pl.subplots_adjust(wspace=0.25)
            plotting.plot_fit(obs, obs_err, photbands, pars=results, constraints=constraints, grids=setup['grids'],
                              gridnames=gridnames, result=res)

            if not setup[pindex].get('path', None) is None:
                pl.savefig(setup[pindex].get('path', 'sed_fit.png'))

        if setup[pindex]['type'] == 'constraints':

            pl.figure(i, figsize=(2 * len(constraints), 6))
            pl.clf()
            pl.subplots_adjust(wspace=0.40, left=0.07, right=0.98)

            plotting.plot_constraints(constraints, samples, results)

            if not setup[pindex].get('path', None) is None:
                pl.savefig(setup[pindex].get('path', 'constraints.png'))

        if setup[pindex]['type'] == 'distribution':

            pars1 = []
            for p in setup[pindex].get('parameters', ['teff', 'rad', 'L', 'd']):
                if p in samples.dtype.names: pars1.append(p)

            data = repack_fields(samples[pars1])

            if setup[pindex].get('show_best', False):
                truths = [results[p][0] for p in data.dtype.names]
            else:
                truths = None

            fig = corner.corner(data.view(np.float64).reshape(data.shape + (-1,)),
                                labels=data.dtype.names,
                                quantiles=setup[pindex].get('quantiles', [0.025, 0.16, 0.5, 0.84, 0.975]),
                                levels=setup[pindex].get('levels', [0.393, 0.865, 0.95]),
                                truths=truths,
                                show_titles=True, title_kwargs={"fontsize": 12}, )

            if not setup[pindex].get('path', None) is None:
                pl.savefig(setup[pindex].get('path', 'distribution.png'))
Exemple #14
0
def coadd():
    """ 
    """
    if request.method == 'POST':  # Get the login user name
        release = request.form['release']
        run2d = request.form['run2d']
        plate = request.form['plate']
        mjd = request.form['mjd']
        fiber = request.form['fiber']
        sample = request.form['sample']
        if 'bands' in request.form:
            bands = request.form['bands']
        else:
            bands = 'all'
        debug_flag = request.form['debug']
    else:
        release = request.args.get('release', 'dr16')
        run2d = request.args.get('run2d', None)
        plate = request.args.get('plate', None)
        mjd = request.args.get('mjd', None)
        fiber = int(request.args.get('fiber', 1))
        sample = int(request.args.get('sample', 1))
        bands = request.args.get('bands', 'all')
        debug_flag = request.args.get('debug', '')

    run2d = int(run2d)
    plate = int(plate)
    mjd = int(mjd)

    root = '/ssd0/sdss/%s/sdss/spectro/redux/' % release
    path = root + '%d/%04i/' % (run2d, plate)
    fname = path + 'spec-%04i-%05i-%04i.npy' % (plate, mjd, fiber)

    if bands == 'all':
        return send_file(fname, mimetype='application/octet-stream')
    else:
        from numpy.lib import recfunctions as rfn
        from tempfile import NamedTemporaryFile

        data = np.load(str(fname))
        cols = data.dtype.name
        dbands = data[[c for c in list(data.dtype.names) if c in bands]]
        dbands = rfn.repack_fields(dbands)
        #bobj = BytesIO()
        #np.save(bobj, dbands, allow_pickle=False)
        #result =  send_file(bobj, mimetype='application/octet-stream')
        #return result

        tmp_file = NamedTemporaryFile(delete=False, dir='/tmp').name
        np.save(tmp_file, dbands, allow_pickle=False)
        result = send_file(tmp_file + '.npy',
                           mimetype='application/octet-stream')
        os.unlink(tmp_file)
        return result
 def _send_work_order(self, Work, w):
     """Sends an allocation function order to a worker
     """
     logger.debug("Manager sending work unit to worker {}".format(w))
     self.wcomms[w - 1].send(Work['tag'], Work)
     work_rows = Work['libE_info']['H_rows']
     if len(work_rows):
         if 'repack_fields' in dir():
             self.wcomms[w - 1].send(
                 0, repack_fields(self.hist.H[Work['H_fields']][work_rows]))
         else:
             self.wcomms[w - 1].send(
                 0, self.hist.H[Work['H_fields']][work_rows])
Exemple #16
0
    def __init__(self, fileName, fileNameEdges=None, preprocessingArgs=None):

        self.result_dir = "REVC_results_" + fileName
        self.figure_dir = "REVC_figures_" + fileName

        self.fileName = fileName

        HierarchichalPrinter.__init__(self)

        if not fileNameEdges:
            return

        self.prst("Reading file", fileNameEdges)

        edges = np.genfromtxt(
            fileNameEdges,
            delimiter=",",
            skip_header=True,
            dtype={
                "names":
                ["ID", "from_to_original", "length", "inspection", "lakeID"],
                'formats':
                [IDTYPE, '2' + IDTYPE, "double", "3" + IDTYPE, IDTYPE]
            },
            autostrip=True)

        from_to = np.vstack(
            (edges["from_to_original"], edges["from_to_original"][:, ::-1]))

        edgeData = rf.repack_fields(edges[["ID", "length", "lakeID"]])
        edgeData = np.concatenate((edgeData, edgeData))

        edgeData = add_fields(edgeData, ["inspection"], [object], [None])

        vertexID = np.zeros(0, dtype=IDTYPE)
        vertexData = np.zeros(0, dtype=[("significant", bool)])

        graph = FlexibleGraph(from_to,
                              edgeData,
                              vertexID,
                              vertexData,
                              replacementMode="shortest",
                              lengthLabel="length")

        graph.set_default_vertex_data(True)
        super().__init__(graph, "length", "significant")
        self.preprocessing(preprocessingArgs)

        if fileName:
            self.save(fileName)
Exemple #17
0
def mst(arr, calc_dist=True):
    """Determine the minimum spanning tree for a set of points represented
    by their inter-point distances. ie their `W`eights

    Parameters
    ----------
    W : array, normally an interpoint distance array
        Edge weights for example, distance, time, for a set of points.
        W needs to be a square array or a np.triu perhaps

    calc_dist : boolean
        True, if W is a points array, calculate W as the interpoint distance.
        False means that W is not a points array, but some other `weight`
        representing the interpoint relationship

    Returns
    -------
    pairs - the pair of nodes that form the edges
    """
    arr = np.unique(arr, True, False, False, axis=0)[0]
    W = arr[~np.isnan(arr[:, 0])]
    a_copy = np.copy(W)
    if calc_dist:
        W = _e_dist_(W)
    if W.shape[0] != W.shape[1]:
        raise ValueError("W needs to be square matrix of edge weights")
    Np = W.shape[0]
    pairs = []
    pnts_seen = [0]  # Add the first point
    n_seen = 1
    # exclude self connections by assigning inf to the diagonal
    diag = np.arange(Np)
    W[diag, diag] = np.inf
    #
    while n_seen != Np:
        new_edge = np.argmin(W[pnts_seen], axis=None)
        new_edge = divmod(new_edge, Np)
        new_edge = [pnts_seen[new_edge[0]], new_edge[1]]
        pairs.append(new_edge)
        pnts_seen.append(new_edge[1])
        W[pnts_seen, new_edge[1]] = np.inf
        W[new_edge[1], pnts_seen] = np.inf
        n_seen += 1
    pairs = np.array(pairs)
    frum = a_copy[pairs[:, 0]]
    too = a_copy[pairs[:, 1]]
    fr_to = np.concatenate((frum, too), axis=1)  # np.vstack(pairs)
    fr_to = uts(fr_to, names=['X_orig', 'Y_orig', 'X_dest', 'Y_dest'])
    return repack_fields(fr_to)
Exemple #18
0
def rasterize(moc_data, order=None):
    """Convert a multi-order HEALPix dataset to fixed-order NESTED ordering.

    Parameters
    ----------
    moc_data : `numpy.ndarray`
        A multi-order HEALPix dataset stored as a Numpy record array whose
        first column is called UNIQ and contains the NUNIQ pixel index. Every
        point on the unit sphere must be contained in exactly one pixel in the
        dataset.
    order : int, optional
        The desired output resolution order, or :obj:`None` for the maximum
        resolution present in the dataset.

    Returns
    -------
    nested_data : `numpy.ndarray`
        A fixed-order, NESTED-ordering HEALPix dataset with all of the columns
        that were in moc_data, with the exception of the UNIQ column.

    """
    if order is None or order < 0:
        order = -1
    else:
        orig_order, orig_nest = uniq2nest(moc_data['UNIQ'])
        to_downsample = order < orig_order
        if np.any(to_downsample):
            to_keep = table.Table(moc_data[~to_downsample], copy=False)
            orig_order = orig_order[to_downsample]
            orig_nest = orig_nest[to_downsample]
            to_downsample = table.Table(moc_data[to_downsample], copy=False)

            ratio = 1 << (2 * np.int64(orig_order - order))
            weights = 1.0 / ratio
            for colname, column in to_downsample.columns.items():
                if colname != 'UNIQ':
                    column *= weights
            to_downsample['UNIQ'] = nest2uniq(order, orig_nest // ratio)
            to_downsample = to_downsample.group_by('UNIQ').groups.aggregate(
                np.sum)

            moc_data = table.vstack((to_keep, to_downsample))

    # Ensure that moc_data has appropriate padding for each of its columns to
    # be properly aligned in order to avoid undefined behavior.
    moc_data = repack_fields(np.asarray(moc_data), align=True)

    return _rasterize(moc_data, order=order)
Exemple #19
0
    def _reorder_prob(self, prob, model, gmm):
        """Reorder probabilities to have consistent output.

        Parameters
        ----------
        prob : numpy.ndarray, shape (N, n_components)
            Sample at which to predict.
        model : str
            Model used for prediction.
        gmm : sklearn.mixture.GaussianMixture
            GMM object of model used for prediction.

        Returns
        -------
        gmm_p : numpy.ndarray, shape (N, n_components)
            Unsorted probabilities output by GMM prediction.
        """
        n_components = _models[model]['n_components']
        if n_components == 3:
            pins = _polin_pins
        elif n_components == 4:
            pins = _branch_pins

        fields = _models[model]['fields']
        pin_data = pins[fields]

        #  sklearn.gmm can't take in structured arrays, so a workaround...
        try:
            arr = pin_data[fields].copy().view((float, len(fields)))
            pin_prob = gmm.predict_proba(arr)
        except ValueError:
            arr = repack_fields(pin_data[fields]).view((float, len(fields)))
            pin_prob = gmm.predict_proba(arr)

        ordered_indices = [np.argmax(p) for p in pin_prob]

        # check for duplicates
        if len(set(ordered_indices)) != n_components:
            print(f'{model} probabilities were not reordered')
            return prob

        prob[:, list(range(n_components))] = prob[:, ordered_indices]

        return prob
    def job(worker_id):
        i_start, i_end = ranges[worker_id]

        # collect data
        for i in range(i_start, i_end):
            # load file into Numpy structured array
            data = np.genfromtxt(valid_files[i],
                                 skip_header=1,
                                 dtype=SAMPLE_DTYPE,
                                 delimiter=',')

            # remove field
            data_cleaned = repack_fields(rmfield(data, 'lon', 'lat'))

            np.savetxt(valid_files[i],
                       data_cleaned,
                       delimiter=',',
                       header=','.join(new_header),
                       fmt=','.join(
                           ['%d', '%f', '%f', '%f', '%f', '%f', '%f', '%s']))
        return
Exemple #21
0
def crosstab_array(a, flds=None):
    """Frequency and crosstabulation for structured arrays.

    Parameters
    ----------
    a : array
       Input structured array.
    flds : string or list
       Fields/columns to use in the analysis.  For a single column, a string
       is all that is needed.  Multiple columns require a list of field names.

    Notes
    -----
    (1) Slice the input array by the classification fields.
    (2) Sort the sliced array using the flds as sorting keys.
    (3) Use unique on the sorted array to return the results.
    (4) Reassemble the original columns and the new count data.
    """
    if flds is None:
        return None
    if isinstance(flds, (str)):
        flds = [flds]
    a = repack_fields(a[flds])  # need to repack fields
    # a = _keep_fields(a, flds)  # alternative to repack_fields
    idx = np.argsort(a, axis=0, order=flds)  # (2) sort
    a_sort = a[idx]
    uni, cnts = np.unique(a_sort, return_counts=True)  # (3) unique, count
    dt = uni.dtype.descr
    dt.append(('Count', '<i4'))
    fr = np.empty_like(uni, dtype=dt)
    names = fr.dtype.names
    vals = list(zip(*uni)) + [cnts.tolist()]  # (4) reassemble
    N = len(names)
    for i in range(N):
        fr[names[i]] = vals[i]
    return fr
Exemple #22
0
async def getSpec(request):
    ''' 
    '''
    params = await request.post()
    try:
        id_list = params['id_list']
        values = params['values']  # NYI
        cutout = params['cutout']  # NYI
        fmt = params['format']
        align = (params['align'].lower() == 'true')
        w0 = float(params['w0'])
        w1 = float(params['w1'])
        context = params['context']
        profile = params['profile']
        debug = (params['debug'].lower() == 'true')
        verbose = (params['verbose'].lower() == 'true')
    except Exception as e:
        logging.error('Param Error: ' + str(e))
        return web.Response(text='Param Error: ' + str(e))

    st_time = time.time()

    # Instantiate the dataset service based on the context.
    svc = _getSvc(context)
    svc.debug = debug
    svc.verbose = verbose

    # From the service call we get a string which we'll need to map to
    # an array of identifiers valid for the service.
    ids = svc.expandIDList(id_list)
    if debug:
        print('GETSPEC ----------')
        print('len ids = ' + str(len(ids)))
        print('ty ids = ' + str(type(ids)))
        print('ty ids elem = ' + str(type(ids[0])))

    # If called from something other than the client API we might not know
    # the wavelength limits of the collection, so compute it here so we can
    # still align properly.
    if w0 in [None, 0.0] and w1 in [None, 0.0] and align:
        w0, w1, nspec = _listSpan(svc, ids)

    res = None
    align = (w0 != w1)
    nspec = 0
    ptime = 0.0
    for id in ids:
        p0 = time.time()
        nspec = nspec + 1
        if fmt.lower() == 'fits':
            fname = svc.dataPath(id, 'fits')
            data = svc.readFile(str(fname))
            return web.Response(body=data)
        else:
            fname = svc.dataPath(id, 'npy')
            data = svc.getData(str(fname))

        if values != 'all':
            # Extract the subset of values.
            dvalues = data[[c for c in list(data.dtype.names) if c in values]]
            data = rfn.repack_fields(dvalues)

        if not align:
            f = data
        else:
            wmin, wmax = data['loglam'][0], data['loglam'][-1]
            disp = float((wmax - wmin) / float(len(data['loglam'])))
            lpad = int(np.around(max((wmin - w0) / disp, 0.0)))
            rpad = int(np.around(max((w1 - wmax) / disp, 0.0)))
            if lpad == 0 and rpad == 0:
                f = data
            else:
                f = np.pad(data, (lpad, rpad),
                           mode='constant',
                           constant_values=0)
                f['loglam'] = np.linspace(w0, w1,
                                          len(f))  # patch wavelength array

            if debug:
                print(str(id))
                print(fname)
                print('wmin,wmax = (%g,%g)  disp=%g' % (wmin, wmax, disp))
                print('w0,w1 = (%g,%g)  pad = (%d,%d)' % (w0, w1, lpad, rpad))
                print('len f = %d   len data = %d' % (len(f), len(data)))

        if res is None:
            res = f
        else:
            res = np.vstack((res, f))
        p1 = time.time()
        ptime = ptime + (p1 - p0)

    if debug:
        print('res type: ' + str(type(res)) + ' shape: ' + str(res.shape))

    # Convert the array to bytes for return.
    fd = BytesIO()
    np.save(fd, res, allow_pickle=False)
    _bytes = fd.getvalue()

    en_time = time.time()
    logging.info ('getSpec time: %g  NSpec: %d  Bytes: %d' % \
                  (en_time-st_time,nspec,len(_bytes)))

    return web.Response(body=_bytes)
Exemple #23
0
def download_and_repack(country_code=None,
                        network_code=None,
                        circle=None,
                        token=None,
                        source=None,
                        destination=None,
                        byte_order="b",
                        verbose=False):
    """
    Downloads and packs the base station data.
    So far, only opencellid.org supported.
    Args:
        country_code (int): the country code;
        network_code (int): the network code;
        circle (tuple): latitude, longitude (degrees) and radius in km;
        token (str): service token;
        source (str): downloaded file name;
        destination (str): destination file name;
        mnc_block_size (int): the size of the mnc block;
        mcc_block_size (int): the size of the mcc block;
        byte_order (str): byte order;
        verbose (bool): prints verbose output;
    """
    def v(*args, **kwargs):
        if verbose:
            print(*args, **kwargs)

    byte_order = byte_order.lower()
    if byte_order not in "bl":
        raise ValueError("Unknown byte order: {}".format(byte_order))
    byte_order = dict(b=">", l="<")[byte_order]

    if destination is None:
        if country_code is None:
            destination = "all.bin"
        else:
            if network_code is None:
                destination = "{country_code}.bin".format(
                    country_code=country_code)
            else:
                destination = "{country_code}-{network_code}.bin".format(
                    country_code=country_code, network_code=network_code)

    v("Target: {}".format(destination))

    if source is None:
        if token is None:
            v("No token specified: downloading from git")
            if country_code is None:
                raise ValueError("Cannot download worldwide database yet")
            else:
                url = "https://github.com/pulkin/agps-data/raw/master/opencellid.org/{country_code}.csv.gz".format(
                    country_code=country_code)
        else:
            if country_code is None:
                url = "https://opencellid.org/ocid/downloads?token={token}&type=full&file=cell_towers.csv.gz".format(
                    token=token)
            else:
                url = "https://opencellid.org/ocid/downloads?token={token}&type=mcc&file={country_code}.csv.gz".format(
                    token=token, country_code=country_code)
        v("Downloading {} ...".format(url))
        response = urllib.request.urlopen(
            urllib.request.Request(
                url,
                headers={
                    'User-Agent':
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
                }))
        buf = io.BytesIO(response.read())

    else:
        v("Reading {} ...".format(source))
        buf = open(source, 'rb')

    v("Unzipping ...")
    buf_raw = gzip.GzipFile(fileobj=buf, mode='rb')
    buf_txt = io.TextIOWrapper(buf_raw)

    v("Parsing ...")
    dtype = [
        ('radio_type', 'S4'),
        ('mcc', byte_order + 'u2'),
        ('mnc', byte_order + 'u2'),
        ('area_code', byte_order + 'u2'),
        ('cell', byte_order + 'u2'),
        ('lon', byte_order + 'f4'),
        ('lat', byte_order + 'f4'),
    ]
    data = numpy.genfromtxt(buf_txt,
                            dtype=dtype,
                            skip_header=1,
                            delimiter=",",
                            usecols=(0, 1, 2, 3, 4, 6, 7))

    v("Filtering ...")
    data = data[data["radio_type"] == b"GSM"][[
        "mcc", "mnc", "area_code", "cell", "lon", "lat"
    ]]
    if country_code is not None:
        v(" - mcc: {:d}".format(len(data)), end="")
        data = data[data["mcc"] == country_code]
        v(" -> {:d}".format(len(data)))
    if network_code is not None:
        v(" - mnc: {:d}".format(len(data)), end="")
        data = data[data["mnc"] == network_code]
        v(" -> {:d}".format(len(data)))
    if circle is not None:
        v(" - circle: {:d}".format(len(data)), end="")
        theta0, phi0, r0 = circle
        theta = (data["lat"] - theta0) * numpy.pi / 180
        phi = (data["lon"] - phi0) * numpy.pi / 180
        mask = (theta**2 + phi**2 * numpy.cos(theta0)**2) < (r0 * 1e3 /
                                                             earth_radius)**2
        data = data[mask]
        v(" -> {:d}".format(len(data)))

    if len(data) == 0:
        raise ValueError("No data to save")

    v("Sorting ...")
    data = numpy.sort(data, order=("mcc", "mnc", "area_code", "cell"))
    v("Items total: {:d}".format(len(data)))

    v("Preparing tables ...")
    keys = "mcc", "mnc"
    mask = numpy.zeros(len(data), dtype=bool)
    mask[0] = True
    for k in keys:
        mask[1:] |= data[k][1:] != data[k][:-1]
    table_ptrs = numpy.where(mask)[0]
    table_data = recfunctions.repack_fields(data[table_ptrs][list(keys)])

    v("Saving ...")
    with open(destination, 'wb') as f:
        f.write(b'agps-bin')
        f.write(b'\x00')
        f.write({">": b">", "<": b"<"}[byte_order])
        f.write(struct.pack(byte_order + "L", len(table_ptrs)))
        for _d, _p in zip(table_data, table_ptrs):
            f.write(struct.pack(byte_order + "HHL", *_d, _p))
        recfunctions.repack_fields(data[["area_code", "cell", "lon",
                                         "lat"]]).tofile(f)

        v("Total size: {:d} bytes".format(f.tell()))
    v("Done")
Exemple #24
0
    def write(self, filename, nside_output, write_pos=False):
        """Write galaxy catalog to disk.

        Returns
        -------
        None
        """

        domain = self.nbody.domain

        if 'ID' not in list(self.catalog.keys()):
            self.catalog['ID'] = np.zeros(len(self.catalog['PX']))

        if 'TRA' not in list(self.catalog.keys()):
            self.catalog['TRA'], self.catalog['TDEC'] = hp.vec2ang(np.vstack(
                [self.catalog['PX'], self.catalog['PY'],
                 self.catalog['PZ']]).T,
                                                                   lonlat=True)

        self.catalog['EPSILON'] = np.zeros((len(self.catalog['PX']), 2))
        self.catalog['SIZE'] = np.zeros(len(self.catalog['PX']))
        self.catalog['KAPPA'] = np.zeros(len(self.catalog['PX']))
        self.catalog['MU'] = np.zeros(len(self.catalog['PX']))
        self.catalog['W'] = np.zeros(len(self.catalog['PX']))

        self.catalog['GAMMA1'] = np.zeros(len(self.catalog['PX']))
        self.catalog['GAMMA2'] = np.zeros(len(self.catalog['PX']))

        self.catalog['DEC'] = np.zeros(len(self.catalog['PX']))
        self.catalog['RA'] = np.zeros(len(self.catalog['PX']))

        self.catalog['LMAG'] = np.zeros_like(self.catalog['TMAG'])
        self.catalog['OMAG'] = np.zeros_like(self.catalog['TMAG'])
        self.catalog['OMAGERR'] = np.zeros_like(self.catalog['TMAG'])
        self.catalog['FLUX'] = np.zeros_like(self.catalog['TMAG'])
        self.catalog['IVAR'] = np.zeros_like(self.catalog['TMAG'])

        cdtype = np.dtype(
            list(
                zip(self.catalog.keys(),
                    [(self.catalog[k].dtype.type, self.catalog[k].shape[1])
                     if len(self.catalog[k].shape) > 1 else
                     self.catalog[k].dtype.type
                     for k in self.catalog.keys()])))

        out = np.zeros(len(self.catalog[list(self.catalog.keys())[0]]),
                       dtype=cdtype)
        for k in self.catalog.keys():
            out[k] = self.catalog[k]

        r = np.sqrt(out['PX']**2 + out['PY']**2 + out['PZ']**2)
        pix = hp.vec2pix(domain.nside,
                         out['PX'],
                         out['PY'],
                         out['PZ'],
                         nest=domain.nest)

        boxnum = domain.boxnum
        # cut off buffer region, make sure we only have the pixel we want
        print('Cutting catalog to {} <= z < {}'.format(
            self.nbody.cosmo.zofR(domain.rbins[boxnum][domain.rbin]),
            self.nbody.cosmo.zofR(domain.rbins[boxnum][domain.rbin + 1])))

        sys.stdout.flush()
        idx = ((domain.rbins[boxnum][domain.rbin] <= r) &
               (r < domain.rbins[boxnum][domain.rbin + 1]) &
               (domain.pix == pix))

        out = out[idx]
        del idx

        keys = list(self.catalog.keys())

        if len(keys) == 0:
            return

        for k in keys:
            del self.catalog[k]

        del self.catalog

        if nside_output != domain.nside:
            map_in = np.arange(12 * domain.nside**2)

            if domain.nest:
                order = 'NESTED'
            else:
                order = 'RING'

            map_out = hp.ud_grade(map_in,
                                  nside_output,
                                  order_in=order,
                                  order_out=order)
            pix, = np.where(map_out == domain.pix)

        else:
            pix = [domain.pix]

        for p in pix:
            fname = '{}.{}.fits'.format(filename, p)
            print('Writing to {}'.format(fname))

            if write_pos:
                pfname = '{}.{}.lens.fits'.format(filename, p)

            if os.path.exists(fname):
                f = fitsio.FITS(fname)
                ngal = f[-1].read_header()['NAXIS2']
                f.close()
            else:
                ngal = 0

            pix = hp.vec2pix(nside_output,
                             out['PX'],
                             out['PY'],
                             out['PZ'],
                             nest=domain.nest)

            idx = pix == p
            if np.sum(idx) < 100:
                continue

            out['ID'][idx] = (p * 1e9 + np.arange(len(out['PX'][idx])) +
                              ngal).astype(np.int64)

            if os.path.exists(fname):
                with fitsio.FITS(fname, 'rw') as f:
                    f[-1].append(out[idx])
            else:
                fitsio.write(fname, out[idx])

            if write_pos:
                if os.path.exists(pfname):
                    with fitsio.FITS(pfname, 'rw') as f:
                        f[-1].append(
                            repack_fields(out[['ID', 'PX', 'PY', 'PZ']][idx]))
                else:
                    fitsio.write(
                        pfname,
                        repack_fields(out[['ID', 'PX', 'PY', 'PZ']][idx]))

        del out
Exemple #25
0
def convert2hdf5(modellist,
                 star_columns=None,
                 binary_columns=None,
                 profile_columns=None,
                 add_stopping_condition=True,
                 skip_existing=True,
                 star1_history_file='LOGS/history1.data',
                 star2_history_file='LOGS/history2.data',
                 binary_history_file='LOGS/binary_history.data',
                 log_file='log.txt',
                 profile_files=None,
                 profiles_path='',
                 profile_pattern='*.profile',
                 input_path_kw='path',
                 input_path_prefix='',
                 output_path=None,
                 verbose=False):

    if not os.path.isdir(output_path):
        os.mkdir(output_path)

    for i, model in modellist.iterrows():

        print(input_path_prefix, model[input_path_kw])

        if not os.path.isdir(Path(input_path_prefix, model[input_path_kw])):
            continue

        if skip_existing and os.path.isfile(
                Path(output_path, model[input_path_kw]).with_suffix('.h5')):
            if verbose:
                print(i, model[input_path_kw], ': exists, skipping')
            continue

        if verbose:
            print(i, model[input_path_kw], ': processing')

        # store all columns of the input file in the hdf5 file
        data = {}
        extra_info = {}
        for col in model.index:
            extra_info[col] = model[col]

        # obtain the termination code and store if requested
        termination_code = 'uk'
        if add_stopping_condition:
            lines = get_end_log_file(
                Path(input_path_prefix, model[input_path_kw], log_file))
            for line in lines:
                if 'termination code' in line:
                    termination_code = line.split()[-1]

        extra_info['termination_code'] = termination_code

        # store the nnaps-version in the output data.
        extra_info['nnaps-version'] = __version__

        data['extra_info'] = extra_info

        # check if all history files that are requested are available and can be read. If there is an error,
        # skip to the next model
        history = {}
        if star1_history_file is not None:
            try:
                d1 = read_mesa_output(
                    Path(input_path_prefix, model[input_path_kw],
                         star1_history_file))[1]
                if star_columns is not None:
                    d1 = rf.repack_fields(d1[star_columns])
                history['star1'] = d1
            except Exception as e:
                if verbose:
                    print("Error in reading star1: ", e)
                continue

        if star2_history_file is not None:
            try:
                d2 = read_mesa_output(
                    Path(input_path_prefix, model[input_path_kw],
                         star2_history_file))[1]
                if star_columns is not None:
                    d2 = rf.repack_fields(d2[star_columns])
                history['star2'] = d2
            except Exception as e:
                if verbose:
                    print("Error in reading star2: ", e)
                continue

        if binary_history_file is not None:
            try:
                d3 = read_mesa_output(
                    Path(input_path_prefix, model[input_path_kw],
                         binary_history_file))[1]
                if star_columns is not None:
                    d3 = rf.repack_fields(d3[binary_columns])
                history['binary'] = d3
            except Exception as e:
                if verbose:
                    print("Error in reading binary: ", e)
                continue

        data['history'] = history

        # check if profiles exists and store them is requested. Also make a profile lookup table (legend)
        profiles = {}
        profile_legend = []
        profile_name_length = 0  # store longest profile name to create recarray of profile_legend
        if profile_files is not None:
            if profile_files == 'all':
                profile_paths = Path(input_path_prefix, model[input_path_kw],
                                     profiles_path).glob(profile_pattern)
            else:
                profile_paths = [
                    Path(input_path_prefix, model[input_path_kw],
                         profiles_path, p) for p in profile_files
                ]

            for filepath in profile_paths:
                if not filepath.is_file():
                    continue

                profile_name = filepath.stem
                header, profile_data = read_mesa_output(filename=filepath,
                                                        only_first=False)

                if profile_columns is not None:
                    profile_data = rf.repack_fields(
                        profile_data[profile_columns])
                profiles[profile_name] = profile_data

                if len(profile_name) > profile_name_length:
                    profile_name_length = len(profile_name)
                profile_legend.append((header['model_number'], profile_name))

        if len(profiles.keys()) >= 1:
            data['profiles'] = profiles
            profile_legend = np.array(profile_legend,
                                      dtype=[('model_number', 'f8'),
                                             ('profile_name',
                                              'a' + str(profile_name_length))])
            data['profile_legend'] = profile_legend

        # rather annoying way to assure that Path doesn't cut of part of the folder name when adding the .h5 suffix
        # if not this will happen: M1.080_M0.502_P192.67_Z0.01129 -> M1.080_M0.502_P192.67_Z0.h5
        output_file = Path(output_path, model[input_path_kw])
        output_file = output_file.with_suffix(output_file.suffix + '.h5')
        fileio.write2hdf5(data, output_file, update=False)
Exemple #26
0
def process_sim(idx,
                total,
                net,
                sim_flp,
                tmp_dir,
                warmup_prc,
                keep_prc,
                sequential=False):
    """
    Loads and processes data from a single simulation.

    For logging purposes, "idx" is the index of this simulation amongst "total"
    simulations total. Uses "net" to determine the relevant input and output
    features. "sim_flp" is the path to the simulation file. The parsed results
    are stored in "tmp_dir". Drops the first "warmup_prc" percent of packets.
    Of the remaining packets, only "keep_prc" percent are kept. See
    utils.save_tmp_file() for the format of the results file.

    Returns the path to the results file and a descriptive utils.Sim object.
    """
    sim, dat = utils.load_sim(sim_flp,
                              msg=f"{idx + 1:{f'0{len(str(total))}'}}/{total}")
    if dat is None:
        return None

    # Drop the first few packets so that we consider steady-state behavior only.
    dat = dat[math.floor(dat.shape[0] * warmup_prc / 100):]
    # Split each data matrix into two separate matrices: one with the input
    # features only and one with the output features only. The names of the
    # columns correspond to the feature names in in_spc and out_spc.
    assert net.in_spc, "{sim_flp}: Empty in spec."
    assert net.out_spc, "{sim_flp}: Empty out spec."
    dat_in = recfunctions.repack_fields(dat[net.in_spc])
    dat_out = recfunctions.repack_fields(dat[net.out_spc])
    # Convert output features to class labels.
    dat_out_raw = dat_out
    dat_out = net.convert_to_class(sim, dat_out)

    # If the results contains NaNs or Infs, then discard this
    # simulation.
    def has_non_finite(arr):
        for fet in arr.dtype.names:
            if not np.isfinite(arr[fet]).all():
                print(f"    Simulation {sim_flp} has NaNs of Infs in feature "
                      f"{fet}")
                return True
        return False

    if has_non_finite(dat_in) or has_non_finite(dat_out):
        return None

    # Verify data.
    assert dat_in.shape[0] == dat_out.shape[0], \
        f"{sim_flp}: Input and output should have the same number of rows."
    # Find the uniques classes in the output features and make sure
    # that they are properly formed. Assumes that dat_out is a
    # structured numpy array containing a column named "class".
    for cls in set(dat_out["class"].tolist()):
        assert 0 <= cls < net.num_clss, f"Invalid class: {cls}"

    # Transform the data as required by this specific model.
    dat_in, dat_out, dat_out_raw, dat_out_oracle, scl_grps = net.modify_data(
        sim,
        dat_in,
        dat_out,
        dat_out_raw,
        # Must put the column name in a list for the result to be
        # a structured array.
        dat_out_oracle=dat[["mathis model label-ewma-alpha0.01"]],
        sequential=sequential)

    # Select a fraction of the data.
    num_rows = dat_in.shape[0]
    num_to_pick = math.ceil(num_rows * keep_prc / 100)
    idxs = np.random.random_integers(0, num_rows - 1, num_to_pick)
    dat_in = dat_in[idxs]
    dat_out = dat_out[idxs]
    dat_out_raw = dat_out_raw[idxs]
    dat_out_oracle = dat_out_oracle[idxs]

    # To avoid errors with sending large matrices between processes,
    # store the results in a temporary file.
    dat_flp = path.join(tmp_dir, f"{path.basename(sim_flp)[:-4]}_tmp.npz")
    utils.save_tmp_file(dat_flp, dat_in, dat_out, dat_out_raw, dat_out_oracle,
                        scl_grps)
    return dat_flp, sim
Exemple #27
0
def extract_fets(dat, split_name, net):
    """
    Extracts net's the input and output features from dat. Returns a tuple of
    the form:
        (dat_in, dat_out, dat_extra, scaling groups).
    """
    # Split each data matrix into two separate matrices: one with the input
    # features only and one with the output features only. The names of the
    # columns correspond to the feature names in in_spc and out_spc.
    assert net.in_spc, f"{net.name}: Empty in spec."
    num_out_fets = len(net.out_spc)
    # This is not a strict requirement from a modeling point of view,
    # but is assumed to make data processing easier.
    assert num_out_fets == 1, \
        (f"{net.name}: Out spec must contain a single feature, but actually "
         f"contains: {net.out_spc}")

    # Remove samples where the ground truth output is unknown.
    len_before = dat.shape[0]
    dat = dat[dat[list(net.out_spc)] != -1][0]
    removed = dat.shape[0] - len_before
    if removed > 0:
        print(f"Removed {removed} rows with unknown out_spc from split "
              f"\"{split_name}\".")

    dat_in = recfunctions.repack_fields(dat[list(net.in_spc)])
    dat_out = recfunctions.repack_fields(dat[list(net.out_spc)])
    # Create a structured array to hold extra data that will not be used as
    # features but may be needed by the training/testing process.
    dtype_extra = (
        # The "raw" entry is the unconverted out_spc.
        [("raw", [typ
                  for typ in dat.dtype.descr if typ[0] in net.out_spc][0][1])]
        + [typ for typ in dat.dtype.descr if typ[0] in features.EXTRA_FETS])
    dat_extra = np.empty(shape=dat.shape, dtype=dtype_extra)
    dat_extra["raw"] = dat_out
    for typ in features.EXTRA_FETS:
        dat_extra[typ] = dat[typ]
    dat_extra = recfunctions.repack_fields(dat_extra)

    is_dt = isinstance(net, models.HistGbdtSklearnWrapper)
    if not is_dt:
        # Verify that there are no NaNs or Infs in the data.
        for fet in dat_in.dtype.names:
            assert (not (
                np.isnan(dat_in[fet]).any() or
                np.isinf(dat_in[fet]).any())), \
                ("Warning: NaNs or Infs in input feature for split "
                 f"\"{split_name}\": {fet}")
        assert (not (
            np.isnan(dat_out[features.LABEL_FET]).any() or
            np.isinf(dat_out[features.LABEL_FET]).any())), \
            f"Warning: NaNs or Infs in ground truth for split \"{split_name}\"."

    if dat_in.shape[0] > 0:
        # Convert all instances of -1 (feature value unknown) to either the mean for
        # that feature or NaN.
        bad_fets = []
        for fet in dat_in.dtype.names:
            invalid = dat_in[fet] == -1
            if invalid.all():
                bad_fets.append(fet)
                continue
            dat_in[fet][invalid] = (float("NaN") if is_dt else np.mean(
                dat_in[fet][np.logical_not(invalid)]))
            assert (dat_in[fet] != -1).all(), \
                f"Found \"-1\" in split \"{split_name}\" feature: {fet}"
        assert not bad_fets, \
            (f"Features in split \"{split_name}\" contain only \"-1\" "
             f"({len(bad_fets)}): {bad_fets}")

    # Convert output features to class labels.
    dat_out = net.convert_to_class(dat_out)

    # Verify data.
    assert dat_in.shape[0] == dat_out.shape[0], \
        "Input and output should have the same number of rows."
    # Find the uniques classes in the output features and make sure that they
    # are properly formed. Assumes that dat_out is a structured numpy array
    # containing a single column specified by features.LABEL_FET.
    for cls in np.unique(dat_out[features.LABEL_FET]).tolist():
        assert 0 <= cls < net.num_clss, f"Invalid class: {cls}"

    # Transform the data as required by this specific model.
    # TODO: Refactor this to be compatible with bulk data splits.
    # dat_in, dat_out, dat_extra, scl_grps = net.modify_data(
    #     exp, dat_in, dat_out, dat_extra, sequential=sequential)
    scl_grps = list(range(len(dat_in.dtype.names)))

    return dat_in, dat_out, dat_extra, scl_grps
def fc_to_Geo(in_fc, geom_kind=2, minX=0, minY=0, sp_ref=None, info=""):
    """Convert a FeatureClassToNumPyArray to a Geo array.

    This works with the geometry only.  Skip the attributes for later.  The
    processing requirements are listed below.  Just copy and paste.

    Parameters
    ----------
    in_fc : featureclass
        Featureclass in a file geodatabase.
    geom_kind : integer
        Points (0), Polylines (1) and Polygons (2)

    minX, minY : numbers
        If these values are 0, then the minimum values will be determined and
        used to shift the data towards the origin.
    sp_ref : text
        Spatial reference name.  eg `'NAD_1983_CSRS_MTM_9'`

    Notes
    -----
    The `arcpy.da.Describe` method takes a substantial amount of time.
    >>> %timeit Describe(fc2)
    ... 355 ms ± 17.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    """
    def _area_part_(a):
        """Mini e_area, used by areas and centroids."""
        x0, y1 = (a.T)[:, 1:]
        x1, y0 = (a.T)[:, :-1]
        e0 = np.einsum('...i,...i->...i', x0, y0)
        e1 = np.einsum('...i,...i->...i', x1, y1)
        return np.sum((e0 - e1) * 0.5)

    def _cw_(a):
        """Clockwise check."""
        return 1 if _area_part_(a) > 0. else 0

    # -- (1) Foundational steps
    # Create the array, extract the object id values.
    # To avoid floating point issues, extract the coordinates, round them to a
    # finite precision and shift them to the x-y origin
    #
    kind = geom_kind
    if sp_ref is None:  # sp_ref = get_SR(in_fc, verbose=False)
        sp_ref = "undefined"
    a = FeatureClassToNumPyArray(
        in_fc, ['OID@', 'SHAPE@X', 'SHAPE@Y'],
        explode_to_points=True)  # spatial_reference=sp_ref
    oids = a['OID@']
    xy = a[['SHAPE@X', 'SHAPE@Y']]
    mn = [np.min(xy['SHAPE@X']), np.min(xy['SHAPE@Y'])]
    mx = [np.max(xy['SHAPE@X']), np.max(xy['SHAPE@Y'])]
    extent = np.array([mn, mx])
    # -- shift if needed
    dx, dy = mn
    if minX != 0.:
        dx = minX  # mn[0] - minX
    if minY != 0.:
        dy = minY  # mn[1] - minY
    xy['SHAPE@X'] = np.round(xy['SHAPE@X'] - dx, 3)
    xy['SHAPE@Y'] = np.round(xy['SHAPE@Y'] - dy, 3)
    xy.dtype.names = ['X', 'Y']
    xy = repack_fields(xy)
    #
    # -- (2) Prepare the oid data for use in identifying from-to points.
    uniq, indx, cnts = np.unique(oids, True, return_counts=True)
    id_vals = oids[indx]
    indx = np.concatenate((indx, [a.shape[0]]))
    #
    # -- (3) Construct the IFT data using `id_fr_to` to carry the load.
    IFT_ = np.asarray(id_fr_to(xy, oids))
    cols = IFT_.shape[0]
    IFT = np.full((cols, 6), -1, dtype=np.int32)
    IFT[:, :3] = IFT_
    #
    # -- (4) clockwise check for polygon parts to identify outer/inner rings
    if kind == 2:  # polygons
        xy_arr = stu(xy)  # View the data as an unstructured array
        cl_wise = np.array([_cw_(xy_arr[i[1]:i[2]]) for i in IFT_])
    else:  # not relevant for polylines or points
        cl_wise = np.full_like(oids, -1)
    IFT[:, 3] = cl_wise
    #
    # -- (5) construct part_ids and pnt_nums
    if kind == 2:
        parts = [np.cumsum(IFT[:, 3][IFT[:, 0] == i]) for i in id_vals]
        part_ids = np.concatenate(parts)
        ar = np.where(IFT[:, 3] == 1)[0]
        ar0 = np.stack((ar[:-1], ar[1:])).T
        pnt_nums = np.zeros(IFT.shape[0], dtype=np.int32)
        for (i, j) in ar0:  # now provide the point numbers per part per shape
            pnt_nums[i:j] = np.arange((j - i))  # smooth!!!
    else:
        part_ids = np.ones_like(oids)
        pnt_nums = np.ones_like(oids)
    IFT[:, 4] = part_ids
    IFT[:, 5] = pnt_nums
    #
    # -- (6) Create the output array... as easy as ``a`` to ``z``
    z = Geo(xy_arr, IFT, kind, Extent=extent, Info="test", SR=sp_ref)
    out = copy.deepcopy(z)
    return out