Exemple #1
0
def into(a, b, **kwargs):
    names = dshape(nd.dshape_of(b))[1].names
    columns = [getattr(b, name) for name in names]
    columns = [np.asarray(nd.as_py(c))
            if to_numpy_dtype(dshape(nd.dshape_of(c))) == np.dtype('O')
            else into(np.ndarray(0), c) for c in columns]

    return bcolz.ctable(columns, names=names, **kwargs)
Exemple #2
0
def into(a, b, **kwargs):
    names = dshape(nd.dshape_of(b))[1].names
    columns = [getattr(b, name) for name in names]
    columns = [
        np.asarray(nd.as_py(c)) if to_numpy_dtype(dshape(nd.dshape_of(c)))
        == np.dtype('O') else into(np.ndarray(0), c) for c in columns
    ]

    return bcolz.ctable(columns, names=names, **kwargs)
    def groupby(self, json_cmd):
        print('GroupBy operation')
        cmd = json.loads(json_cmd)
        array_url = cmd.get('input', self.base_url + self.array_name)
        if not array_url.startswith(self.base_url):
            raise RuntimeError('Input array must start with the base url')
        array_name = array_url[len(self.base_url):]
        fields = cmd['fields']

        arr = self.get_session_array(array_name)[...].ddesc.dynd_arr()

        # Do the groupby, get its groups, then
        # evaluate it because deferred operations
        # through the groupby won't work well yet.
        res = nd.groupby(arr, nd.fields(arr, *fields))
        groups = res.groups
        res = res.eval()

        # Write out the groupby result
        defarr_gb = self.array_provider.create_deferred_array_filename(
            self.session_name, 'groupby_', array(res))
        dshape_gb = nd.dshape_of(res)
        defarr_gb[0].write(
            json.dumps({
                'dshape': dshape_gb,
                'command': 'groupby',
                'params': {
                    'fields': fields
                }
            }))
        defarr_gb[0].close()

        # Write out the groups
        defarr_groups = self.array_provider.create_deferred_array_filename(
            self.session_name, 'groups_', groups)
        dshape_groups = nd.dshape_of(groups)
        defarr_groups[0].write(
            json.dumps({
                'dshape': dshape_groups,
                'command': 'groupby.groups',
                'params': {
                    'fields': fields
                }
            }))
        defarr_groups[0].close()

        content_type = 'application/json; charset=utf-8'
        body = json.dumps({
            'session': self.base_url + self.session_name,
            'output_gb': self.base_url + defarr_gb[1],
            'dshape_gb': dshape_gb,
            'output_groups': self.base_url + defarr_groups[1],
            'dshape_groups': dshape_groups
        })
        return (content_type, body)
    def groupby(self, json_cmd):
        print('GroupBy operation')
        cmd = json.loads(json_cmd)
        array_url = cmd.get('input', self.base_url + self.array_name)
        if not array_url.startswith(self.base_url):
            raise RuntimeError('Input array must start with the base url')
        array_name = array_url[len(self.base_url):]
        fields = cmd['fields']

        arr = self.get_session_array(array_name)[...].ddesc.dynd_arr()

        # Do the groupby, get its groups, then
        # evaluate it because deferred operations
        # through the groupby won't work well yet.
        res = nd.groupby(arr, nd.fields(arr, *fields))
        groups = res.groups
        res = res.eval()

        # Write out the groupby result
        defarr_gb = self.array_provider.create_deferred_array_filename(
                        self.session_name, 'groupby_', array(res))
        dshape_gb = nd.dshape_of(res)
        defarr_gb[0].write(json.dumps({
                'dshape': dshape_gb,
                'command': 'groupby',
                'params': {
                    'fields': fields
                }
            }))
        defarr_gb[0].close()

        # Write out the groups
        defarr_groups = self.array_provider.create_deferred_array_filename(
                        self.session_name, 'groups_', groups)
        dshape_groups = nd.dshape_of(groups)
        defarr_groups[0].write(json.dumps({
                'dshape': dshape_groups,
                'command': 'groupby.groups',
                'params': {
                    'fields': fields
                }
            }))
        defarr_groups[0].close()

        content_type = 'application/json; charset=utf-8'
        body = json.dumps({
                'session': self.base_url + self.session_name,
                'output_gb': self.base_url + defarr_gb[1],
                'dshape_gb': dshape_gb,
                'output_groups': self.base_url + defarr_groups[1],
                'dshape_groups': dshape_groups
            })
        return (content_type, body)
    def make_computed_fields(self, json_cmd):
        print('Adding computed fields')
        cmd = json.loads(json_cmd)
        array_url = cmd.get('input', self.base_url + self.array_name)
        if not array_url.startswith(self.base_url):
            raise RuntimeError('Input array must start with the base url')
        array_name = array_url[len(self.base_url):]
        fields = cmd['fields']
        replace_undim = cmd.get('replace_undim', 0)
        fnname = cmd.get('fnname', None)

        arr = self.get_session_array(array_name).ddesc.dynd_arr()

        res = nd.make_computed_fields(arr, replace_undim, fields, fnname)
        defarr = self.array_provider.create_deferred_array_filename(
            self.session_name, 'computed_fields_', array(res))
        dshape = nd.dshape_of(res)
        defarr[0].write(
            json.dumps({
                'dshape': dshape,
                'command': 'make_computed_fields',
                'params': {
                    'fields': fields,
                    'replace_undim': replace_undim,
                    'fnname': fnname
                }
            }))
        defarr[0].close()
        content_type = 'application/json; charset=utf-8'
        body = json.dumps({
            'session': self.base_url + self.session_name,
            'output': self.base_url + defarr[1],
            'dshape': dshape
        })
        return (content_type, body)
 def sort(self, json_cmd):
     import numpy as np
     print('sorting')
     cmd = json.loads(json_cmd)
     array_url = cmd.get('input', self.base_url + self.array_name)
     if not array_url.startswith(self.base_url):
         raise RuntimeError('Input array must start with the base url')
     array_name = array_url[len(self.base_url):]
     field = cmd['field']
     arr = self.get_session_array(array_name)
     nparr = as_numpy(arr)
     idxs = np.argsort(nparr[field])
     res = nd.ndobject(nparr[idxs])
     defarr = self.array_provider.create_deferred_array_filename(
         self.session_name, 'sort_', res)
     dshape = nd.dshape_of(res)
     defarr[0].write(
         json.dumps({
             'dshape': dshape,
             'command': 'sort',
             'params': {
                 'field': field,
             }
         }))
     defarr[0].close()
     content_type = 'application/json; charset=utf-8'
     body = json.dumps({
         'session': self.base_url + self.session_name,
         'output': self.base_url + defarr[1],
         'dshape': dshape
     })
     return (content_type, body)
    def make_computed_fields(self, json_cmd):
        print('Adding computed fields')
        cmd = json.loads(json_cmd)
        array_url = cmd.get('input', self.base_url + self.array_name)
        if not array_url.startswith(self.base_url):
            raise RuntimeError('Input array must start with the base url')
        array_name = array_url[len(self.base_url):]
        fields = cmd['fields']
        replace_undim = cmd.get('replace_undim', 0)
        fnname = cmd.get('fnname', None)

        arr = self.get_session_array(array_name).ddesc.dynd_arr()

        res = nd.make_computed_fields(arr, replace_undim, fields, fnname)
        defarr = self.array_provider.create_deferred_array_filename(
                        self.session_name, 'computed_fields_', array(res))
        dshape = nd.dshape_of(res)
        defarr[0].write(json.dumps({
                'dshape': dshape,
                'command': 'make_computed_fields',
                'params': {
                    'fields': fields,
                    'replace_undim': replace_undim,
                    'fnname': fnname
                }
            }))
        defarr[0].close()
        content_type = 'application/json; charset=utf-8'
        body = json.dumps({
                'session': self.base_url + self.session_name,
                'output': self.base_url + defarr[1],
                'dshape': dshape
            })
        return (content_type, body)
 def sort(self, json_cmd):
     import numpy as np
     print ('sorting')
     cmd = json.loads(json_cmd)
     array_url = cmd.get('input', self.base_url + self.array_name)
     if not array_url.startswith(self.base_url):
         raise RuntimeError('Input array must start with the base url')
     array_name = array_url[len(self.base_url):]
     field = cmd['field']
     arr = self.get_session_array(array_name)
     nparr = as_numpy(arr)
     idxs = np.argsort(nparr[field])
     res = nd.ndobject(nparr[idxs])
     defarr = self.array_provider.create_deferred_array_filename(
                     self.session_name, 'sort_', res)
     dshape = nd.dshape_of(res)
     defarr[0].write(json.dumps({
             'dshape': dshape,
             'command': 'sort',
             'params': {
                 'field': field,
             }
         }))
     defarr[0].close()
     content_type = 'application/json; charset=utf-8'
     body = json.dumps({
             'session': self.base_url + self.session_name,
             'output': self.base_url + defarr[1],
             'dshape': dshape
         })
     return (content_type, body)
 def __init__(self, dyndarr, nindex):
     if nindex > nd.ndim_of(dyndarr):
         raise IndexError('Cannot have more indices than dimensions')
     self._nindex = nindex
     self._dshape = datashape.dshape(nd.dshape_of(dyndarr)).subarray(nindex)
     self._c_dtype = ndt.type(str(self._dshape))
     self._dyndarr = dyndarr
 def test_array_from_ptr(self):
     # cfixed_dim arrmeta is redundant so this is ok
     a = (ctypes.c_int32 * 3)()
     a[0] = 3
     a[1] = 6
     a[2] = 9
     # Readwrite version using cfixed
     b = _lowlevel.array_from_ptr('cfixed[3] * int32',
                                  ctypes.addressof(a), a, 'readwrite')
     self.assertEqual(_lowlevel.data_address_of(b), ctypes.addressof(a))
     self.assertEqual(nd.dshape_of(b), '3 * int32')
     self.assertEqual(nd.as_py(b), [3, 6, 9])
     b[1] = 10
     self.assertEqual(a[1], 10)
     # Readonly version using cfixed
     b = _lowlevel.array_from_ptr('cfixed[3] * int32',
                                  ctypes.addressof(a), a, 'readonly')
     self.assertEqual(nd.as_py(b), [3, 10, 9])
     def assign_to(b):
         b[1] = 100
     self.assertRaises(RuntimeError, assign_to, b)
     # Using a fixed dim default-constructs the arrmeta, so works too
     b = _lowlevel.array_from_ptr('3 * int32', ctypes.addressof(a),
                                  a, 'readonly')
     self.assertEqual(nd.as_py(b), [3, 10, 9])
     # Should get an error if we try strided, because the size is unknown
     self.assertRaises(RuntimeError,
                       lambda: _lowlevel.array_from_ptr('strided * int32',
                                                        ctypes.addressof(a),
                                                        a, 'readonly'))
 def __init__(self, dyndarr):
     if nd.ndim_of(dyndarr) <= 0:
         raise IndexError('Need at least one dimension for iteration')
     self._index = 0
     self._len = len(dyndarr)
     self._dshape = datashape.dshape(nd.dshape_of(dyndarr)).subarray(1)
     self._c_dtype = ndt.type(str(self._dshape))
     self._dyndarr = dyndarr
Exemple #12
0
def into(a, b):
    ds = dshape(nd.dshape_of(b))
    if list(a.columns):
        names = a.columns
    elif isinstance(ds[-1], Record):
        names = ds[-1].names
    else:
        names = None
    if names:
        return pd.DataFrame(nd.as_py(b), columns=names)
    else:
        return pd.DataFrame(nd.as_py(b))
 def __init__(self, dyndarr):
     if nd.ndim_of(dyndarr) <= 0:
         raise IndexError('Need at least one dimension for iteration')
     self._index = 0
     self._len = len(dyndarr)
     ds = datashape.dshape(nd.dshape_of(dyndarr))
     self._dshape = ds.subarray(1)
     self._c_dtype = ndt.type(str(self._dshape))
     self._usebuffer = (ndt.type(str(ds)) != nd.type_of(dyndarr))
     self._buffer = None
     self._buffer_index = -1
     self._dyndarr = dyndarr
Exemple #14
0
def into(a, b):
    ds = dshape(nd.dshape_of(b))
    if list(a.columns):
        names = a.columns
    elif isinstance(ds[-1], Record):
        names = ds[-1].names
    else:
        names = None
    if names:
        return pd.DataFrame(nd.as_py(b), columns=names)
    else:
        return pd.DataFrame(nd.as_py(b))
Exemple #15
0
 def test_array_from_ptr(self):
     a = (ctypes.c_int32 * 3)()
     a[0] = 3
     a[1] = 6
     a[2] = 9
     # Readwrite version
     b = _lowlevel.array_from_ptr(ndt.type('3 * int32'), ctypes.addressof(a),
                     a, 'readwrite')
     self.assertEqual(_lowlevel.data_address_of(b), ctypes.addressof(a))
     self.assertEqual(nd.dshape_of(b), '3 * int32')
     self.assertEqual(nd.as_py(b), [3, 6, 9])
     b[1] = 10
     self.assertEqual(a[1], 10)
     # Readonly version
     b = _lowlevel.array_from_ptr(ndt.type('3 * int32'), ctypes.addressof(a),
                     a, 'readonly')
     self.assertEqual(nd.as_py(b), [3, 10, 9])
     def assign_to(b):
         b[1] = 100
     self.assertRaises(RuntimeError, assign_to, b)
Exemple #16
0
    def handle_array_query(self, environ, start_response):
        print('Handling array query')
        try:
            array_name, indexers = split_array_base(environ['PATH_INFO'])
            arr = self.get_array(array_name, indexers)

            base_url = wsgi_reconstruct_base_url(environ)
            request_method = environ['REQUEST_METHOD']
            if request_method == 'GET' and environ['QUERY_STRING'] == '':
                # This version of the array information is for human consumption
                content_type = 'text/html; charset=utf-8'
                body = self.html_array(arr, base_url, array_name, indexers)
            else:
                if request_method == 'GET':
                    q = parse_qs(environ['QUERY_STRING'])
                elif request_method == 'POST':
                    # the environment variable CONTENT_LENGTH may be empty or missing
                    try:
                        request_body_size = int(environ.get('CONTENT_LENGTH', 0))
                    except (ValueError):
                        request_body_size = 0
                    request_body = environ['wsgi.input'].read(request_body_size)
                    q = parse_qs(request_body)
                else:
                    status = '404 Not Found'
                    response_headers = [('content-type', 'text/plain')]
                    start_response(status, response_headers)
                    return ['Unsupported request method']

                print q
                if not q.has_key('r'):
                    status = '400 Bad Request'
                    response_headers = [('content-type', 'text/plain')]
                    start_response(status, response_headers, sys.exc_info())
                    return ['Blaze server request requires the ?r= query request type']
                q_req = q['r'][0]
                if q_req == 'data.json':
                    content_type = 'application/json; charset=utf-8'
                    body = nd.as_py(nd.format_json(arr).view_scalars(ndt.bytes))
                elif q_req == 'datashape':
                    content_type = 'text/plain; charset=utf-8'
                    body = nd.dshape_of(arr)
                elif q_req == 'dyndtype':
                    content_type = 'application/json; charset=utf-8'
                    body = str(nd.type_of(arr))
                elif q_req == 'dynddebug':
                    content_type = 'text/plain; charset=utf-8'
                    body = str(nd.debug_repr(arr))
                elif q_req == 'create_session':
                    session = compute_session(self.array_provider, base_url,
                                              add_indexers_to_url(array_name, indexers))
                    self.sessions[session.session_name] = session
                    content_type, body = session.creation_response()
                else:
                    status = '400 Bad Request'
                    response_headers = [('content-type', 'text/plain')]
                    start_response(status, response_headers, sys.exc_info())
                    return ['Unknown Blaze server request ?r=%s' % q['r'][0]]
        except:
            traceback.print_exc()
            status = '404 Not Found'
            response_headers = [('content-type', 'text/plain')]
            start_response(status, response_headers, sys.exc_info())
            return ['Error getting Blaze Array\n\n' + traceback.format_exc()]

        status = '200 OK'
        response_headers = [
            ('content-type', content_type),
            ('content-length', str(len(body)))
        ]
        start_response(status, response_headers)
        return [body]
Exemple #17
0
def discover(arr):
    return dshape(nd.dshape_of(arr))
 def __init__(self, dyndarr):
     if not isinstance(dyndarr, nd.array):
         raise TypeError('object is not a dynd array, has type %s' %
                         type(dyndarr))
     self._dyndarr = dyndarr
     self._dshape = dshape(nd.dshape_of(dyndarr))
 def test_var_dshape(self):
     # Getting the dshape can see into leading var dims
     a = nd.array([[[1], [2,3]]], type='var * var * var * int32')
     self.assertEqual(nd.dshape_of(a), '1 * 2 * var * int32')
Exemple #20
0
def discover(arr):
    return dshape(nd.dshape_of(arr))
 def _dshape(self):
     return nd.dshape_of(self.arr)
 def test_var_dshape(self):
     # Getting the dshape can see into leading var dims
     a = nd.array([[[1], [2,3]]], type='var * var * var * int32')
     self.assertEqual(nd.dshape_of(a), '1 * 2 * var * int32')
Exemple #23
0
 def __init__(self, dyndarr):
     if not isinstance(dyndarr, nd.array):
         raise TypeError('object is not a dynd array, has type %s' %
                         type(dyndarr))
     self._dyndarr = dyndarr
     self._dshape = dshape(nd.dshape_of(dyndarr))