def str2bool(value): """ Tries to transform a string supposed to represent a boolean to a boolean. Parameters ---------- value : str The string that is transformed to a boolean. Returns ------- boolval : bool The boolean representation of `value`. Raises ------ ValueError If the string is not 'True' or 'False' (case independent) Examples -------- >>> np.lib._iotools.str2bool('TRUE') True >>> np.lib._iotools.str2bool('false') False """ value = value.upper() if value == asbytes('TRUE'): return True elif value == asbytes('FALSE'): return False else: raise ValueError("Invalid boolean")
def nc_generator(ncfile, input): """ an iteration-based approach to nc_streamer above input should be an iterable of numpy arrays with which to fill the netcdf file Examples -------- >>> from itertools import chain >>> import numpy >>> nc = netcdf_file(None) >>> # add attributes, dimensions and variables to the netcdf_file object >>> def input(): >>> yield numpy.random(100, 100) >>> def more_input(): >>> yield numpy.arange(10000).reshape(100, 100) >>> pipeline = nc_generator(nc, chain(input, more_input)) >>> f = open('foo.nc', 'w') >>> for block in pipeline: >>> f.write(block) """ input = check_byteorder(input) assert type(ncfile) == netcdf_file count = 0 ncfile._calc_begins() yield ncfile._header() count += len(ncfile._header()) try: if ncfile.variables and ncfile.non_recvars: for name, var in ncfile.non_recvars.items(): end = var._begin + var._vsize if var.dimensions else var._begin while count < end: data = input.next() bytes = data.tostring() count += len(bytes) # padding if end - count < data.itemsize: bytes += asbytes("0") * (end - count) count = end yield bytes # Record variables... keep taking data until it stops coming (i.e. a StopIteration is raised) if ncfile.variables and ncfile.recvars: while True: vars = ncfile.recvars.values() while True: for var in vars: data = input.next() bytes = data.tostring() yield bytes padding = len(bytes) % 4 if padding: yield asbytes("0") * padding count += len(bytes) + padding except StopIteration: pass
def test_recarray_returntypes(self): qux_fields = {'C': (np.dtype('S5'), 0), 'D': (np.dtype('S5'), 6)} a = np.rec.array([('abc ', (1,1), 1, ('abcde', 'fgehi')), ('abc', (2,3), 1, ('abcde', 'jklmn'))], dtype=[('foo', 'S4'), ('bar', [('A', int), ('B', int)]), ('baz', int), ('qux', qux_fields)]) assert_equal(type(a.foo), np.ndarray) assert_equal(type(a['foo']), np.ndarray) assert_equal(type(a.bar), np.recarray) assert_equal(type(a['bar']), np.recarray) assert_equal(a.bar.dtype.type, np.record) assert_equal(type(a['qux']), np.recarray) assert_equal(a.qux.dtype.type, np.record) assert_equal(dict(a.qux.dtype.fields), qux_fields) assert_equal(type(a.baz), np.ndarray) assert_equal(type(a['baz']), np.ndarray) assert_equal(type(a[0].bar), np.record) assert_equal(type(a[0]['bar']), np.record) assert_equal(a[0].bar.A, 1) assert_equal(a[0].bar['A'], 1) assert_equal(a[0]['bar'].A, 1) assert_equal(a[0]['bar']['A'], 1) assert_equal(a[0].qux.D, asbytes('fgehi')) assert_equal(a[0].qux['D'], asbytes('fgehi')) assert_equal(a[0]['qux'].D, asbytes('fgehi')) assert_equal(a[0]['qux']['D'], asbytes('fgehi'))
def test_space_delimiter(self): "Test space delimiter" strg = asbytes(" 1 2 3 4 5 # test") test = LineSplitter(asbytes(' '))(strg) assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5'])) test = LineSplitter(asbytes(' '))(strg) assert_equal(test, asbytes_nested(['1 2 3 4', '5']))
def test_int64_dtype(self): "Check that int64 integer types can be specified" converter = StringConverter(np.int64, default=0) val = asbytes("-9223372036854775807") assert_(converter(val) == -9223372036854775807) val = asbytes("9223372036854775807") assert_(converter(val) == 9223372036854775807)
def test_method_array(self): r = np.rec.array( asbytes('abcdefg') * 100, formats='i2,a3,i4', shape=3, byteorder='big') assert_equal(r[1].item(), (25444, asbytes('efg'), 1633837924))
def test_bad_header(): # header of length less than 2 should fail s = BytesIO() assert_raises(ValueError, format.read_array_header_1_0, s) s = BytesIO(asbytes('1')) assert_raises(ValueError, format.read_array_header_1_0, s) # header shorter than indicated size should fail s = BytesIO(asbytes('\x01\x00')) assert_raises(ValueError, format.read_array_header_1_0, s) # headers without the exact keys required should fail d = {"shape": (1, 2), "descr": "x"} s = BytesIO() format.write_array_header_1_0(s, d) assert_raises(ValueError, format.read_array_header_1_0, s) d = {"shape": (1, 2), "fortran_order": False, "descr": "x", "extrakey": -1} s = BytesIO() format.write_array_header_1_0(s, d) assert_raises(ValueError, format.read_array_header_1_0, s)
def gen_for_simple(ncfileobj): ''' Generator for example fileobj tests ''' yield assert_equal, ncfileobj.history, asbytes('Created for a test') time = ncfileobj.variables['time'] yield assert_equal, time.units, asbytes('days since 2008-01-01') yield assert_equal, time.shape, (N_EG_ELS,) yield assert_equal, time[-1], N_EG_ELS-1
def test_space_delimiter(self): "Test space delimiter" strg = asbytes(" 1 2 3 4 5 # test") test = LineSplitter(asbytes(" "))(strg) assert_equal(test, asbytes_nested(["1", "2", "3", "4", "", "5"])) test = LineSplitter(asbytes(" "))(strg) assert_equal(test, asbytes_nested(["1 2 3 4", "5"]))
def gen_for_simple(ncfileobj): """ Generator for example fileobj tests """ yield assert_equal, ncfileobj.history, asbytes("Created for a test") time = ncfileobj.variables["time"] yield assert_equal, time.units, asbytes("days since 2008-01-01") yield assert_equal, time.shape, (N_EG_ELS,) yield assert_equal, time[-1], N_EG_ELS - 1
def save_nparray_to_hdfs(fname, X, hdfs): ''' An instance of numpy's savetext function to enable saving numpy arrays in HDFS as text files ''' fmt = '%.18e' delimiter = ' ' newline = '\n' if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) X = np.asarray(X) if X.ndim == 1: if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] n_fmt_chars = fmt.count('%') fmt = [fmt, ] * ncol format = delimiter.join(fmt) first = True for row in X: if first: hdfs.create_file(fname, asbytes(format % tuple(row) + newline), overwrite=True) first = False else: hdfs.appendfile(fname, asbytes(format % tuple(row) + newline))
def _write_var_data(self, name): var = self.variables[name] # Set begin in file header. the_beguine = self.fp.tell() self.fp.seek(var._begin) self._pack_begin(the_beguine) self.fp.seek(the_beguine) # Write data. if not var.isrec: self.fp.write(var.data.tostring()) count = var.data.size * var.data.itemsize self.fp.write(asbytes('0') * (var._vsize - count)) else: # record variable # Handle rec vars with shape[0] < nrecs. if self._recs > len(var.data): shape = (self._recs,) + var.data.shape[1:] var.data.resize(shape) pos0 = pos = self.fp.tell() for rec in var.data: # Apparently scalars cannot be converted to big endian. If we # try to convert a ``=i4`` scalar to, say, '>i4' the dtype # will remain as ``=i4``. if not rec.shape and (rec.dtype.byteorder == '<' or (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): rec = rec.byteswap() self.fp.write(rec.tostring()) # Padding count = rec.size * rec.itemsize self.fp.write(asbytes('0') * (var._vsize - count)) pos += self._recsize self.fp.seek(pos) self.fp.seek(pos0 + var._vsize)
def writeout(self, f=None): """write all the dump items and the summary out to file(s) Parameters ---------- f : filename or filehandle If specified then all summary and object data will go in one file. If None is specified then type specific files will be generated in the dump_dir If a filehandle is specified then it must be a byte mode file as numpy.savetxt is used, and requires this. """ fall = None # If specific file given then write everything to it if hasattr(f, 'write'): if not 'b' in f.mode: raise RuntimeError("File stream must be in binary mode") # write all to this stream fall = f fs = f closefall = False closefs = False elif f: # Assume f is a filename fall = open(f, 'wb') fs = fall closefs = False closefall = True else: self.create_dump_dir() closefall = False if self.dump_summary: fs = open(self.summary_file, 'wb') closefs = True if self.dump_summary: for ecs in self.evo_summary: if ecs.idx == 0: fs.write(asbytes("{}\n{}\n".format( ecs.get_header_line(self.summary_sep), ecs.get_value_line(self.summary_sep)))) else: fs.write(asbytes("{}\n".format( ecs.get_value_line(self.summary_sep)))) if closefs: fs.close() logger.info("Dynamics dump summary saved to {}".format( self.summary_file)) for di in self.evo_dumps: di.writeout(fall) if closefall: fall.close() logger.info("Dynamics dump saved to {}".format(f)) else: if fall: logger.info("Dynamics dump saved to specified stream") else: logger.info("Dynamics dump saved to {}".format(self.dump_dir))
def test_variable_fixed_width(self): strg = asbytes(" 1 3 4 5 6# test") test = LineSplitter((3, 6, 6, 3))(strg) assert_equal(test, asbytes_nested(["1", "3", "4 5", "6"])) # strg = asbytes(" 1 3 4 5 6# test") test = LineSplitter((6, 6, 9))(strg) assert_equal(test, asbytes_nested(["1", "3 4", "5 6"]))
def test_tab_delimiter(self): "Test tab delimiter" strg = asbytes(" 1\t 2\t 3\t 4\t 5 6") test = LineSplitter(asbytes("\t"))(strg) assert_equal(test, asbytes_nested(["1", "2", "3", "4", "5 6"])) strg = asbytes(" 1 2\t 3 4\t 5 6") test = LineSplitter(asbytes("\t"))(strg) assert_equal(test, asbytes_nested(["1 2", "3 4", "5 6"]))
def test_variable_fixed_width(self): strg = asbytes(" 1 3 4 5 6# test") test = LineSplitter((3, 6, 6, 3))(strg) assert_equal(test, asbytes_nested(['1', '3', '4 5', '6'])) # strg = asbytes(" 1 3 4 5 6# test") test = LineSplitter((6, 6, 9))(strg) assert_equal(test, asbytes_nested(['1', '3 4', '5 6']))
def test_tab_delimiter(self): "Test tab delimiter" strg = asbytes(" 1\t 2\t 3\t 4\t 5 6") test = LineSplitter(asbytes('\t'))(strg) assert_equal(test, asbytes_nested(['1', '2', '3', '4', '5 6'])) strg = asbytes(" 1 2\t 3 4\t 5 6") test = LineSplitter(asbytes('\t'))(strg) assert_equal(test, asbytes_nested(['1 2', '3 4', '5 6']))
def _check_from(self, s, value, **kw): y = np.fromstring(asbytes(s), **kw) assert_array_equal(y, value) f = open(self.filename, 'wb') f.write(asbytes(s)) f.close() y = np.fromfile(self.filename, **kw) assert_array_equal(y, value)
def setup(self): # Generic setup _a = ma.array([1, 2, 3], mask=[0, 0, 1], dtype=int) _b = ma.array([1.1, 2.2, 3.3], mask=[0, 0, 1], dtype=float) _c = ma.array(list(map(asbytes, ["one", "two", "three"])), mask=[0, 0, 1], dtype="|S8") ddtype = [("a", int), ("b", float), ("c", "|S8")] mrec = fromarrays([_a, _b, _c], dtype=ddtype, fill_value=(asbytes("99999"), asbytes("99999."), asbytes("N/A"))) nrec = recfromarrays((_a._data, _b._data, _c._data), dtype=ddtype) self.data = (mrec, nrec, ddtype)
def test_other_delimiter(self): "Test LineSplitter on delimiter" strg = asbytes("1,2,3,4,,5") test = LineSplitter(asbytes(","))(strg) assert_equal(test, asbytes_nested(["1", "2", "3", "4", "", "5"])) # strg = asbytes(" 1,2,3,4,,5 # test") test = LineSplitter(asbytes(","))(strg) assert_equal(test, asbytes_nested(["1", "2", "3", "4", "", "5"]))
def test_other_delimiter(self): "Test LineSplitter on delimiter" strg = asbytes("1,2,3,4,,5") test = LineSplitter(asbytes(','))(strg) assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5'])) # strg = asbytes(" 1,2,3,4,,5 # test") test = LineSplitter(asbytes(','))(strg) assert_equal(test, asbytes_nested(['1', '2', '3', '4', '', '5']))
def test_unused_converter(self): c = StringIO() c.writelines([asbytes("1 21\n"), asbytes("3 42\n")]) c.seek(0) data = np.loadtxt(c, usecols=(1,), converters={0: lambda s: int(s, 16)}) assert_array_equal(data, [21, 42]) c.seek(0) data = np.loadtxt(c, usecols=(1,), converters={1: lambda s: int(s, 16)}) assert_array_equal(data, [33, 66])
def test_keep_default(self): "Make sure we don't lose an explicit default" converter = StringConverter(None, missing_values=asbytes(""), default=-999) converter.upgrade(asbytes("3.14159265")) assert_equal(converter.default, -999) assert_equal(converter.type, np.dtype(float)) # converter = StringConverter(None, missing_values=asbytes(""), default=0) converter.upgrade(asbytes("3.14159265")) assert_equal(converter.default, 0) assert_equal(converter.type, np.dtype(float))
def grep_dependencies(self, file, deps): stdout = self.get_dependencies(file) rdeps = dict([(asbytes(dep), re.compile(asbytes(dep))) for dep in deps]) founds = [] for l in stdout.splitlines(): for k, v in rdeps.items(): if v.search(l): founds.append(k) return founds
def test_rjust(self): assert issubclass(self.A.rjust(10).dtype.type, np.string_) widths = np.array([[10, 20]]) C = self.A.rjust([10, 20]) assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) C = self.A.rjust(20, asbytes("#")) assert np.all(C.startswith(asbytes("#"))) assert_array_equal(C.endswith(asbytes("#")), [[False, True], [False, False], [False, False]]) C = np.char.rjust(asbytes("FOO"), [[10, 20], [15, 8]]) assert issubclass(C.dtype.type, np.string_) assert_array_equal(C, asbytes_nested([[" FOO", " FOO"], [" FOO", " FOO"]]))
def test_center(self): assert_(issubclass(self.A.center(10).dtype.type, np.string_)) widths = np.array([[10, 20]]) C = self.A.center([10, 20]) assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) C = self.A.center(20, asbytes("#")) assert_(np.all(C.startswith(asbytes("#")))) assert_(np.all(C.endswith(asbytes("#")))) C = np.char.center(asbytes("FOO"), [[10, 20], [15, 8]]) assert_(issubclass(C.dtype.type, np.string_)) assert_array_equal(C, asbytes_nested([[" FOO ", " FOO "], [" FOO ", " FOO "]]))
def test_comments(self): "Test the stripping of comments" control = np.array([1, 2, 3, 5], int) # Comment on its own line data = StringIO("# comment\n1,2,3,5\n") test = np.ndfromtxt(data, dtype=int, delimiter=asbytes(","), comments=asbytes("#")) assert_equal(test, control) # Comment at the end of a line data = StringIO("1,2,3,5# comment\n") test = np.ndfromtxt(data, dtype=int, delimiter=asbytes(","), comments=asbytes("#")) assert_equal(test, control)
def test_upgrademapper(self): "Tests updatemapper" dateparser = _bytes_to_date StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) convert = StringConverter(dateparser, date(2000, 1, 1)) test = convert(asbytes('2001-01-01')) assert_equal(test, date(2001, 1, 1)) test = convert(asbytes('2009-01-01')) assert_equal(test, date(2009, 1, 1)) test = convert(asbytes('')) assert_equal(test, date(2000, 1, 1))
def read(file, mmap=False): """ Return the sample rate (in samples/sec) and data from a WAV file Parameters ---------- file : file Input wav file. mmap : bool, optional Whether to read data as memory mapped. (Default: False) .. versionadded:: 0.12.0 Returns ------- rate : int Sample rate of wav file data : numpy array Data read from wav file Notes ----- * The file can be an open file or a filename. * The returned sample rate is a Python integer * The data is returned as a numpy array with a data-type determined from the file. """ if hasattr(file,'read'): fid = file else: fid = open(file, 'rb') fsize = _read_riff_chunk(fid) noc = 1 bits = 8 while (fid.tell() < fsize): # read the next chunk chunk_id = fid.read(4) if chunk_id == asbytes('fmt '): size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid) elif chunk_id == asbytes('data'): data = _read_data_chunk(fid, noc, bits, mmap=mmap) elif chunk_id == asbytes('LIST'): # Someday this could be handled properly but for now skip it _skip_unknown_chunk(fid) else: warnings.warn("Chunk (non-data) not understood, skipping it.", WavFileWarning) _skip_unknown_chunk(fid) fid.close() return rate, data
def test_invalid_converter(self): strip_rand = lambda x : float((asbytes('r') in x.lower() and x.split()[-1]) or (not asbytes('r') in x.lower() and x.strip() or 0.0)) strip_per = lambda x : float((asbytes('%') in x.lower() and x.split()[0]) or (not asbytes('%') in x.lower() and x.strip() or 0.0)) s = StringIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" \ "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n" "D02N03,10/10/2004,R 1,,7,145.55") kwargs = dict(converters={2 : strip_per, 3 : strip_rand}, delimiter=",", dtype=None) assert_raises(ConverterError, np.genfromtxt, s, **kwargs)
def test_numeric_carray_compare(self, level=rlevel): """Ticket #341""" assert_equal(np.array(['X'], 'c'), asbytes('X'))
Notes ----- The ``.npy`` format, including reasons for creating it and a comparison of alternatives, is described fully in the "npy-format" NEP. """ import cPickle import numpy import sys from numpy.lib.utils import safe_eval from numpy.compat import asbytes, isfileobj MAGIC_PREFIX = asbytes('\x93NUMPY') MAGIC_LEN = len(MAGIC_PREFIX) + 2 def magic(major, minor): """ Return the magic string for the given file format version. Parameters ---------- major : int in [0, 255] minor : int in [0, 255] Returns ------- magic : str
def parseTrajectories(fh,framesNumber): r"""Converts rows of motion capture data into a dictionary This function is only in scope from within `loadCSV`. Parameters ---------- fh : list iterator object Iterator for rows of motion capture data. The first 3 rows in `fh` contain the frequency, labels, and field headers respectively. All elements of the rows in `fh` are strings. See Examples. framesNumber : int Number of rows iterated over in `fh`. Returns ------- labels, rows, rowsUnlabeled, freq : tuple `labels` is a list of marker names. `rows` is a list of dict of motion capture data. `rowsUnlabeled` is of the same type as `rows`, but for unlabeled data. `freq` is the frequency in Hz. Examples -------- This example uses a loop and numpy.array_equal to test the equality of individual dictionary elements since python does not guarantee the order of dictionary elements. Example for 2 markers, LFHD and RFHD, and one frame of trial. >>> from numpy import array, array_equal # Rows will hold frequency, headers, fields, and one row of data >>> rows = [None, None, None, None] >>> rows[0] = '240.000000,Hz\n' >>> rows[1] = ',LFHD,,,RFHD\n' >>> rows[2] = 'Field #,X,Y,Z,X,Y,Z\n' >>> rows[3] = '1,-1003.583618,81.007614,1522.236938,-1022.270447,-47.190071,1519.680420\n' >>> fh = iter(rows) >>> framesNumber = 1 #Indicates one row of data >>> labels, rows, rowsUnlabeled, freq = parseTrajectories(fh, framesNumber) >>> labels ['LFHD', 'RFHD'] >>> expectedRows = [{'LFHD': array([-1003.583618, 81.007614, 1522.236938]), ... 'RFHD': array([-1022.270447, -47.190071, 1519.68042 ])}] >>> flag = True #False if any values are not equal >>> for i in range(len(expectedRows)): ... for key in rows[i]: ... if (not array_equal(rows[i][key], expectedRows[i][key])): ... flag = False >>> flag True >>> rowsUnlabeled [{}] >>> freq 240.0 """ delimiter=',' if pyver == 2: freq=np.float64(split_line(fh.next())[0]) labels=split_line(fh.next())[1::3] fields=split_line(fh.next()) elif pyver == 3: freq=np.float64(split_line(next(fh))[0]) labels=split_line(next(fh))[1::3] fields=split_line(next(fh)) delimiter = asbytes(delimiter) rows=[] rowsUnlabeled=[] if pyver == 2: first_line=fh.next() elif pyver == 3: first_line=next(fh) first_elements=split_line(first_line)[1:] colunsNum=len(first_elements) first_elements,first_elements_unlabeled=rowToDict(first_elements,labels) rows.append(first_elements) rowsUnlabeled.append(first_elements_unlabeled) for row in fh: row=split_line(row)[1:] if len(row)!=colunsNum: break elements,unlabeled_elements=rowToDict(row,labels) rows.append(elements) rowsUnlabeled.append(unlabeled_elements) return labels,rows,rowsUnlabeled,freq
def test_from_string(self): A = np.char.array(asbytes('abc')) assert_equal(len(A), 1) assert_equal(len(A[0]), 3) assert_(issubclass(A.dtype.type, np.string_))
def test_method_array(self): r = np.rec.array(asbytes('abcdefg') * 100, formats='i2,a3,i4', shape=3, byteorder='big') assert_equal(r[1].item(), (25444, asbytes('efg'), 1633837924))
def test_bytes(self): np.random.seed(self.seed) actual = np.random.bytes(10) desired = asbytes('\x82Ui\x9e\xff\x97+Wf\xa5') np.testing.assert_equal(actual, desired)
#A place for code to be called from C-code # that implements more complicated stuff. import re import sys from numpy.compat import asbytes, bytes if (sys.byteorder == 'little'): _nbo = asbytes('<') else: _nbo = asbytes('>') def _makenames_list(adict): from multiarray import dtype allfields = [] fnames = adict.keys() for fname in fnames: obj = adict[fname] n = len(obj) if not isinstance(obj, tuple) or n not in [2, 3]: raise ValueError("entry not a 2- or 3- tuple") if (n > 2) and (obj[2] == fname): continue num = int(obj[1]) if (num < 0): raise ValueError("invalid offset.") format = dtype(obj[0]) if (format.itemsize == 0): raise ValueError("all itemsizes must be fixed.")
def _pack_string(self, s): count = len(s) self._pack_int(count) self.fp.write(asbytes(s)) self.fp.write(b'\x00' * (-count % 4)) # pad
def test_fromstring_crash(self): # Ticket #1345: the following should not cause a crash np.fromstring(asbytes('aa, aa, 1.0'), sep=',')
def _unpack_string(self): count = self._unpack_int() s = self.fp.read(count).rstrip(asbytes('\x00')) self.fp.read(-count % 4) # read padding return s
#are automatically stored in the ``_attributes`` attribute by overloading #``__setattr__``. This is the reason why the code sometimes uses #``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``; #otherwise the key would be inserted into userspace attributes. __all__ = ['netcdf_file', 'netcdf_variable'] from operator import mul from mmap import mmap, ACCESS_READ import numpy as np from numpy.compat import asbytes, asstr from numpy import fromstring, ndarray, dtype, empty, array, asarray from numpy import little_endian as LITTLE_ENDIAN ABSENT = asbytes('\x00\x00\x00\x00\x00\x00\x00\x00') ZERO = asbytes('\x00\x00\x00\x00') NC_BYTE = asbytes('\x00\x00\x00\x01') NC_CHAR = asbytes('\x00\x00\x00\x02') NC_SHORT = asbytes('\x00\x00\x00\x03') NC_INT = asbytes('\x00\x00\x00\x04') NC_FLOAT = asbytes('\x00\x00\x00\x05') NC_DOUBLE = asbytes('\x00\x00\x00\x06') NC_DIMENSION = asbytes('\x00\x00\x00\n') NC_VARIABLE = asbytes('\x00\x00\x00\x0b') NC_ATTRIBUTE = asbytes('\x00\x00\x00\x0c') TYPEMAP = { NC_BYTE: ('b', 1), NC_CHAR: ('c', 1), NC_SHORT: ('h', 2),
def test_pickle_python2_python3(): # Test that loading object arrays saved on Python 2 works both on # Python 2 and Python 3 and vice versa data_dir = os.path.join(os.path.dirname(__file__), 'data') if sys.version_info[0] >= 3: xrange = range else: import __builtin__ xrange = __builtin__.xrange expected = np.array([None, xrange, sixu('\u512a\u826f'), asbytes('\xe4\xb8\x8d\xe8\x89\xaf')], dtype=object) for fname in ['py2-objarr.npy', 'py2-objarr.npz', 'py3-objarr.npy', 'py3-objarr.npz']: path = os.path.join(data_dir, fname) if (fname.endswith('.npz') and sys.version_info[0] == 2 and sys.version_info[1] < 7): # Reading object arrays directly from zipfile appears to fail # on Py2.6, see cfae0143b4 continue for encoding in ['bytes', 'latin1']: if (sys.version_info[0] >= 3 and sys.version_info[1] < 4 and encoding == 'bytes'): # The bytes encoding is available starting from Python 3.4 continue data_f = np.load(path, encoding=encoding) if fname.endswith('.npz'): data = data_f['x'] data_f.close() else: data = data_f if sys.version_info[0] >= 3: if encoding == 'latin1' and fname.startswith('py2'): assert_(isinstance(data[3], str)) assert_array_equal(data[:-1], expected[:-1]) # mojibake occurs assert_array_equal(data[-1].encode(encoding), expected[-1]) else: assert_(isinstance(data[3], bytes)) assert_array_equal(data, expected) else: assert_array_equal(data, expected) if sys.version_info[0] >= 3: if fname.startswith('py2'): if fname.endswith('.npz'): data = np.load(path) assert_raises(UnicodeError, data.__getitem__, 'x') data.close() data = np.load(path, fix_imports=False, encoding='latin1') assert_raises(ImportError, data.__getitem__, 'x') data.close() else: assert_raises(UnicodeError, np.load, path) assert_raises(ImportError, np.load, path, encoding='latin1', fix_imports=False)
def build_module_distutils(source_files, config_code, module_name, **kw): """ Build a module via distutils and import it. """ from numpy.distutils.misc_util import Configuration from numpy.distutils.core import setup d = get_module_dir() # Copy files dst_sources = [] for fn in source_files: if not os.path.isfile(fn): raise RuntimeError("%s is not a file" % fn) dst = os.path.join(d, os.path.basename(fn)) shutil.copyfile(fn, dst) dst_sources.append(dst) # Build script config_code = textwrap.dedent(config_code).replace("\n", "\n ") code = """\ import os import sys sys.path = %(syspath)s def configuration(parent_name='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('', parent_name, top_path) %(config_code)s return config if __name__ == "__main__": from numpy.distutils.core import setup setup(configuration=configuration) """ % dict(config_code=config_code, syspath=repr(sys.path)) script = os.path.join(d, get_temp_module_name() + '.py') dst_sources.append(script) f = open(script, 'wb') f.write(asbytes(code)) f.close() # Build cwd = os.getcwd() try: os.chdir(d) cmd = [sys.executable, script, 'build_ext', '-i'] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = p.communicate() if p.returncode != 0: raise RuntimeError("Running distutils build failed: %s\n%s" % (cmd[4:], asstr(out))) finally: os.chdir(cwd) # Partial cleanup for fn in dst_sources: os.unlink(fn) # Import __import__(module_name) return sys.modules[module_name]
def test_array_astype(): a = np.arange(6, dtype='f4').reshape(2, 3) # Default behavior: allows unsafe casts, keeps memory layout, # always copies. b = a.astype('i4') assert_equal(a, b) assert_equal(b.dtype, np.dtype('i4')) assert_equal(a.strides, b.strides) b = a.T.astype('i4') assert_equal(a.T, b) assert_equal(b.dtype, np.dtype('i4')) assert_equal(a.T.strides, b.strides) b = a.astype('f4') assert_equal(a, b) assert_(not (a is b)) # copy=False parameter can sometimes skip a copy b = a.astype('f4', copy=False) assert_(a is b) # order parameter allows overriding of the memory layout, # forcing a copy if the layout is wrong b = a.astype('f4', order='F', copy=False) assert_equal(a, b) assert_(not (a is b)) assert_(b.flags.f_contiguous) b = a.astype('f4', order='C', copy=False) assert_equal(a, b) assert_(a is b) assert_(b.flags.c_contiguous) # casting parameter allows catching bad casts b = a.astype('c8', casting='safe') assert_equal(a, b) assert_equal(b.dtype, np.dtype('c8')) assert_raises(TypeError, a.astype, 'i4', casting='safe') # subok=False passes through a non-subclassed array b = a.astype('f4', subok=0, copy=False) assert_(a is b) a = np.matrix([[0, 1, 2], [3, 4, 5]], dtype='f4') # subok=True passes through a matrix b = a.astype('f4', subok=True, copy=False) assert_(a is b) # subok=True is default, and creates a subtype on a cast b = a.astype('i4', copy=False) assert_equal(a, b) assert_equal(type(b), np.matrix) # subok=False never returns a matrix b = a.astype('f4', subok=False, copy=False) assert_equal(a, b) assert_(not (a is b)) assert_(type(b) != np.matrix) # Make sure converting from string object to fixed length string # does not truncate. a = np.array([asbytes('a') * 100], dtype='O') b = a.astype('S') assert_equal(a, b) assert_equal(b.dtype, np.dtype('S100')) a = np.array([u'a' * 100], dtype='O') b = a.astype('U') assert_equal(a, b) assert_equal(b.dtype, np.dtype('U100')) # Same test as above but for strings shorter than 64 characters a = np.array([asbytes('a') * 10], dtype='O') b = a.astype('S') assert_equal(a, b) assert_equal(b.dtype, np.dtype('S10')) a = np.array([u'a' * 10], dtype='O') b = a.astype('U') assert_equal(a, b) assert_equal(b.dtype, np.dtype('U10'))
def test_recarray_slices(self): r = np.rec.array([(1, 11, 'a'), (2, 22, 'b'), (3, 33, 'c'), (4, 44, 'd'), (5, 55, 'ex'), (6, 66, 'f'), (7, 77, 'g')], formats='u1,f4,a1') assert_equal(r[1::2][1].item(), (4, 44.0, asbytes('d')))
def test_it(self): assert_equal(self.A.shape, (4, )) assert_equal(self.A.upper()[:2].tobytes(), asbytes('AB'))
def test_pickle_withstring(self): import pickle astring = asbytes( "cnumpy.core\n_ufunc_reconstruct\np0\n(S'numpy.core.umath'\np1\nS'cos'\np2\ntp3\nRp4\n." ) assert pickle.loads(astring) is np.cos
def test_expandtabs(self): T = self.A.expandtabs() assert_(T[2, 0] == asbytes('123 345 \0'))
def test_basic(self): a = array(['1980-03-23'], dtype=datetime64) assert_equal(datetime_data(a.dtype), (asbytes('us'), 1, 1, 1))
def _write(self, stream, a, comment='', field=None, precision=None, symmetry=None): if isinstance(a, list) or isinstance(a, ndarray) or \ isinstance(a, tuple) or hasattr(a, '__array__'): rep = self.FORMAT_ARRAY a = asarray(a) if len(a.shape) != 2: raise ValueError('Expected 2 dimensional array') rows, cols = a.shape if field is not None: if field == self.FIELD_INTEGER: a = a.astype('i') elif field == self.FIELD_REAL: if a.dtype.char not in 'fd': a = a.astype('d') elif field == self.FIELD_COMPLEX: if a.dtype.char not in 'FD': a = a.astype('D') else: if not isspmatrix(a): raise ValueError('unknown matrix type: %s' % type(a)) rep = 'coordinate' rows, cols = a.shape typecode = a.dtype.char if precision is None: if typecode in 'fF': precision = 8 else: precision = 16 if field is None: kind = a.dtype.kind if kind == 'i': field = 'integer' elif kind == 'f': field = 'real' elif kind == 'c': field = 'complex' else: raise TypeError('unexpected dtype kind ' + kind) if symmetry is None: symmetry = self._get_symmetry(a) # validate rep, field, and symmetry self.__class__._validate_format(rep) self.__class__._validate_field(field) self.__class__._validate_symmetry(symmetry) # write initial header line stream.write( asbytes('%%MatrixMarket matrix {0} {1} {2}\n'.format( rep, field, symmetry))) # write comments for line in comment.split('\n'): stream.write(asbytes('%%%s\n' % (line))) template = self._field_template(field, precision) # write dense format if rep == self.FORMAT_ARRAY: # write shape spec stream.write(asbytes('%i %i\n' % (rows, cols))) if field in (self.FIELD_INTEGER, self.FIELD_REAL): if symmetry == self.SYMMETRY_GENERAL: for j in range(cols): for i in range(rows): stream.write(asbytes(template % a[i, j])) else: for j in range(cols): for i in range(j, rows): stream.write(asbytes(template % a[i, j])) elif field == self.FIELD_COMPLEX: if symmetry == self.SYMMETRY_GENERAL: for j in range(cols): for i in range(rows): aij = a[i, j] stream.write( asbytes(template % (real(aij), imag(aij)))) else: for j in range(cols): for i in range(j, rows): aij = a[i, j] stream.write( asbytes(template % (real(aij), imag(aij)))) elif field == self.FIELD_PATTERN: raise ValueError('pattern type inconsisted with dense format') else: raise TypeError('Unknown field type %s' % field) # write sparse format else: coo = a.tocoo() # convert to COOrdinate format # if symmetry format used, remove values above main diagonal if symmetry != self.SYMMETRY_GENERAL: lower_triangle_mask = coo.row >= coo.col coo = coo_matrix((coo.data[lower_triangle_mask], (coo.row[lower_triangle_mask], coo.col[lower_triangle_mask])), shape=coo.shape) # write shape spec stream.write(asbytes('%i %i %i\n' % (rows, cols, coo.nnz))) # make indices and data array if field == self.FIELD_PATTERN: IJV = vstack((coo.row, coo.col)).T elif field in [self.FIELD_INTEGER, self.FIELD_REAL]: IJV = vstack((coo.row, coo.col, coo.data)).T elif field == self.FIELD_COMPLEX: IJV = vstack( (coo.row, coo.col, coo.data.real, coo.data.imag)).T else: raise TypeError('Unknown field type %s' % field) IJV[:, :2] += 1 # change base 0 -> base 1 # formats for row indices, col indices and data columns fmt = ('%i', '%i') + ('%%.%dg' % precision, ) * (IJV.shape[1] - 2) # save to file savetxt(stream, IJV, fmt=fmt)
class StringConverter(object): """ Factory class for function transforming a string into another object (int, float). After initialization, an instance can be called to transform a string into another object. If the string is recognized as representing a missing value, a default value is returned. Attributes ---------- func : function Function used for the conversion. default : any Default value to return when the input corresponds to a missing value. type : type Type of the output. _status : int Integer representing the order of the conversion. _mapper : sequence of tuples Sequence of tuples (dtype, function, default value) to evaluate in order. _locked : bool Holds `locked` parameter. Parameters ---------- dtype_or_func : {None, dtype, function}, optional If a `dtype`, specifies the input data type, used to define a basic function and a default value for missing data. For example, when `dtype` is float, the `func` attribute is set to `float` and the default value to `np.nan`. If a function, this function is used to convert a string to another object. In this case, it is recommended to give an associated default value as input. default : any, optional Value to return by default, that is, when the string to be converted is flagged as missing. If not given, `StringConverter` tries to supply a reasonable default value. missing_values : sequence of str, optional Sequence of strings indicating a missing value. locked : bool, optional Whether the StringConverter should be locked to prevent automatic upgrade or not. Default is False. """ # _mapper = [(nx.bool_, str2bool, False), (nx.integer, int, -1), (nx.floating, float, nx.nan), (complex, _bytes_to_complex, nx.nan + 0j), (nx.string_, bytes, asbytes('???'))] (_defaulttype, _defaultfunc, _defaultfill) = list(zip(*_mapper)) # @classmethod def _getdtype(cls, val): """Returns the dtype of the input variable.""" return np.array(val).dtype # @classmethod def _getsubdtype(cls, val): """Returns the type of the dtype of the input variable.""" return np.array(val).dtype.type # # This is a bit annoying. We want to return the "general" type in most cases # (ie. "string" rather than "S10"), but we want to return the specific type # for datetime64 (ie. "datetime64[us]" rather than "datetime64"). @classmethod def _dtypeortype(cls, dtype): """Returns dtype for datetime64 and type of dtype otherwise.""" if dtype.type == np.datetime64: return dtype return dtype.type # @classmethod def upgrade_mapper(cls, func, default=None): """ Upgrade the mapper of a StringConverter by adding a new function and its corresponding default. The input function (or sequence of functions) and its associated default value (if any) is inserted in penultimate position of the mapper. The corresponding type is estimated from the dtype of the default value. Parameters ---------- func : var Function, or sequence of functions Examples -------- >>> import dateutil.parser >>> import datetime >>> dateparser = datetustil.parser.parse >>> defaultdate = datetime.date(2000, 1, 1) >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) """ # Func is a single functions if hasattr(func, '__call__'): cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) return elif hasattr(func, '__iter__'): if isinstance(func[0], (tuple, list)): for _ in func: cls._mapper.insert(-1, _) return if default is None: default = [None] * len(func) else: default = list(default) default.append([None] * (len(func) - len(default))) for (fct, dft) in zip(func, default): cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) # def __init__(self, dtype_or_func=None, default=None, missing_values=None, locked=False): # Convert unicode (for Py3) if isinstance(missing_values, str): missing_values = asbytes(missing_values) elif isinstance(missing_values, (list, tuple)): missing_values = asbytes_nested(missing_values) # Defines a lock for upgrade self._locked = bool(locked) # No input dtype: minimal initialization if dtype_or_func is None: self.func = str2bool self._status = 0 self.default = default or False dtype = np.dtype('bool') else: # Is the input a np.dtype ? try: self.func = None dtype = np.dtype(dtype_or_func) except TypeError: # dtype_or_func must be a function, then if not hasattr(dtype_or_func, '__call__'): errmsg = "The input argument `dtype` is neither a function"\ " or a dtype (got '%s' instead)" raise TypeError(errmsg % type(dtype_or_func)) # Set the function self.func = dtype_or_func # If we don't have a default, try to guess it or set it to None if default is None: try: default = self.func(asbytes('0')) except ValueError: default = None dtype = self._getdtype(default) # Set the status according to the dtype _status = -1 for (i, (deftype, func, default_def)) in enumerate(self._mapper): if np.issubdtype(dtype.type, deftype): _status = i if default is None: self.default = default_def else: self.default = default break if _status == -1: # We never found a match in the _mapper... _status = 0 self.default = default self._status = _status # If the input was a dtype, set the function to the last we saw if self.func is None: self.func = func # If the status is 1 (int), change the function to # something more robust. if self.func == self._mapper[1][1]: if issubclass(dtype.type, np.uint64): self.func = np.uint64 elif issubclass(dtype.type, np.int64): self.func = np.int64 else: self.func = lambda x: int(float(x)) # Store the list of strings corresponding to missing values. if missing_values is None: self.missing_values = set([asbytes('')]) else: if isinstance(missing_values, bytes): missing_values = missing_values.split(asbytes(",")) self.missing_values = set(list(missing_values) + [asbytes('')]) # self._callingfunction = self._strict_call self.type = self._dtypeortype(dtype) self._checked = False self._initial_default = default # def _loose_call(self, value): try: return self.func(value) except ValueError: return self.default # def _strict_call(self, value): try: return self.func(value) except ValueError: if value.strip() in self.missing_values: if not self._status: self._checked = False return self.default raise ValueError("Cannot convert string '%s'" % value) # def __call__(self, value): return self._callingfunction(value) # def upgrade(self, value): """ Try to find the best converter for a given string, and return the result. The supplied string `value` is converted by testing different converters in order. First the `func` method of the `StringConverter` instance is tried, if this fails other available converters are tried. The order in which these other converters are tried is determined by the `_status` attribute of the instance. Parameters ---------- value : str The string to convert. Returns ------- out : any The result of converting `value` with the appropriate converter. """ self._checked = True try: self._strict_call(value) except ValueError: # Raise an exception if we locked the converter... if self._locked: errmsg = "Converter is locked and cannot be upgraded" raise ConverterLockError(errmsg) _statusmax = len(self._mapper) # Complains if we try to upgrade by the maximum _status = self._status if _status == _statusmax: errmsg = "Could not find a valid conversion function" raise ConverterError(errmsg) elif _status < _statusmax - 1: _status += 1 (self.type, self.func, default) = self._mapper[_status] self._status = _status if self._initial_default is not None: self.default = self._initial_default else: self.default = default self.upgrade(value) def iterupgrade(self, value): self._checked = True if not hasattr(value, '__iter__'): value = (value, ) _strict_call = self._strict_call try: list(map(_strict_call, value)) except ValueError: # Raise an exception if we locked the converter... if self._locked: errmsg = "Converter is locked and cannot be upgraded" raise ConverterLockError(errmsg) _statusmax = len(self._mapper) # Complains if we try to upgrade by the maximum _status = self._status if _status == _statusmax: raise ConverterError( "Could not find a valid conversion function") elif _status < _statusmax - 1: _status += 1 (self.type, self.func, default) = self._mapper[_status] if self._initial_default is not None: self.default = self._initial_default else: self.default = default self._status = _status self.iterupgrade(value) def update(self, func, default=None, testing_value=None, missing_values=asbytes(''), locked=False): """ Set StringConverter attributes directly. Parameters ---------- func : function Conversion function. default : any, optional Value to return by default, that is, when the string to be converted is flagged as missing. If not given, `StringConverter` tries to supply a reasonable default value. testing_value : str, optional A string representing a standard input value of the converter. This string is used to help defining a reasonable default value. missing_values : sequence of str, optional Sequence of strings indicating a missing value. locked : bool, optional Whether the StringConverter should be locked to prevent automatic upgrade or not. Default is False. Notes ----- `update` takes the same parameters as the constructor of `StringConverter`, except that `func` does not accept a `dtype` whereas `dtype_or_func` in the constructor does. """ self.func = func self._locked = locked # Don't reset the default to None if we can avoid it if default is not None: self.default = default self.type = self._dtypeortype(self._getdtype(default)) else: try: tester = func(testing_value or asbytes('1')) except (TypeError, ValueError): tester = None self.type = self._dtypeortype(self._getdtype(tester)) # Add the missing values to the existing set if missing_values is not None: if _is_bytes_like(missing_values): self.missing_values.add(missing_values) elif hasattr(missing_values, '__iter__'): for val in missing_values: self.missing_values.add(val) else: self.missing_values = []
def _write(self, stream, a, comment='', field=None, precision=None, symmetry=None): if isinstance(a, list) or isinstance(a, ndarray) or \ isinstance(a, tuple) or hasattr(a, '__array__'): rep = self.FORMAT_ARRAY a = asarray(a) if len(a.shape) != 2: raise ValueError('Expected 2 dimensional array') rows, cols = a.shape if field is not None: if field == self.FIELD_INTEGER: if not can_cast(a.dtype, 'intp'): raise OverflowError("mmwrite does not support integer " "dtypes larger than native 'intp'.") a = a.astype('intp') elif field == self.FIELD_REAL: if a.dtype.char not in 'fd': a = a.astype('d') elif field == self.FIELD_COMPLEX: if a.dtype.char not in 'FD': a = a.astype('D') else: if not isspmatrix(a): raise ValueError('unknown matrix type: %s' % type(a)) rep = 'coordinate' rows, cols = a.shape typecode = a.dtype.char if precision is None: if typecode in 'fF': precision = 8 else: precision = 16 if field is None: kind = a.dtype.kind if kind == 'i': if not can_cast(a.dtype, 'intp'): raise OverflowError("mmwrite does not support integer " "dtypes larger than native 'intp'.") field = 'integer' elif kind == 'f': field = 'real' elif kind == 'c': field = 'complex' elif kind == 'u': field = 'unsigned-integer' else: raise TypeError('unexpected dtype kind ' + kind) if symmetry is None: symmetry = self._get_symmetry(a) # validate rep, field, and symmetry self.__class__._validate_format(rep) self.__class__._validate_field(field) self.__class__._validate_symmetry(symmetry) # write initial header line stream.write(asbytes('%%MatrixMarket matrix {0} {1} {2}\n'.format(rep, field, symmetry))) # write comments for line in comment.split('\n'): stream.write(asbytes('%%%s\n' % (line))) template = self._field_template(field, precision) # write dense format if rep == self.FORMAT_ARRAY: # write shape spec stream.write(asbytes('%i %i\n' % (rows, cols))) if field in (self.FIELD_INTEGER, self.FIELD_REAL, self.FIELD_UNSIGNED): if symmetry == self.SYMMETRY_GENERAL: for j in range(cols): for i in range(rows): stream.write(asbytes(template % a[i, j])) elif symmetry == self.SYMMETRY_SKEW_SYMMETRIC: for j in range(cols): for i in range(j + 1, rows): stream.write(asbytes(template % a[i, j])) else: for j in range(cols): for i in range(j, rows): stream.write(asbytes(template % a[i, j])) elif field == self.FIELD_COMPLEX: if symmetry == self.SYMMETRY_GENERAL: for j in range(cols): for i in range(rows): aij = a[i, j] stream.write(asbytes(template % (real(aij), imag(aij)))) else: for j in range(cols): for i in range(j, rows): aij = a[i, j] stream.write(asbytes(template % (real(aij), imag(aij)))) elif field == self.FIELD_PATTERN: raise ValueError('pattern type inconsisted with dense format') else: raise TypeError('Unknown field type %s' % field) # write sparse format else: coo = a.tocoo() # convert to COOrdinate format # if symmetry format used, remove values above main diagonal if symmetry != self.SYMMETRY_GENERAL: lower_triangle_mask = coo.row >= coo.col coo = coo_matrix((coo.data[lower_triangle_mask], (coo.row[lower_triangle_mask], coo.col[lower_triangle_mask])), shape=coo.shape) # write shape spec stream.write(asbytes('%i %i %i\n' % (rows, cols, coo.nnz))) template = self._field_template(field, precision-1) if field == self.FIELD_PATTERN: for r, c in zip(coo.row+1, coo.col+1): stream.write(asbytes("%i %i\n" % (r, c))) elif field in (self.FIELD_INTEGER, self.FIELD_REAL, self.FIELD_UNSIGNED): for r, c, d in zip(coo.row+1, coo.col+1, coo.data): stream.write(asbytes(("%i %i " % (r, c)) + (template % d))) elif field == self.FIELD_COMPLEX: for r, c, d in zip(coo.row+1, coo.col+1, coo.data): stream.write(asbytes(("%i %i " % (r, c)) + (template % (d.real, d.imag)))) else: raise TypeError('Unknown field type %s' % field)
def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, shape=None, order='C'): # Import here to minimize 'import numpy' overhead import mmap import os.path try: mode = mode_equivalents[mode] except KeyError: if mode not in valid_filemodes: raise ValueError("mode must be one of %s" % \ (valid_filemodes + mode_equivalents.keys())) if hasattr(filename, 'read'): fid = filename else: fid = open(filename, (mode == 'c' and 'r' or mode) + 'b') if (mode == 'w+') and shape is None: raise ValueError, "shape must be given" fid.seek(0, 2) flen = fid.tell() descr = dtypedescr(dtype) _dbytes = descr.itemsize if shape is None: bytes = flen - offset if (bytes % _dbytes): fid.close() raise ValueError, "Size of available data is not a "\ "multiple of data-type size." size = bytes // _dbytes shape = (size, ) else: if not isinstance(shape, tuple): shape = (shape, ) size = 1 for k in shape: size *= k bytes = long(offset + size * _dbytes) if mode == 'w+' or (mode == 'r+' and flen < bytes): fid.seek(bytes - 1, 0) fid.write(asbytes('\0')) fid.flush() if mode == 'c': acc = mmap.ACCESS_COPY elif mode == 'r': acc = mmap.ACCESS_READ else: acc = mmap.ACCESS_WRITE if sys.version_info[:2] >= (2, 6): # The offset keyword in mmap.mmap needs Python >= 2.6 start = offset - offset % mmap.ALLOCATIONGRANULARITY bytes -= start offset -= start mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) else: mm = mmap.mmap(fid.fileno(), bytes, access=acc) self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, offset=offset, order=order) self._mmap = mm self.offset = offset self.mode = mode if isinstance(filename, basestring): self.filename = os.path.abspath(filename) elif hasattr(filename, "name"): self.filename = os.path.abspath(filename.name) return self
def test_junk_in_string_fields_of_recarray(self, level=rlevel): """Ticket #483""" r = np.array([[asbytes('abc')]], dtype=[('var1', '|S20')]) assert asbytes(r['var1'][0][0]) == asbytes('abc')
urllib_request.urlopen = urlopen_stub def teardown(): urllib_request.urlopen = old_urlopen # A valid website for more robust testing http_path = 'http://www.google.com/' http_file = 'index.html' http_fakepath = 'http://fake.abc.web/site/' http_fakefile = 'fake.txt' malicious_files = ['/etc/shadow', '../../shadow', '..\\system.dat', 'c:\\windows\\system.dat'] magic_line = asbytes('three is the magic number') # Utility functions used by many TestCases def valid_textfile(filedir): # Generate and return a valid temporary file. fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir, text=True) os.close(fd) return path def invalid_textfile(filedir): # Generate and return an invalid filename. fd, path = mkstemp(suffix='.txt', prefix='dstmp_', dir=filedir) os.close(fd) os.remove(path) return path
def __init__(self, dtype_or_func=None, default=None, missing_values=None, locked=False): # Convert unicode (for Py3) if isinstance(missing_values, unicode): missing_values = asbytes(missing_values) elif isinstance(missing_values, (list, tuple)): missing_values = asbytes_nested(missing_values) # Defines a lock for upgrade self._locked = bool(locked) # No input dtype: minimal initialization if dtype_or_func is None: self.func = str2bool self._status = 0 self.default = default or False dtype = np.dtype('bool') else: # Is the input a np.dtype ? try: self.func = None dtype = np.dtype(dtype_or_func) except TypeError: # dtype_or_func must be a function, then if not hasattr(dtype_or_func, '__call__'): errmsg = ("The input argument `dtype` is neither a" " function nor a dtype (got '%s' instead)") raise TypeError(errmsg % type(dtype_or_func)) # Set the function self.func = dtype_or_func # If we don't have a default, try to guess it or set it to # None if default is None: try: default = self.func(b'0') except ValueError: default = None dtype = self._getdtype(default) # Set the status according to the dtype _status = -1 for (i, (deftype, func, default_def)) in enumerate(self._mapper): if np.issubdtype(dtype.type, deftype): _status = i if default is None: self.default = default_def else: self.default = default break # if a converter for the specific dtype is available use that last_func = func for (i, (deftype, func, default_def)) in enumerate(self._mapper): if dtype.type == deftype: _status = i last_func = func if default is None: self.default = default_def else: self.default = default break func = last_func if _status == -1: # We never found a match in the _mapper... _status = 0 self.default = default self._status = _status # If the input was a dtype, set the function to the last we saw if self.func is None: self.func = func # If the status is 1 (int), change the function to # something more robust. if self.func == self._mapper[1][1]: if issubclass(dtype.type, np.uint64): self.func = np.uint64 elif issubclass(dtype.type, np.int64): self.func = np.int64 else: self.func = lambda x: int(float(x)) # Store the list of strings corresponding to missing values. if missing_values is None: self.missing_values = set([b'']) else: if isinstance(missing_values, bytes): missing_values = missing_values.split(b",") self.missing_values = set(list(missing_values) + [b'']) # self._callingfunction = self._strict_call self.type = self._dtypeortype(dtype) self._checked = False self._initial_default = default
Some things are more easily handled Python. """ from __future__ import division, absolute_import, print_function import re import sys from numpy.compat import asbytes, basestring from .multiarray import dtype, array, ndarray import ctypes from .numerictypes import object_ if (sys.byteorder == 'little'): _nbo = asbytes('<') else: _nbo = asbytes('>') def _makenames_list(adict, align): allfields = [] fnames = list(adict.keys()) for fname in fnames: obj = adict[fname] n = len(obj) if not isinstance(obj, tuple) or n not in [2, 3]: raise ValueError("entry not a 2- or 3- tuple") if (n > 2) and (obj[2] == fname): continue num = int(obj[1])
def test_chararray_rstrip(self, level=rlevel): """Ticket #222""" x = np.chararray((1, ), 5) x[0] = asbytes('a ') x = x.rstrip() assert_equal(x[0], asbytes('a'))
def readsav(file_name, idict=None, python_dict=False, uncompressed_file_name=None, verbose=False): ''' Read an IDL .sav file Parameters ---------- file_name : str Name of the IDL save file. idict : dict, optional Dictionary in which to insert .sav file variables python_dict: bool, optional By default, the object return is not a Python dictionary, but a case-insensitive dictionary with item, attribute, and call access to variables. To get a standard Python dictionary, set this option to True. uncompressed_file_name : str, optional This option only has an effect for .sav files written with the /compress option. If a file name is specified, compressed .sav files are uncompressed to this file. Otherwise, readsav will use the `tempfile` module to determine a temporary filename automatically, and will remove the temporary file upon successfully reading it in. verbose : bool, optional Whether to print out information about the save file, including the records read, and available variables. Returns ---------- idl_dict : AttrDict or dict If `python_dict` is set to False (default), this function returns a case-insensitive dictionary with item, attribute, and call access to variables. If `python_dict` is set to True, this function returns a Python dictionary with all variable names in lowercase. If `idict` was specified, then variables are written to the dictionary specified, and the updated dictionary is returned. ''' # Initialize record and variable holders records = [] if python_dict or idict: variables = {} else: variables = AttrDict() # Open the IDL file f = open(file_name, 'rb') # Read the signature, which should be 'SR' signature = _read_bytes(f, 2) if signature <> asbytes('SR'): raise Exception("Invalid SIGNATURE: %s" % signature) # Next, the record format, which is '\x00\x04' for normal .sav # files, and '\x00\x06' for compressed .sav files. recfmt = _read_bytes(f, 2) if recfmt == asbytes('\x00\x04'): pass elif recfmt == asbytes('\x00\x06'): if verbose: print "IDL Save file is compressed" if uncompressed_file_name: fout = open(uncompressed_file_name, 'w+b') else: fout = tempfile.NamedTemporaryFile(suffix='.sav') if verbose: print " -> expanding to %s" % fout.name # Write header fout.write(asbytes('SR\x00\x04')) # Cycle through records while True: # Read record type rectype = _read_long(f) fout.write(struct.pack('>l', int(rectype))) # Read position of next record and return as int nextrec = _read_uint32(f) nextrec += _read_uint32(f) * 2**32 # Read the unknown 4 bytes unknown = f.read(4) # Check if the end of the file has been reached if RECTYPE_DICT[rectype] == 'END_MARKER': fout.write(struct.pack('>I', int(nextrec) % 2**32)) fout.write( struct.pack('>I', int( (nextrec - (nextrec % 2**32)) / 2**32))) fout.write(unknown) break # Find current position pos = f.tell() # Decompress record string = zlib.decompress(f.read(nextrec - pos)) # Find new position of next record nextrec = fout.tell() + len(string) + 12 # Write out record fout.write(struct.pack('>I', int(nextrec % 2**32))) fout.write( struct.pack('>I', int((nextrec - (nextrec % 2**32)) / 2**32))) fout.write(unknown) fout.write(string) # Close the original compressed file f.close() # Set f to be the decompressed file, and skip the first four bytes f = fout f.seek(4) else: raise Exception("Invalid RECFMT: %s" % recfmt) # Loop through records, and add them to the list while True: r = _read_record(f) records.append(r) if 'end' in r: if r['end']: break # Close the file f.close() # Find heap data variables heap = {} for r in records: if r['rectype'] == "HEAP_DATA": heap[r['heap_index']] = r['data'] # Find all variables for r in records: if r['rectype'] == "VARIABLE": while isinstance(r['data'], Pointer): r['data'] = heap[r['data'].index] variables[r['varname'].lower()] = r['data'] if verbose: # Print out timestamp info about the file for record in records: if record['rectype'] == "TIMESTAMP": print "-" * 50 print "Date: %s" % record['date'] print "User: %s" % record['user'] print "Host: %s" % record['host'] break # Print out version info about the file for record in records: if record['rectype'] == "VERSION": print "-" * 50 print "Format: %s" % record['format'] print "Architecture: %s" % record['arch'] print "Operating System: %s" % record['os'] print "IDL Version: %s" % record['release'] break # Print out identification info about the file for record in records: if record['rectype'] == "IDENTIFICATON": print "-" * 50 print "Author: %s" % record['author'] print "Title: %s" % record['title'] print "ID Code: %s" % record['idcode'] break print "-" * 50 print "Successfully read %i records of which:" % \ (len(records)) # Create convenience list of record types rectypes = [r['rectype'] for r in records] for rt in set(rectypes): if rt <> 'END_MARKER': print " - %i are of type %s" % (rectypes.count(rt), rt) print "-" * 50 if 'VARIABLE' in rectypes: print "Available variables:" for var in variables: print " - %s [%s]" % (var, type(variables[var])) print "-" * 50 if idict: for var in variables: idict[var] = variables[var] return idict else: return variables
def test_bytes(self): rnd.seed(self.seed, self.brng) actual = rnd.bytes(10) desired = asbytes('\xa4\xde\xde{\xb4\x88\xe6\x84*2') np.testing.assert_equal(actual, desired)