def roundtrip_truncated(arr): f = BytesIO() format.write_array(f, arr) #BytesIO is one byte short f2 = BytesIO(f.getvalue()[0:-1]) arr2 = format.read_array(f2) return arr2
def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) # All datetimes should be stored as M8[ns]. When unpickling with # numpy1.6, it will read these as M8[us]. So this ensures all # datetime64 types are read as MS[ns] if is_datetime64_dtype(arr): arr = arr.view(_NS_DTYPE) return arr
def test_version_2_0(): f = BytesIO() # requires more than 2 byte for header dt = [(("%d" % i) * 100, float) for i in range(500)] d = np.ones(1000, dtype=dt) format.write_array(f, d, version=(2, 0)) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', UserWarning) format.write_array(f, d) assert_(w[0].category is UserWarning) f.seek(0) n = format.read_array(f) assert_array_equal(d, n) # 1.0 requested but data cannot be saved this way assert_raises(ValueError, format.write_array, f, d, (1, 0))
def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) return arr
def _unpickle_array(bytes): arr = read_array(StringIO(bytes)) return arr
def roundtrip(arr): f = BytesIO() format.write_array(f, arr) f2 = BytesIO(f.getvalue()) arr2 = format.read_array(f2, allow_pickle=True) return arr2
def roundtrip_randsize(arr): f = BytesIO() format.write_array(f, arr) f2 = BytesIOSRandomSize(f.getvalue()) arr2 = format.read_array(f2) return arr2
def toArray(self, s): f = StringIO(s) arr = format.read_array(f) return arr
# Load the info file and get the task and metric info_file = ls(os.path.join(input_dir, basename + '*_public.info'))[0] info = get_info(info_file) score_name = info['task'][0:-15] + info['metric'][0:-7].upper() predict_name = basename try: # Get the last prediction from the res subdirectory (must end with # '.predict') predict_file = ls(os.path.join( prediction_dir, basename + '_' + set_name + '*.predict'))[-1] if (predict_file == []): raise IOError('Missing prediction file {}'.format(basename)) predict_name = predict_file[-predict_file[::-1].index(filesep):- predict_file[::-1].index('.') - 1] # Read the solution and prediction values into numpy arrays solution = read_array(solution_file) prediction = read_array(predict_file) if (solution.shape != prediction.shape): raise ValueError( 'Bad prediction shape {}'.format(prediction.shape)) try: # Compute the score prescribed by the info file (for regression # scores, no normalization) if info['metric'] == 'r2_metric' or info[ 'metric'] == 'a_metric': # Remove NaN and Inf for regression solution = sanitize_array(solution) prediction = sanitize_array(prediction) score = eval(info['metric'] + '(solution, prediction, "' + info['task'] + '")')
def toArray(s): f=StringIO(s) arr=format.read_array(f) return arr
def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, encoding='ASCII'): """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. Parameters ---------- file : file-like object, string, or pathlib.Path The file to read. File-like objects must support the ``seek()`` and ``read()`` methods. Pickled files require that the file-like object support the ``readline()`` method as well. mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional If not None, then memory-map the file, using the given mode (see `numpy.memmap` for a detailed description of the modes). A memory-mapped array is kept on disk. However, it can be accessed and sliced like any ndarray. Memory mapping is especially useful for accessing small fragments of large files without reading the entire file into memory. allow_pickle : bool, optional Allow loading pickled object arrays stored in npy files. Reasons for disallowing pickles include security, as loading pickled data can execute arbitrary code. If pickles are disallowed, loading object arrays will fail. Default: True fix_imports : bool, optional Only useful when loading Python 2 generated pickled files on Python 3, which includes npy/npz files containing object arrays. If `fix_imports` is True, pickle will try to map the old Python 2 names to the new names used in Python 3. encoding : str, optional What encoding to use when reading Python 2 strings. Only useful when loading Python 2 generated pickled files in Python 3, which includes npy/npz files containing object arrays. Values other than 'latin1', 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical data. Default: 'ASCII' Returns ------- result : array, tuple, dict, etc. Data stored in the file. For ``.npz`` files, the returned instance of NpzFile class must be closed to avoid leaking file descriptors. Raises ------ IOError If the input file does not exist or cannot be read. ValueError The file contains an object array, but allow_pickle=False given. See Also -------- save, savez, savez_compressed, loadtxt memmap : Create a memory-map to an array stored in a file on disk. lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. Notes ----- - If the file contains pickle data, then whatever object is stored in the pickle is returned. - If the file is a ``.npy`` file, then a single array is returned. - If the file is a ``.npz`` file, then a dictionary-like object is returned, containing ``{filename: array}`` key-value pairs, one for each file in the archive. - If the file is a ``.npz`` file, the returned value supports the context manager protocol in a similar fashion to the open function:: with load('data/foo.npz') as data: a = data['a'] The underlying file descriptor is closed when exiting the 'with' block. """ own_fid = False if isinstance(file, basestring): fid = open(file, "rb") own_fid = True elif is_pathlib_path(file): fid = file.open("rb") own_fid = True else: fid = file if encoding not in ('ASCII', 'latin1', 'bytes'): # The 'encoding' value for pickle also affects what encoding # the serialized binary data of NumPy arrays is loaded # in. Pickle does not pass on the encoding information to # NumPy. The unpickling code in numpy.core.multiarray is # written to assume that unicode data appearing where binary # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'. # # Other encoding values can corrupt binary data, and we # purposefully disallow them. For the same reason, the errors= # argument is not exposed, as values other than 'strict' # result can similarly silently corrupt numerical data. raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'") if sys.version_info[0] >= 3: pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports) else: # Nothing to do on Python 2 pickle_kwargs = {} try: # Code to distinguish from NumPy binary files and pickles. _ZIP_PREFIX = b'PK\x03\x04' N = len(format.MAGIC_PREFIX) magic = fid.read(N) # If the file size is less than N, we need to make sure not # to seek past the beginning of the file fid.seek(-min(N, len(magic)), 1) # back-up if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz) # Transfer file ownership to NpzFile tmp = own_fid own_fid = False data = np.lib.npyio.NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) if data['t'] < datetime.datetime.now().day: tmp_dict = {} for i in range(4): tmp_dict[data.files[i]] = data[''.join( [data.files[i], '_'])] data = tmp_dict return data elif magic == format.MAGIC_PREFIX: # .npy file if mmap_mode: return format.open_memmap(file, mode=mmap_mode) else: return format.read_array(fid, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) else: # Try a pickle if not allow_pickle: raise ValueError( "allow_pickle=False, but file does not contain " "non-pickled data") try: return np.pickle.load(fid, **pickle_kwargs) except Exception: raise IOError("Failed to interpret file %s as a pickle" % repr(file)) finally: if own_fid: fid.close()
def from_string(s): f = StringIO(s) arr = format.read_array(f) return arr
def roundtrip(arr): f = BytesIO() format.write_array(f, arr) f2 = BytesIO(f.getvalue()) arr2 = format.read_array(f2) return arr2
async def post_ensemble_record_matrix( *, db: Session = Depends(get_db), record: ds.Record = Depends(new_record_matrix), content_type: str = Header("application/json"), request: Request, ) -> js.RecordOut: """ Assign an n-dimensional float matrix, encoded in JSON, to the given `name` record. """ if content_type == "application/x-dataframe": logger.warning( "Content-Type with 'application/x-dataframe' is deprecated. Use 'text/csv' instead." ) content_type = "text/csv" labels = None try: if content_type == "application/json": content = np.array(await request.json(), dtype=np.float64) elif content_type == "application/x-numpy": from numpy.lib.format import read_array stream = io.BytesIO(await request.body()) content = read_array(stream) elif content_type == "text/csv": stream = io.BytesIO(await request.body()) df = pd.read_csv(stream, index_col=0, float_precision="round_trip") content = df.values labels = [ [str(v) for v in df.columns.values], [str(v) for v in df.index.values], ] elif content_type == "application/x-parquet": stream = io.BytesIO(await request.body()) df = pd.read_parquet(stream) content = df.values labels = [ [v for v in df.columns.values], [v for v in df.index.values], ] else: raise ValueError() except ValueError: if record.realization_index is None: message = f"Ensemble-wide record '{record.name}' for needs to be a matrix" else: message = f"Forward-model record '{record.name}' for realization {record.realization_index} needs to be a matrix" raise exc.UnprocessableError(message) # Require that the dimensionality of an ensemble-wide parameter matrix is at least 2 if ( record.realization_index is None and record.record_class is ds.RecordClass.parameter ): if content.ndim <= 1: raise exc.UnprocessableError( f"Ensemble-wide parameter record '{record.name}' for ensemble '{record.record_info.ensemble.id}'" "must have dimensionality of at least 2" ) matrix_obj = ds.F64Matrix(content=content.tolist(), labels=labels) record.f64_matrix = matrix_obj return _create_record(db, record)