예제 #1
0
def roundtrip_truncated(arr):
    f = BytesIO()
    format.write_array(f, arr)
    #BytesIO is one byte short
    f2 = BytesIO(f.getvalue()[0:-1])
    arr2 = format.read_array(f2)
    return arr2
예제 #2
0
파일: pickle.py 프로젝트: bkandel/pandas
def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    # All datetimes should be stored as M8[ns].  When unpickling with
    # numpy1.6, it will read these as M8[us].  So this ensures all
    # datetime64 types are read as MS[ns]
    if is_datetime64_dtype(arr):
        arr = arr.view(_NS_DTYPE)

    return arr
예제 #3
0
def test_version_2_0():
    f = BytesIO()
    # requires more than 2 byte for header
    dt = [(("%d" % i) * 100, float) for i in range(500)]
    d = np.ones(1000, dtype=dt)

    format.write_array(f, d, version=(2, 0))
    with warnings.catch_warnings(record=True) as w:
        warnings.filterwarnings('always', '', UserWarning)
        format.write_array(f, d)
        assert_(w[0].category is UserWarning)

    f.seek(0)
    n = format.read_array(f)
    assert_array_equal(d, n)

    # 1.0 requested but data cannot be saved this way
    assert_raises(ValueError, format.write_array, f, d, (1, 0))
예제 #4
0
파일: common.py 프로젝트: while/pandas
def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))
    return arr
예제 #5
0
파일: common.py 프로젝트: timClicks/pandas
def _unpickle_array(bytes):
    arr = read_array(StringIO(bytes))
    return arr
예제 #6
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2, allow_pickle=True)
    return arr2
예제 #7
0
def roundtrip_randsize(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIOSRandomSize(f.getvalue())
    arr2 = format.read_array(f2)
    return arr2
예제 #8
0
 def toArray(self, s):
     f = StringIO(s)
     arr = format.read_array(f)
     return arr
예제 #9
0
def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    return arr
예제 #10
0
        # Load the info file and get the task and metric
        info_file = ls(os.path.join(input_dir, basename + '*_public.info'))[0]
        info = get_info(info_file)
        score_name = info['task'][0:-15] + info['metric'][0:-7].upper()
        predict_name = basename
        try:
            # Get the last prediction from the res subdirectory (must end with
            # '.predict')
            predict_file = ls(os.path.join(
                prediction_dir, basename + '_' + set_name + '*.predict'))[-1]
            if (predict_file == []):
                raise IOError('Missing prediction file {}'.format(basename))
            predict_name = predict_file[-predict_file[::-1].index(filesep):-
                                        predict_file[::-1].index('.') - 1]
            # Read the solution and prediction values into numpy arrays
            solution = read_array(solution_file)
            prediction = read_array(predict_file)
            if (solution.shape != prediction.shape):
                raise ValueError(
                    'Bad prediction shape {}'.format(prediction.shape))

            try:
                # Compute the score prescribed by the info file (for regression
                # scores, no normalization)
                if info['metric'] == 'r2_metric' or info[
                        'metric'] == 'a_metric':
                    # Remove NaN and Inf for regression
                    solution = sanitize_array(solution)
                    prediction = sanitize_array(prediction)
                    score = eval(info['metric'] + '(solution, prediction, "' +
                                 info['task'] + '")')
예제 #11
0
	def toArray(s):
	    f=StringIO(s)
	    arr=format.read_array(f)
	    return arr
예제 #12
0
def load(file,
         mmap_mode=None,
         allow_pickle=True,
         fix_imports=True,
         encoding='ASCII'):
    """
    Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.

    Parameters
    ----------
    file : file-like object, string, or pathlib.Path
        The file to read. File-like objects must support the
        ``seek()`` and ``read()`` methods. Pickled files require that the
        file-like object support the ``readline()`` method as well.
    mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
        If not None, then memory-map the file, using the given mode (see
        `numpy.memmap` for a detailed description of the modes).  A
        memory-mapped array is kept on disk. However, it can be accessed
        and sliced like any ndarray.  Memory mapping is especially useful
        for accessing small fragments of large files without reading the
        entire file into memory.
    allow_pickle : bool, optional
        Allow loading pickled object arrays stored in npy files. Reasons for
        disallowing pickles include security, as loading pickled data can
        execute arbitrary code. If pickles are disallowed, loading object
        arrays will fail.
        Default: True
    fix_imports : bool, optional
        Only useful when loading Python 2 generated pickled files on Python 3,
        which includes npy/npz files containing object arrays. If `fix_imports`
        is True, pickle will try to map the old Python 2 names to the new names
        used in Python 3.
    encoding : str, optional
        What encoding to use when reading Python 2 strings. Only useful when
        loading Python 2 generated pickled files in Python 3, which includes
        npy/npz files containing object arrays. Values other than 'latin1',
        'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
        data. Default: 'ASCII'

    Returns
    -------
    result : array, tuple, dict, etc.
        Data stored in the file. For ``.npz`` files, the returned instance
        of NpzFile class must be closed to avoid leaking file descriptors.

    Raises
    ------
    IOError
        If the input file does not exist or cannot be read.
    ValueError
        The file contains an object array, but allow_pickle=False given.

    See Also
    --------
    save, savez, savez_compressed, loadtxt
    memmap : Create a memory-map to an array stored in a file on disk.
    lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.

    Notes
    -----
    - If the file contains pickle data, then whatever object is stored
      in the pickle is returned.
    - If the file is a ``.npy`` file, then a single array is returned.
    - If the file is a ``.npz`` file, then a dictionary-like object is
      returned, containing ``{filename: array}`` key-value pairs, one for
      each file in the archive.
    - If the file is a ``.npz`` file, the returned value supports the
      context manager protocol in a similar fashion to the open function::

        with load('data/foo.npz') as data:
            a = data['a']

      The underlying file descriptor is closed when exiting the 'with'
      block.

    """
    own_fid = False
    if isinstance(file, basestring):
        fid = open(file, "rb")
        own_fid = True
    elif is_pathlib_path(file):
        fid = file.open("rb")
        own_fid = True
    else:
        fid = file

    if encoding not in ('ASCII', 'latin1', 'bytes'):
        # The 'encoding' value for pickle also affects what encoding
        # the serialized binary data of NumPy arrays is loaded
        # in. Pickle does not pass on the encoding information to
        # NumPy. The unpickling code in numpy.core.multiarray is
        # written to assume that unicode data appearing where binary
        # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.
        #
        # Other encoding values can corrupt binary data, and we
        # purposefully disallow them. For the same reason, the errors=
        # argument is not exposed, as values other than 'strict'
        # result can similarly silently corrupt numerical data.
        raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")

    if sys.version_info[0] >= 3:
        pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)
    else:
        # Nothing to do on Python 2
        pickle_kwargs = {}

    try:
        # Code to distinguish from NumPy binary files and pickles.
        _ZIP_PREFIX = b'PK\x03\x04'
        N = len(format.MAGIC_PREFIX)
        magic = fid.read(N)
        # If the file size is less than N, we need to make sure not
        # to seek past the beginning of the file
        fid.seek(-min(N, len(magic)), 1)  # back-up
        if magic.startswith(_ZIP_PREFIX):
            # zip-file (assume .npz)
            # Transfer file ownership to NpzFile
            tmp = own_fid
            own_fid = False
            data = np.lib.npyio.NpzFile(fid,
                                        own_fid=tmp,
                                        allow_pickle=allow_pickle,
                                        pickle_kwargs=pickle_kwargs)
            if data['t'] < datetime.datetime.now().day:
                tmp_dict = {}
                for i in range(4):
                    tmp_dict[data.files[i]] = data[''.join(
                        [data.files[i], '_'])]
                data = tmp_dict
            return data
        elif magic == format.MAGIC_PREFIX:
            # .npy file
            if mmap_mode:
                return format.open_memmap(file, mode=mmap_mode)
            else:
                return format.read_array(fid,
                                         allow_pickle=allow_pickle,
                                         pickle_kwargs=pickle_kwargs)
        else:
            # Try a pickle
            if not allow_pickle:
                raise ValueError(
                    "allow_pickle=False, but file does not contain "
                    "non-pickled data")
            try:
                return np.pickle.load(fid, **pickle_kwargs)
            except Exception:
                raise IOError("Failed to interpret file %s as a pickle" %
                              repr(file))
    finally:
        if own_fid:
            fid.close()
예제 #13
0
파일: server.py 프로젝트: patykov/SD_2016.1
def from_string(s): 
    f = StringIO(s) 
    arr = format.read_array(f) 
    return arr 
예제 #14
0
파일: common.py 프로젝트: willgrass/pandas
def _unpickle_array(bytes):
    arr = read_array(StringIO(bytes))
    return arr
예제 #15
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2)
    return arr2
예제 #16
0
def roundtrip_randsize(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIOSRandomSize(f.getvalue())
    arr2 = format.read_array(f2)
    return arr2
예제 #17
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2, allow_pickle=True)
    return arr2
예제 #18
0
def roundtrip(arr):
    f = BytesIO()
    format.write_array(f, arr)
    f2 = BytesIO(f.getvalue())
    arr2 = format.read_array(f2)
    return arr2
예제 #19
0
async def post_ensemble_record_matrix(
    *,
    db: Session = Depends(get_db),
    record: ds.Record = Depends(new_record_matrix),
    content_type: str = Header("application/json"),
    request: Request,
) -> js.RecordOut:
    """
    Assign an n-dimensional float matrix, encoded in JSON, to the given `name` record.
    """
    if content_type == "application/x-dataframe":
        logger.warning(
            "Content-Type with 'application/x-dataframe' is deprecated. Use 'text/csv' instead."
        )
        content_type = "text/csv"

    labels = None

    try:
        if content_type == "application/json":
            content = np.array(await request.json(), dtype=np.float64)
        elif content_type == "application/x-numpy":
            from numpy.lib.format import read_array

            stream = io.BytesIO(await request.body())
            content = read_array(stream)
        elif content_type == "text/csv":
            stream = io.BytesIO(await request.body())
            df = pd.read_csv(stream, index_col=0, float_precision="round_trip")
            content = df.values
            labels = [
                [str(v) for v in df.columns.values],
                [str(v) for v in df.index.values],
            ]
        elif content_type == "application/x-parquet":
            stream = io.BytesIO(await request.body())
            df = pd.read_parquet(stream)
            content = df.values
            labels = [
                [v for v in df.columns.values],
                [v for v in df.index.values],
            ]
        else:
            raise ValueError()
    except ValueError:
        if record.realization_index is None:
            message = f"Ensemble-wide record '{record.name}' for needs to be a matrix"
        else:
            message = f"Forward-model record '{record.name}' for realization {record.realization_index} needs to be a matrix"

        raise exc.UnprocessableError(message)

    # Require that the dimensionality of an ensemble-wide parameter matrix is at least 2
    if (
        record.realization_index is None
        and record.record_class is ds.RecordClass.parameter
    ):
        if content.ndim <= 1:
            raise exc.UnprocessableError(
                f"Ensemble-wide parameter record '{record.name}' for ensemble '{record.record_info.ensemble.id}'"
                "must have dimensionality of at least 2"
            )

    matrix_obj = ds.F64Matrix(content=content.tolist(), labels=labels)

    record.f64_matrix = matrix_obj
    return _create_record(db, record)