Ejemplo n.º 1
0
Archivo: hdfs.py Proyecto: bkanuka/pymc
def save_nparray_to_hdfs(fname, X, hdfs):
	'''
	An instance of numpy's savetext function to enable saving numpy 
	arrays in HDFS as text files
	'''
	fmt = '%.18e'
	delimiter = ' '
	newline = '\n'
	if isinstance(fmt, bytes):
		fmt = asstr(fmt)
	delimiter = asstr(delimiter)
	X = np.asarray(X)
	if X.ndim == 1:
		if X.dtype.names is None:
			X = np.atleast_2d(X).T
			ncol = 1
		else:
			ncol = len(X.dtype.descr)
	else:
		ncol = X.shape[1]
	n_fmt_chars = fmt.count('%')
	fmt = [fmt, ] * ncol
	format = delimiter.join(fmt)
	first = True
	for row in X:
		if first:
			hdfs.create_file(fname, asbytes(format % tuple(row) + newline), overwrite=True)
			first = False
		else:	
			hdfs.appendfile(fname, asbytes(format % tuple(row) + newline))
Ejemplo n.º 2
0
def _read_metadata(bitmap):
    metadata = {}
    models = [(name[5:], number) for name, number in
        METADATA_MODELS.__dict__.items() if name.startswith('FIMD_')]

    tag = ctypes.c_void_p()
    for model_name, number in models:
        mdhandle = _FI.FreeImage_FindFirstMetadata(number, bitmap,
                                                   ctypes.byref(tag))
        mdhandle = ctypes.c_void_p(mdhandle)
        if mdhandle:
            more = True
            while more:
                tag_name = asstr(_FI.FreeImage_GetTagKey(tag))
                tag_type = _FI.FreeImage_GetTagType(tag)
                byte_size = _FI.FreeImage_GetTagLength(tag)
                char_ptr = ctypes.c_char * byte_size
                tag_str = char_ptr.from_address(_FI.FreeImage_GetTagValue(tag))
                if tag_type == METADATA_DATATYPE.FIDT_ASCII:
                    tag_val = asstr(tag_str.value)
                else:
                    tag_val = numpy.fromstring(tag_str,
                            dtype=METADATA_DATATYPE.dtypes[tag_type])
                    if len(tag_val) == 1:
                        tag_val = tag_val[0]
                metadata[(model_name, tag_name)] = tag_val
                more = _FI.FreeImage_FindNextMetadata(mdhandle, ctypes.byref(tag))
            _FI.FreeImage_FindCloseMetadata(mdhandle)
    return metadata
Ejemplo n.º 3
0
    def info(self, source):
        source, close_it = self._open(source)

        try:

            # read and validate header line
            line = source.readline()
            mmid, matrix, format, field, symmetry  = \
              [asstr(part.strip().lower()) for part in line.split()]
            if not mmid.startswith('%%matrixmarket'):
                raise ValueError,'source is not in Matrix Market format'

            assert matrix == 'matrix',`line`

            # ??? Is this necessary?  I don't see 'dense' or 'sparse' in the spec
            # http://math.nist.gov/MatrixMarket/formats.html
            if format == 'dense': format = self.FORMAT_ARRAY
            elif format == 'sparse': format = self.FORMAT_COORDINATE

            # skip comments
            while line.startswith(asbytes('%')): line = source.readline()

            line = line.split()
            if format == self.FORMAT_ARRAY:
                assert len(line)==2,`line`
                rows,cols = map(float, line)
                entries = rows*cols
            else:
                assert len(line)==3,`line`
                rows, cols, entries = map(float, line)

            return (rows, cols, entries, format, field, symmetry)

        finally:
            if close_it: source.close()
Ejemplo n.º 4
0
    def get_variables(self, variable_names=None):
        ''' get variables from stream as dictionary

        variable_names   - optional list of variable names to get

        If variable_names is None, then get all variables in file
        '''
        if isinstance(variable_names, basestring):
            variable_names = [variable_names]
        self.mat_stream.seek(0)
        # set up variable reader
        self.initialize_read()
        mdict = {}
        while not self.end_of_stream():
            hdr, next_position = self.read_var_header()
            name = asstr(hdr.name)
            if variable_names and name not in variable_names:
                self.mat_stream.seek(next_position)
                continue
            mdict[name] = self.read_var_array(hdr)
            self.mat_stream.seek(next_position)
            if variable_names:
                variable_names.remove(name)
                if len(variable_names) == 0:
                    break
        return mdict
Ejemplo n.º 5
0
    def get_variables(self, variable_names=None):
        ''' get variables from stream as dictionary

        Parameters
        ----------
        variable_names : None or str or sequence of str, optional
            variable name, or sequence of variable names to get from Mat file /
            file stream.  If None, then get all variables in file
        '''
        if isinstance(variable_names, string_types):
            variable_names = [variable_names]
        elif variable_names is not None:
            variable_names = list(variable_names)
        self.mat_stream.seek(0)
        # set up variable reader
        self.initialize_read()
        mdict = {}
        while not self.end_of_stream():
            hdr, next_position = self.read_var_header()
            name = asstr(hdr.name)
            if variable_names is not None and name not in variable_names:
                self.mat_stream.seek(next_position)
                continue
            mdict[name] = self.read_var_array(hdr)
            self.mat_stream.seek(next_position)
            if variable_names is not None:
                variable_names.remove(name)
                if len(variable_names) == 0:
                    break
        return mdict
Ejemplo n.º 6
0
    def _read_var(self):
        name = asstr(self._unpack_string())
        dimensions = []
        shape = []
        dims = self._unpack_int()

        for i in range(dims):
            dimid = self._unpack_int()
            dimname = self._dims[dimid]
            dimensions.append(dimname)
            dim = self.dimensions[dimname]
            shape.append(dim)
        dimensions = tuple(dimensions)
        shape = tuple(shape)

        attributes = self._read_att_array()
        nc_type = self.fp.read(4)
        vsize = self._unpack_int()
        begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()

        typecode, size = TYPEMAP[nc_type]
        if typecode is 'c':
            dtype_ = '>c'
        else:
            dtype_ = '>%s' % typecode
            if size > 1: dtype_ += str(size)

        return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
Ejemplo n.º 7
0
def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize

    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if last_token_was_number and token_type == tokenize.NAME and token_string == "L":
            continue
        else:
            tokens.append(token)
        last_token_was_number = token_type == tokenize.NUMBER
    return tokenize.untokenize(tokens)
Ejemplo n.º 8
0
def build_module(source_files, options=[], skip=[], only=[], module_name=None):
    """
    Compile and import a f2py module, built from the given files.

    """

    code = ("import sys; sys.path = %s; import numpy.f2py as f2py2e; "
            "f2py2e.main()" % repr(sys.path))

    d = get_module_dir()

    # Copy files
    dst_sources = []
    for fn in source_files:
        if not os.path.isfile(fn):
            raise RuntimeError("%s is not a file" % fn)
        dst = os.path.join(d, os.path.basename(fn))
        shutil.copyfile(fn, dst)
        dst_sources.append(dst)

        fn = os.path.join(os.path.dirname(fn), '.f2py_f2cmap')
        if os.path.isfile(fn):
            dst = os.path.join(d, os.path.basename(fn))
            if not os.path.isfile(dst):
                shutil.copyfile(fn, dst)

    # Prepare options
    if module_name is None:
        module_name = get_temp_module_name()
    f2py_opts = ['-c', '-m', module_name] + options + dst_sources
    if skip:
        f2py_opts += ['skip:'] + skip
    if only:
        f2py_opts += ['only:'] + only

    # Build
    cwd = os.getcwd()
    try:
        os.chdir(d)
        cmd = [sys.executable, '-c', code] + f2py_opts
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        out, err = p.communicate()
        if p.returncode != 0:
            raise RuntimeError("Running f2py failed: %s\n%s"
                               % (cmd[4:], asstr(out)))
    finally:
        os.chdir(cwd)

        # Partial cleanup
        for fn in dst_sources:
            os.unlink(fn)

    # Import
    __import__(module_name)
    return sys.modules[module_name]
Ejemplo n.º 9
0
Archivo: idl.py Proyecto: Kitchi/scipy
def _read_string(f):
    '''Read a string'''
    length = _read_long(f)
    if length > 0:
        chars = _read_bytes(f, length)
        _align_32(f)
        chars = asstr(chars)
    else:
        chars = ''
    return chars
Ejemplo n.º 10
0
    def _read_att_array(self):
        header = self.fp.read(4)
        assert header in [ZERO, NC_ATTRIBUTE]
        count = self._unpack_int()

        attributes = {}
        for attr in range(count):
            name = asstr(self._unpack_string())
            attributes[name] = self._read_values()
        return attributes
Ejemplo n.º 11
0
    def _read_dim_array(self):
        header = self.fp.read(4)
        assert header in [ZERO, NC_DIMENSION]
        count = self._unpack_int()

        for dim in range(count):
            name = asstr(self._unpack_string())
            length = self._unpack_int() or None  # None for record dimension
            self.dimensions[name] = length
            self._dims.append(name)  # preserve order
Ejemplo n.º 12
0
def _read_string(f):
    """Read a string"""
    length = _read_long(f)
    if length > 0:
        chars = _read_bytes(f, length)
        _align_32(f)
        chars = asstr(chars)
    else:
        chars = None
    return chars
Ejemplo n.º 13
0
Archivo: idl.py Proyecto: b-t-g/Sim
def _read_string(f):
    '''Read a string'''
    length = _read_long(f)
    if length > 0:
        chars = _read_bytes(f, length)
        _align_32(f)
        chars = asstr(chars)
    else:
        warnings.warn("warning: empty strings are now set to '' instead of None")
        chars = ''
    return chars
Ejemplo n.º 14
0
    def get_variables(self, variable_names=None):
        ''' get variables from stream as dictionary

        variable_names   - optional list of variable names to get

        If variable_names is None, then get all variables in file
        '''
        if isinstance(variable_names, string_types):
            variable_names = [variable_names]
        elif variable_names is not None:
            variable_names = list(variable_names)

        self.mat_stream.seek(0)
        # Here we pass all the parameters in self to the reading objects
        self.initialize_read()
        mdict = self.read_file_header()
        mdict['__globals__'] = []
        while not self.end_of_stream():
            hdr, next_position = self.read_var_header()
            name = asstr(hdr.name)
            if name in mdict:
                warnings.warn('Duplicate variable name "%s" in stream'
                              ' - replacing previous with new\n'
                              'Consider mio5.varmats_from_mat to split '
                              'file into single variable files' % name,
                              MatReadWarning, stacklevel=2)
            if name == '':
                # can only be a matlab 7 function workspace
                name = '__function_workspace__'
                # We want to keep this raw because mat_dtype processing
                # will break the format (uint8 as mxDOUBLE_CLASS)
                process = False
            else:
                process = True
            if variable_names and name not in variable_names:
                self.mat_stream.seek(next_position)
                continue
            try:
                res = self.read_var_array(hdr, process)
            except MatReadError as err:
                warnings.warn(
                    'Unreadable variable "%s", because "%s"' %
                    (name, err),
                    Warning, stacklevel=2)
                res = "Read error: %s" % err
            self.mat_stream.seek(next_position)
            mdict[name] = res
            if hdr.is_global:
                mdict['__globals__'].append(name)
            if variable_names:
                variable_names.remove(name)
                if len(variable_names) == 0:
                    break
        return mdict
Ejemplo n.º 15
0
    def _read_dim_array(self):
        header = self.fp.read(4)
        if header not in [ZERO, NC_DIMENSION]:
            raise ValueError("Unexpected header.")
        count = self._unpack_int()

        for dim in range(count):
            name = asstr(self._unpack_string())
            length = self._unpack_int() or None  # None for record dimension
            self.dimensions[name] = length
            self._dims.append(name)  # preserve order
Ejemplo n.º 16
0
    def _read_att_array(self):
        header = self.fp.read(4)
        if header not in [ZERO, NC_ATTRIBUTE]:
            raise ValueError("Unexpected header.")
        count = self._unpack_int()

        attributes = {}
        for attr in range(count):
            name = asstr(self._unpack_string())
            attributes[name] = self._read_values()
        return attributes
Ejemplo n.º 17
0
    def list_variables(self):
        ''' list variables from stream '''
        self.mat_stream.seek(0)
        # set up variable reader
        self.initialize_read()
        vars = []
        while not self.end_of_stream():
            hdr, next_position = self.read_var_header()
            name = asstr(hdr.name)
            shape = self._matrix_reader.shape_from_header(hdr)
            info = mclass_info.get(hdr.mclass, 'unknown')
            vars.append((name, shape, info))

            self.mat_stream.seek(next_position)
        return vars
Ejemplo n.º 18
0
def read_minimat_vars(rdr):
    rdr.initialize_read()
    mdict = {'__globals__': []}
    i = 0
    while not rdr.end_of_stream():
        hdr, next_position = rdr.read_var_header()
        name = asstr(hdr.name)
        if name == '':
            name = 'var_%d' % i
            i += 1
        res = rdr.read_var_array(hdr, process=False)
        rdr.mat_stream.seek(next_position)
        mdict[name] = res
        if hdr.is_global:
            mdict['__globals__'].append(name)
    return mdict
Ejemplo n.º 19
0
    def list_variables(self):
        ''' list variables from stream '''
        self.mat_stream.seek(0)
        # Here we pass all the parameters in self to the reading objects
        self.initialize_read()
        self.read_file_header()
        vars = []
        while not self.end_of_stream():
            hdr, next_position = self.read_var_header()
            name = asstr(hdr.name)
            if name == '':
                # can only be a matlab 7 function workspace
                name = '__function_workspace__'

            shape = self._matrix_reader.shape_from_header(hdr)
            info = mclass_info.get(hdr.mclass, 'unknown')
            vars.append((name, shape, info))

            self.mat_stream.seek(next_position)
        return vars
Ejemplo n.º 20
0
    def info(self, source):
        source, close_it = self._open(source)

        try:

            # read and validate header line
            line = source.readline()
            mmid, matrix, format, field, symmetry = \
              [asstr(part.strip()) for part in line.split()]
            if not mmid.startswith('%%MatrixMarket'):
                raise ValueError('source is not in Matrix Market format')
            if not matrix.lower() == 'matrix':
                raise ValueError("Problem reading file header: " + line)

            # http://math.nist.gov/MatrixMarket/formats.html
            if format.lower() == 'array':
                format = self.FORMAT_ARRAY
            elif format.lower() == 'coordinate':
                format = self.FORMAT_COORDINATE

            # skip comments
            while line.startswith(b'%'):
                line = source.readline()

            line = line.split()
            if format == self.FORMAT_ARRAY:
                if not len(line) == 2:
                    raise ValueError("Header line not of length 2: " + line)
                rows, cols = map(int, line)
                entries = rows * cols
            else:
                if not len(line) == 3:
                    raise ValueError("Header line not of length 3: " + line)
                rows, cols, entries = map(int, line)

            return (rows, cols, entries, format, field.lower(), symmetry.lower())

        finally:
            if close_it:
                source.close()
Ejemplo n.º 21
0
def svn_version():
    from numpy.compat import asstr

    env = os.environ.copy()
    env['LC_ALL'] = 'C'
    try:
        out = subprocess.Popen(['svn', 'info'], stdout=subprocess.PIPE,
                env=env).communicate()[0]
    except OSError:
        warnings.warn(" --- Could not run svn info --- ")
        return ""

    r = re.compile('Revision: ([0-9]+)')
    svnver = None
    for line in asstr(out).split('\n'):
        m = r.match(line)
        if m:
            svnver = m.group(1)

    if not svnver:
        raise ValueError("Error while parsing svn version ?")
    return svnver
Ejemplo n.º 22
0
def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    # adding newline as python 2.7.5 workaround
    string = asstr(s) + "\n"
    for token in tokenize.generate_tokens(StringIO(string).readline):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    # removing newline (see above) as python 2.7.5 workaround
    return tokenize.untokenize(tokens)[:-1]
Ejemplo n.º 23
0
    def _read_var(self):
        name = asstr(self._unpack_string())
        dimensions = []
        shape = []
        dims = self._unpack_int()

        for i in range(dims):
            dimid = self._unpack_int()
            dimname = self._dims[dimid]
            dimensions.append(dimname)
            dim = self.dimensions[dimname]
            shape.append(dim)
        dimensions = tuple(dimensions)
        shape = tuple(shape)

        attributes = self._read_att_array()
        nc_type = self.fp.read(4)
        vsize = int(fromstring(self.fp.read(4), 'int32')[0])

        start = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
        type = TYPEMAP(nc_type)

        return name, dimensions, shape, attributes, type, start, vsize
Ejemplo n.º 24
0
def _filter_header(s):
    """Clean up 'L' in npz header ints.

    Cleans up the 'L' in strings representing integers. Needed to allow npz
    headers produced in Python2 to be read in Python3.

    Parameters
    ----------
    s : byte string
        Npy file header.

    Returns
    -------
    header : str
        Cleaned up header.

    """
    import tokenize
    if sys.version_info[0] >= 3:
        from io import StringIO
    else:
        from StringIO import StringIO

    tokens = []
    last_token_was_number = False
    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
        token_type = token[0]
        token_string = token[1]
        if (last_token_was_number and
                token_type == tokenize.NAME and
                token_string == "L"):
            continue
        else:
            tokens.append(token)
        last_token_was_number = (token_type == tokenize.NUMBER)
    return tokenize.untokenize(tokens)
Ejemplo n.º 25
0
    def _read_var(self):
        name = asstr(self._unpack_string())
        dimensions = []
        shape = []
        dims = self._unpack_int()

        for i in range(dims):
            dimid = self._unpack_int()
            dimname = self._dims[dimid]
            dimensions.append(dimname)
            dim = self.dimensions[dimname]
            shape.append(dim)
        dimensions = tuple(dimensions)
        shape = tuple(shape)

        attributes = self._read_att_array()
        nc_type = self.fp.read(4)
        vsize = self._unpack_int()
        begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()

        typecode, size = TYPEMAP[nc_type]
        dtype_ = '>%s' % typecode

        return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
Ejemplo n.º 26
0
def savetxt_nice(fname,
                 X,
                 fmt='%.18e',
                 delimiter=' ',
                 newline='\n',
                 header='',
                 footer='',
                 comments='# '):
    """
    Reimplmenentation of numpy's savetxt that doesn't complain about
    bytes when saving to unicode files in Python 3.

    Save an array to a text file.

    Parameters
    ----------
    fname : filename or file handle
        If the filename ends in ``.gz``, the file is automatically saved in
        compressed gzip format.  `loadtxt` understands gzipped files
        transparently.
    X : array_like
        Data to be saved to a text file.
    fmt : str or sequence of strs, optional
        A single format (%10.5f), a sequence of formats, or a
        multi-format string, e.g. 'Iteration %d -- %10.5f', in which
        case `delimiter` is ignored. For complex `X`, the legal options
        for `fmt` are:
            a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted
                like `' (%s+%sj)' % (fmt, fmt)`
            b) a full string specifying every real and imaginary part, e.g.
                `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns
            c) a list of specifiers, one per column - in this case, the real
                and imaginary part must have separate specifiers,
                e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
    delimiter : str, optional
        String or character separating columns.
    newline : str, optional
        String or character separating lines.

        .. versionadded:: 1.5.0
    header : str, optional
        String that will be written at the beginning of the file.

        .. versionadded:: 1.7.0
    footer : str, optional
        String that will be written at the end of the file.

        .. versionadded:: 1.7.0
    comments : str, optional
        String that will be prepended to the ``header`` and ``footer`` strings,
        to mark them as comments. Default: '# ',  as expected by e.g.
        ``numpy.loadtxt``.

        .. versionadded:: 1.7.0


    See Also
    --------
    save : Save an array to a binary file in NumPy ``.npy`` format
    savez : Save several arrays into an uncompressed ``.npz`` archive
    savez_compressed : Save several arrays into a compressed ``.npz`` archive

    Notes
    -----
    Further explanation of the `fmt` parameter
    (``%[flag]width[.precision]specifier``):

    flags:
        ``-`` : left justify

        ``+`` : Forces to precede result with + or -.

        ``0`` : Left pad the number with zeros instead of space (see width).

    width:
        Minimum number of characters to be printed. The value is not truncated
        if it has more characters.

    precision:
        - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
          digits.
        - For ``e, E`` and ``f`` specifiers, the number of digits to print
          after the decimal point.
        - For ``g`` and ``G``, the maximum number of significant digits.
        - For ``s``, the maximum number of characters.

    specifiers:
        ``c`` : character

        ``d`` or ``i`` : signed decimal integer

        ``e`` or ``E`` : scientific notation with ``e`` or ``E``.

        ``f`` : decimal floating point

        ``g,G`` : use the shorter of ``e,E`` or ``f``

        ``o`` : signed octal

        ``s`` : string of characters

        ``u`` : unsigned decimal integer

        ``x,X`` : unsigned hexadecimal integer

    This explanation of ``fmt`` is not complete, for an exhaustive
    specification see [1]_.

    References
    ----------
    .. [1] `Format Specification Mini-Language
           <http://docs.python.org/library/string.html#
           format-specification-mini-language>`_, Python Documentation.

    Examples
    --------
    >>> x = y = z = np.arange(0.0,5.0,1.0)
    >>> np.savetxt('test.out', x, delimiter=',')   # X is an array
    >>> np.savetxt('test.out', (x,y,z))   # x,y,z equal sized 1D arrays
    >>> np.savetxt('test.out', x, fmt='%1.4e')   # use exponential notation

    """
    # Py3 conversions first
    if isinstance(fmt, bytes):
        fmt = asstr(fmt)
    delimiter = asstr(delimiter)

    own_fh = False
    if _is_string_like(fname):
        own_fh = True
        if fname.endswith('.gz'):
            import gzip
            fh = gzip.open(fname, 'wb')
        else:
            if sys.version_info[0] >= 3:
                fh = open(fname, 'wb')
            else:
                fh = open(fname, 'w')
    elif hasattr(fname, 'write'):
        fh = fname
    else:
        raise ValueError('fname must be a string or file handle')

    try:
        X = np.asarray(X)

        # Handle 1-dimensional arrays
        if X.ndim == 1:
            # Common case -- 1d array of numbers
            if X.dtype.names is None:
                X = np.atleast_2d(X).T
                ncol = 1

            # Complex dtype -- each field indicates a separate column
            else:
                ncol = len(X.dtype.descr)
        else:
            ncol = X.shape[1]

        iscomplex_X = np.iscomplexobj(X)
        # `fmt` can be a string with multiple insertion points or a
        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
        print("type(fmt) = %s" % type(fmt))
        if isinstance(fmt, (list, tuple)):
            if len(fmt) != ncol:
                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
            format = asstr(delimiter).join(map(asstr, fmt))
        elif isinstance(fmt, str):
            n_fmt_chars = fmt.count('%')
            error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
            if n_fmt_chars == 1:
                if iscomplex_X:
                    fmt = [
                        ' (%s+%sj)' % (fmt, fmt),
                    ] * ncol
                else:
                    fmt = [
                        fmt,
                    ] * ncol
                format = delimiter.join(fmt)
            elif iscomplex_X and n_fmt_chars != (2 * ncol):
                raise error
            elif ((not iscomplex_X) and n_fmt_chars != ncol):
                raise error
            else:
                format = fmt
        else:
            raise ValueError('invalid fmt: %r' % (fmt, ))

        if len(header) > 0:
            header = header.replace('\n', '\n' + comments)
            fh.write(asbytes(comments + header + newline))
        if iscomplex_X:
            for row in X:
                row2 = []
                for number in row:
                    row2.append(number.real)
                    row2.append(number.imag)
                fh.write(asbytes(format % tuple(row2) + newline))
        else:
            for row in X:
                try:
                    print('format = %r' % format, type(format))
                    fh.write(asbytes(format % tuple(row) + newline))
                except TypeError:
                    raise TypeError("Mismatch between array dtype ('%s') and "
                                    "format specifier ('%s')" %
                                    (str(X.dtype), format))
        if len(footer) > 0:
            footer = footer.replace('\n', '\n' + comments)
            fh.write(asbytes(comments + footer + newline))
    finally:
        if own_fh:
            fh.close()
Ejemplo n.º 27
0
Archivo: mmio.py Proyecto: Kitchi/scipy
    def info(self, source):
        """
        Return size, storage parameters from Matrix Market file-like 'source'.

        Parameters
        ----------
        source : str or file-like
            Matrix Market filename (extension .mtx) or open file-like object

        Returns
        -------
        rows : int
            Number of matrix rows.
        cols : int
            Number of matrix columns.
        entries : int
            Number of non-zero entries of a sparse matrix
            or rows*cols for a dense matrix.
        format : str
            Either 'coordinate' or 'array'.
        field : str
            Either 'real', 'complex', 'pattern', or 'integer'.
        symmetry : str
            Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
        """

        stream, close_it = self._open(source)

        try:

            # read and validate header line
            line = stream.readline()
            mmid, matrix, format, field, symmetry = \
                [asstr(part.strip()) for part in line.split()]
            if not mmid.startswith('%%MatrixMarket'):
                raise ValueError('source is not in Matrix Market format')
            if not matrix.lower() == 'matrix':
                raise ValueError("Problem reading file header: " + line)

            # http://math.nist.gov/MatrixMarket/formats.html
            if format.lower() == 'array':
                format = self.FORMAT_ARRAY
            elif format.lower() == 'coordinate':
                format = self.FORMAT_COORDINATE

            # skip comments
            while line.startswith(b'%'):
                line = stream.readline()

            # skip empty lines
            while not line.strip():
                line = stream.readline()

            line = line.split()
            if format == self.FORMAT_ARRAY:
                if not len(line) == 2:
                    raise ValueError("Header line not of length 2: " + line)
                rows, cols = map(int, line)
                entries = rows * cols
            else:
                if not len(line) == 3:
                    raise ValueError("Header line not of length 3: " + line)
                rows, cols, entries = map(int, line)

            return (rows, cols, entries, format, field.lower(),
                    symmetry.lower())

        finally:
            if close_it:
                stream.close()
Ejemplo n.º 28
0
def varmats_from_mat(file_obj):
    """ Pull variables out of mat 5 file as a sequence of mat file objects

    This can be useful with a difficult mat file, containing unreadable
    variables.  This routine pulls the variables out in raw form and puts them,
    unread, back into a file stream for saving or reading.  Another use is the
    pathological case where there is more than one variable of the same name in
    the file; this routine returns the duplicates, whereas the standard reader
    will overwrite duplicates in the returned dictionary.

    The file pointer in `file_obj` will be undefined.  File pointers for the
    returned file-like objects are set at 0.

    Parameters
    ----------
    file_obj : file-like
        file object containing mat file

    Returns
    -------
    named_mats : list
        list contains tuples of (name, BytesIO) where BytesIO is a file-like
        object containing mat file contents as for a single variable.  The
        BytesIO contains a string with the original header and a single var. If
        ``var_file_obj`` is an individual BytesIO instance, then save as a mat
        file with something like ``open('test.mat',
        'wb').write(var_file_obj.read())``

    Examples
    --------
    >>> import scipy.io

    BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
    python < 3.

    >>> mat_fileobj = BytesIO()
    >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
    >>> varmats = varmats_from_mat(mat_fileobj)
    >>> sorted([name for name, str_obj in varmats])
    ['a', 'b']
    """
    rdr = MatFile5Reader(file_obj)
    file_obj.seek(0)
    # Raw read of top-level file header
    hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
    raw_hdr = file_obj.read(hdr_len)
    # Initialize variable reading
    file_obj.seek(0)
    rdr.initialize_read()
    mdict = rdr.read_file_header()
    next_position = file_obj.tell()
    named_mats = []
    while not rdr.end_of_stream():
        start_position = next_position
        hdr, next_position = rdr.read_var_header()
        name = asstr(hdr.name)
        # Read raw variable string
        file_obj.seek(start_position)
        byte_count = next_position - start_position
        var_str = file_obj.read(byte_count)
        # write to stringio object
        out_obj = BytesIO()
        out_obj.write(raw_hdr)
        out_obj.write(var_str)
        out_obj.seek(0)
        named_mats.append((name, out_obj))
    return named_mats
Ejemplo n.º 29
0
def savetxt(fname,
            X,
            fmt='%.18e',
            delimiter=' ',
            newline='\n',
            header='',
            footer='',
            comments='# ',
            encoding=None,
            progress_callback=None):
    """numpy.savetxt modified to output progress """

    # Py3 conversions first
    if isinstance(fmt, bytes):
        fmt = asstr(fmt)
    delimiter = asstr(delimiter)

    class WriteWrap:
        """Convert to bytes on bytestream inputs.
        """
        def __init__(self, ifh, iencoding):
            self.fh = ifh
            self.encoding = iencoding
            self.do_write = self.first_write
            self.write = None

        def close(self):
            self.fh.close()

        def write(self, v):
            self.do_write(v)

        def write_bytes(self, v):
            if isinstance(v, bytes):
                self.fh.write(v)
            else:
                self.fh.write(v.encode(self.encoding))

        def write_normal(self, v):
            self.fh.write(asunicode(v))

        def first_write(self, v):
            try:
                self.write_normal(v)
                self.write = self.write_normal
            except TypeError:
                # input is probably a bytestream
                self.write_bytes(v)
                self.write = self.write_bytes

    own_fh = False
    if isinstance(fname, os_PathLike):
        fname = os_fspath(fname)
    if isinstance(fname, str):
        # datasource doesn't vis_support creating a new file ...
        open(fname, 'wt').close()
        fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)
        own_fh = True
    elif hasattr(fname, 'write'):
        # wrap to handle byte output streams
        fh = WriteWrap(fname, encoding or 'latin1')
    else:
        raise ValueError('fname must be a string or file handle')

    try:
        X = np.asarray(X)

        # Handle 1-dimensional arrays
        if X.ndim == 0 or X.ndim > 2:
            raise ValueError("Expected 1D or 2D array, got %dD array instead" %
                             X.ndim)
        elif X.ndim == 1:
            # Common case -- 1d array of numbers
            if X.dtype.names is None:
                X = np.atleast_2d(X).T
                ncol = 1

            # Complex dtype -- each field indicates a separate column
            else:
                ncol = len(X.dtype.names)
        else:
            ncol = X.shape[1]

        iscomplex_X = np.iscomplexobj(X)
        # `fmt` can be a string with multiple insertion points or a
        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
        if type(fmt) in (list, tuple):
            if len(fmt) != ncol:
                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
            iformat = asstr(delimiter).join(map(asstr, fmt))
        elif isinstance(fmt, str):
            n_fmt_chars = fmt.count('%')
            error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
            if n_fmt_chars == 1:
                if iscomplex_X:
                    fmt = [
                        ' (%s+%sj)' % (fmt, fmt),
                    ] * ncol
                else:
                    fmt = [
                        fmt,
                    ] * ncol
                iformat = delimiter.join(fmt)
            elif iscomplex_X and n_fmt_chars != (2 * ncol):
                raise error
            elif (not iscomplex_X) and n_fmt_chars != ncol:
                raise error
            else:
                iformat = fmt
        else:
            raise ValueError('invalid fmt: %r' % (fmt, ))

        if len(header) > 0:
            header = header.replace('\n', '\n' + comments)
            fh.write(comments + header + newline)
        if iscomplex_X:
            for row in X:
                row2 = []
                for number in row:
                    row2.append(number.real)
                    row2.append(number.imag)
                s = iformat % tuple(row2) + newline
                fh.write(s.replace('+-', '-'))
        else:
            length = len(X)
            for ind, row in enumerate(X):
                try:
                    v = iformat % tuple(row) + newline
                except TypeError:
                    raise TypeError("Mismatch between array dtype ('%s') and "
                                    "format specifier ('%s')" %
                                    (str(X.dtype), iformat))
                fh.write(v)

                progress_callback.emit((ind + 1) * 100 / length)

        if len(footer) > 0:
            footer = footer.replace('\n', '\n' + comments)
            fh.write(comments + footer + newline)
    finally:
        if own_fh:
            fh.close()
Ejemplo n.º 30
0
def varmats_from_mat(file_obj):
    """ Pull variables out of mat 5 file as a sequence of mat file objects

    This can be useful with a difficult mat file, containing unreadable
    variables.  This routine pulls the variables out in raw form and puts them,
    unread, back into a file stream for saving or reading.  Another use is the
    pathological case where there is more than one variable of the same name in
    the file; this routine returns the duplicates, whereas the standard reader
    will overwrite duplicates in the returned dictionary.

    The file pointer in `file_obj` will be undefined.  File pointers for the
    returned file-like objects are set at 0.

    Parameters
    ----------
    file_obj : file-like
        file object containing mat file

    Returns
    -------
    named_mats : list
        list contains tuples of (name, BytesIO) where BytesIO is a file-like
        object containing mat file contents as for a single variable.  The
        BytesIO contains a string with the original header and a single var. If
        ``var_file_obj`` is an individual BytesIO instance, then save as a mat
        file with something like ``open('test.mat',
        'wb').write(var_file_obj.read())``

    Examples
    --------
    >>> import scipy.io

    BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
    python < 3.

    >>> mat_fileobj = BytesIO()
    >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
    >>> varmats = varmats_from_mat(mat_fileobj)
    >>> sorted([name for name, str_obj in varmats])
    ['a', 'b']
    """
    rdr = MatFile5Reader(file_obj)
    file_obj.seek(0)
    # Raw read of top-level file header
    hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
    raw_hdr = file_obj.read(hdr_len)
    # Initialize variable reading
    file_obj.seek(0)
    rdr.initialize_read()
    mdict = rdr.read_file_header()
    next_position = file_obj.tell()
    named_mats = []
    while not rdr.end_of_stream():
        start_position = next_position
        hdr, next_position = rdr.read_var_header()
        name = asstr(hdr.name)
        # Read raw variable string
        file_obj.seek(start_position)
        byte_count = next_position - start_position
        var_str = file_obj.read(byte_count)
        # write to stringio object
        out_obj = BytesIO()
        out_obj.write(raw_hdr)
        out_obj.write(var_str)
        out_obj.seek(0)
        named_mats.append((name, out_obj))
    return named_mats
Ejemplo n.º 31
0
def savetxt(fname,
            X,
            fmt='%.18e',
            delimiter=' ',
            newline='\n',
            header='',
            footer='',
            comments='# '):
    """This code is from NumPy 1.9.1. For help see there.

    It was included because features are used that were added in version 1.7
    but on some machines only NumPy version 1.6.2 is available.
    """
    ## needed for the rest
    from numpy.compat import asstr, asbytes

    def _is_string_like(obj):
        try:
            obj + ''
        except (TypeError, ValueError):
            return False
        return True

    # Py3 conversions first
    if isinstance(fmt, bytes):
        fmt = asstr(fmt)
        delimiter = asstr(delimiter)

    own_fh = False
    if _is_string_like(fname):
        own_fh = True
        if fname.endswith('.gz'):
            import gzip
            fh = gzip.open(fname, 'wb')
        else:
            if os.sys.version_info[0] >= 3:
                fh = open(fname, 'wb')
            else:
                fh = open(fname, 'w')
    elif hasattr(fname, 'write'):
        fh = fname
    else:
        raise ValueError('fname must be a string or file handle')

    try:
        X = np.asarray(X)

        # Handle 1-dimensional arrays
        if X.ndim == 1:
            # Common case -- 1d array of numbers
            if X.dtype.names is None:
                X = np.atleast_2d(X).T
                ncol = 1

            # Complex dtype -- each field indicates a separate column
            else:
                ncol = len(X.dtype.descr)
        else:
            ncol = X.shape[1]

        iscomplex_X = np.iscomplexobj(X)
        # `fmt` can be a string with multiple insertion points or a
        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '%10d')
        if type(fmt) in (list, tuple):
            if len(fmt) != ncol:
                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
            format = asstr(delimiter).join(map(asstr, fmt))
        elif isinstance(fmt, str):
            n_fmt_chars = fmt.count('%')
            error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
            if n_fmt_chars == 1:
                if iscomplex_X:
                    fmt = [
                        ' (%s+%sj)' % (fmt, fmt),
                    ] * ncol
                else:
                    fmt = [
                        fmt,
                    ] * ncol
                format = delimiter.join(fmt)
            elif iscomplex_X and n_fmt_chars != (2 * ncol):
                raise error
            elif ((not iscomplex_X) and n_fmt_chars != ncol):
                raise error
            else:
                format = fmt
        else:
            raise ValueError('invalid fmt: %r' % (fmt, ))

        if len(header) > 0:
            header = header.replace('\n', '\n' + comments)
            fh.write(asbytes(comments + header + newline))
        if iscomplex_X:
            for row in X:
                row2 = []
                for number in row:
                    row2.append(number.real)
                    row2.append(number.imag)
                fh.write(asbytes(format % tuple(row2) + newline))
        else:
            for row in X:
                fh.write(asbytes(format % tuple(row) + newline))
        if len(footer) > 0:
            footer = footer.replace('\n', '\n' + comments)
            fh.write(asbytes(comments + footer + newline))
    finally:
        if own_fh:
            fh.close()
Ejemplo n.º 32
0
def build_module_distutils(source_files, config_code, module_name, **kw):
    """
    Build a module via distutils and import it.

    """
    from numpy.distutils.misc_util import Configuration
    from numpy.distutils.core import setup

    d = get_module_dir()

    # Copy files
    dst_sources = []
    for fn in source_files:
        if not os.path.isfile(fn):
            raise RuntimeError("%s is not a file" % fn)
        dst = os.path.join(d, os.path.basename(fn))
        shutil.copyfile(fn, dst)
        dst_sources.append(dst)

    # Build script
    config_code = textwrap.dedent(config_code).replace("\n", "\n    ")

    code = """\
import os
import sys
sys.path = %(syspath)s

def configuration(parent_name='',top_path=None):
    from numpy.distutils.misc_util import Configuration
    config = Configuration('', parent_name, top_path)
    %(config_code)s
    return config

if __name__ == "__main__":
    from numpy.distutils.core import setup
    setup(configuration=configuration)
""" % dict(config_code=config_code, syspath=repr(sys.path))

    script = os.path.join(d, get_temp_module_name() + '.py')
    dst_sources.append(script)
    f = open(script, 'wb')
    f.write(asbytes(code))
    f.close()

    # Build
    cwd = os.getcwd()
    try:
        os.chdir(d)
        cmd = [sys.executable, script, 'build_ext', '-i']
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        out, err = p.communicate()
        if p.returncode != 0:
            raise RuntimeError("Running distutils build failed: %s\n%s"
                               % (cmd[4:], asstr(out)))
    finally:
        os.chdir(cwd)

        # Partial cleanup
        for fn in dst_sources:
            os.unlink(fn)

    # Import
    __import__(module_name)
    return sys.modules[module_name]
Ejemplo n.º 33
0
def build_module_distutils(source_files, config_code, module_name, **kw):
    """
    Build a module via distutils and import it.

    """
    from numpy.distutils.misc_util import Configuration
    from numpy.distutils.core import setup

    d = get_module_dir()

    # Copy files
    dst_sources = []
    for fn in source_files:
        if not os.path.isfile(fn):
            raise RuntimeError("%s is not a file" % fn)
        dst = os.path.join(d, os.path.basename(fn))
        shutil.copyfile(fn, dst)
        dst_sources.append(dst)

    # Build script
    config_code = textwrap.dedent(config_code).replace("\n", "\n    ")

    code = """\
import os
import sys
sys.path = %(syspath)s

def configuration(parent_name='',top_path=None):
    from numpy.distutils.misc_util import Configuration
    config = Configuration('', parent_name, top_path)
    %(config_code)s
    return config

if __name__ == "__main__":
    from numpy.distutils.core import setup
    setup(configuration=configuration)
""" % dict(config_code=config_code, syspath = repr(sys.path))

    script = os.path.join(d, get_temp_module_name() + '.py')
    dst_sources.append(script)
    f = open(script, 'wb')
    f.write(asbytes(code))
    f.close()

    # Build
    cwd = os.getcwd()
    try:
        os.chdir(d)
        cmd = [sys.executable, script, 'build_ext', '-i']
        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
        out, err = p.communicate()
        if p.returncode != 0:
            raise RuntimeError("Running distutils build failed: %s\n%s"
                               % (cmd[4:], asstr(out)))
    finally:
        os.chdir(cwd)

        # Partial cleanup
        for fn in dst_sources:
            os.unlink(fn)

    # Import
    __import__(module_name)
    return sys.modules[module_name]
Ejemplo n.º 34
0
def savetxt(fname,
            X,
            fmt='%.18e',
            delimiter=' ',
            newline='\n',
            preamble=None,
            comment_character='#',
            header=None):
    """
    Save an array to a text file.

    Parameters
    ----------
    fname : filename or file handle
        If the filename ends in ``.gz``, the file is automatically saved in
        compressed gzip format.  `loadtxt` understands gzipped files
        transparently.
    X : array_like
        Data to be saved to a text file.
    fmt : str or sequence of strs
        A single format (%10.5f), a sequence of formats, or a
        multi-format string, e.g. 'Iteration %d -- %10.5f', in which
        case `delimiter` is ignored.
    delimiter : str
        Character separating columns.
    newline : str
        .. versionadded:: 1.5.0

        Character separating lines.
    preamble : str or sequence of strs, optional
        If specified, the content of the strings will be added at the top
        of the file. Each line will be preceded by the `comment_character` string
        and terminated by `newline`. In default configuration, numpy.loadtxt
        recognizes the preamble as a comment and, thus, ignores it.
    comment_character : str, optional
        The string which is intended to introduce the lines of the `preamble`
        (default is '#'). Please note that numpy.loadtxt uses the `comments`
        keyword to refer to this string.
    header : bool or sequence of strs, optional
        The column names; will be added after the `preamble` (if specified),
        but before the data. If header is 'True', the names will either
        be inferred from `X`.dtype.names or default names (f1, f2..fn)
        will be used. If a sequence of strs is given, these
        will be used as names.


    See Also
    --------
    save : Save an array to a binary file in NumPy ``.npy`` format
    savez : Save several arrays into a ``.npz`` compressed archive

    Notes
    -----
    Further explanation of the `fmt` parameter
    (``%[flag]width[.precision]specifier``):

    flags:
        ``-`` : left justify

        ``+`` : Forces to preceed result with + or -.

        ``0`` : Left pad the number with zeros instead of space (see width).

    width:
        Minimum number of characters to be printed. The value is not truncated
        if it has more characters.

    precision:
        - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
          digits.
        - For ``e, E`` and ``f`` specifiers, the number of digits to print
          after the decimal point.
        - For ``g`` and ``G``, the maximum number of significant digits.
        - For ``s``, the maximum number of characters.

    specifiers:
        ``c`` : character

        ``d`` or ``i`` : signed decimal integer

        ``e`` or ``E`` : scientific notation with ``e`` or ``E``.

        ``f`` : decimal floating point

        ``g,G`` : use the shorter of ``e,E`` or ``f``

        ``o`` : signed octal

        ``s`` : string of characters

        ``u`` : unsigned decimal integer

        ``x,X`` : unsigned hexadecimal integer

    This explanation of ``fmt`` is not complete, for an exhaustive
    specification see [1]_.
    
    Explanation of `preamble` and `header`:
        Both preamble and header are used to add information on the
        data at the top of the file.
        In default configuration, the
        preamble preserves compatibility of the
        output file with numpy.loadtxt, while the header not necessarily
        does.
        The header is a single line containing, for example, the name of each
        column. While this is very desirable, it may become a problem when
        the data is recovered with numpy.loadtxt, which
        (in default configuration) does not recognize the header and probably
        fails, because it cannot convert the header entries to the target type.
        Additionally, a preamble can be used to put an introductory text
        at the top of the file.

    References
    ----------
    .. [1] `Format Specification Mini-Language
           <http://docs.python.org/library/string.html#
           format-specification-mini-language>`_, Python Documentation.

    Examples
    --------
    >>> x = y = z = np.arange(0.0,5.0,1.0)
    >>> np.savetxt('test.out', x, delimiter=',')   # X is an array
    >>> np.savetxt('test.out', (x,y,z))   # x,y,z equal sized 1D arrays
    >>> np.savetxt('test.out', x, fmt='%1.4e')   # use exponential notation
    >>> recar = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']})
    >>> np.savetxt('test.out', recar, header=True)
    >>> np.savetxt('test.out', recar, preamble=['This is the most','important result.'])
    >>> np.savetxt('test.out', recar, header=['Apples', 'Oranges'])
    """
    import sys
    import numpy as np
    from numpy.compat import asbytes, asstr, asbytes_nested, bytes

    def _is_string_like(v):
        try:
            v + ''
        except:
            return False
        return True

    # Py3 conversions first
    if isinstance(fmt, bytes):
        fmt = asstr(fmt)
    delimiter = asstr(delimiter)

    own_fh = False
    if _is_string_like(fname):
        own_fh = True
        if fname.endswith('.gz'):
            import gzip
            fh = gzip.open(fname, 'wb')
        else:
            if sys.version_info[0] >= 3:
                fh = open(fname, 'wb')
            else:
                fh = open(fname, 'w')
    elif hasattr(fname, 'seek'):
        fh = fname
    else:
        raise ValueError('fname must be a string or file handle')

    try:
        X = np.asarray(X)

        # Handle 1-dimensional arrays
        if X.ndim == 1:
            # Common case -- 1d array of numbers
            if X.dtype.names is None:
                X = np.atleast_2d(X).T
                ncol = 1

            # Complex dtype -- each field indicates a separate column
            else:
                ncol = 0
                for desc in X.dtype.descr:
                    if len(desc) == 3: ncol = ncol + desc[2]
                    else: ncol = ncol + 1
        else:
            ncol = X.shape[1]

        # `fmt` can be a string with multiple insertion points or a
        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
        if type(fmt) in (list, tuple):
            if len(fmt) != ncol:
                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
            format = asstr(delimiter).join(map(asstr, fmt))
        elif type(fmt) is str:
            if fmt.count('%') == 1:
                fmt = [
                    fmt,
                ] * ncol
                format = delimiter.join(fmt)
            elif fmt.count('%') != ncol:
                raise AttributeError(
                    'fmt has wrong number of %% formats.  %s' % fmt)
            else:
                format = fmt

        if preamble is not None:
            # A comment object was specified.
            # Check whether the comment_character is valid.
            if not _is_string_like(comment_character):
                raise ValueError("'comment_character' has to be a string.")
            # If the comment is a plane string, make it an element of a list
            # to be iterated over below.
            if _is_string_like(preamble):
                preamble = [preamble]
            # Iterate over comment argument if possible.
            if hasattr(preamble, '__iter__'):
                for co in preamble:
                    # Check whether individual elements are string-like
                    if _is_string_like(co):
                        # Remove trailing newline character and split string
                        # at occurrence of newline character to avoid new
                        # preamble lines without introducing comment character.
                        co = co.rstrip(newline)
                        cosplit = co.split(newline)
                        for c in cosplit:
                            fh.write(
                                asbytes(''.join(
                                    (comment_character, c, newline))))

        if header is not None:
            # Check whether column names are given as strings, or names
            # must be infered from dtype.
            if isinstance(header, bool):
                if header:
                    # If possible, infer column names from dtype and use default
                    # (f1, f2 .. fn) otherwise.
                    if X.dtype.names is not None:
                        column_names = X.dtype.names
                    else:
                        column_names = []
                        for i in range(ncol):
                            column_names.append(''.join(('f', str(i + 1))))
            # Check whether column names are given as a list/tuple and check number.
            elif isinstance(header, list) or isinstance(header, tuple):
                if len(header) != ncol:
                    raise ValueError(''.join(
                        ("Table has %i column(s) but header has %i entrie(s)" %
                         (ncol, len(header)))))
                column_names = header
            # Write column names (seperated by delimiter).
            headerline = ''
            for colname in column_names:
                headerline = ''.join((headerline, colname, delimiter))
            headerline = headerline.rstrip(delimiter)
            fh.write(asbytes(''.join((headerline, newline))))

        for row in X:
            fh.write(asbytes(format % tuple(row) + newline))
    finally:
        if own_fh:
            fh.close()
Ejemplo n.º 35
0
def savetxt_nice(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
                 footer='', comments='# ', encoding=None):
    """
    Save an array to a text file.  This is 95% a backport of numpy 1.15.1's
    savetxt.  It does not support:
      - DataSource's URL
      - pathlib fnames
      - gz files

    Parameters
    ----------
    fname : filename or file handle
        If the filename ends in ``.gz``, the file is automatically saved in
        compressed gzip format.  `loadtxt` understands gzipped files
        transparently.
    X : 1D or 2D array_like
        Data to be saved to a text file.
    fmt : str or sequence of strs, optional
        A single format (%10.5f), a sequence of formats, or a
        multi-format string, e.g. 'Iteration %d -- %10.5f', in which
        case `delimiter` is ignored. For complex `X`, the legal options
        for `fmt` are:

        * a single specifier, `fmt='%.4e'`, resulting in numbers formatted
          like `' (%s+%sj)' % (fmt, fmt)`
        * a full string specifying every real and imaginary part, e.g.
          `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns
        * a list of specifiers, one per column - in this case, the real
          and imaginary part must have separate specifiers,
          e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
    delimiter : str, optional
        String or character separating columns.
    newline : str, optional
        String or character separating lines.

        .. versionadded:: 1.5.0
    header : str, optional
        String that will be written at the beginning of the file.

        .. versionadded:: 1.7.0
    footer : str, optional
        String that will be written at the end of the file.

        .. versionadded:: 1.7.0
    comments : str, optional
        String that will be prepended to the ``header`` and ``footer`` strings,
        to mark them as comments. Default: '# ',  as expected by e.g.
        ``numpy.loadtxt``.

        .. versionadded:: 1.7.0
    encoding : {None, str}, optional
        Encoding used to encode the outputfile. Does not apply to output
        streams. If the encoding is something other than 'bytes' or 'latin1'
        you will not be able to load the file in NumPy versions < 1.14. Default
        is 'latin1'.

        .. versionadded:: 1.14.0


    See Also
    --------
    save : Save an array to a binary file in NumPy ``.npy`` format
    savez : Save several arrays into an uncompressed ``.npz`` archive
    savez_compressed : Save several arrays into a compressed ``.npz`` archive

    Notes
    -----
    Further explanation of the `fmt` parameter
    (``%[flag]width[.precision]specifier``):

    flags:
        ``-`` : left justify

        ``+`` : Forces to precede result with + or -.

        ``0`` : Left pad the number with zeros instead of space (see width).

    width:
        Minimum number of characters to be printed. The value is not truncated
        if it has more characters.

    precision:
        - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
          digits.
        - For ``e, E`` and ``f`` specifiers, the number of digits to print
          after the decimal point.
        - For ``g`` and ``G``, the maximum number of significant digits.
        - For ``s``, the maximum number of characters.

    specifiers:
        ``c`` : character

        ``d`` or ``i`` : signed decimal integer

        ``e`` or ``E`` : scientific notation with ``e`` or ``E``.

        ``f`` : decimal floating point

        ``g,G`` : use the shorter of ``e,E`` or ``f``

        ``o`` : signed octal

        ``s`` : string of characters

        ``u`` : unsigned decimal integer

        ``x,X`` : unsigned hexadecimal integer

    This explanation of ``fmt`` is not complete, for an exhaustive
    specification see [1]_.

    References
    ----------
    .. [1] `Format Specification Mini-Language
           <http://docs.python.org/library/string.html#
           format-specification-mini-language>`_, Python Documentation.

    Examples
    --------
    >>> x = y = z = np.arange(0.0,5.0,1.0)
    >>> np.savetxt('test.out', x, delimiter=',')   # X is an array
    >>> np.savetxt('test.out', (x,y,z))   # x,y,z equal sized 1D arrays
    >>> np.savetxt('test.out', x, fmt='%1.4e')   # use exponential notation

    """
    # Py3 conversions first
    if isinstance(fmt, bytes):
        fmt = asstr(fmt)
    delimiter = asstr(delimiter)

    class WriteWrap:
        """Convert to unicode in py2 or to bytes on bytestream inputs."""
        def __init__(self, fh, encoding):
            self.fh = fh
            self.encoding = encoding
            self.do_write = self.first_write

        def close(self):
            self.fh.close()

        def write(self, v):
            self.do_write(v)

        def write_bytes(self, v):
            if isinstance(v, bytes):
                self.fh.write(v)
            else:
                self.fh.write(v.encode(self.encoding))

        def write_normal(self, v):
            self.fh.write(asunicode(v))

        def first_write(self, v):
            try:
                self.write_normal(v)
                self.write = self.write_normal
            except TypeError:
                # input is probably a bytestream
                self.write_bytes(v)
                self.write = self.write_bytes

    own_fh = False
    #if is_pathlib_path(fname):
        #fname = str(fname)

    if _is_string_like(fname):
        # datasource doesn't support creating a new file ...
        open(fname, 'wt').close()
        fh = open(fname, 'wt', encoding=encoding)
        own_fh = True
        # need to convert str to unicode for text io output
        if sys.version_info[0] == 2:
            fh = WriteWrap(fh, encoding or 'latin1')
    elif hasattr(fname, 'write'):
        # wrap to handle byte output streams
        fh = WriteWrap(fname, encoding or 'latin1')

    #if _is_string_like(fname):
        #own_fh = True
        #if fname.endswith('.gz'):
            #import gzip
            #fh = gzip.open(fname, 'wb')
        #else:
            #if sys.version_info[0] >= 3:
                #fh = open(fname, 'wb')
            #else:
                #fh = open(fname, 'w')
    #elif hasattr(fname, 'write'):
        #fh = fname
    else:
        raise ValueError('fname must be a string or file handle')

    try:
        X = np.asarray(X)

        # Handle 1-dimensional arrays
        if X.ndim == 0 or X.ndim > 2:
            raise ValueError(
                "Expected 1D or 2D array, got %dD array instead" % X.ndim)
        elif X.ndim == 1:
            # Common case -- 1d array of numbers
            if X.dtype.names is None:
                X = np.atleast_2d(X).T
                ncol = 1

            # Complex dtype -- each field indicates a separate column
            else:
                ncol = len(X.dtype.descr)
        else:
            ncol = X.shape[1]

        iscomplex_X = np.iscomplexobj(X)
        # `fmt` can be a string with multiple insertion points or a
        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
        if isinstance(fmt, (list, tuple)):
            if len(fmt) != ncol:
                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
            txt_format = asstr(delimiter).join(map(asstr, fmt))
        elif isinstance(fmt, str):
            n_fmt_chars = fmt.count('%')
            error = ValueError('fmt has wrong number of %% formats:  %s' % fmt)
            if n_fmt_chars == 1:
                if iscomplex_X:
                    fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
                else:
                    fmt = [fmt, ] * ncol
                txt_format = delimiter.join(fmt)
            elif iscomplex_X and n_fmt_chars != (2 * ncol):
                raise error
            elif ((not iscomplex_X) and n_fmt_chars != ncol):
                raise error
            else:
                txt_format = fmt
        else:
            raise ValueError('invalid fmt: %r' % (fmt,))

        if len(header) > 0:
            header = header.replace('\n', '\n' + comments)
            fh.write(comments + header + newline)
        if iscomplex_X:
            for row in X:
                row2 = []
                for number in row:
                    row2.append(number.real)
                    row2.append(number.imag)
                s = txt_format % tuple(row2) + newline
                fh.write(s.replace('+-', '-'))
        else:
            for row in X:
                try:
                    v = txt_format % tuple(row) + newline
                except TypeError:
                    raise TypeError("Mismatch between array dtype ('%s') and "
                                    "format specifier ('%s')"
                                    % (str(X.dtype), txt_format))
                fh.write(v)

        if len(footer) > 0:
            footer = footer.replace('\n', '\n' + comments)
            fh.write(comments + footer + newline)
    finally:
        if own_fh:
            fh.close()
Ejemplo n.º 36
0
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', preamble=None, comment_character='#', header=None):
    """
    Save an array to a text file.

    Parameters
    ----------
    fname : filename or file handle
        If the filename ends in ``.gz``, the file is automatically saved in
        compressed gzip format.  `loadtxt` understands gzipped files
        transparently.
    X : array_like
        Data to be saved to a text file.
    fmt : str or sequence of strs
        A single format (%10.5f), a sequence of formats, or a
        multi-format string, e.g. 'Iteration %d -- %10.5f', in which
        case `delimiter` is ignored.
    delimiter : str
        Character separating columns.
    newline : str
        .. versionadded:: 1.5.0

        Character separating lines.
    preamble : str or sequence of strs, optional
        If specified, the content of the strings will be added at the top
        of the file. Each line will be preceded by the `comment_character` string
        and terminated by `newline`. In default configuration, numpy.loadtxt
        recognizes the preamble as a comment and, thus, ignores it.
    comment_character : str, optional
        The string which is intended to introduce the lines of the `preamble`
        (default is '#'). Please note that numpy.loadtxt uses the `comments`
        keyword to refer to this string.
    header : bool or sequence of strs, optional
        The column names; will be added after the `preamble` (if specified),
        but before the data. If header is 'True', the names will either
        be inferred from `X`.dtype.names or default names (f1, f2..fn)
        will be used. If a sequence of strs is given, these
        will be used as names.


    See Also
    --------
    save : Save an array to a binary file in NumPy ``.npy`` format
    savez : Save several arrays into a ``.npz`` compressed archive

    Notes
    -----
    Further explanation of the `fmt` parameter
    (``%[flag]width[.precision]specifier``):

    flags:
        ``-`` : left justify

        ``+`` : Forces to preceed result with + or -.

        ``0`` : Left pad the number with zeros instead of space (see width).

    width:
        Minimum number of characters to be printed. The value is not truncated
        if it has more characters.

    precision:
        - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
          digits.
        - For ``e, E`` and ``f`` specifiers, the number of digits to print
          after the decimal point.
        - For ``g`` and ``G``, the maximum number of significant digits.
        - For ``s``, the maximum number of characters.

    specifiers:
        ``c`` : character

        ``d`` or ``i`` : signed decimal integer

        ``e`` or ``E`` : scientific notation with ``e`` or ``E``.

        ``f`` : decimal floating point

        ``g,G`` : use the shorter of ``e,E`` or ``f``

        ``o`` : signed octal

        ``s`` : string of characters

        ``u`` : unsigned decimal integer

        ``x,X`` : unsigned hexadecimal integer

    This explanation of ``fmt`` is not complete, for an exhaustive
    specification see [1]_.
    
    Explanation of `preamble` and `header`:
        Both preamble and header are used to add information on the
        data at the top of the file.
        In default configuration, the
        preamble preserves compatibility of the
        output file with numpy.loadtxt, while the header not necessarily
        does.
        The header is a single line containing, for example, the name of each
        column. While this is very desirable, it may become a problem when
        the data is recovered with numpy.loadtxt, which
        (in default configuration) does not recognize the header and probably
        fails, because it cannot convert the header entries to the target type.
        Additionally, a preamble can be used to put an introductory text
        at the top of the file.

    References
    ----------
    .. [1] `Format Specification Mini-Language
           <http://docs.python.org/library/string.html#
           format-specification-mini-language>`_, Python Documentation.

    Examples
    --------
    >>> x = y = z = np.arange(0.0,5.0,1.0)
    >>> np.savetxt('test.out', x, delimiter=',')   # X is an array
    >>> np.savetxt('test.out', (x,y,z))   # x,y,z equal sized 1D arrays
    >>> np.savetxt('test.out', x, fmt='%1.4e')   # use exponential notation
    >>> recar = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']})
    >>> np.savetxt('test.out', recar, header=True)
    >>> np.savetxt('test.out', recar, preamble=['This is the most','important result.'])
    >>> np.savetxt('test.out', recar, header=['Apples', 'Oranges'])
    """
    import sys
    import numpy as np
    from numpy.compat import asbytes, asstr, asbytes_nested, bytes
    def _is_string_like(v):
        try: v + ''
        except: return False
        return True

    # Py3 conversions first
    if isinstance(fmt, bytes):
        fmt = asstr(fmt)
    delimiter = asstr(delimiter)

    own_fh = False
    if _is_string_like(fname):
        own_fh = True
        if fname.endswith('.gz'):
            import gzip
            fh = gzip.open(fname, 'wb')
        else:
            if sys.version_info[0] >= 3:
                fh = open(fname, 'wb')
            else:
                fh = open(fname, 'w')
    elif hasattr(fname, 'seek'):
        fh = fname
    else:
        raise ValueError('fname must be a string or file handle')

    try:
        X = np.asarray(X)

        # Handle 1-dimensional arrays
        if X.ndim == 1:
            # Common case -- 1d array of numbers
            if X.dtype.names is None:
                X = np.atleast_2d(X).T
                ncol = 1

            # Complex dtype -- each field indicates a separate column
            else:
                ncol = 0
                for desc in X.dtype.descr:
                    if len(desc) == 3: ncol = ncol + desc[2]
                    else : ncol = ncol + 1
        else:
            ncol = X.shape[1]

        # `fmt` can be a string with multiple insertion points or a
        # list of formats.  E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
        if type(fmt) in (list, tuple):
            if len(fmt) != ncol:
                raise AttributeError('fmt has wrong shape.  %s' % str(fmt))
            format = asstr(delimiter).join(map(asstr, fmt))
        elif type(fmt) is str:
            if fmt.count('%') == 1:
                fmt = [fmt, ]*ncol
                format = delimiter.join(fmt)
            elif fmt.count('%') != ncol:
                raise AttributeError('fmt has wrong number of %% formats.  %s'
                                     % fmt)
            else:
                format = fmt

        if preamble is not None:
            # A comment object was specified.
            # Check whether the comment_character is valid.
            if not _is_string_like(comment_character):
                raise ValueError("'comment_character' has to be a string.")
            # If the comment is a plane string, make it an element of a list
            # to be iterated over below.
            if _is_string_like(preamble):
                preamble = [preamble]
            # Iterate over comment argument if possible.
            if hasattr(preamble, '__iter__'):
                for co in preamble:
                    # Check whether individual elements are string-like
                    if _is_string_like(co):
                        # Remove trailing newline character and split string
                        # at occurrence of newline character to avoid new
                        # preamble lines without introducing comment character.
                        co = co.rstrip(newline)
                        cosplit = co.split(newline)
                        for c in cosplit:
                            fh.write(asbytes(''.join((comment_character,c,newline))))
   
        if header is not None:
          # Check whether column names are given as strings, or names
          # must be infered from dtype.
          if isinstance(header, bool):
              if header:
                  # If possible, infer column names from dtype and use default
                  # (f1, f2 .. fn) otherwise.
                  if X.dtype.names is not None:
                    column_names = X.dtype.names
                  else:
                    column_names = []
                    for i in range(ncol):
                      column_names.append(''.join(('f',str(i+1))))
          # Check whether column names are given as a list/tuple and check number.
          elif isinstance(header, list) or isinstance(header, tuple):
            if len(header) != ncol:
              raise ValueError(''.join(("Table has %i column(s) but header has %i entrie(s)" % (ncol,len(header)))))
            column_names = header
          # Write column names (seperated by delimiter).
          headerline = ''
          for colname in column_names:
              headerline = ''.join((headerline, colname, delimiter))
          headerline = headerline.rstrip(delimiter)
          fh.write(asbytes(''.join((headerline,newline))))
 

        for row in X:
            fh.write(asbytes(format % tuple(row) + newline))
    finally:
        if own_fh:
            fh.close()
Ejemplo n.º 37
0
    def info(self, source):
        """
        Return size, storage parameters from Matrix Market file-like 'source'.

        Parameters
        ----------
        source : str or file-like
            Matrix Market filename (extension .mtx) or open file-like object

        Returns
        -------
        rows : int
            Number of matrix rows.
        cols : int
            Number of matrix columns.
        entries : int
            Number of non-zero entries of a sparse matrix
            or rows*cols for a dense matrix.
        format : str
            Either 'coordinate' or 'array'.
        field : str
            Either 'real', 'complex', 'pattern', or 'integer'.
        symmetry : str
            Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'.
        """

        stream, close_it = self._open(source)

        try:

            # read and validate header line
            line = stream.readline()
            mmid, matrix, format, field, symmetry = \
                [asstr(part.strip()) for part in line.split()]
            if not mmid.startswith('%%MatrixMarket'):
                raise ValueError('source is not in Matrix Market format')
            if not matrix.lower() == 'matrix':
                raise ValueError("Problem reading file header: " + line)

            # http://math.nist.gov/MatrixMarket/formats.html
            if format.lower() == 'array':
                format = self.FORMAT_ARRAY
            elif format.lower() == 'coordinate':
                format = self.FORMAT_COORDINATE

            # skip comments
            while line.startswith(b'%'):
                line = stream.readline()

            line = line.split()
            if format == self.FORMAT_ARRAY:
                if not len(line) == 2:
                    raise ValueError("Header line not of length 2: " + line)
                rows, cols = map(int, line)
                entries = rows * cols
            else:
                if not len(line) == 3:
                    raise ValueError("Header line not of length 3: " + line)
                rows, cols, entries = map(int, line)

            return (rows, cols, entries, format, field.lower(),
                    symmetry.lower())

        finally:
            if close_it:
                stream.close()