Exemple #1
0
def guessdtype(table):
    try:
        import numpy as np
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:
        # get numpy to infer dtypes for each field individually
        fields, table = iterpeek(table, 1)
        cols = columns(table)
        dtype = []
        for f in fields:
            a = np.array(cols[f]) # load into 1D array to get numpy to infer a dtype for the column
            dtype.append((f, a.dtype))
        return np.dtype(dtype)
Exemple #2
0
def toarray(table, dtype=None, count=-1, sample=1000):
    """
    Convenience function to load data from the given `table` into a numpy 
    structured array. E.g.::

        >>> from petl import look
        >>> from petlx.array import toarray
        >>> look(table)
        +-----------+-------+-------+
        | 'foo'     | 'bar' | 'baz' |
        +===========+=======+=======+
        | 'apples'  | 1     | 2.5   |
        +-----------+-------+-------+
        | 'oranges' | 3     | 4.4   |
        +-----------+-------+-------+
        | 'pears'   | 7     | 0.1   |
        +-----------+-------+-------+
        
        >>> a = toarray(table)
        >>> a
        array([('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)], 
              dtype=[('foo', '|S7'), ('bar', '<i8'), ('baz', '<f8')])
        >>> a['foo']
        array(['apples', 'oranges', 'pears'], 
              dtype='|S7')
        >>> a['bar']
        array([1, 3, 7])
        >>> a['baz']
        array([ 2.5,  4.4,  0.1])
        >>> a['foo'][0]
        'apples'
        >>> a['bar'][1]
        3
        >>> a['baz'][2]
        0.10000000000000001
        
    If no datatype is specified, `sample` rows will be examined to infer an
    appropriate datatype for each field.
        
    The datatype can be specified as a string, e.g.:

        >>> a = toarray(table, dtype='a4, i2, f4')
        >>> a
        array([('appl', 1, 2.5), ('oran', 3, 4.400000095367432),
               ('pear', 7, 0.10000000149011612)], 
              dtype=[('foo', '|S4'), ('bar', '<i2'), ('baz', '<f4')])

    The datatype can also be partially specified, in which case datatypes will
    be inferred for other fields, e.g.:
    
        >>> a = toarray(table, dtype={'foo': 'a4'})
        >>> a
        array([('appl', 1, 2.5), ('oran', 3, 4.4), ('pear', 7, 0.1)], 
              dtype=[('foo', '|S4'), ('bar', '<i8'), ('baz', '<f8')])
    
    """
    
    try:
        import numpy as np
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:

        it = iter(table)
        peek, it = iterpeek(it, sample)
        fields = it.next()
        
        if dtype is None:
            dtype = guessdtype(peek)
           
        elif isinstance(dtype, basestring):
            # insert field names from source table
            typestrings = [s.strip() for s in dtype.split(',')]
            dtype = [(f, t) for f, t in zip(fields, typestrings)]
            
        elif isinstance(dtype, dict) and ('names' not in dtype or 'formats' not in dtype):
            # allow for partial specification of dtype
            cols = columns(peek)
            newdtype = {'names': [], 'formats': []}
            for f in fields:
                newdtype['names'].append(f)
                if f in dtype and isinstance(dtype[f], tuple):
                    # assume fully specified
                    newdtype['formats'].append(dtype[f][0])
                elif f not in dtype:
                    # not specified at all
                    a = np.array(cols[f])
                    newdtype['formats'].append(a.dtype)
                else:
                    # assume directly specified, just need to add offset
                    newdtype['formats'].append(dtype[f])
            dtype = newdtype
            
        else:
            pass # leave dtype as-is
                         
        it = (tuple(row) for row in it) # numpy is fussy about having tuples, need to make sure
        sa = np.fromiter(it, dtype=dtype, count=count)
        return sa
Exemple #3
0
def toarray(table, dtype=None, count=-1, sample=1000):
    """
    Convenience function to load data from the given `table` into a numpy 
    structured array. E.g.::

        >>> from petl import look
        >>> from petlx.array import toarray
        >>> look(table)
        +-----------+-------+-------+
        | 'foo'     | 'bar' | 'baz' |
        +===========+=======+=======+
        | 'apples'  | 1     | 2.5   |
        +-----------+-------+-------+
        | 'oranges' | 3     | 4.4   |
        +-----------+-------+-------+
        | 'pears'   | 7     | 0.1   |
        +-----------+-------+-------+
        
        >>> a = toarray(table)
        >>> a
        array([('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)], 
              dtype=[('foo', '|S7'), ('bar', '<i8'), ('baz', '<f8')])
        >>> a['foo']
        array(['apples', 'oranges', 'pears'], 
              dtype='|S7')
        >>> a['bar']
        array([1, 3, 7])
        >>> a['baz']
        array([ 2.5,  4.4,  0.1])
        >>> a['foo'][0]
        'apples'
        >>> a['bar'][1]
        3
        >>> a['baz'][2]
        0.10000000000000001
        
    If no datatype is specified, `sample` rows will be examined to infer an
    appropriate datatype for each field.
        
    The datatype can be specified as a string, e.g.:

        >>> a = toarray(table, dtype='a4, i2, f4')
        >>> a
        array([('appl', 1, 2.5), ('oran', 3, 4.400000095367432),
               ('pear', 7, 0.10000000149011612)], 
              dtype=[('foo', '|S4'), ('bar', '<i2'), ('baz', '<f4')])

    The datatype can also be partially specified, in which case datatypes will
    be inferred for other fields, e.g.:
    
        >>> a = toarray(table, dtype={'foo': 'a4'})
        >>> a
        array([('appl', 1, 2.5), ('oran', 3, 4.4), ('pear', 7, 0.1)], 
              dtype=[('foo', '|S4'), ('bar', '<i8'), ('baz', '<f8')])
    
    """

    try:
        import numpy as np
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:

        it = iter(table)
        peek, it = iterpeek(it, sample)
        fields = it.next()

        if dtype is None:
            dtype = guessdtype(peek)

        elif isinstance(dtype, basestring):
            # insert field names from source table
            typestrings = [s.strip() for s in dtype.split(',')]
            dtype = [(f, t) for f, t in zip(fields, typestrings)]

        elif (isinstance(dtype, dict)
              and ('names' not in dtype or 'formats' not in dtype)):
            # allow for partial specification of dtype
            cols = columns(peek)
            newdtype = {'names': [], 'formats': []}
            for f in fields:
                newdtype['names'].append(f)
                if f in dtype and isinstance(dtype[f], tuple):
                    # assume fully specified
                    newdtype['formats'].append(dtype[f][0])
                elif f not in dtype:
                    # not specified at all
                    a = np.array(cols[f])
                    newdtype['formats'].append(a.dtype)
                else:
                    # assume directly specified, just need to add offset
                    newdtype['formats'].append(dtype[f])
            dtype = newdtype

        else:
            pass  # leave dtype as-is

        # numpy is fussy about having tuples, need to make sure
        it = (tuple(row) for row in it)
        sa = np.fromiter(it, dtype=dtype, count=count)

        return sa
Exemple #4
0
def test_columns():
    
    table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
    cols = columns(table)
    eq_(['a', 'b', 'b'], cols['foo'])
    eq_([1, 2, 3], cols['bar'])