Python discover 예제들, datashape.discovery.discover Python 예제들

예제 #1

0

파일 보기

파일: test_discovery.py 프로젝트: pskyp/shareapplication

def test_unite_records():
    assert (discover([{
        'name': 'Alice',
        'balance': 100
    }, {
        'name': 'Bob',
        'balance': ''
    }]) == 2 * Record([['balance', Option(int64)], ['name', string]]))

    assert (discover([{
        'name': 'Alice',
        's': 'foo'
    }, {
        'name': 'Bob',
        's': None
    }]) == 2 * Record([['name', string], ['s', Option(string)]]))

    assert (discover([{
        'name': 'Alice',
        's': 'foo',
        'f': 1.0
    }, {
        'name': 'Bob',
        's': None,
        'f': None
    }]) == 2 * Record([['f', Option(float64)], ['name', string],
                       ['s', Option(string)]]))

예제 #2

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_discover_mixed():
    i = discover(1)
    f = discover(1.0)
    exp = 10 * Tuple([i, i, f, f])
    assert dshape(discover([[1, 2, 1.0, 2.0]] * 10)) == exp

    exp = 10 * (4 * f)
    assert dshape(discover([[1, 2, 1.0, 2.0], [1.0, 2.0, 1, 2]] * 5)) == exp

예제 #3

0

파일 보기

파일: test_discovery.py 프로젝트: pskyp/shareapplication

def test_unite_tuples():
    assert (discover([[1, 1, 'hello'], [1, '', ''], [1, 1, 'hello']]) == 3 *
            Tuple([int64, Option(int64), Option(string)]))

    assert (discover([[1, 1, 'hello', 1], [1, '', '', 1], [1, 1, 'hello',
                                                           1]]) == 3 *
            Tuple([int64, Option(int64),
                   Option(string), int64]))

예제 #4

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_unite_tuples():
    assert discover([[1, 1, "hello"], [1, "", ""], [1, 1, "hello"]]) == 3 * Tuple(
        [int64, Option(int64), Option(string)]
    )

    assert discover([[1, 1, "hello", 1], [1, "", "", 1], [1, 1, "hello", 1]]) == 3 * Tuple(
        [int64, Option(int64), Option(string), int64]
    )

예제 #5

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_discover_mixed():
    i = discover(1)
    f = discover(1.0)
    exp = 10 * Tuple([i, i, f, f])
    assert dshape(discover([[1, 2, 1.0, 2.0]] * 10)) == exp

    exp = 10 * (4 * f)
    assert dshape(discover([[1, 2, 1.0, 2.0], [1.0, 2.0, 1, 2]] * 5)) == exp

예제 #6

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_discover_mock():
    try:
        from unittest.mock import Mock
    except ImportError:
        from mock import Mock

    # This used to segfault because we were sending mocks into numpy
    with pytest.raises(NotImplementedError):
        discover(Mock())

예제 #7

0

파일 보기

파일: test_discovery.py 프로젝트: CaptainAL/Spyder

def test_unite_tuples():
    assert (discover([[1, 1, 'hello'],
                     [1, '', ''],
                     [1, 1, 'hello']]) ==
            3 * Tuple([int64, Option(int64), Option(string)]))

    assert (discover([[1, 1, 'hello', 1],
                     [1, '', '', 1],
                     [1, 1, 'hello', 1]]) ==
            3 * Tuple([int64, Option(int64), Option(string), int64]))

예제 #8

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_unite_records():
    assert discover([{"name": "Alice", "balance": 100}, {"name": "Bob", "balance": ""}]) == 2 * Record(
        [["balance", Option(int64)], ["name", string]]
    )

    assert discover([{"name": "Alice", "s": "foo"}, {"name": "Bob", "s": None}]) == 2 * Record(
        [["name", string], ["s", Option(string)]]
    )

    assert discover([{"name": "Alice", "s": "foo", "f": 1.0}, {"name": "Bob", "s": None, "f": None}]) == 2 * Record(
        [["f", Option(float64)], ["name", string], ["s", Option(string)]]
    )

예제 #9

0

파일 보기

파일: test_discovery.py 프로젝트: CaptainAL/Spyder

def test_unite_records():
    assert (discover([{'name': 'Alice', 'balance': 100},
                     {'name': 'Bob', 'balance': ''}]) ==
            2 * Record([['balance', Option(int64)], ['name', string]]))

    assert (discover([{'name': 'Alice', 's': 'foo'},
                     {'name': 'Bob', 's': None}]) ==
            2 * Record([['name', string], ['s', Option(string)]]))

    assert (discover([{'name': 'Alice', 's': 'foo', 'f': 1.0},
                     {'name': 'Bob', 's': None, 'f': None}]) ==
            2 * Record([['f', Option(float64)],
                        ['name', string],
                        ['s', Option(string)]]))

예제 #10

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_list_of_dicts_difference():
    data = [{'name': 'Alice', 'amount': 100},
            {'name': 'Bob', 'house_color': 'blue'}]
    result = discover(data)
    s = '2 * {amount: ?int64, house_color: ?string, name: string}'
    expected = dshape(s)
    assert result == expected

예제 #11

0

파일 보기

파일: json.py 프로젝트: vitan/blaze

    def __init__(self, path, mode='rt', schema=None, dshape=None, open=open,
                 nrows_discovery=50):
        self.path = path
        self._abspath = os.path.abspath(path)
        self.mode = mode
        self.open = open
        if dshape:
            dshape = datashape.dshape(dshape)
        if schema:
            schema = datashape.dshape(schema)
        if dshape and not schema and isdimension(dshape[0]):
            schema = dshape.subshape[0]
        if schema and not dshape:
            dshape = var * schema

        if not schema and not dshape:
            try:
                f = open(self.path, 'r')
            except:
                raise ValueError('No schema detected')
            data = list(map(json.loads,
                            islice(f, 1, nrows_discovery)))
            f.close()
            dshape = discover(data)
            schema = dshape.subshape[0]
        # Initially the array is not loaded (is this necessary?)
        self._cache_arr = None

        self._schema = schema
        self._dshape = dshape

예제 #12

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_integrative():
    data = [{'name': 'Alice', 'amount': '100'},
            {'name': 'Bob', 'amount': '200'},
            {'name': 'Charlie', 'amount': '300'}]

    assert (dshape(discover(data)) ==
            dshape('3 * {amount: int64, name: string}'))

예제 #13

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_discover_array_like():
    class MyArray(object):
        def __init__(self, shape, dtype):
            self.shape = shape
            self.dtype = dtype

    assert discover(MyArray((4, 3), "f4")) == dshape("4 * 3 * float32")

예제 #14

0

파일 보기

파일: json.py 프로젝트: B-Rich/blaze

    def __init__(self, path, mode='rt', schema=None, dshape=None, open=open,
                 nrows_discovery=50):
        self.path = path
        self.mode = mode
        self.open = open
        if dshape:
            dshape = datashape.dshape(dshape)
        if schema:
            schema = datashape.dshape(schema)
        if dshape and not schema and isdimension(dshape[0]):
            schema = dshape.subshape[0]
        if schema and not dshape:
            dshape = var * schema

        if not schema and not dshape:
            try:
                f = open(self.path, 'r')
            except:
                raise ValueError('No schema detected')
            data = list(map(json.loads,
                            islice(f, 1, nrows_discovery)))
            f.close()
            dshape = discover(data)
            schema = dshape.subshape[0]
        # Initially the array is not loaded (is this necessary?)
        self._cache_arr = None

        self._schema = schema
        self._dshape = dshape

예제 #15

0

파일 보기

파일: json.py 프로젝트: vitan/blaze

    def __init__(self, path, mode='rt', schema=None, dshape=None, open=open,
            **kwargs):
        self.path = path
        self._abspath = os.path.abspath(path)
        self.mode = mode
        self.open = open
        if dshape:
            dshape = datashape.dshape(dshape)
        if schema:
            schema = datashape.dshape(schema)
        if dshape and not schema and isdimension(dshape[0]):
            schema = dshape.subarray(1)

        if not schema and not dshape:
            try:
                f = open(self.path, 'r')
            except:
                raise ValueError('No schema detected')
            dshape = discover(json.load(f))
            f.close()
        # Initially the array is not loaded (is this necessary?)
        self._cache_arr = None

        self._schema = schema
        self._dshape = dshape

예제 #16

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_timedelta_strings():
    inputs = ["1 day", "-2 hours", "3 seconds", "1 microsecond", "1003 milliseconds"]
    for ts in inputs:
        assert discover(ts) == TimeDelta(unit=ts.split()[1])

    with pytest.raises(ValueError):
        TimeDelta(unit="buzz light-years")

예제 #17

0

파일 보기

파일: test_discovery.py 프로젝트: pskyp/shareapplication

def test_timedelta_strings():
    inputs = [
        "1 day", "-2 hours", "3 seconds", "1 microsecond", "1003 milliseconds"
    ]
    for ts in inputs:
        assert discover(ts) == TimeDelta(unit=ts.split()[1])

    with pytest.raises(ValueError):
        TimeDelta(unit='buzz light-years')

예제 #18

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_discover_array_like():
    class MyArray(object):
        def __init__(self, shape, dtype):
            self.shape = shape
            self.dtype = dtype

    with catch_warnings(record=True) as wl:
        simplefilter('always')
        assert discover(MyArray((4, 3), 'f4')) == dshape('4 * 3 * float32')
    assert len(wl) == 1
    assert issubclass(wl[0].category, DeprecationWarning)
    assert 'MyArray' in str(wl[0].message)

예제 #19

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_simple():
    assert discover(3) == int64
    assert discover(3.0) == float64
    assert discover(3.0 + 1j) == complex128
    assert discover("Hello") == string
    assert discover(True) == bool_
    assert discover(None) == null

예제 #20

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_time_string():
    assert discover("12:00:01") == time_
    assert discover("12:00:01.000") == time_
    assert discover("12:00:01.123456") == time_
    assert discover("12:00:01.1234") == time_
    assert discover("10-10-01T12:00:01") == datetime_
    assert discover("10-10-01 12:00:01") == datetime_

예제 #21

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_time_string():
    assert discover('12:00:01') == time_
    assert discover('12:00:01.000') == time_
    assert discover('12:00:01.123456') == time_
    assert discover('12:00:01.1234') == time_
    assert discover('10-10-01T12:00:01') == datetime_
    assert discover('10-10-01 12:00:01') == datetime_

예제 #22

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_simple():
    assert discover(3) == int64
    assert discover(3.0) == float64
    assert discover(3.0 + 1j) == complex128
    assert discover('Hello') == string
    assert discover(True) == bool_
    assert discover(None) == null

예제 #23

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_datetime():
    inputs = ["1991-02-03 04:05:06",
              "11/12/1822 06:47:26.00",
              "1822-11-12T06:47:26",
              "Fri Dec 19 15:10:11 1997",
              "Friday, November 11, 2005 17:56:21",
              "1982-2-20 5:02:00",
              "20030331 05:59:59.9",
              "Jul  6 2030  5:55PM",
              "1994-10-20 T 11:15",
              "2013-03-04T14:38:05.123",
              datetime(2014, 1, 1, 12, 1, 1),
              # "15MAR1985:14:15:22",
              # "201303041438"
              ]
    for dt in inputs:
        assert discover(dt) == datetime_

예제 #24

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_letters_only_strings():
    strings = (
        "sunday",
        "monday",
        "tuesday",
        "wednesday",
        "thursday",
        "friday",
        "saturday",
        "a",
        "b",
        "now",
        "yesterday",
        "tonight",
    )
    for s in strings:
        assert discover(s) == string

예제 #25

0

파일 보기

def discover_csv(path,
                 encoding=DEFAULT_ENCODING,
                 nrows_discovery=50,
                 header=None,
                 dialect=None,
                 types=None,
                 columns=None,
                 typehints=None):
    """ Discover datashape of CSV file """
    df = pd.read_csv(path,
                     dtype='O',
                     encoding=encoding,
                     chunksize=nrows_discovery,
                     compression={
                         'gz': 'gzip',
                         'bz2': 'bz2'
                     }.get(ext(path)),
                     header=0 if header else None,
                     **clean_dialect(dialect)).get_chunk()
    if not types:
        L = (df.fillna('').to_records(index=False).tolist())
        rowtype = discover(L).subshape[0]
        if isinstance(rowtype[0], Tuple):
            types = rowtype[0].dshapes
            types = [unpack(t) for t in types]
            types = [string if t == null else t for t in types]
            types = [safely_option(t) for t in types]
        elif (isinstance(rowtype[0], Fixed) and isinstance(rowtype[1], Unit)):
            types = int(rowtype[0]) * [rowtype[1]]
        else:
            raise ValueError("Could not discover schema from data.\n"
                             "Please specify schema.")
    if not columns:
        if header:
            columns = list(df.columns)
        else:
            columns = ['_%d' % i for i in range(len(types))]
    if typehints:
        types = [typehints.get(c, t) for c, t in zip(columns, types)]

    return dshape(Record(list(zip(columns, types))))

예제 #26

0

파일 보기

파일: csv.py 프로젝트: Casolt/blaze

def discover_csv(path, encoding=DEFAULT_ENCODING, nrows_discovery=50,
        header=None, dialect=None, types=None, columns=None,
        typehints=None):
    """ Discover datashape of CSV file """
    df = pd.read_csv(path,
            dtype='O',
            encoding=encoding,
            chunksize=nrows_discovery,
            compression={'gz': 'gzip',
                         'bz2': 'bz2'}.get(ext(path)),
            header=0 if header else None,
            **clean_dialect(dialect)).get_chunk()
    if not types:
        L = (df.fillna('')
                .to_records(index=False)
                .tolist())
        rowtype = discover(L).subshape[0]
        if isinstance(rowtype[0], Tuple):
            types = rowtype[0].dshapes
            types = [unpack(t) for t in types]
            types = [string if t == null else t for t in types]
            types = [safely_option(t) for t in types]
        elif (isinstance(rowtype[0], Fixed) and
                isinstance(rowtype[1], Unit)):
            types = int(rowtype[0]) * [rowtype[1]]
        else:
            raise ValueError("Could not discover schema from data.\n"
                    "Please specify schema.")
    if not columns:
        if header:
            columns = list(df.columns)
        else:
            columns = ['_%d' % i for i in range(len(types))]
    if typehints:
        types = [typehints.get(c, t) for c, t in zip(columns, types)]

    return dshape(Record(list(zip(columns, types))))

예제 #27

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_string():
    assert discover('1') == discover(1)
    assert discover('1.0') == discover(1.0)
    assert discover('True') == discover(True)
    assert discover('true') == discover(True)

예제 #28

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_timedelta():
    objs = starmap(timedelta, (range(10, 10 - i, -1) for i in range(1, 8)))
    for ts in objs:
        assert discover(ts) == timedelta_

예제 #29

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_date():
    assert discover("2014-01-01") == date_
    assert discover(date(2014, 1, 1)) == date_

예제 #30

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_string():
    assert discover("1") == discover(1)
    assert discover("1.0") == discover(1.0)
    assert discover("True") == discover(True)
    assert discover("true") == discover(True)

예제 #31

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_set():
    assert discover(set([1])) == 1 * discover(1)

예제 #32

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_timedelta():
    objs = starmap(timedelta, (range(10, 10 - i, -1) for i in range(1, 8)))
    for ts in objs:
        assert discover(ts) == timedelta_

예제 #33

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_time():
    assert discover(time(12, 0, 1)) == time_

예제 #34

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_string_that_looks_like_date():
    # GH 91
    assert discover("31-DEC-99 12.00.00.000000000") == string

예제 #35

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_single_space_string_is_not_date():
    assert discover(' ') == string

예제 #36

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_date():
    assert discover('2014-01-01') == date_
    assert discover(date(2014, 1, 1)) == date_

예제 #37

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_frozenset():
    assert discover(frozenset([1])) == 1 * discover(1)

예제 #38

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_record():
    assert (discover({'name': 'Alice', 'amount': 100}) ==
            Record([['amount', discover(100)],
                    ['name', discover('Alice')]]))

예제 #39

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_list_of_dicts_difference():
    data = [{"name": "Alice", "amount": 100}, {"name": "Bob", "house_color": "blue"}]
    result = discover(data)
    s = "2 * {amount: ?int64, house_color: ?string, name: string}"
    expected = dshape(s)
    assert result == expected

예제 #40

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_nested_complex_record_type():
    dt = np.dtype([("a", "U7"), ("b", [("c", "int64", 2), ("d", "float64")])])
    x = np.zeros(5, dt)
    s = "5 * {a: string[7, 'U32'], b: {c: 2 * int64, d: float64}}"
    assert discover(x) == dshape(s)

예제 #41

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_set():
    assert discover(set([1])) == 1 * discover(1)

예제 #42

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_nested_complex_record_type():
    dt = np.dtype([('a', 'U7'), ('b', [('c', 'int64', 2), ('d', 'float64')])])
    x = np.zeros(5, dt)
    s = "5 * {a: string[7, 'U32'], b: {c: 2 * int64, d: float64}}"
    assert discover(x) == dshape(s)

예제 #43

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_discover_empty_sequence(seq):
    assert discover(seq) == var * string

예제 #44

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_list():
    assert discover([1, 2, 3]) == 3 * discover(1)
    assert discover([1.0, 2.0, 3.0]) == 3 * discover(1.0)

예제 #45

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_list():
    assert discover([1, 2, 3]) == 3 * discover(1)
    assert discover([1.0, 2.0, 3.0]) == 3 * discover(1.0)

예제 #46

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_heterogeneous_ordered_container():
    assert discover(("Hello", 1)) == Tuple([discover("Hello"), discover(1)])

예제 #47

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_discover_undiscoverable():
    class MyClass(object):
        pass
    with pytest.raises(NotImplementedError):
        discover(MyClass())

예제 #48

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_record():
    assert discover({"name": "Alice", "amount": 100}) == Record(
        [["amount", discover(100)], ["name", discover("Alice")]]
    )

예제 #49

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_discover_bytes():
    x = b'abcdefg'
    assert discover(x) == String('A')

예제 #50

0

파일 보기

파일: test_discovery.py 프로젝트: ymarfoq/outilACVDesagregation

def test_time():
    assert discover(time(12, 0, 1)) == time_

예제 #51

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_letters_only_strings():
    strings = ('sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
               'friday', 'saturday', 'a', 'b', 'now', 'yesterday', 'tonight')
    for s in strings:
        assert discover(s) == string

예제 #52

0

파일 보기

파일: csv.py 프로젝트: holdenk/blaze

    def __init__(self, path, mode='rt',
            schema=None, columns=None, types=None, typehints=None,
            dialect=None, header=None, open=open, nrows_discovery=50,
            **kwargs):
        if 'r' in mode and os.path.isfile(path) is not True:
            raise ValueError('CSV file "%s" does not exist' % path)
        if not schema and 'w' in mode:
            raise ValueError('Please specify schema for writable CSV file')
        self.path = path
        self.mode = mode
        self.open = open

        if os.path.exists(path) and mode != 'w':
            f = self.open(path)
            sample = f.read(16384)
            try:
                f.close()
            except AttributeError:
                pass
        else:
            sample = ''

        # Pandas uses sep instead of delimiter.
        # Lets support that too
        if 'sep' in kwargs:
            kwargs['delimiter'] = kwargs['sep']

        dialect = discover_dialect(sample, dialect, **kwargs)
        assert dialect
        if header is None:
            header = has_header(sample)

        if not schema and 'w' not in mode:
            if not types:
                with open(self.path) as f:
                    data = list(it.islice(csv.reader(f, **dialect), 1, nrows_discovery))
                    types = discover(data)
                    rowtype = types.subshape[0]
                    if isinstance(rowtype[0], Tuple):
                        types = types.subshape[0][0].dshapes
                        types = [unpack(t) for t in types]
                        types = [string if t == null else t
                                        for t in types]
                        types = [t if isinstance(t, Option) or t==string else Option(t)
                                      for t in types]
                    elif (isinstance(rowtype[0], Fixed) and
                          isinstance(rowtype[1], CType)):
                        types = int(rowtype[0]) * [rowtype[1]]
                    else:
                       ValueError("Could not discover schema from data.\n"
                                  "Please specify schema.")
            if not columns:
                if header:
                    with open(self.path) as f:
                        columns = next(csv.reader([next(f)], **dialect))
                else:
                    columns = ['_%d' % i for i in range(len(types))]
            if typehints:
                types = [typehints.get(c, t) for c, t in zip(columns, types)]

            schema = dshape(Record(list(zip(columns, types))))

        self._schema = schema

        self.header = header
        self.dialect = dialect

예제 #53

0

파일 보기

파일: test_discovery.py 프로젝트: dan-coates/datashape

def test_heterogeneous_ordered_container():
    assert discover(('Hello', 1)) == Tuple([discover('Hello'), discover(1)])