Esempio n. 1
0
def test_stype_instantiate_bad():
    from datatable import stype
    with pytest.raises(dt.exceptions.ValueError):
        print(stype(-1))
    with pytest.raises(dt.exceptions.ValueError):
        print(stype(0))
    with pytest.raises(dt.exceptions.ValueError):
        print(stype(["i", "4"]))
    with pytest.raises(dt.exceptions.ValueError):
        print(stype(1.5))
    with pytest.raises(dt.exceptions.ValueError):
        print(stype(True))
Esempio n. 2
0
def test_stype_instantiate_from_numpy(numpy):
    from datatable import stype
    assert stype(numpy.dtype("void")) is stype.void
    assert stype(numpy.dtype("bool")) is stype.bool8
    assert stype(numpy.dtype("int8")) is stype.int8
    assert stype(numpy.dtype("int16")) is stype.int16
    assert stype(numpy.dtype("int32")) is stype.int32
    assert stype(numpy.dtype("int64")) is stype.int64
    assert stype(numpy.dtype("float32")) is stype.float32
    assert stype(numpy.dtype("float64")) is stype.float64
    assert stype(numpy.dtype("str")) is stype.str64
    assert stype(numpy.dtype("object")) is stype.obj64
Esempio n. 3
0
def parse_html_repr(html):
    # Here `re.S` means "single-line mode", i.e. allow '.' to match any
    # character, including the newline (by default '.' does not match '\n').
    mm = re.search("<div class='datatable'>(.*)</div>", html, re.S)
    html = mm.group(1).strip()
    mm = re.match(r"<table class='frame'>(.*)</table>\s*"
                  r"<div class='footer'>(.*)</div>", html, re.S)
    frame = mm.group(1).strip()
    footer = mm.group(2).strip()
    mm = re.match(r"<div class='frame_dimensions'>"
                  r"(\d+) rows? &times; (\d+) columns?</div>", footer, re.S)
    shape = (int(mm.group(1).strip()), int(mm.group(2).strip()))
    mm = re.match(r"<thead>(.*)</thead>\s*<tbody>(.*)</tbody>", frame, re.S)
    thead = mm.group(1).strip()
    tbody = mm.group(2).strip()
    mm = re.match("<tr class='colnames'><td class='row_index'></td>(.*)</tr>"
                  "\\s*"
                  "<tr class='coltypes'><td class='row_index'></td>(.*)</tr>",
                  thead, re.S)
    str_colnames = mm.group(1).strip()
    str_coltypes = mm.group(2).strip()
    colnames = re.findall("<th>(.*?)</th>", str_colnames)
    coltypes = re.findall("<td class='\\w+' title='(\\w+)'>", str_coltypes)
    str_rows = re.findall("<tr>(.*?)</tr>", tbody, re.S)
    rows = []
    for str_row in str_rows:
        row = re.findall("<td>(.*?)</td>", str_row, re.S)
        rows.append(row)
    html_repr = namedtuple("html_repr", ["names", "stypes", "shape", "data"])
    return html_repr(names=tuple(colnames),
                     stypes=tuple(dt.stype(s) for s in coltypes),
                     shape=shape,
                     data=rows)
Esempio n. 4
0
    def tonumpy(self, stype=None):
        """
        Convert Frame into a numpy array, optionally forcing it into a
        specific stype/dtype.

        Parameters
        ----------
        stype: datatable.stype, numpy.dtype or str
            Cast datatable into this dtype before converting it into a numpy
            array.
        """
        numpy = load_module("numpy")
        st = 0
        if stype:
            st = datatable.stype(stype).value
        self.internal.use_stype_for_buffers(st)
        res = numpy.array(self.internal)
        self.internal.use_stype_for_buffers(0)
        return res
Esempio n. 5
0
    def to_numpy(self, stype=None):
        """
        Convert Frame into a numpy array, optionally forcing it into a
        specific stype/dtype.

        Parameters
        ----------
        stype: datatable.stype, numpy.dtype or str
            Cast datatable into this dtype before converting it into a numpy
            array.
        """
        numpy = load_module("numpy")
        if not hasattr(numpy, "array"):  # pragma: no cover
            raise ImportError("Unsupported numpy version: `%s`"
                              % (getattr(numpy, "__version__", "???"), ))
        st = 0
        if stype:
            st = datatable.stype(stype).value
        self.internal.use_stype_for_buffers(st)
        res = numpy.array(self.internal)
        self.internal.use_stype_for_buffers(0)
        return res
Esempio n. 6
0
def test_stype_instantiate():
    from datatable import stype
    for st in stype:
        assert stype(st) is st
        assert stype(st.value) is st
        assert stype(st.name) is st
        assert stype(st.code) is st
    assert stype(bool) is stype.bool8
    assert stype("b1") is stype.bool8
    assert stype("bool") is stype.bool8
    assert stype("boolean") is stype.bool8
    assert stype(int) is stype.int64
    assert stype("int") is stype.int64
    assert stype("integer") is stype.int64
    assert stype("int8") is stype.int8
    assert stype("int16") is stype.int16
    assert stype("int32") is stype.int32
    assert stype("int64") is stype.int64
    assert stype(float) is stype.float64
    assert stype("real") is stype.float64
    assert stype("float") is stype.float64
    assert stype("float32") is stype.float32
    assert stype("float64") is stype.float64
    assert stype(str) is stype.str64
    assert stype("str") is stype.str64
    assert stype("str32") is stype.str32
    assert stype("str64") is stype.str64
    assert stype(object) is stype.obj64
    assert stype("obj") is stype.obj64
    assert stype("object") is stype.obj64