Exemple #1
0
def _test_convert_nested(orig, expected_type, expected_value):
    a = [[orig]]
    s = Schema("a:" + expected_type).pa_schema
    x = list(apply_schema(s, a, deep=True))[0]
    y = list(apply_schema(s, a, copy=False, deep=True))[0]
    for b in [x, y]:
        assert expected_value == b[0]
    assert x is not a[0]
    assert y is a[0]
Exemple #2
0
def _test_convert(orig, expected_type, expected_value):
    a = [[orig]]
    s = Schema("a:" + expected_type).pa_schema
    x = list(apply_schema(s, a))[0]
    y = list(apply_schema(s, a, copy=False))[0]
    for b in [x, y]:
        if isinstance(expected_value, float) and math.isnan(expected_value):
            assert math.isnan(b[0])
        elif expected_value is pd.NaT:
            assert b[0] is pd.NaT
        else:
            assert expected_value == b[0]
    assert x is not a[0]
    assert y is a[0]
Exemple #3
0
    def as_array_iterable(
        self,
        df: T,
        schema: Optional[pa.Schema] = None,
        columns: Optional[List[str]] = None,
        type_safe: bool = False,
    ) -> Iterable[List[Any]]:
        """Convert pandas like dataframe to iterable of rows in the format of list.

        :param df: pandas like dataframe
        :param schema: schema of the input. With None, it will infer the schema,
          it can't infer wrong schema for nested types, so try to be explicit
        :param columns: columns to output, None for all columns
        :param type_safe: whether to enforce the types in schema, if False, it will
            return the original values from the dataframe
        :return: iterable of rows, each row is a list

        :Notice:
        If there are nested types in schema, the conversion can be slower
        """
        if self.empty(df):
            return
        if schema is None:
            schema = self.to_schema(df)
        if columns is not None:
            df = df[columns]
            schema = pa.schema([schema.field(n) for n in columns])
        if not type_safe:
            for arr in df.itertuples(index=False, name=None):
                yield list(arr)
        elif all(not pa.types.is_nested(x) for x in schema.types):
            p = self.as_arrow(df, schema)
            d = p.to_pydict()
            cols = [d[n] for n in schema.names]
            for arr in zip(*cols):
                yield list(arr)
        else:
            # If schema has nested types, the conversion will be much slower
            for arr in apply_schema(
                    schema,
                    df.itertuples(index=False, name=None),
                    copy=True,
                    deep=True,
                    str_as_json=True,
            ):
                yield arr
Exemple #4
0
 def as_array_iterable(self,
                       columns: Optional[List[str]] = None,
                       type_safe: bool = False) -> Iterable[Any]:
     if columns is None:
         if not type_safe:
             for item in self.native:
                 yield item
         else:
             for item in apply_schema(
                     self.schema.pa_schema,
                     self.native,
                     copy=True,
                     deep=True,
                     str_as_json=True,
             ):
                 yield item
     else:
         df = IterableDataFrame(self, self.schema.extract(columns))
         for item in df.as_array_iterable(type_safe=type_safe):
             yield item
Exemple #5
0
 def as_array_iterable(self,
                       columns: Optional[List[str]] = None,
                       type_safe: bool = False) -> Iterable[Any]:
     if columns is None:
         pos = []
     else:
         pos = [self.schema.index_of_key(k) for k in columns]
         assert_or_throw(len(pos) > 0, "columns if set must be non empty")
     if not type_safe:
         for item in self._iter_cols(pos):
             yield item
     else:
         sub = self.schema if columns is None else self.schema.extract(
             columns)
         for item in apply_schema(
                 sub.pa_schema,
                 self._iter_cols(pos),
                 copy=True,
                 deep=True,
                 str_as_json=True,
         ):
             yield item
Exemple #6
0
def _assert_not_supported(orig, expected_type, deep=False):
    with raises(NotImplementedError):
        a = [[orig]]
        s = Schema("a:" + expected_type).pa_schema
        b = list(apply_schema(s, a, deep=deep))[0]
Exemple #7
0
def _assert_raise(orig, expected_type, deep=False):
    with raises(ValueError):
        a = [[orig]]
        s = Schema("a:" + expected_type).pa_schema
        b = list(apply_schema(s, a, deep=deep))[0]