Beispiel #1
0
def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs):
    bind = getbind(el, bind)
    if bind.dialect.name == 'postgresql':
        buf = StringIO()
        append(CSV(None, buffer=buf), el, bind=bind, **kwargs)
        buf.seek(0)
        datetime_fields = []
        other_dtypes = {}
        optional_string_fields = []
        try:
            fields = dshape.measure.fields
        except AttributeError:
            fields = [(0, dshape.measure)]

        for field, dtype in fields:
            if isdatelike(dtype):
                datetime_fields.append(field)
            elif isinstance(dtype, Option):
                ty = dtype.ty
                if ty in datashape.integral:
                    other_dtypes[field] = 'float64'
                else:
                    other_dtypes[field] = ty.to_numpy_dtype()
                    if ty == string:
                        optional_string_fields.append(field)
            else:
                other_dtypes[field] = dtype.to_numpy_dtype()

        df = pd.read_csv(
            buf,
            parse_dates=datetime_fields,
            dtype=other_dtypes,
            skip_blank_lines=False,
            escapechar=kwargs.get('escapechar', '\\'),
        )
        # read_csv really wants missing values to be NaN, but for
        # string (object) columns, we want None to be missing
        for field in optional_string_fields:
            df.loc[df[field].isnull(), field] = None
        return df

    columns, rows = batch(el, bind=bind)
    dtypes = {}
    try:
        fields = dshape.measure.fields
    except AttributeError:
        fields = [(columns[0], dshape.measure)]
    for field, dtype in fields:
        if isinstance(dtype, Option):
            ty = dtype.ty
            if ty in datashape.integral:
                dtypes[field] = 'float64'
            else:
                dtypes[field] = ty.to_numpy_dtype()
        else:
            dtypes[field] = dtype.to_numpy_dtype()

    return pd.DataFrame(
        np.array(list(map(tuple, rows)),
                 dtype=[(str(c), dtypes[c]) for c in columns]))
Beispiel #2
0
    """ Vector norm

    See np.linalg.norm
    """
    if ord is None or ord == 'fro':
        ord = 2
    if ord == inf:
        return max(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == -inf:
        return min(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == 1:
        return sum(abs(expr), axis=axis, keepdims=keepdims)
    elif ord % 2 == 0:
        return sum(expr**ord, axis=axis, keepdims=keepdims)**(1.0 / ord)
    return sum(abs(expr)**ord, axis=axis, keepdims=keepdims)**(1.0 / ord)


dshape_method_list.extend([
    (iscollection, set([count, nelements])),
    (lambda ds:
     (iscollection(ds) and
      (isstring(ds) or isnumeric(ds) or isboolean(ds) or isdatelike(ds))),
     set([min, max])),
    (lambda ds: len(ds.shape) == 1, set([nrows, nunique])),
    (lambda ds: iscollection(ds) and isboolean(ds), set([any, all])),
    (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)),
     set([mean, sum, std, var, vnorm])),
])

method_properties.update([nrows])
Beispiel #3
0
    See np.linalg.norm
    """
    if ord is None or ord == 'fro':
        ord = 2
    if ord == inf:
        return max(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == -inf:
        return min(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == 1:
        return sum(abs(expr), axis=axis, keepdims=keepdims)
    elif ord % 2 == 0:
        return sum(expr ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord)
    return sum(abs(expr) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord)


dshape_method_list.extend([
    (iscollection, set([count, nelements])),
    (lambda ds: (iscollection(ds) and
                 (isstring(ds) or isnumeric(ds) or isboolean(ds) or
                  isdatelike(ds) or isinstance(ds, TimeDelta))),
     set([min, max])),
    (lambda ds: len(ds.shape) == 1,
     set([nrows, nunique])),
    (lambda ds: iscollection(ds) and isboolean(ds),
     set([any, all])),
    (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)),
     set([mean, sum, std, var, vnorm])),
])

method_properties.update([nrows])
Beispiel #4
0
    See np.linalg.norm
    """
    if ord is None or ord == 'fro':
        ord = 2
    if ord == inf:
        return max(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == -inf:
        return min(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == 1:
        return sum(abs(expr), axis=axis, keepdims=keepdims)
    elif ord % 2 == 0:
        return sum(expr ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord)
    return sum(abs(expr) ** ord, axis=axis, keepdims=keepdims) ** (1.0 / ord)


dshape_method_list.extend([
    (iscollection, set([count, nelements])),
    (lambda ds: (iscollection(ds) and
                 (isstring(ds) or isnumeric(ds) or isboolean(ds) or
                  isdatelike(ds))),
     set([min, max])),
    (lambda ds: len(ds.shape) == 1,
     set([nrows, nunique])),
    (lambda ds: iscollection(ds) and isboolean(ds),
     set([any, all])),
    (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)),
     set([mean, sum, std, var, vnorm])),
])

method_properties.update([nrows])
Beispiel #5
0
    """ Vector norm

    See np.linalg.norm
    """
    if ord is None or ord == 'fro':
        ord = 2
    if ord == inf:
        return max(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == -inf:
        return min(abs(expr), axis=axis, keepdims=keepdims)
    elif ord == 1:
        return sum(abs(expr), axis=axis, keepdims=keepdims)
    elif ord % 2 == 0:
        return sum(expr**ord, axis=axis, keepdims=keepdims)**(1.0 / ord)
    return sum(abs(expr)**ord, axis=axis, keepdims=keepdims)**(1.0 / ord)


dshape_method_list.extend([
    (iscollection, set([count, nelements])),
    (lambda ds:
     (iscollection(ds) and (isstring(ds) or isnumeric(ds) or isboolean(
         ds) or isdatelike(ds) or isinstance(ds, TimeDelta))), set([min,
                                                                    max])),
    (lambda ds: len(ds.shape) == 1, set([nrows, nunique])),
    (lambda ds: iscollection(ds) and isboolean(ds), set([any, all])),
    (lambda ds: iscollection(ds) and (isnumeric(ds) or isboolean(ds)),
     set([mean, sum, std, var, vnorm])),
])

method_properties.update([nrows])
Beispiel #6
0
def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs):
    bind = getbind(el, bind)
    if bind.dialect.name == 'postgresql':
        buf = StringIO()
        append(CSV(None, buffer=buf), el, bind=bind, **kwargs)
        buf.seek(0)
        datetime_fields = []
        other_dtypes = {}
        optional_string_fields = []
        try:
            fields = dshape.measure.fields
        except AttributeError:
            fields = [(0, dshape.measure)]

        for n, (field, dtype) in enumerate(fields):
            if isdatelike(dtype):
                datetime_fields.append(field)
            elif isinstance(dtype, Option):
                ty = dtype.ty
                if ty in datashape.integral:
                    other_dtypes[field] = 'float64'
                else:
                    other_dtypes[field] = ty.to_numpy_dtype()
                    if ty == string:
                        # work with integer column indices for the
                        # optional_string columns because we don't always
                        # know the column name and then the lookup will fail
                        # in the loop below.
                        optional_string_fields.append(n)
            else:
                other_dtypes[field] = dtype.to_numpy_dtype()

        df = pd.read_csv(
            buf,
            parse_dates=datetime_fields,
            dtype=other_dtypes,
            skip_blank_lines=False,
            escapechar=kwargs.get('escapechar', '\\'),
        )
        # read_csv really wants missing values to be NaN, but for
        # string (object) columns, we want None to be missing
        columns = df.columns
        for field_ix in optional_string_fields:
            # use ``df.loc[bool, df.columns[field_ix]]`` because you cannot do
            # boolean slicing with ``df.iloc``.
            field = columns[field_ix]
            df.loc[df[field].isnull(), field] = None
        return df

    columns, rows = batch(el, bind=bind)
    dtypes = {}
    try:
        fields = dshape.measure.fields
    except AttributeError:
        fields = [(columns[0], dshape.measure)]
    for field, dtype in fields:
        if isinstance(dtype, Option):
            ty = dtype.ty
            if ty in datashape.integral:
                dtypes[field] = 'float64'
            else:
                dtypes[field] = ty.to_numpy_dtype()
        else:
            dtypes[field] = dtype.to_numpy_dtype()

    return pd.DataFrame(np.array(list(map(tuple, rows)),
                                 dtype=[(str(c), dtypes[c]) for c in columns]))
Beispiel #7
0
def select_or_selectable_to_frame(el, bind=None, dshape=None, **kwargs):
    bind = getbind(el, bind)
    if bind.dialect.name == 'postgresql':
        buf = StringIO()
        append(CSV(None, buffer=buf), el, bind=bind, **kwargs)
        buf.seek(0)
        datetime_fields = []
        other_dtypes = {}
        optional_string_fields = []
        try:
            fields = dshape.measure.fields
        except AttributeError:
            fields = [(0, dshape.measure)]

        for n, (field, dtype) in enumerate(fields):
            if isdatelike(dtype):
                datetime_fields.append(field)
            elif isinstance(dtype, Option):
                ty = dtype.ty
                if ty in datashape.integral:
                    other_dtypes[field] = 'float64'
                else:
                    other_dtypes[field] = ty.to_numpy_dtype()
                    if ty == string:
                        # work with integer column indices for the
                        # optional_string columns because we don't always
                        # know the column name and then the lookup will fail
                        # in the loop below.
                        optional_string_fields.append(n)
            else:
                other_dtypes[field] = dtype.to_numpy_dtype()

        df = pd.read_csv(
            buf,
            parse_dates=datetime_fields,
            dtype=other_dtypes,
            skip_blank_lines=False,
            escapechar=kwargs.get('escapechar', '\\'),
        )
        # read_csv really wants missing values to be NaN, but for
        # string (object) columns, we want None to be missing
        columns = df.columns
        for field_ix in optional_string_fields:
            # use ``df.loc[bool, df.columns[field_ix]]`` because you cannot do
            # boolean slicing with ``df.iloc``.
            field = columns[field_ix]
            df.loc[df[field].isnull(), field] = None
        return df

    columns, rows = batch(el, bind=bind)
    dtypes = {}
    try:
        fields = dshape.measure.fields
    except AttributeError:
        fields = [(columns[0], dshape.measure)]
    for field, dtype in fields:
        if isinstance(dtype, Option):
            ty = dtype.ty
            if ty in datashape.integral:
                dtypes[field] = 'float64'
            else:
                dtypes[field] = ty.to_numpy_dtype()
        else:
            dtypes[field] = dtype.to_numpy_dtype()

    return pd.DataFrame(
        np.array(list(map(tuple, rows)),
                 dtype=[(str(c), dtypes[c]) for c in columns]))