Ejemplo n.º 1
0
    def _parse_html_result(self, response, verbose=False):
        # parse the HTML return...
        root = BeautifulSoup(response.content, 'html5lib')

        htmltable = root.findAll('table')
        # if len(htmltable) != 1:
        #    raise ValueError("Found the wrong number of tables: {0}"
        #                     .format(len(htmltable)))

        string_to_parse = htmltable[-1].encode('ascii')

        if six.PY2:
            from astropy.io.ascii import html
            from astropy.io.ascii.core import convert_numpy
            htmlreader = html.HTML({'parser': 'html5lib'})
            htmlreader.outputter.default_converters.append(convert_numpy(np.unicode))
            table = htmlreader.read(string_to_parse)
        else:
            table = Table.read(string_to_parse.decode('utf-8'), format='ascii.html')

        return table
Ejemplo n.º 2
0
    def _convert_vals(self, cols):
        """READ: Convert str_vals in `cols` to final arrays with correct dtypes.

        This is adapted from ``BaseOutputter._convert_vals``. In the case of ECSV
        there is no guessing and all types are known in advance. A big change
        is handling the possibility of JSON-encoded values, both unstructured
        object data and structured values that may contain masked data.
        """
        for col in cols:
            try:
                # 1-d or N-d object columns are serialized as JSON.
                if col.subtype == 'object':
                    _check_dtype_is_str(col)
                    col_vals = [json.loads(val) for val in col.str_vals]
                    col.data = np.empty([len(col_vals)] + col.shape,
                                        dtype=object)
                    col.data[...] = col_vals

                # Variable length arrays with shape (n, m, ..., *) for fixed
                # n, m, .. and variable in last axis. Masked values here are
                # not currently supported.
                elif col.shape and col.shape[-1] is None:
                    _check_dtype_is_str(col)
                    # Remake as a 1-d object column of numpy ndarrays using the
                    # datatype specified in the ECSV file.
                    col_vals = [
                        np.array(json.loads(val), dtype=col.subtype)
                        for val in col.str_vals
                    ]
                    col.shape = ()
                    col.dtype = np.dtype(object)
                    # np.array(col_vals_arr, dtype=object) fails ?? so this workaround:
                    col.data = np.empty(len(col_vals), dtype=object)
                    col.data[:] = col_vals

                # Multidim columns with consistent shape (n, m, ...). These
                # might be masked.
                elif col.shape:
                    _check_dtype_is_str(col)
                    col_vals = [json.loads(val) for val in col.str_vals]
                    # Make a numpy object array of col_vals to look for None
                    # (masked values)
                    data = np.array(col_vals, dtype=object)
                    mask = (data == None)  # noqa: E711
                    if not np.any(mask):
                        # No None's, just convert to required dtype
                        col.data = data.astype(col.subtype)
                    else:
                        # Replace all the None with an appropriate fill value
                        kind = np.dtype(col.subtype).kind
                        data[mask] = {'U': '', 'S': b''}.get(kind, 0)
                        # Finally make a MaskedArray with the filled data + mask
                        col.data = np.ma.array(data.astype(col.subtype),
                                               mask=mask)

                # Regular scalar value column
                else:
                    if col.subtype:
                        warnings.warn(
                            f'unexpected subtype {col.subtype!r} set for column '
                            f'{col.name!r}, using dtype={col.dtype!r} instead.',
                            category=AstropyUserWarning)
                    converter_func, _ = convert_numpy(col.dtype)
                    col.data = converter_func(col.str_vals)

                if col.data.shape[1:] != tuple(col.shape):
                    raise ValueError(
                        'shape mismatch between value and column specifier')

            except json.JSONDecodeError:
                raise ValueError(f'column {col.name!r} failed to convert: '
                                 'column value is not valid JSON')
            except Exception as exc:
                raise ValueError(
                    f'column {col.name!r} failed to convert: {exc}')
Ejemplo n.º 3
0
    def _convert_vals(self, cols):
        """READ: Convert str_vals in `cols` to final arrays with correct dtypes.

        This is adapted from ``BaseOutputter._convert_vals``. In the case of ECSV
        there is no guessing and all types are known in advance. A big change
        is handling the possibility of JSON-encoded values, both unstructured
        object data and structured values that may contain masked data.
        """
        for col in cols:
            try:
                # 1-d or N-d object columns are serialized as JSON.
                if col.subtype == 'object':
                    _check_dtype_is_str(col)
                    col_vals = [json.loads(val) for val in col.str_vals]
                    col.data = np.empty([len(col_vals)] + col.shape,
                                        dtype=object)
                    col.data[...] = col_vals

                # Variable length arrays with shape (n, m, ..., *) for fixed
                # n, m, .. and variable in last axis. Masked values here are
                # not currently supported.
                elif col.shape and col.shape[-1] is None:
                    _check_dtype_is_str(col)

                    # Empty (blank) values in original ECSV are changed to "0"
                    # in str_vals with corresponding col.mask being created and
                    # set accordingly. Instead use an empty list here.
                    if hasattr(col, 'mask'):
                        for idx in np.nonzero(col.mask)[0]:
                            col.str_vals[idx] = '[]'

                    # Remake as a 1-d object column of numpy ndarrays or
                    # MaskedArray using the datatype specified in the ECSV file.
                    col_vals = []
                    for str_val in col.str_vals:
                        obj_val = json.loads(str_val)  # list or nested lists
                        try:
                            arr_val = np.array(obj_val, dtype=col.subtype)
                        except TypeError:
                            # obj_val has entries that are inconsistent with
                            # dtype. For a valid ECSV file the only possibility
                            # is None values (indicating missing values).
                            data = np.array(obj_val, dtype=object)
                            # Replace all the None with an appropriate fill value
                            mask = (data == None)  # noqa: E711
                            kind = np.dtype(col.subtype).kind
                            data[mask] = {'U': '', 'S': b''}.get(kind, 0)
                            arr_val = np.ma.array(data.astype(col.subtype),
                                                  mask=mask)

                        col_vals.append(arr_val)

                    col.shape = ()
                    col.dtype = np.dtype(object)
                    # np.array(col_vals_arr, dtype=object) fails ?? so this workaround:
                    col.data = np.empty(len(col_vals), dtype=object)
                    col.data[:] = col_vals

                # Multidim columns with consistent shape (n, m, ...). These
                # might be masked.
                elif col.shape:
                    _check_dtype_is_str(col)

                    # Change empty (blank) values in original ECSV to something
                    # like "[[null, null],[null,null]]" so subsequent JSON
                    # decoding works. Delete `col.mask` so that later code in
                    # core TableOutputter.__call__() that deals with col.mask
                    # does not run (since handling is done here already).
                    if hasattr(col, 'mask'):
                        all_none_arr = np.full(shape=col.shape,
                                               fill_value=None,
                                               dtype=object)
                        all_none_json = json.dumps(all_none_arr.tolist())
                        for idx in np.nonzero(col.mask)[0]:
                            col.str_vals[idx] = all_none_json
                        del col.mask

                    col_vals = [json.loads(val) for val in col.str_vals]
                    # Make a numpy object array of col_vals to look for None
                    # (masked values)
                    data = np.array(col_vals, dtype=object)
                    mask = (data == None)  # noqa: E711
                    if not np.any(mask):
                        # No None's, just convert to required dtype
                        col.data = data.astype(col.subtype)
                    else:
                        # Replace all the None with an appropriate fill value
                        kind = np.dtype(col.subtype).kind
                        data[mask] = {'U': '', 'S': b''}.get(kind, 0)
                        # Finally make a MaskedArray with the filled data + mask
                        col.data = np.ma.array(data.astype(col.subtype),
                                               mask=mask)

                # Regular scalar value column
                else:
                    if col.subtype:
                        warnings.warn(
                            f'unexpected subtype {col.subtype!r} set for column '
                            f'{col.name!r}, using dtype={col.dtype!r} instead.',
                            category=InvalidEcsvDatatypeWarning)
                    converter_func, _ = convert_numpy(col.dtype)
                    col.data = converter_func(col.str_vals)

                if col.data.shape[1:] != tuple(col.shape):
                    raise ValueError(
                        'shape mismatch between value and column specifier')

            except json.JSONDecodeError:
                raise ValueError(f'column {col.name!r} failed to convert: '
                                 'column value is not valid JSON')
            except Exception as exc:
                raise ValueError(
                    f'column {col.name!r} failed to convert: {exc}')