def _parse_html_result(self, response, verbose=False): # parse the HTML return... root = BeautifulSoup(response.content, 'html5lib') htmltable = root.findAll('table') # if len(htmltable) != 1: # raise ValueError("Found the wrong number of tables: {0}" # .format(len(htmltable))) string_to_parse = htmltable[-1].encode('ascii') if six.PY2: from astropy.io.ascii import html from astropy.io.ascii.core import convert_numpy htmlreader = html.HTML({'parser': 'html5lib'}) htmlreader.outputter.default_converters.append(convert_numpy(np.unicode)) table = htmlreader.read(string_to_parse) else: table = Table.read(string_to_parse.decode('utf-8'), format='ascii.html') return table
def _convert_vals(self, cols): """READ: Convert str_vals in `cols` to final arrays with correct dtypes. This is adapted from ``BaseOutputter._convert_vals``. In the case of ECSV there is no guessing and all types are known in advance. A big change is handling the possibility of JSON-encoded values, both unstructured object data and structured values that may contain masked data. """ for col in cols: try: # 1-d or N-d object columns are serialized as JSON. if col.subtype == 'object': _check_dtype_is_str(col) col_vals = [json.loads(val) for val in col.str_vals] col.data = np.empty([len(col_vals)] + col.shape, dtype=object) col.data[...] = col_vals # Variable length arrays with shape (n, m, ..., *) for fixed # n, m, .. and variable in last axis. Masked values here are # not currently supported. elif col.shape and col.shape[-1] is None: _check_dtype_is_str(col) # Remake as a 1-d object column of numpy ndarrays using the # datatype specified in the ECSV file. col_vals = [ np.array(json.loads(val), dtype=col.subtype) for val in col.str_vals ] col.shape = () col.dtype = np.dtype(object) # np.array(col_vals_arr, dtype=object) fails ?? so this workaround: col.data = np.empty(len(col_vals), dtype=object) col.data[:] = col_vals # Multidim columns with consistent shape (n, m, ...). These # might be masked. elif col.shape: _check_dtype_is_str(col) col_vals = [json.loads(val) for val in col.str_vals] # Make a numpy object array of col_vals to look for None # (masked values) data = np.array(col_vals, dtype=object) mask = (data == None) # noqa: E711 if not np.any(mask): # No None's, just convert to required dtype col.data = data.astype(col.subtype) else: # Replace all the None with an appropriate fill value kind = np.dtype(col.subtype).kind data[mask] = {'U': '', 'S': b''}.get(kind, 0) # Finally make a MaskedArray with the filled data + mask col.data = np.ma.array(data.astype(col.subtype), mask=mask) # Regular scalar value column else: if col.subtype: warnings.warn( f'unexpected subtype {col.subtype!r} set for column ' f'{col.name!r}, using dtype={col.dtype!r} instead.', category=AstropyUserWarning) converter_func, _ = convert_numpy(col.dtype) col.data = converter_func(col.str_vals) if col.data.shape[1:] != tuple(col.shape): raise ValueError( 'shape mismatch between value and column specifier') except json.JSONDecodeError: raise ValueError(f'column {col.name!r} failed to convert: ' 'column value is not valid JSON') except Exception as exc: raise ValueError( f'column {col.name!r} failed to convert: {exc}')
def _convert_vals(self, cols): """READ: Convert str_vals in `cols` to final arrays with correct dtypes. This is adapted from ``BaseOutputter._convert_vals``. In the case of ECSV there is no guessing and all types are known in advance. A big change is handling the possibility of JSON-encoded values, both unstructured object data and structured values that may contain masked data. """ for col in cols: try: # 1-d or N-d object columns are serialized as JSON. if col.subtype == 'object': _check_dtype_is_str(col) col_vals = [json.loads(val) for val in col.str_vals] col.data = np.empty([len(col_vals)] + col.shape, dtype=object) col.data[...] = col_vals # Variable length arrays with shape (n, m, ..., *) for fixed # n, m, .. and variable in last axis. Masked values here are # not currently supported. elif col.shape and col.shape[-1] is None: _check_dtype_is_str(col) # Empty (blank) values in original ECSV are changed to "0" # in str_vals with corresponding col.mask being created and # set accordingly. Instead use an empty list here. if hasattr(col, 'mask'): for idx in np.nonzero(col.mask)[0]: col.str_vals[idx] = '[]' # Remake as a 1-d object column of numpy ndarrays or # MaskedArray using the datatype specified in the ECSV file. col_vals = [] for str_val in col.str_vals: obj_val = json.loads(str_val) # list or nested lists try: arr_val = np.array(obj_val, dtype=col.subtype) except TypeError: # obj_val has entries that are inconsistent with # dtype. For a valid ECSV file the only possibility # is None values (indicating missing values). data = np.array(obj_val, dtype=object) # Replace all the None with an appropriate fill value mask = (data == None) # noqa: E711 kind = np.dtype(col.subtype).kind data[mask] = {'U': '', 'S': b''}.get(kind, 0) arr_val = np.ma.array(data.astype(col.subtype), mask=mask) col_vals.append(arr_val) col.shape = () col.dtype = np.dtype(object) # np.array(col_vals_arr, dtype=object) fails ?? so this workaround: col.data = np.empty(len(col_vals), dtype=object) col.data[:] = col_vals # Multidim columns with consistent shape (n, m, ...). These # might be masked. elif col.shape: _check_dtype_is_str(col) # Change empty (blank) values in original ECSV to something # like "[[null, null],[null,null]]" so subsequent JSON # decoding works. Delete `col.mask` so that later code in # core TableOutputter.__call__() that deals with col.mask # does not run (since handling is done here already). if hasattr(col, 'mask'): all_none_arr = np.full(shape=col.shape, fill_value=None, dtype=object) all_none_json = json.dumps(all_none_arr.tolist()) for idx in np.nonzero(col.mask)[0]: col.str_vals[idx] = all_none_json del col.mask col_vals = [json.loads(val) for val in col.str_vals] # Make a numpy object array of col_vals to look for None # (masked values) data = np.array(col_vals, dtype=object) mask = (data == None) # noqa: E711 if not np.any(mask): # No None's, just convert to required dtype col.data = data.astype(col.subtype) else: # Replace all the None with an appropriate fill value kind = np.dtype(col.subtype).kind data[mask] = {'U': '', 'S': b''}.get(kind, 0) # Finally make a MaskedArray with the filled data + mask col.data = np.ma.array(data.astype(col.subtype), mask=mask) # Regular scalar value column else: if col.subtype: warnings.warn( f'unexpected subtype {col.subtype!r} set for column ' f'{col.name!r}, using dtype={col.dtype!r} instead.', category=InvalidEcsvDatatypeWarning) converter_func, _ = convert_numpy(col.dtype) col.data = converter_func(col.str_vals) if col.data.shape[1:] != tuple(col.shape): raise ValueError( 'shape mismatch between value and column specifier') except json.JSONDecodeError: raise ValueError(f'column {col.name!r} failed to convert: ' 'column value is not valid JSON') except Exception as exc: raise ValueError( f'column {col.name!r} failed to convert: {exc}')