def _parse_parameter_file(self): self.dimensionality = 3 self.refine_by = 2 self.parameters["HydroMethod"] = "sph" # Here's where we're going to grab the JSON index file hreq = requests.get(self.base_url + "/yt_index.json") if hreq.status_code != 200: raise RuntimeError header = json.loads(hreq.content) header["particle_count"] = { int(k): header["particle_count"][k] for k in header["particle_count"] } self.parameters = header # Now we get what we need self.domain_left_edge = np.array(header["domain_left_edge"], "float64") self.domain_right_edge = np.array(header["domain_right_edge"], "float64") self.domain_dimensions = np.ones(3, "int32") self._periodicity = (True, True, True) self.current_time = header["current_time"] self.unique_identifier = header.get("unique_identifier", time.time()) self.cosmological_simulation = int(header["cosmological_simulation"]) for attr in ( "current_redshift", "omega_lambda", "omega_matter", "hubble_constant", ): setattr(self, attr, float(header[attr])) self.file_count = header["num_files"]
def _is_valid(cls, filename, *args, **kwargs): if not filename.startswith("http://"): return False try: return requests.get(filename + "/yt_index.json").status_code == 200 except ImportError: # requests is not installed return False
def _open_stream(self, data_file, field): # This does not actually stream yet! ftype, fname = field s = f"{self._url}/{data_file.file_id}/{ftype}/{fname}" mylog.info("Loading URL %s", s) resp = requests.get(s) if resp.status_code != 200: raise RuntimeError self.total_bytes += len(resp.content) return resp.content
def get_data_registry_table(): """ Load the sample data registry as a pandas.Dataframe instance. This function is considered experimental and is exposed for exploratory purposed. The output format is subject to change. The output of this function is cached so it will only generate one request per session. """ # it would be nicer to have an actual api on the yt website server, # but this will do for now api_url = "https://raw.githubusercontent.com/yt-project/website/master/data/datafiles.json" response = requests.get(api_url) if not response.ok: raise RuntimeError( "Could not retrieve registry data. Please check your network setup." ) website_json = response.json() # this dict follows this schema: {frontend_name: {flat dataframe-like}} columns = ["code", "filename", "size", "url", "description"] website_table = pd.concat(pd.DataFrame(d) for d in website_json.values())[columns] # add a int-type byte size column # note that we cast to pandas specific type "Int64" because we expect missing values # see https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html#integer-dtypes-and-missing-data website_table["byte_size"] = ( website_table["size"].apply(_parse_byte_size).astype("Int64")) # normalize urls to match the local json website_table["url"] = website_table["url"].apply( lambda u: u.replace("http:", "https:")) # load local data with pkg_resources.resource_stream("yt", "sample_data_registry.json") as fh: pooch_json = json.load(fh) pooch_table = pd.DataFrame(pooch_json.values()) # merge tables unified_table = website_table.merge(pooch_table, on="url", how="outer") # PR 3089 # ideally we should be able to do this, but it's not possible # at the time of writing because fhe "filename" is incomplete # see the companion comment in load_sample # unified_table.set_index("filename", inplace=True) # unified_table.index.rename("id", inplace=True) return unified_table
def _is_valid(cls, filename, *args, **kwargs): sdf_header = kwargs.get("sdf_header", filename) if sdf_header.startswith("http"): try: hreq = requests.get(sdf_header, stream=True) except ImportError: # requests is not installed return False if hreq.status_code != 200: return False # Grab a whole 4k page. line = next(hreq.iter_content(4096)) elif os.path.isfile(sdf_header): with open(sdf_header, encoding="ISO-8859-1") as f: line = f.read(10).strip() else: return False return line.startswith("# SDF")
def fancy_download_file(url, filename, requests=None): response = requests.get(url, stream=True) total_length = response.headers.get("content-length") with open(filename, "wb") as fh: if total_length is None: fh.write(response.content) else: blocksize = 4 * 1024 ** 2 iterations = int(float(total_length) / float(blocksize)) pbar = get_pbar( "Downloading %s to %s " % os.path.split(filename)[::-1], iterations ) iteration = 0 for chunk in response.iter_content(chunk_size=blocksize): fh.write(chunk) iteration += 1 pbar.update(iteration) pbar.finish() return filename