def test_download_archive_type(tmpdir, mocker, requests_mock, tarfile_path, tarfile_uncompressed_path, zipfile_path): archive_url = "https://www.hepdata.net/record/resource/1408476?view=true" output_directory = tmpdir.join("likelihoods").strpath # Give BytesIO a tarfile requests_mock.get(archive_url, content=open(tarfile_path, "rb").read()) download(archive_url, output_directory) # Give BytesIO an uncompressed tarfile requests_mock.get(archive_url, content=open(tarfile_uncompressed_path, "rb").read()) download(archive_url, output_directory) # Give BytesIO a zipfile requests_mock.get(archive_url, content=open(zipfile_path, "rb").read()) # Run without and with existing output_directory to cover both # cases of the shutil.rmtree logic rmtree(Path(output_directory)) download(archive_url, output_directory) # without download(archive_url, output_directory) # with # Give BytesIO a zipfile (using same requests_mock as previous) but have # zipfile.is_zipfile reject it mocker.patch("zipfile.is_zipfile", return_value=False) with pytest.raises(InvalidArchive): download(archive_url, output_directory)
def download(archive_url, output_directory, verbose, force, compress): """ Download the patchset archive from the remote URL and extract it in a directory at the path given. Example: .. code-block:: shell $ pyhf contrib download --verbose https://doi.org/10.17182/hepdata.90607.v3/r3 1Lbb-likelihoods \b 1Lbb-likelihoods/patchset.json 1Lbb-likelihoods/README.md 1Lbb-likelihoods/BkgOnly.json Raises: :class:`~pyhf.exceptions.InvalidArchiveHost`: if the provided archive host name is not known to be valid """ try: from pyhf.contrib import utils utils.download(archive_url, output_directory, force, compress) if verbose: file_list = [str(file) for file in list(Path(output_directory).glob("*"))] print("\n".join(file_list)) except AttributeError: log.error( "\nInstallation of the contrib extra is required to use the contrib CLI API" + "\nPlease install with: python -m pip install pyhf[contrib]\n", exc_info=True, )
def test_download_archive_force(tmpdir, requests_mock, tarfile_path): archive_url = "https://www.cern.ch/record/resource/123456789" requests_mock.get(archive_url, content=open(tarfile_path, "rb").read(), status_code=200) with pytest.raises(InvalidArchiveHost): download(archive_url, tmpdir.join("likelihoods").strpath, force=False) download(archive_url, tmpdir.join("likelihoods").strpath, force=True)
def main(args): if args.config_file is not None: with open(args.config_file, "r") as infile: config = json.load(infile) backend = args.backend pallet_path = Path(config["input_prefix"]).joinpath(config["pallet_name"]) # locally get pyhf pallet for analysis if not pallet_path.exists(): download(config["pallet_url"], pallet_path) analysis_name = config["analysis_name"] analysis_prefix_str = "" if analysis_name is None else f"{analysis_name}_" if config["analysis_dir"] is not None: pallet_path = pallet_path.joinpath(config["analysis_dir"]) with open(pallet_path.joinpath( f"{analysis_prefix_str}BkgOnly.json")) as bkgonly_json: bkgonly_workspace = json.load(bkgonly_json) # Initialize funcX client fxc = FuncXClient() fxc.max_requests = 200 with open("endpoint_id.txt") as endpoint_file: pyhf_endpoint = str(endpoint_file.read().rstrip()) # register functions prepare_func = fxc.register_function(prepare_workspace) infer_func = fxc.register_function(infer_hypotest) # execute background only workspace prepare_task = fxc.run(bkgonly_workspace, backend, endpoint_id=pyhf_endpoint, function_id=prepare_func) # Read patchset in while background only workspace running with open(pallet_path.joinpath( f"{analysis_prefix_str}patchset.json")) as patchset_json: patchset = pyhf.PatchSet(json.load(patchset_json)) workspace = None while not workspace: try: workspace = fxc.get_result(prepare_task) except Exception as excep: print(f"prepare: {excep}") sleep(10) print("--------------------") print(workspace) # execute patch fits across workers and retrieve them when done n_patches = len(patchset.patches) tasks = {} for patch_idx in range(n_patches): patch = patchset.patches[patch_idx] task_id = fxc.run( workspace, patch.metadata, [patch.patch], backend, endpoint_id=pyhf_endpoint, function_id=infer_func, ) tasks[patch.name] = {"id": task_id, "result": None} while count_complete(tasks.values()) < n_patches: for task in tasks.keys(): if not tasks[task]["result"]: try: result = fxc.get_result(tasks[task]["id"]) print( f"Task {task} complete, there are {count_complete(tasks.values())+1} results now" ) tasks[task]["result"] = result except Exception as excep: print(f"inference: {excep}") sleep(15) print("--------------------") print(tasks.values())
def test_download_compress(tmpdir, requests_mock): archive_url = "https://www.hepdata.net/record/resource/1408476?view=true" requests_mock.get(archive_url) download(archive_url, tmpdir.join("likelihoods").strpath, compress=True)
def test_download_invalid_archive(tmpdir, requests_mock): archive_url = "https://www.hepdata.net/record/resource/1408476?view=true" requests_mock.get(archive_url, status_code=404) with pytest.raises(InvalidArchive): download(archive_url, tmpdir.join("likelihoods").strpath)
def test_download_untrusted_archive_host(tmpdir, requests_mock): archive_url = "https://www.pyhfthisdoesnotexist.org" requests_mock.get(archive_url) with pytest.raises(InvalidArchiveHost): download(archive_url, tmpdir.join("likelihoods").strpath)
from time import sleep from funcx.sdk.client import FuncXClient from pyhf.contrib.utils import download def prepare_workspace(data): import pyhf return pyhf.Workspace(data) if __name__ == "__main__": # locally get pyhf pallet for analysis if not Path("1Lbb-pallet").exists(): download("https://doi.org/10.17182/hepdata.90607.v3/r3", "1Lbb-pallet") with open("1Lbb-pallet/BkgOnly.json") as bkgonly_json: bkgonly_workspace = json.load(bkgonly_json) # Use privately assigned endpoint id with open("endpoint_id.txt") as endpoint_file: pyhf_endpoint = str(endpoint_file.read().rstrip()) fxc = FuncXClient() # Register function and execute on worker node prepare_func = fxc.register_function(prepare_workspace) prepare_task = fxc.run(bkgonly_workspace, endpoint_id=pyhf_endpoint, function_id=prepare_func)