def wipe_data(client, dsquery): """Delete all datafiles from IDS relating to datasets matching the query. The argument dsquery must be a Query object searching for datasets. All datafiles relating to corresponding datasets must have been uploaded to ids.server (Issue icatproject/ids.server #61). """ require_ids_version("1.6.0", "Issue #42") if dsquery.entity.BeanName != 'Dataset': raise ValueError("Invalid query '%s'" % query) dfquery = Query(client, "Datafile", conditions={"location": "IS NOT NULL"}, limit=(0, 1)) for a, c in dsquery.conditions.items(): dfquery.addConditions({"dataset.%s" % a: c}) while True: deleteDatasets = [] restoreDatasets = [] for ds in client.searchChunked(dsquery): status = client.ids.getStatus(DataSelection([ds])) if status == "ONLINE": deleteDatasets.append(ds) if len(deleteDatasets) >= 25: try: client.deleteData(deleteDatasets) client.deleteMany(deleteDatasets) except icat.IDSDataNotOnlineError: pass deleteDatasets = [] elif status == "ARCHIVED": if len(restoreDatasets) < 25: restoreDatasets.append(ds) if len(deleteDatasets) > 0: try: client.deleteData(deleteDatasets) client.deleteMany(deleteDatasets) except icat.IDSDataNotOnlineError: pass if len(restoreDatasets) > 0: client.ids.restore(DataSelection(restoreDatasets)) client.autoRefresh() # If any Datafile is left we need to continue the loop. if client.search(dfquery): time.sleep(60) else: break
def test_ingest_existing(icatconfig, dataset, delay): """Try to ingest a dataset that already exist. This must yield an error and must not in any way damage the already existing dataset. """ client, conf, _ = icatconfig query = icat.query.Query(client, "Investigation", conditions=dataset.proposal.as_conditions()) investigation = client.assertedSearch(query)[0] old_dataset = DatasetBase(client, investigation, dataset.name, fileCount=4, fileSize=MemorySpace("10 KiB")) old_dataset.uploadFiles(client) if delay: # Request archive of the old dataset and wait until it is # written to archive storage and removed from main storage. client.ids.archive(DataSelection([old_dataset.dataset])) time.sleep(90) # OSError is raised if the ZIP file in archive storage exists, # RuntimeError is raised if the directory in main storage exists, # icat.ICATObjectExistsError is raised if neither files exist, but # the dataset in ICAT. with pytest.raises((OSError, RuntimeError, icat.ICATObjectExistsError)): dataset.ingest(conf) old_dataset.download(client) old_dataset.cleanup()
def getDfSelections(status=None): """Yield selections of Datafiles. """ skip = 0 while True: searchexp = ("SELECT o FROM Dataset o INCLUDE o.datafiles " "LIMIT %d, %d" % (skip, searchlimit)) skip += searchlimit datasets = client.search(searchexp) if not datasets: break selection = DataSelection() for ds in datasets: dfs = [df for df in ds.datafiles if df.location is not None] if dfs and (not status or client.ids.getStatus(DataSelection([ds])) == status): selection.extend(dfs) if selection: yield selection
def test_getinfo(client, case): """Call getStatus() and getSize() to get some informations on a dataset. """ query = Query(client, "Dataset", conditions={ "name": "= '%s'" % case['dsname'], "investigation.name": "= '%s'" % case['invname'], }) selection = DataSelection(client.assertedSearch(query)) size = client.ids.getSize(selection) print("Size of dataset %s: %d" % (case['dsname'], size)) assert size == sum(f['size'] for f in case['dfs']) status = client.ids.getStatus(selection) print("Status of dataset %s: %s" % (case['dsname'], status)) assert status in {"ONLINE", "RESTORING", "ARCHIVED"}
def test_getDatafileIds(client, case): """Call getDatafileIds() to get the Datafile ids from a dataset. """ if client.ids.apiversion < '1.5.0': pytest.skip("IDS %s is too old, need 1.5.0 or newer" % client.ids.apiversion) query = Query(client, "Dataset", conditions={ "name": "= '%s'" % case['dsname'], "investigation.name": "= '%s'" % case['invname'], }) ds = client.assertedSearch(query)[0] selection = DataSelection([ds]) dfids = client.ids.getDatafileIds(selection) print("Datafile ids of dataset %s: %s" % (case['dsname'], str(dfids))) query = "Datafile.id <-> Dataset [id=%d]" % ds.id assert set(dfids) == set(client.search(query))
def upload(conf, client_kwargs, dataset): try: # Note: I'm not sure whether Suds is thread safe. Therefore # use a separate local client object in each thread. client = icat.Client(conf.url, **client_kwargs) client.login(conf.auth, conf.credentials) name = dataset.name datafileFormat = dataset.getDatafileFormat(client) f = Datafile(dataset.datasetdir, "upload.dat", 32) while True: # Get the dataset object from ICAT, continue to retry # while it does not exist yet. try: ds = dataset.search(client) log.info("Upload: dataset %s found.", name) break except icat.SearchAssertionError: log.info("Upload: dataset %s not found (yet).", name) time.sleep(0.2) continue selection = DataSelection([ds]) datafile = client.new("datafile", name=f.fname, dataset=ds, datafileFormat=datafileFormat) while True: # Do the upload. This may (or even should) fail due to # the dataset not online. The error triggers a restore, # so we continue to retry until the restore has been # completed. try: df = client.putData(f.path, datafile) log.info("Upload to dataset %s succeeded.", name) break except icat.IDSDataNotOnlineError: status = client.ids.getStatus(selection) log.info("Upload: dataset %s is %s.", name, status) time.sleep(0.2) continue resultQueue.put(f) client.logout() except Exception as err: resultQueue.put(err)
def test_status_no_sessionId(client, case): """Call getStatus() while logged out. IDS 1.5.0 and newer allow the sessionId to be omitted from the getStatus() call. """ if client.ids.apiversion < '1.5.0': pytest.skip("IDS %s is too old, need 1.5.0 or newer" % client.ids.apiversion) query = Query(client, "Dataset", conditions={ "name": "= '%s'" % case['dsname'], "investigation.name": "= '%s'" % case['invname'], }) selection = DataSelection(client.assertedSearch(query)) with tmpSessionId(client, None): status = client.ids.getStatus(selection) print("Status of dataset %s: %s" % (case['dsname'], status)) assert status in {"ONLINE", "RESTORING", "ARCHIVED"}
def test_restore(client, case): """Call restore() on a dataset. """ if not client.ids.isTwoLevel(): pytest.skip("This IDS does not use two levels of data storage") query = Query(client, "Dataset", conditions={ "name": "= '%s'" % case['dsname'], "investigation.name": "= '%s'" % case['invname'], }) selection = DataSelection(client.assertedSearch(query)) status = client.ids.getStatus(selection) if status != "ARCHIVED": pytest.skip("The dataset is not online") client.ids.restore(selection) print("Request restore of dataset %s" % (case['dsname'])) status = client.ids.getStatus(selection) # Do not assert status == "RESTORING" because same remark as for # archive() applies: there is no guarantee whatsoever on the # outcome of the restore() call. print("Status of dataset %s is now %s" % (case['dsname'], status))
def test_archive(client, case): """Call archive() on a dataset. """ if not client.ids.isTwoLevel(): pytest.skip("This IDS does not use two levels of data storage") query = Query(client, "Dataset", conditions={ "name": "= '%s'" % case['dsname'], "investigation.name": "= '%s'" % case['invname'], }) selection = DataSelection(client.assertedSearch(query)) status = client.ids.getStatus(selection) if status != "ONLINE": pytest.skip("The dataset is not online") client.ids.archive(selection) print("Request archive of dataset %s" % (case['dsname'])) status = client.ids.getStatus(selection) # Do not assert status == "ARCHIVED" because the archive could be # deferred by the server or an other operation on the same dataset # could intervene. So, there is no guarantee whatsoever on the # outcome of the archive() call. print("Status of dataset %s is now %s" % (case['dsname'], status))
def provokeIDSNotFoundError(client): print("Provoke an IDSNotFoundError ...") selection = DataSelection({'datasetIds':[-11]}) client.ids.getData(selection)