Ejemplo n.º 1
0
def wipe_data(client, dsquery):
    """Delete all datafiles from IDS relating to datasets matching the query.

    The argument dsquery must be a Query object searching for
    datasets.  All datafiles relating to corresponding datasets must
    have been uploaded to ids.server
    (Issue icatproject/ids.server #61).
    """
    require_ids_version("1.6.0", "Issue #42")
    if dsquery.entity.BeanName != 'Dataset':
        raise ValueError("Invalid query '%s'" % query)

    dfquery = Query(client,
                    "Datafile",
                    conditions={"location": "IS NOT NULL"},
                    limit=(0, 1))
    for a, c in dsquery.conditions.items():
        dfquery.addConditions({"dataset.%s" % a: c})

    while True:
        deleteDatasets = []
        restoreDatasets = []
        for ds in client.searchChunked(dsquery):
            status = client.ids.getStatus(DataSelection([ds]))
            if status == "ONLINE":
                deleteDatasets.append(ds)
                if len(deleteDatasets) >= 25:
                    try:
                        client.deleteData(deleteDatasets)
                        client.deleteMany(deleteDatasets)
                    except icat.IDSDataNotOnlineError:
                        pass
                    deleteDatasets = []
            elif status == "ARCHIVED":
                if len(restoreDatasets) < 25:
                    restoreDatasets.append(ds)
        if len(deleteDatasets) > 0:
            try:
                client.deleteData(deleteDatasets)
                client.deleteMany(deleteDatasets)
            except icat.IDSDataNotOnlineError:
                pass
        if len(restoreDatasets) > 0:
            client.ids.restore(DataSelection(restoreDatasets))
        client.autoRefresh()
        # If any Datafile is left we need to continue the loop.
        if client.search(dfquery):
            time.sleep(60)
        else:
            break
Ejemplo n.º 2
0
def test_ingest_existing(icatconfig, dataset, delay):
    """Try to ingest a dataset that already exist.  This must yield an
    error and must not in any way damage the already existing dataset.
    """
    client, conf, _ = icatconfig
    query = icat.query.Query(client,
                             "Investigation",
                             conditions=dataset.proposal.as_conditions())
    investigation = client.assertedSearch(query)[0]
    old_dataset = DatasetBase(client,
                              investigation,
                              dataset.name,
                              fileCount=4,
                              fileSize=MemorySpace("10 KiB"))
    old_dataset.uploadFiles(client)
    if delay:
        # Request archive of the old dataset and wait until it is
        # written to archive storage and removed from main storage.
        client.ids.archive(DataSelection([old_dataset.dataset]))
        time.sleep(90)
    # OSError is raised if the ZIP file in archive storage exists,
    # RuntimeError is raised if the directory in main storage exists,
    # icat.ICATObjectExistsError is raised if neither files exist, but
    # the dataset in ICAT.
    with pytest.raises((OSError, RuntimeError, icat.ICATObjectExistsError)):
        dataset.ingest(conf)
    old_dataset.download(client)
    old_dataset.cleanup()
Ejemplo n.º 3
0
def getDfSelections(status=None):
    """Yield selections of Datafiles.
    """
    skip = 0
    while True:
        searchexp = ("SELECT o FROM Dataset o INCLUDE o.datafiles "
                     "LIMIT %d, %d" % (skip, searchlimit))
        skip += searchlimit
        datasets = client.search(searchexp)
        if not datasets:
            break
        selection = DataSelection()
        for ds in datasets:
            dfs = [df for df in ds.datafiles if df.location is not None]
            if dfs and (not status or client.ids.getStatus(DataSelection([ds]))
                        == status):
                selection.extend(dfs)
        if selection:
            yield selection
Ejemplo n.º 4
0
def getDfSelections(status=None):
    """Yield selections of Datafiles.
    """
    skip = 0
    while True:
        searchexp = ("SELECT o FROM Dataset o INCLUDE o.datafiles "
                     "LIMIT %d, %d" % (skip, searchlimit))
        skip += searchlimit
        datasets = client.search(searchexp)
        if not datasets:
            break
        selection = DataSelection()
        for ds in datasets:
            dfs = [df for df in ds.datafiles if df.location is not None]
            if dfs and (not status or 
                        client.ids.getStatus(DataSelection([ds])) == status):
                selection.extend(dfs)
        if selection:
            yield selection
Ejemplo n.º 5
0
def test_getinfo(client, case):
    """Call getStatus() and getSize() to get some informations on a dataset.
    """
    query = Query(client,
                  "Dataset",
                  conditions={
                      "name": "= '%s'" % case['dsname'],
                      "investigation.name": "= '%s'" % case['invname'],
                  })
    selection = DataSelection(client.assertedSearch(query))
    size = client.ids.getSize(selection)
    print("Size of dataset %s: %d" % (case['dsname'], size))
    assert size == sum(f['size'] for f in case['dfs'])
    status = client.ids.getStatus(selection)
    print("Status of dataset %s: %s" % (case['dsname'], status))
    assert status in {"ONLINE", "RESTORING", "ARCHIVED"}
Ejemplo n.º 6
0
def test_getDatafileIds(client, case):
    """Call getDatafileIds() to get the Datafile ids from a dataset.
    """
    if client.ids.apiversion < '1.5.0':
        pytest.skip("IDS %s is too old, need 1.5.0 or newer" %
                    client.ids.apiversion)
    query = Query(client,
                  "Dataset",
                  conditions={
                      "name": "= '%s'" % case['dsname'],
                      "investigation.name": "= '%s'" % case['invname'],
                  })
    ds = client.assertedSearch(query)[0]
    selection = DataSelection([ds])
    dfids = client.ids.getDatafileIds(selection)
    print("Datafile ids of dataset %s: %s" % (case['dsname'], str(dfids)))
    query = "Datafile.id <-> Dataset [id=%d]" % ds.id
    assert set(dfids) == set(client.search(query))
Ejemplo n.º 7
0
 def upload(conf, client_kwargs, dataset):
     try:
         # Note: I'm not sure whether Suds is thread safe.  Therefore
         # use a separate local client object in each thread.
         client = icat.Client(conf.url, **client_kwargs)
         client.login(conf.auth, conf.credentials)
         name = dataset.name
         datafileFormat = dataset.getDatafileFormat(client)
         f = Datafile(dataset.datasetdir, "upload.dat", 32)
         while True:
             # Get the dataset object from ICAT, continue to retry
             # while it does not exist yet.
             try:
                 ds = dataset.search(client)
                 log.info("Upload: dataset %s found.", name)
                 break
             except icat.SearchAssertionError:
                 log.info("Upload: dataset %s not found (yet).", name)
                 time.sleep(0.2)
                 continue
         selection = DataSelection([ds])
         datafile = client.new("datafile",
                               name=f.fname,
                               dataset=ds,
                               datafileFormat=datafileFormat)
         while True:
             # Do the upload.  This may (or even should) fail due to
             # the dataset not online.  The error triggers a restore,
             # so we continue to retry until the restore has been
             # completed.
             try:
                 df = client.putData(f.path, datafile)
                 log.info("Upload to dataset %s succeeded.", name)
                 break
             except icat.IDSDataNotOnlineError:
                 status = client.ids.getStatus(selection)
                 log.info("Upload: dataset %s is %s.", name, status)
                 time.sleep(0.2)
                 continue
         resultQueue.put(f)
         client.logout()
     except Exception as err:
         resultQueue.put(err)
Ejemplo n.º 8
0
def test_status_no_sessionId(client, case):
    """Call getStatus() while logged out.

    IDS 1.5.0 and newer allow the sessionId to be omitted from the
    getStatus() call.
    """
    if client.ids.apiversion < '1.5.0':
        pytest.skip("IDS %s is too old, need 1.5.0 or newer" %
                    client.ids.apiversion)
    query = Query(client,
                  "Dataset",
                  conditions={
                      "name": "= '%s'" % case['dsname'],
                      "investigation.name": "= '%s'" % case['invname'],
                  })
    selection = DataSelection(client.assertedSearch(query))
    with tmpSessionId(client, None):
        status = client.ids.getStatus(selection)
    print("Status of dataset %s: %s" % (case['dsname'], status))
    assert status in {"ONLINE", "RESTORING", "ARCHIVED"}
Ejemplo n.º 9
0
def test_restore(client, case):
    """Call restore() on a dataset.
    """
    if not client.ids.isTwoLevel():
        pytest.skip("This IDS does not use two levels of data storage")
    query = Query(client,
                  "Dataset",
                  conditions={
                      "name": "= '%s'" % case['dsname'],
                      "investigation.name": "= '%s'" % case['invname'],
                  })
    selection = DataSelection(client.assertedSearch(query))
    status = client.ids.getStatus(selection)
    if status != "ARCHIVED":
        pytest.skip("The dataset is not online")
    client.ids.restore(selection)
    print("Request restore of dataset %s" % (case['dsname']))
    status = client.ids.getStatus(selection)
    # Do not assert status == "RESTORING" because same remark as for
    # archive() applies: there is no guarantee whatsoever on the
    # outcome of the restore() call.
    print("Status of dataset %s is now %s" % (case['dsname'], status))
Ejemplo n.º 10
0
def test_archive(client, case):
    """Call archive() on a dataset.
    """
    if not client.ids.isTwoLevel():
        pytest.skip("This IDS does not use two levels of data storage")
    query = Query(client,
                  "Dataset",
                  conditions={
                      "name": "= '%s'" % case['dsname'],
                      "investigation.name": "= '%s'" % case['invname'],
                  })
    selection = DataSelection(client.assertedSearch(query))
    status = client.ids.getStatus(selection)
    if status != "ONLINE":
        pytest.skip("The dataset is not online")
    client.ids.archive(selection)
    print("Request archive of dataset %s" % (case['dsname']))
    status = client.ids.getStatus(selection)
    # Do not assert status == "ARCHIVED" because the archive could be
    # deferred by the server or an other operation on the same dataset
    # could intervene.  So, there is no guarantee whatsoever on the
    # outcome of the archive() call.
    print("Status of dataset %s is now %s" % (case['dsname'], status))
Ejemplo n.º 11
0
def provokeIDSNotFoundError(client):
    print("Provoke an IDSNotFoundError ...")
    selection = DataSelection({'datasetIds':[-11]})
    client.ids.getData(selection)