Esempio n. 1
0
def test_updated_file(session_db, tmpdir, caplog):
    session, db = session_db

    # Make tmpdir a concrete path otherwise filesystem ops won't work
    tmpdir = Path(tmpdir)

    ncfile = "test1.nc"
    ncpath = Path("test/data/indexing/longnames/output000/") / ncfile
    shutil.copy(str(ncpath), str(tmpdir / ncfile))
    indexed = database.build_index(str(tmpdir), session)
    assert indexed == 1

    # Should not reindex
    reindexed = database.build_index(str(tmpdir), session)
    assert reindexed == 0

    # Should reindex as file is updated
    time.sleep(1)
    (tmpdir / ncfile).touch()
    reindexed = database.build_index(str(tmpdir), session)
    assert reindexed == 1

    # Should not reindex as flagging as missing will not remove
    # file from the database, so will not be reindexed
    time.sleep(1)
    (tmpdir / ncfile).touch()
    with caplog.at_level(logging.WARNING):
        reindexed = database.build_index(str(tmpdir), session, prune="flag")
        assert reindexed == 0
        assert "Set prune to 'delete' to reindex updated files" in caplog.text
Esempio n. 2
0
def test_delete_experiment(session_db):
    """Test that we can completely delete an experiment
    and its associated data.
    """

    session, db = session_db
    database.build_index("test/data/indexing/longnames", session)

    # make sure we actually did index something
    expt = (session.query(database.NCExperiment).filter(
        database.NCExperiment.experiment == "longnames").one_or_none())
    assert expt is not None

    database.delete_experiment("longnames", session)
    expt = (session.query(database.NCExperiment).filter(
        database.NCExperiment.experiment == "longnames").one_or_none())
    assert expt is None

    # check that all files are removed
    files = session.query(sa.func.count(database.NCFile.id)).scalar()
    assert files == 0

    # make sure all ncvars are removed
    vars = session.query(sa.func.count(database.NCVar.id)).scalar()
    assert vars == 0
def test_multiple_experiments(session_db):
    session, db = session_db
    # index multiple experiments, which have duplicate data and therefore push
    # against some unique constraints
    database.build_index(['test/data/indexing/multiple/experiment_a', 'test/data/indexing/multiple/experiment_b'], session)

    q = session.query(database.NCExperiment)
    assert(q.count() == 2)
def test_reindex_noupdate(session_db):
    session, db = session_db
    database.build_index('test/data/indexing/broken_file', session)
    assert(db.check())

    # re-run the index, make sure we don't re-index anything
    reindexed = database.build_index('test/data/indexing/broken_file', session)
    assert(reindexed == 0)
Esempio n. 5
0
def test_distributed(client, session_db):
    session, db = session_db
    database.build_index("test/data/indexing/broken_file", session, client)

    assert db.check()
    q = session.query(database.NCExperiment)
    r = q.all()
    assert len(r) == 1
Esempio n. 6
0
def test_reindex_force(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/broken_file", session)
    assert db.check()

    # re-run the index, make sure re-index
    reindexed = database.build_index("test/data/indexing/broken_file",
                                     session,
                                     force=True)
    assert reindexed == 1
Esempio n. 7
0
def test_update_nonew(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/broken_file", session)
    assert db.check()

    # re-run the index, make sure we don't re-index anything
    reindexed = database.build_index("test/data/indexing/broken_file",
                                     session,
                                     prune="flag")
    assert reindexed == 0
def test_update_newfile(session_db, tmpdir):
    session, db = session_db
    shutil.copy('test/data/indexing/longnames/output000/test1.nc',
                str(tmpdir / 'test1.nc'))
    database.build_index(str(tmpdir), session)

    # add another file
    shutil.copy('test/data/indexing/longnames/output000/test2.nc',
                str(tmpdir / 'test2.nc'))
    database.build_index(str(tmpdir), session, update=True)
Esempio n. 9
0
def test_update_newfile(session_db, tmpdir):
    session, db = session_db
    shutil.copy("test/data/indexing/longnames/output000/test1.nc",
                str(tmpdir / "test1.nc"))
    database.build_index(str(tmpdir), session)

    # add another file
    shutil.copy("test/data/indexing/longnames/output000/test2.nc",
                str(tmpdir / "test2.nc"))
    reindexed = database.build_index(str(tmpdir), session)
    assert reindexed == 1
Esempio n. 10
0
def test_getvar_with_metadata(session_db):

    session, db = session_db
    database.build_index("test/data/indexing/metadata", session)

    with querying.getvar("metadata", "test", session, decode_times=False) as v:
        assert v.attrs["long_name"] == "Test Variable"
        assert v.attrs["contact"] == "The ACCESS Oracle"
        assert v.attrs["email"] == "*****@*****.**"
        assert v.attrs["created"] == "2018-01-01"
        assert "description" in v.attrs
Esempio n. 11
0
def test_single_broken(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/single_broken_file", session)

    # query ncfiles table -- should have two entries
    q = session.query(func.count(database.NCFile.id))
    assert q.scalar() == 2

    # query ncvars table -- should have a single entry
    q = session.query(func.count(database.NCVar.id))
    assert q.scalar() == 1
def test_metadata(session_db):
    session, db = session_db
    database.build_index('test/data/indexing/metadata', session)

    # query metadata
    q = session.query(database.NCExperiment.contact,
                      database.NCExperiment.created,
                      database.NCExperiment.description)
    r = q.one()
    assert(r[0] == 'The ACCESS Oracle')
    assert(r[1] == datetime(2018, 1, 1))
    assert(len(r[2]) > 0)
Esempio n. 13
0
def test_longnames(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/longnames", session)

    # query ncvars table -- should have two entries
    q = session.query(func.count(database.NCVar.id))
    assert q.scalar() == 2

    # query generic table -- should only be a single variable
    q = session.query(database.CFVariable)
    r = q.all()
    assert len(r) == 1
    assert r[0].long_name == "Test Variable"
Esempio n. 14
0
def test_missing_time_bounds(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/time_bounds", session)

    # Should have one experiment
    q = session.query(database.NCExperiment)
    assert q.count() == 1

    # And one correctly indexed (present) file
    q = session.query(database.NCFile)
    r = q.all()
    assert len(r) == 1
    assert r[0].present
def test_same_expt_name(session_db):
    session, db = session_db
    # index multiple experiments with different root directories, but the same
    # final path component (experiment name)
    database.build_index(['test/data/indexing/multiple/experiment_a', 'test/data/indexing/alternate/experiment_a'], session)

    # the indexing shouldn't fail, and we should have two distinct experiments
    # with the same name

    q = (session
         .query(database.NCExperiment)
         .filter(database.NCExperiment.experiment == 'experiment_a'))
    r = q.all()
    assert(len(r) == 2)
    assert(r[0].root_dir != r[1].root_dir)
Esempio n. 16
0
def test_metadata(session_db):
    """Test that metadata.yaml is read for an experiment during indexing"""

    session, db = session_db
    database.build_index("test/data/indexing/metadata", session)

    # query metadata
    q = session.query(
        database.NCExperiment.contact,
        database.NCExperiment.created,
        database.NCExperiment.description,
    )
    r = q.one()
    assert r[0] == "The ACCESS Oracle"
    assert r[1] == "2018-01-01"
    assert len(r[2]) > 0
Esempio n. 17
0
def test_following_symlinks(session_db):
    session, db = session_db

    # Indexing symlinked experiment should fail with default arguments
    database.build_index("test/data/indexing/symlinked/experiment_a", session)

    q = session.query(database.NCExperiment)
    assert q.count() == 0

    # Now specify to follow symlinks
    database.build_index("test/data/indexing/symlinked/experiment_a",
                         session,
                         followsymlinks=True)

    q = session.query(database.NCExperiment)
    assert q.count() == 1
Esempio n. 18
0
def test_prune_missing_experiment(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/broken_file", session)

    assert db.check()

    # check that we have one file
    q = session.query(database.NCFile)
    r = q.all()
    assert len(r) == 1

    # prune experiment
    experiment = "incorrect_experiment"
    with pytest.raises(RuntimeError,
                       match="No such experiment: ".format(experiment)):
        database.prune_experiment(experiment, session)
Esempio n. 19
0
def test_prune_broken(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/broken_file", session)

    assert db.check()

    # check that we have one file
    q = session.query(database.NCFile)
    r = q.all()
    assert len(r) == 1

    # prune experiment
    database.prune_experiment("broken_file", session)

    # now the database should be empty
    q = session.query(database.NCFile)
    r = q.all()
    assert len(r) == 0
Esempio n. 20
0
def test_time_dimension(session_db):
    session, db = session_db
    database.build_index("test/data/indexing/time", session)

    q = session.query(database.NCFile.time_start, database.NCFile.time_end)
    assert q.count() == 5  # should pick up 5 files

    q = q.filter((database.NCFile.time_start is None)
                 | (database.NCFile.time_end is None))
    assert q.count() == 0  # but all of them should have times populated

    # there should be 5 separate time variables
    q = session.query(database.CFVariable)
    assert q.count() == 5

    # each file should have exactly one time dimension
    q = (session.query(func.count(database.NCFile.ncvars)).join(
        database.NCFile.ncvars).group_by(database.NCFile.id))
    for r in q.all():
        assert r[0] == 1
Esempio n. 21
0
def test_index_with_prune_delete(session_db, tmpdir):
    session, db = session_db
    expt_dir = tmpdir / "expt"
    expt_dir.mkdir()

    # copy the file to a new experiment directory and index
    shutil.copy("test/data/indexing/longnames/output000/test1.nc",
                str(expt_dir / "test1.nc"))
    database.build_index(str(expt_dir), session)

    # check that we have a valid file
    q = session.query(database.NCFile).filter(database.NCFile.present)
    r = q.all()
    assert len(r) == 1

    # remove the file and build with pruning
    os.remove(expt_dir / "test1.nc")
    database.build_index(str(expt_dir), session, prune="delete")

    # now we should still have no files
    q = session.query(database.NCFile)
    r = q.one_or_none()
    assert r is None
Esempio n. 22
0
def test_index_attributes(session_db):
    session, db = session_db
    database.build_index("test/data/querying", session)

    inspector = inspect(session.get_bind())
    assert inspector.get_indexes("ncattributes")[0] == {
        "name": "ix_ncattributes_ncvar_id",
        "column_names": ["ncvar_id"],
        "unique": 0,
    }

    ncfile = "output000/ocean.nc"

    # check that we have the right attributes for a file (just use a subset)
    f = session.query(
        database.NCFile).filter(database.NCFile.ncfile == ncfile).one()

    file_attrs = {
        "filename": "ocean.nc",
        "title": "MOM5",
        "grid_type": "mosaic",
        "grid_tile": "1",
    }
    for attr, attr_val in file_attrs.items():
        assert attr in f.attrs and f.attrs[attr] == attr_val

    # and check a particular variable
    v = (session.query(database.NCVar).join(
        database.NCFile).filter(database.NCFile.ncfile == ncfile).filter(
            database.NCVar.varname == "temp").one())
    var_attrs = {
        "long_name": "Potential temperature",
        "cell_methods": "time: mean",
        "coordinates": "geolon_t geolat_t",
    }
    for attr, attr_val in var_attrs.items():
        assert attr in v.attrs and v.attrs[attr] == attr_val
Esempio n. 23
0
def test_prune_nodelete(session_db, tmpdir):
    session, db = session_db
    expt_dir = tmpdir / "expt"
    expt_dir.mkdir()

    # copy the file to a new experiment directory and index
    shutil.copy("test/data/indexing/longnames/output000/test1.nc",
                str(expt_dir / "test1.nc"))
    database.build_index(str(expt_dir), session)

    # check that we have a valid file
    q = session.query(database.NCFile).filter(database.NCFile.present)
    r = q.all()
    assert len(r) == 1

    # remove the file and prune
    os.remove(expt_dir / "test1.nc")
    database.prune_experiment("expt", session, delete=False)

    # now we should still have one file, but now not present
    q = session.query(database.NCFile)
    r = q.one_or_none()
    assert r is not None
    assert not r.present
Esempio n. 24
0
def test_empty_file(session_db):
    session, db = session_db
    indexed = database.build_index("test/data/indexing/empty_file", session)

    # as with test_broken, we should have seen a single file,
    # but it should be marked as empty
    assert db.check()
    assert indexed == 1
    q = session.query(database.NCFile)
    r = q.all()
    assert len(r) == 1
    assert not r[0].present

    # but there should be a valid variable
    q = session.query(func.count(
        database.NCVar.id)).filter(database.NCVar.varname == "ty_trans_rho")
    assert q.scalar() == 1
Esempio n. 25
0
def test_broken(session_db):
    session, db = session_db
    indexed = database.build_index("test/data/indexing/broken_file", session)

    # make sure the database was created
    assert db.check()

    # we indexed a single file
    assert indexed == 1

    # query ncfiles table -- should have a single file, marked as empty
    q = session.query(database.NCFile)
    r = q.all()
    assert len(r) == 1
    assert not r[0].present

    # query ncvars table -- should be empty
    q = session.query(func.count(database.NCVar.id))
    assert q.scalar() == 0
Esempio n. 26
0
def test_unreadable(session_db, unreadable_dir):
    session, db = session_db

    with pytest.warns(UserWarning,
                      match="Some files or directories could not be read"):
        indexed = database.build_index(str(unreadable_dir), session)
Esempio n. 27
0
def test_get_experiments_metadata(session_db):
    """Test that get_experiments returns metadata correctly"""

    session, db = session_db
    database.build_index("test/data/indexing/metadata", session)

    r = querying.get_experiments(session, contact=True)
    df = pd.DataFrame.from_dict({
        "experiment": ["metadata"],
        "contact": ["The ACCESS Oracle"],
        "ncfiles": [1]
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, email=True)
    df = pd.DataFrame.from_dict({
        "experiment": ["metadata"],
        "email": ["*****@*****.**"],
        "ncfiles": [1]
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, url=True)
    df = pd.DataFrame.from_dict({
        "experiment": ["metadata"],
        "url": ["https://github.com/COSIMA/oracle"],
        "ncfiles": [1],
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, description=True)
    df = pd.DataFrame.from_dict({
        "experiment": ["metadata"],
        "description":
        [("Attempted spinup, using salt flux fix "
          "https://arccss.slack.com/archives/C6PP0GU9Y/p1515460656000124 "
          "and https://github.com/mom-ocean/MOM5/pull/208/commits/9f4ee6f8b72b76c96a25bf26f3f6cdf773b424d2 "
          "from the start. Used mushy ice from July year 1 onwards to avoid vertical thermo error in cice "
          "https://arccss.slack.com/archives/C6PP0GU9Y/p1515842016000079")],
        "ncfiles": [1],
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, notes=True)
    df = pd.DataFrame.from_dict({
        "experiment": ["metadata"],
        "notes":
        [("Stripy salt restoring: "
          "https://github.com/OceansAus/access-om2/issues/74 tripole seam bug: "
          "https://github.com/OceansAus/access-om2/issues/86 requires dt=300s "
          "in May, dt=240s in Aug to maintain CFL in CICE near tripoles (storms "
          "in those months in 8485RYF); all other months work with dt=400s")],
        "ncfiles": [1],
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, created=True)
    df = pd.DataFrame.from_dict({
        "experiment": ["metadata"],
        "created": ["2018-01-01"],
        "ncfiles": [1]
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, root_dir=True)
    # Won't try and match a path that can change on different platforms
    # assert_frame_equal(r, df)
    assert r.shape == (1, 3)

    r = querying.get_experiments(session, all=True)
    # Won't try and match everything, just check dimensions are correct
    assert r.shape == (1, 9)

    # Test turning off returning experiment (bit dumb, but hey ...)
    r = querying.get_experiments(session, experiment=False)
    df = pd.DataFrame.from_dict({"ncfiles": [1]})
    assert_frame_equal(r, df)
Esempio n. 28
0
def test_broken_metadata(session_db):
    session, db = session_db
    indexed = database.build_index("test/data/indexing/broken_metadata",
                                   session)

    assert indexed == 1
Esempio n. 29
0
def test_get_experiments_with_keywords(session_db):
    """Test retrieval of experiments with keyword filtering"""
    session, db = session_db
    database.build_index("test/data/metadata/keywords", session)
    database.build_index("test/data/metadata/keywords2", session)

    # Test keyword common to both experiments
    r = querying.get_experiments(session, keywords="cosima")
    df = pd.DataFrame.from_dict({
        "experiment": ["keywords", "keywords2"],
        "ncfiles": [1, 1]
    })
    assert_frame_equal(r, df)

    # Test keyword common to both experiments using wildcard
    r = querying.get_experiments(session, keywords="cos%")
    df = pd.DataFrame.from_dict({
        "experiment": ["keywords", "keywords2"],
        "ncfiles": [1, 1]
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, keywords="%-%")
    df = pd.DataFrame.from_dict({
        "experiment": ["keywords", "keywords2"],
        "ncfiles": [1, 1]
    })
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, keywords="access-om2%")
    df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]})
    assert_frame_equal(r, df)

    # Test keyword in only one experiment
    r = querying.get_experiments(session, keywords="another-keyword")
    df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]})
    assert_frame_equal(r, df)

    r = querying.get_experiments(session, keywords="ryf9091")
    df = pd.DataFrame.from_dict({"experiment": ["keywords"], "ncfiles": [1]})
    assert_frame_equal(r, df)

    # Test passing an array of keywords that match only one experiment
    r = querying.get_experiments(session,
                                 keywords=["cosima", "another-keyword"])
    df = pd.DataFrame.from_dict({"experiment": ["keywords2"], "ncfiles": [1]})
    assert_frame_equal(r, df)

    # Test passing an array of keywords that will not match any one experiment
    r = querying.get_experiments(session,
                                 keywords=["another-keyword", "ryf9091"])
    df = pd.DataFrame(columns=["experiment", "ncfiles"])
    assert_frame_equal(r, df)

    # Test passing a non-existent keyword along with one present. Should return
    # nothing as no experiment contains it
    r = querying.get_experiments(session,
                                 keywords=["ryf9091", "not-a-keyword"])
    df = pd.DataFrame(columns=["experiment", "ncfiles"])
    assert_frame_equal(r, df)

    # Test passing only a non-existent keyword
    r = querying.get_experiments(session, keywords=["not-a-keyword"])
    df = pd.DataFrame(columns=["experiment", "ncfiles"])
    assert_frame_equal(r, df)

    # Test passing only a non-existent wildcard keyword
    r = querying.get_experiments(session, keywords=["z%"])
    df = pd.DataFrame(columns=["experiment", "ncfiles"])
    assert_frame_equal(r, df)