Esempio n. 1
0
def test_get_provenance_document_for_id(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir,
                            "example_schematic_processing_chain.xml")

    doc = prov.read(filename)
    data_set.provenance["test_provenance"] = doc

    assert data_set.provenance.get_provenance_document_for_id(
            '{http://seisprov.org/seis_prov/0.1/#}sp002_dt_f87sf7sf78') == \
        {"name": "test_provenance", "document": doc}

    assert data_set.provenance.get_provenance_document_for_id(
            '{http://seisprov.org/seis_prov/0.1/#}sp004_lp_f87sf7sf78') == \
        {"name": "test_provenance", "document": doc}

    # Id not found.
    with pytest.raises(ASDFValueError) as err:
        data_set.provenance.get_provenance_document_for_id(
            '{http://seisprov.org/seis_prov/0.1/#}bogus_id')

    assert err.value.args[0] == (
        "Document containing id "
        "'{http://seisprov.org/seis_prov/0.1/#}bogus_id'"
        " not found in the data set.")

    # Not a qualified id.
    with pytest.raises(ASDFValueError) as err:
        data_set.provenance.get_provenance_document_for_id("bla")

    assert err.value.args[0] == ("Not a valid qualified name.")

    data_set.__del__()
Esempio n. 2
0
def __validate_seis_prov(file_object):
    """
    Core validation function.

    :param file_object: Open file or file-like object.
    """
    original_position = file_object.tell()
    # Step 1: Check and read the JSON schema.
    json_schema = _check_json_schema()

    # Determine file type.
    is_json = _is_json_file(file_object)
    file_object.seek(original_position, 0)
    is_xml = _is_xml_file(file_object)
    file_object.seek(original_position, 0)

    if is_json is False and is_xml is False:
        _log_error("File is neither a valid JSON nor a valid XML file.")
    elif is_json is True and is_xml is True:
        # Should not happen for obvious reasons...
        raise NotImplementedError
    elif is_json:
        fileformat = "json"
    elif is_xml:
        fileformat = "xml"

    # Step 2: Attempt to read the provenance file with the prov Python package.
    try:
        doc = prov.read(file_object, format=fileformat)
    except Exception as e:
        _log_error("Could not parse the file with the prov Python library due"
                   " to: the following PROV error message: %s" % (repr(e)))

    # Step 3: Validate against the PROV XML XSD Scheme.
    _validate_against_xsd_scheme(doc)

    # Find the seis prov namespace.
    for ns in doc.namespaces:
        if ns.uri == SEIS_PROV_NAMESPACE:
            break
    else:
        _log_error("SEIS-PROV namespace not found in document!")

    # Step 4: Custom validation against the JSON schema. Validate the root
    # document as well as any bundles.
    seis_prov_ids = _validate_prov_bundle(doc, json_schema, ns=ns)
    for bundle in doc.bundles:
        seis_prov_ids.extend(
            _validate_prov_bundle(bundle, json_schema, ns=ns))

    if not seis_prov_ids:
        _log_warning("The document is a valid W3C PROV document but not a "
                     "single SEIS-PROV record has been found.")

    # Find duplicate ids.
    duplicates = set([i for i in seis_prov_ids
                      if sum([1 for a in seis_prov_ids if a == i]) > 1])
    if duplicates:
        _log_error("One or more ids have been used more than once: %s" %
                   ", ".join(["'%s'" % _i for _i in duplicates]))
def init():
	f = open('plan.json', 'r')
	doc = prov.read(f)
	f.close()
	doc.add_namespace('alg', 'https://data-mechanics.s3.amazonaws.com/linshan_luoty/algorithm/') # The scripts in <folder>/<filename> format.
	doc.add_namespace('dat', 'https://data-mechanics.s3.amazonaws.com/linshan_luoty/data/') # The data sets in <user>/<collection> format.
	doc.add_namespace('ont', 'https://data-mechanics.s3.amazonaws.com/ontology#') # 'Extension', 'DataResource', 'DataSet', 'Retrieval', 'Query', or 'Computation'.
	doc.add_namespace('log', 'https://data-mechanics.s3.amazonaws.com/log#') # The event log.
	doc.add_namespace('bdp', 'https://data.cityofboston.gov/resource/')
	return doc
Esempio n. 4
0
def test_get_ids_from_prov_document():
    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")
    doc = prov.read(filename, format="xml")
    ids = get_all_ids_for_prov_document(doc)
    assert ids == [
        '{http://seisprov.org/seis_prov/0.1/#}sp001_wf_a34j4didj3',
        '{http://seisprov.org/seis_prov/0.1/#}sp002_dt_f87sf7sf78',
        '{http://seisprov.org/seis_prov/0.1/#}sp003_wf_js83hf34aj',
        '{http://seisprov.org/seis_prov/0.1/#}sp004_lp_f87sf7sf78',
        '{http://seisprov.org/seis_prov/0.1/#}sp005_wf_378f8ks8kd',
        '{http://seisprov.org/seis_prov/0.1/#}sp006_dc_f87sf7sf78',
        '{http://seisprov.org/seis_prov/0.1/#}sp007_wf_jude89du8l']
Esempio n. 5
0
def test_provenance_list_command(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir,
                            "example_schematic_processing_chain.xml")

    # Add it as a document.
    doc = prov.read(filename, format="xml")
    data_set.add_provenance_document(doc, name="test_provenance")

    assert data_set.provenance.list() == ["test_provenance"]
Esempio n. 6
0
def test_get_ids_from_prov_document():
    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")
    doc = prov.read(filename, format="xml")
    ids = get_all_ids_for_prov_document(doc)
    assert ids == [
        '{http://seisprov.org/seis_prov/0.1/#}sp001_wf_a34j4didj3',
        '{http://seisprov.org/seis_prov/0.1/#}sp002_dt_f87sf7sf78',
        '{http://seisprov.org/seis_prov/0.1/#}sp003_wf_js83hf34aj',
        '{http://seisprov.org/seis_prov/0.1/#}sp004_lp_f87sf7sf78',
        '{http://seisprov.org/seis_prov/0.1/#}sp005_wf_378f8ks8kd',
        '{http://seisprov.org/seis_prov/0.1/#}sp006_dc_f87sf7sf78',
        '{http://seisprov.org/seis_prov/0.1/#}sp007_wf_jude89du8l']
Esempio n. 7
0
def test_wps_subset_cmip6_prov():
    client = client_for(Service(processes=[Subset()], cfgfiles=[PYWPS_CFG]))
    datainputs = "collection=CMIP6.CMIP.IPSL.IPSL-CM6A-LR.historical.r1i1p1f1.Amon.rlds.gr.v20180803"
    datainputs += ";time=1860-01-01/1900-12-30;area=1,1,300,89"
    resp = client.get(
        "?service=WPS&request=Execute&version=1.0.0&identifier=subset&datainputs={}"
        .format(datainputs))
    assert_response_success(resp)
    doc = prov.read(get_output(resp.xml)["prov"][len("file://"):])
    assert ('roocs:time="1860-01-01/1900-12-30", roocs:area="1,1,300,89"'
            in doc.get_provn())
    assert (
        "wasDerivedFrom(roocs:rlds_Amon_IPSL-CM6A-LR_historical_r1i1p1f1_gr_18600116-19001216.nc, roocs:CMIP6.CMIP.IPSL.IPSL-CM6A-LR.historical.r1i1p1f1.Amon.rlds.gr.v20180803"  # noqa
        in doc.get_provn())
def test_prov(path):
    def drop_blank(lines):
        return set(line for line in lines if line.strip())

    provenance = prov.read(path, format='json')
    output = StringIO()
    # In production, we won't output PROV-N,
    # but for tests, it's easy for a human to read, and easy to compare.
    serializer = prov.serializers.provn.ProvNSerializer(provenance)
    serializer.serialize(output)
    actual = output.getvalue()
    actual_lines = drop_blank(actual.split('\n'))
    with open(Path(path).parent.parent / 'expected.prov') as prov_fixture:
        expected_lines = drop_blank(prov_fixture.read().split('\n'))
        assert actual_lines == expected_lines
Esempio n. 9
0
def test_adding_a_provenance_record(tmpdir):
    """
    Tests adding a provenance record.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")

    # Add it as a document.
    doc = prov.read(filename, format="xml")
    data_set.add_provenance_document(doc, name="test_provenance")
    del data_set

    # Read it again.
    data_set = ASDFDataSet(asdf_filename)
    assert data_set.provenance.test_provenance == doc
Esempio n. 10
0
def test_wps_subset_cmip6_multiple_files_prov():
    client = client_for(Service(processes=[Subset()], cfgfiles=[PYWPS_CFG]))
    datainputs = "collection=CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.SImon.siconc.gn.latest"
    datainputs += ";time=1850-01-01/2013-12-30"
    resp = client.get(
        "?service=WPS&request=Execute&version=1.0.0&identifier=subset&datainputs={}"
        .format(datainputs))
    assert_response_success(resp)
    doc = prov.read(get_output(resp.xml)["prov"][len("file://"):])
    print(doc.get_provn())
    assert 'roocs:time="1850-01-01/2013-12-30"' in doc.get_provn()
    assert (
        "wasDerivedFrom(roocs:siconc_SImon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_18500116-18960316.nc, roocs:CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.SImon.siconc.gn.latest"  # noqa
        in doc.get_provn())
    assert (
        "wasDerivedFrom(roocs:siconc_SImon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_18960416-19420616.nc, roocs:CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.SImon.siconc.gn.latest"  # noqa
        in doc.get_provn())
def init():
    f = open('plan.json', 'r')
    doc = prov.read(f)
    f.close()
    doc.add_namespace(
        'alg',
        'https://data-mechanics.s3.amazonaws.com/linshan_luoty/algorithm/'
    )  # The scripts in <folder>/<filename> format.
    doc.add_namespace(
        'dat', 'https://data-mechanics.s3.amazonaws.com/linshan_luoty/data/'
    )  # The data sets in <user>/<collection> format.
    doc.add_namespace(
        'ont', 'https://data-mechanics.s3.amazonaws.com/ontology#'
    )  # 'Extension', 'DataResource', 'DataSet', 'Retrieval', 'Query', or 'Computation'.
    doc.add_namespace(
        'log',
        'https://data-mechanics.s3.amazonaws.com/log#')  # The event log.
    doc.add_namespace('bdp', 'https://data.cityofboston.gov/resource/')
    return doc
Esempio n. 12
0
def test_trying_to_add_provenance_record_with_invalid_name_fails(tmpdir):
    """
    The name must be valid according to a particular regular expression.
    """
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir, "example_schematic_processing_chain.xml")

    # First try adding it as a prov document.
    doc = prov.read(filename, format="xml")
    with pytest.raises(ASDFValueError) as err:
        data_set.add_provenance_document(doc, name="a-b-c")

    assert err.value.args[0] == (
        "Name 'a-b-c' is invalid. It must validate against the regular "
        "expression '^[0-9a-z][0-9a-z_]*[0-9a-z]$'.")

    # Must sometimes be called to get around some bugs.
    data_set.__del__()
Esempio n. 13
0
def test_provenance_dicionary_behaviour(tmpdir):
    asdf_filename = os.path.join(tmpdir.strpath, "test.h5")
    data_set = ASDFDataSet(asdf_filename)

    filename = os.path.join(data_dir,
                            "example_schematic_processing_chain.xml")

    # Add it as a document.
    doc = prov.read(filename, format="xml")
    # Setting via setitem.
    data_set.provenance["test_provenance"] = doc

    data_set.__del__()
    del data_set

    new_data_set = ASDFDataSet(asdf_filename)
    assert new_data_set.provenance.list() == ["test_provenance"]

    assert new_data_set.provenance["test_provenance"] == doc
    assert getattr(new_data_set.provenance, "test_provenance") == doc

    assert list(new_data_set.provenance.keys()) == ["test_provenance"]
    assert list(new_data_set.provenance.values()) == [doc]
    assert list(new_data_set.provenance.items()) == [("test_provenance", doc)]
	elif o in ("-o", "--outfile"):
		outfile = a
	elif o in ("-i", "--infile"):
		infile = a
	elif o in ("-b", "--bindings"):
		bindings = a
	elif o in ("-3", "--bindver3"):
		v3=True
	else:
		assert False, "unhandled option"

if not infile or not bindings:
	sys.exit()


template=prov.read(infile)

bindings_dict=None


if v3:
	bindings_dict=dict()
	v3_dict=json.load(open(bindings, "r"))
	if "context" in v3_dict:
		print v3_dict["context"]
		namespaces=set()
		for k in  v3_dict["context"]:
			namespaces.add(prov.model.Namespace(k, v3_dict["context"][k]))	
		template=provconv.set_namespaces(namespaces, template)
	if "var" in v3_dict:	
		for v in v3_dict["var"]: