Esempi in Python per ProvDocument, esempi in Python per prov.model.ProvDocument

Esempio n. 1

0

Mostra file

File: provenance.py Progetto: realmarcin/greentranslator

 def __init__(self, default_ns, namespaces=[]):
     """ Specify a default namespace and associated sub namespaces """
     self.document = prov.ProvDocument()
     self.default_ns = default_ns
     self.document.set_default_namespace(self.default_ns)
     self.namespaces = namespaces
     self.subspaces = {}
     for namespace in self.namespaces:
         self.subspaces[namespace] = self.add_namespace(
             self.default_ns, namespace)

Esempio n. 2

0

Mostra file

File: test_abstract_operator.py Progetto: ukaea/Indica

def compatible_operators(draw, args):
    doc = prov.ProvDocument()
    doc.set_default_namespace("https://ccfe.ukaea.uk/")
    session = MagicMock(
        prov=doc,
        agent=doc.agent("session_agent"),
        session=doc.activity("session_activity"),
    )
    operator = FakeOperator(session)
    operator.ARGUMENT_TYPES = draw(compatible_datatype_lists(args))
    return operator

Esempio n. 3

0

Mostra file

def instantiate_template(prov_doc, instance_dict):
    global GLOBAL_UUID_DEF_NS
    '''
	Instantiate a prov template based on a dictionary setting for
	the prov template variables
	
	Supported:
		entity and attribute var: matching
		multiple entity expansion

	Unsupported by now:
		linked entities
		multiple attribute expansion

	To Do: Handle core template expansion rules as described in
		https://ieeexplore.ieee.org/document/7909036/ 
		and maybe add additional expansion/composition rules for
		templates useful to compose ENES community workflow templates

	Args: 
		prov_doc (ProvDocument): input prov document template
		instance_dict (dict): match dictionary
	'''

    #print("here inst templ")

    #CHECK FOR NAMESPACE FOR VARGEN UUID
    for ns in prov_doc.namespaces:
        if ns.prefix == GLOBAL_UUID_DEF_NS_PREFIX:
            #print ("found namespace")
            #uuid namespace defined in template? Use this one
            GLOBAL_UUID_DEF_NS = ns

    new_doc = set_namespaces(prov_doc.namespaces, prov.ProvDocument())

    new_doc = add_records(prov_doc, new_doc, instance_dict)

    blist = list(prov_doc.bundles)

    #print (repr(blist))
    #print ("iterating bundles")
    for bundle in blist:
        id1 = match(bundle.identifier, instance_dict, True)
        #print (id1)
        #print (repr(id1))
        #print ("---")
        new_bundle = new_doc.bundle(id1)
        #print (repr(new_bundle))
        new_bundle = add_records(bundle, new_bundle, instance_dict)

    return new_doc

Esempio n. 4

0

Mostra file

File: test_xml.py Progetto: Slugger70/SauerEnrichmentAnalysis

    def test_serialization_example_7(self):
        """
        Test the serialization of example 7 which is a basic activity.
        """
        document = prov.ProvDocument()
        document.add_namespace(*EX_NS)

        document.activity(
            "ex:a1", "2011-11-16T16:05:00", "2011-11-16T16:06:00",
            [(prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)),
             ("ex:host", "server.example.org")])

        with io.BytesIO() as actual:
            document.serialize(format='xml', destination=actual)
            compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual)

Esempio n. 5

0

Mostra file

File: export_writer.py Progetto: raffaelfoidl/noworkflow

def export_provo(trial: Trial, args, extension):
    document = provo.ProvDocument()
    document.set_default_namespace(args.defaultns)
    bundle_def = None
    bundle_dep = None
    bundle_exec = None

    print_msg("Exporting provenance of trial {} in PROV-O format".format(trial.id), force=True)
    document.collection("trial{}Prov".format(trial.id), [(provo.PROV_LABEL, "provenance collected by noworfklow")])
    basic_info.export(trial, document)

    if args.function_defs:
        bundle_def = document.bundle("trial{}DefinitionProv".format(trial.id)) if not bundle_def else bundle_def
        bundle_def.set_default_namespace(args.defaultns)
        function_defs.export(trial, bundle_def)

    if args.modules:
        bundle_dep = document.bundle("trial{}DeploymentProv".format(trial.id))
        bundle_dep.set_default_namespace(args.defaultns)
        module_deps.export(trial, bundle_dep)

    if args.environment:
        bundle_dep = document.bundle("trial{}DeploymentProv".format(trial.id)) if not bundle_dep else bundle_dep
        bundle_dep.set_default_namespace(args.defaultns)
        environment_attrs.export(trial, bundle_dep)

    if args.function_activations:
        bundle_exec = document.bundle("trial{}ExecutionProv".format(trial.id))
        bundle_exec.set_default_namespace(args.defaultns)
        function_activations.export(trial, bundle_exec, args.recursion_depth)

    if args.file_accesses:
        bundle_exec = document.bundle("trial{}ExecutionProv".format(trial.id)) if not bundle_exec else bundle_exec
        bundle_exec.set_default_namespace(args.defaultns)
        file_accesses.export(trial, bundle_exec)

    if bundle_def:
        document.hadMember("trial{}Prov".format(trial.id), bundle_def.identifier)
        document.wasGeneratedBy(bundle_def.identifier, "trial{}Execution".format(trial.id), None)
    if bundle_dep:
        document.hadMember("trial{}Prov".format(trial.id), bundle_dep.identifier)
        document.wasGeneratedBy(bundle_dep.identifier, "trial{}Execution".format(trial.id), None)
    if bundle_exec:
        document.hadMember("trial{}Prov".format(trial.id), bundle_exec.identifier)
        document.wasGeneratedBy(bundle_exec.identifier, "trial{}Execution".format(trial.id), None)

    persist_document(document, args.file, args.format, extension,
                     args.hide_elem_attr, args.hide_rel_attr, args.graph_dir)

Esempio n. 6

0

Mostra file

    def deserialize(self, stream, rdf_format='trig', **kwargs):
        """
        Deserialize from the `PROV-O <https://www.w3.org/TR/prov-o/>`_
        representation to a :class:`~prov.model.ProvDocument` instance.

        :param stream: Input data.
        :param rdf_format: The RDF format of the input data, default: TRiG.
        """
        newargs = kwargs.copy()
        newargs['format'] = rdf_format
        container = ConjunctiveGraph()
        container.parse(stream, **newargs)
        document = pm.ProvDocument()
        self.document = document
        self.decode_document(container, document)
        return document

Esempio n. 7

0

Mostra file

File: test_xml.py Progetto: Slugger70/SauerEnrichmentAnalysis

    def test_serialization_example_6(self):
        """
        Test the serialization of example 6 which is a simple entity
        description.
        """
        document = prov.ProvDocument()
        ex_ns = document.add_namespace(*EX_NS)
        document.add_namespace(*EX_TR)

        document.entity("tr:WD-prov-dm-20111215",
                        ((prov.PROV_TYPE, ex_ns["Document"]),
                         ("ex:version", "2")))

        with io.BytesIO() as actual:
            document.serialize(format='xml', destination=actual)
            compare_xml(os.path.join(DATA_PATH, "example_06.xml"), actual)

Esempio n. 8

0

Mostra file

File: test_xml.py Progetto: Slugger70/SauerEnrichmentAnalysis

    def test_deserialization_example_6(self):
        """
        Test the deserialization of example 6 which is a simple entity
        description.
        """
        actual_doc = prov.ProvDocument.deserialize(source=os.path.join(
            DATA_PATH, "example_06.xml"),
                                                   format="xml")

        expected_document = prov.ProvDocument()
        ex_ns = expected_document.add_namespace(*EX_NS)
        expected_document.add_namespace(*EX_TR)

        expected_document.entity("tr:WD-prov-dm-20111215",
                                 ((prov.PROV_TYPE, ex_ns["Document"]),
                                  ("ex:version", "2")))

        self.assertEqual(actual_doc, expected_document)

Esempio n. 9

0

Mostra file

File: test_xml.py Progetto: Slugger70/SauerEnrichmentAnalysis

    def test_deserialization_example_7(self):
        """
        Test the deserialization of example 7 which is a simple activity
        description.
        """
        actual_doc = prov.ProvDocument.deserialize(source=os.path.join(
            DATA_PATH, "example_07.xml"),
                                                   format="xml")

        expected_document = prov.ProvDocument()
        ex_ns = Namespace(*EX_NS)
        expected_document.add_namespace(ex_ns)

        expected_document.activity(
            "ex:a1", "2011-11-16T16:05:00", "2011-11-16T16:06:00",
            [(prov.PROV_TYPE, QualifiedName(ex_ns, "edit")),
             ("ex:host", "server.example.org")])

        self.assertEqual(actual_doc, expected_document)

Esempio n. 10

0

Mostra file

File: provconv.py Progetto: d0rg0ld/enes_graph_use_case

def instantiate_template(prov_doc, instance_dict):
    '''
    Instantiate a prov template based on a dictionary setting for
    the prov template variables
    
    Supported:
        entity and attribute var: matching
        multiple entity expansion
        
    Unsupported by now:
        linked entities
        multiple attribute expansion
        
    To Do: Handle core template expansion rules as described in
           https://ieeexplore.ieee.org/document/7909036/ 
           and maybe add additional expansion/composition rules for
           templates useful to compose ENES community workflow templates
           
    Args: 
        prov_doc (ProvDocument): input prov document template
        instance_dict (dict): match dictionary
    '''

    #print("here inst templ")

    new_doc = set_namespaces(prov_doc.namespaces, prov.ProvDocument())

    new_doc = add_records(prov_doc, new_doc, instance_dict)

    blist = list(prov_doc.bundles)

    #print repr(blist)
    print "iterating bundles"
    for bundle in blist:
        id1 = match(bundle.identifier, instance_dict, True)
        print id1
        print "---"
        new_bundle = new_doc.bundle(id1)
        #print repr(new_bundle)
        new_bundle = add_records(bundle, new_bundle, instance_dict)

    return new_doc

Esempio n. 11

0

Mostra file

File: diff_writer.py Progetto: raffaelfoidl/noworkflow

def export_diff(diff: DiffModel, args, extension):
    document = provo.ProvDocument()
    document.set_default_namespace(args.defaultns)

    print_msg(
        "Exporting comparison provenance of trials {} and {} in PROV-O format".
        format(diff.trial1.id, diff.trial2.id),
        force=True)

    basic_info.diff(diff, document)

    if args.modules:
        module_deps.diff(diff, document)

    if args.environment:
        environment_attrs.diff(diff, document)

    if args.file_accesses:
        file_accesses.diff(diff, document)

    persist_document(document, args.file, args.format, extension,
                     args.hide_elem_attr, args.hide_rel_attr, args.graph_dir)

Esempio n. 12

0

Mostra file

File: Project.py Progetto: tgbugs/PyNIDM

    def __init__(self, attributes=None, empty_graph=False, uuid=None):
        """
        Default contructor, creates document and adds Project activity to graph with optional attributes

        :param attributes: optional dictionary of attributes to add
        :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults
        :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs)

        """

        if (empty_graph):
            self.graph = pm.ProvDocument()
        else:
            self.graph = Constants.NIDMDocument(
                namespaces=Constants.namespaces)

        if uuid is None:
            self._uuid = getUUID()

            #execute default parent class constructor
            super(Project, self).__init__(
                self.graph,
                pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI),
                                 self.get_uuid()), attributes)
        else:
            self._uuid = uuid
            #execute default parent class constructor
            super(Project, self).__init__(
                self.graph,
                pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI),
                                 self.get_uuid()), attributes)
        #add record to graph
        self.graph._add_record(self)
        #create empty sessions list
        self._sessions = []

        #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them...
        self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})

Esempio n. 13

0

Mostra file

def gen_prov_file():
    """Creates provenance file for the data schema. 
    """
    document = prov.ProvDocument()
    document.set_default_namespace('http://dapsi-example.org/')
    document.add_namespace('ex', 'http://dapsi-example.org/')
    e2 = document.entity('e2', (
        (prov.PROV_TYPE, "File"),
        ('ex:path', "/amazon.sql"),
        ('ex:creator', "Miguel-Angel Sicilia"),
        ('ex:content', "Amazon user profile schema"),
    ))

    a1 = document.activity('a1', datetime.datetime.now(), None,
                           {prov.PROV_TYPE: "edit"})
    # References can be qnames or ProvRecord objects themselves
    document.wasGeneratedBy(e2, a1, None, {'ex:fct': "run_transformer"})
    document.wasAssociatedWith('a1', 'ag2', None, None,
                               {prov.PROV_ROLE: "author"})
    document.agent('ag2', {
        prov.PROV_TYPE: 'prov:Person',
        'ex:name': "Miguel-Angel Sicilia"
    })
    print(document.get_provn())

Esempio n. 14

0

Mostra file

    def test_deserialization_example_04_and_05(self):
        """
        Example 4 and 5 have a different type specification. They use an
        xsi:type as an attribute on an entity. This can be read but if
        written again it will become an XML child element. This is
        semantically identical but cannot be tested with a round trip.
        """
        # Example 4.
        xml_string = """
        <prov:document
            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
            xmlns:xsd="http://www.w3.org/2001/XMLSchema"
            xmlns:prov="http://www.w3.org/ns/prov#"
            xmlns:ex="http://example.com/ns/ex#"
            xmlns:tr="http://example.com/ns/tr#">

          <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan">
            <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type>
          </prov:entity>

        </prov:document>
        """
        with io.StringIO() as xml:
            xml.write(xml_string)
            xml.seek(0, 0)
            actual_document = prov.ProvDocument.deserialize(source=xml,
                                                            format="xml")

        expected_document = prov.ProvDocument()
        ex_ns = Namespace(*EX_NS)
        expected_document.add_namespace(ex_ns)
        expected_document.add_namespace(*EX_TR)

        # The xsi:type attribute is mapped to a proper PROV attribute.
        expected_document.entity(
            "tr:WD-prov-dm-20111215",
            (
                (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")),
                (prov.PROV_TYPE, PROV["Plan"]),
            ),
        )

        self.assertEqual(actual_document, expected_document, "example_04")

        # Example 5.
        xml_string = """
        <prov:document
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xmlns:xsd="http://www.w3.org/2001/XMLSchema"
          xmlns:prov="http://www.w3.org/ns/prov#"
          xmlns:ex="http://example.com/ns/ex#"
          xmlns:tr="http://example.com/ns/tr#">

        <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan">
          <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type>
          <prov:type xsi:type="xsd:QName">prov:Plan</prov:type> <!-- inferred -->
          <prov:type xsi:type="xsd:QName">prov:Entity</prov:type> <!-- inferred -->
        </prov:entity>

        </prov:document>
        """
        with io.StringIO() as xml:
            xml.write(xml_string)
            xml.seek(0, 0)
            actual_document = prov.ProvDocument.deserialize(source=xml,
                                                            format="xml")

        expected_document = prov.ProvDocument()
        expected_document.add_namespace(*EX_NS)
        expected_document.add_namespace(*EX_TR)

        # The xsi:type attribute is mapped to a proper PROV attribute.
        expected_document.entity(
            "tr:WD-prov-dm-20111215",
            (
                (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")),
                (prov.PROV_TYPE, PROV["Entity"]),
                (prov.PROV_TYPE, PROV["Plan"]),
            ),
        )

        self.assertEqual(actual_document, expected_document, "example_05")

Esempio n. 15

0

Mostra file

File: test_abstract_operator.py Progetto: ukaea/Indica

def test_assign_dataset_provenance(args, operator, dataset, replace_equilib):
    doc = prov.ProvDocument()
    doc.set_default_namespace("https://ccfe.ukaea.uk/")
    session = MagicMock(
        prov=doc,
        agent=doc.agent("session_agent"),
        session=doc.activity("session_activity"),
    )
    operator._session = session
    for i, arg in enumerate(args):
        if "provenance" in arg.attrs:
            arg.attrs["provenance"] = doc.entity(f"arg{i}")
    contents_prov = {}
    equilib = MagicMock(provenance=doc.entity("equilibrium"), _session=session)
    for key, var in dataset.data_vars.items():
        if "provenance" in var.attrs:
            var.attrs["provenance"] = var.attrs[
                "partial_provenance"] = doc.entity(f"variable_{key}")
        del var.indica.equilibrium
        if replace_equilib[key]:
            var.indica.equilibrium = equilib
        if "provenance" in var.attrs:
            contents_prov[key] = var.attrs["provenance"]
    operator.validate_arguments(*args)
    before = datetime.now()
    operator.assign_provenance(dataset)
    after = datetime.now()
    entity = dataset.attrs["provenance"]
    assert "Dataset" in entity.get_attribute(prov.PROV_TYPE)
    generated_candidates = list(
        filter(
            lambda x: {entity.identifier} == x.get_attribute("prov:entity"),
            doc.get_records(prov.ProvGeneration),
        ))
    assert len(generated_candidates) == 1
    generated = generated_candidates[0]
    assert {entity.identifier} == generated.get_attribute("prov:entity")
    activity_id = next(iter(generated.get_attribute("prov:activity")))
    end_time = next(iter(generated.get_attribute("prov:time")))
    assert before < end_time < after
    comms = list(
        filter(
            lambda x: x.get_attribute("prov:informed") == {activity_id},
            doc.get_records(prov.ProvCommunication),
        ))
    assert len(comms) == 1
    informed = comms[0]
    assert {session.session.identifier
            } == informed.get_attribute("prov:informant")
    expected_agents = [session.agent.identifier, operator.agent.identifier]
    for a in filter(
            lambda a: a.get_attribute("prov:activity") == {activity_id},
            doc.get_records(prov.ProvAssociation),
    ):
        agent_id = next(iter(a.get_attribute("prov:agent")))
        assert agent_id in expected_agents
        expected_agents.remove(agent_id)
    assert len(expected_agents) == 0
    expected_agents = [session.agent.identifier, operator.agent.identifier]
    for a in filter(
            lambda x: {entity.identifier} == x.get_attribute("prov:entity"),
            doc.get_records(prov.ProvAttribution),
    ):
        agent_id = next(iter(a.get_attribute("prov:agent")))
        assert agent_id in expected_agents
        expected_agents.remove(agent_id)
    assert len(expected_agents) == 0
    data = [
        arg.attrs["provenance"].identifier for arg in args
        if "provenance" in arg.attrs
    ]
    data2 = list(data)
    for d in filter(
            lambda d: {entity.identifier} == d.get_attribute(
                "prov:generatedEntity"),
            doc.get_records(prov.ProvDerivation),
    ):
        used_id = next(iter(d.get_attribute("prov:usedEntity")))
        assert used_id in data
        data.remove(used_id)
    assert len(data) == 0
    data = data2
    for u in doc.get_records(prov.ProvUsage):
        assert {activity_id} == u.get_attribute("prov:activity")
        entity_id = next(iter(u.get_attribute("prov:entity")))
        assert entity_id in data
        data.remove(entity_id)
    assert len(data) == 0
    for key, var in dataset.data_vars.items():
        if key in contents_prov:
            assert contents_prov[key] == var.attrs["provenance"]
        else:
            assert "provenance" in var.attrs
            contents_prov[key] = var.attrs["provenance"]
    # Check provenance of dataset is collection of provenance of contents
    contents = [c.identifier for c in contents_prov.values()]
    for e in filter(
            lambda e: {entity.identifier} == e.get_attribute("prov:collection"
                                                             ),
            doc.get_records(prov.ProvMembership),
    ):
        contents_id = next(iter(e.get_attribute("prov:entity")))
        assert contents_id in contents
        contents.remove(contents_id)
    assert len(contents) == 0

Esempio n. 16

0

Mostra file

 def __init__(self):
     self.g = pm.ProvDocument()
     self.g.add_namespace(foaf)
     self.g.add_namespace(dcterms)
     self.g.add_namespace(nipype_ns)
     self.g.add_namespace(niiri)

Esempio n. 17

0

Mostra file

#outval='"'+outval+'"^^xsd:datetime'

#prepare data for bindings file

            if ID not in bindfile_dict:
                bindfile_dict[ID] = dict()
                bindfile_dict[ID]["value"] = list()
                bindfile_dict[ID]["type"] = bindmap[col]["type"]

            if not bindmap[col]["uniqueOnly"] or outval not in bindfile_dict[
                    ID]["value"]:
                bindfile_dict[ID]["value"].append(outval)

tmpl_NS = prov.Namespace("tmpl", "http://openprovenance.org/tmpl#")

bindDoc = prov.ProvDocument()
bindDoc.add_namespace("tmpl", "http://openprovenance.org/tmpl#")
bindDoc.add_namespace("var", "http://openprovenance.org/var#")
#make bindings file, use prov library for this
for ID in bindfile_dict:
    cnt1 = 0
    attrs = dict()
    for a in bindfile_dict[ID]["value"]:
        #print ID
        #print a
        if bindfile_dict[ID]["type"] == "attr":
            cnt2 = 0
            if isinstance(a, list):
                for b in a:
                    attr = prov.QualifiedName(
                        tmpl_NS, "2dvalue_" + str(cnt1) + "_" + str(cnt2))

Esempio n. 18

0

Mostra file

File: provenance.py Progetto: lawr3nc/artwork_prov

class Provenance:
    document = prov.ProvDocument()
    document.add_namespace('art', 'http://art.com/')
    document.add_namespace('auction', 'http://auction.com/')
    document.add_namespace('transport', 'http://transport.com/')
    document.add_namespace('gallery', 'http://gallery.com/')

Esempio n. 19

0

Mostra file

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 20 17:45:33 2017

@author: Mehmet Suzen
"""

import prov.model as prov

document = prov.ProvDocument()

#
# narciprov ontology
#
document.add_namespace('narciprov', 'http://www.purl.org/narci/prov#')
#
# Activities
#
document.activity('narciprov:CraniotomyProcedure')
document.activity('narciprov:TwoPhotonImaging')
document.activity('narciprov:VisualStimulation')
document.activity('narciprov:RegionClassification')
document.activity('narciprov:TimeSeriesExtract')
#
# Entities
#
document.entity('narciprov:Subject')
document.entity('narciprov:CraniotomyProtocol')
document.entity('narciprov:Craniotomy')
document.entity('narciprov:AnaesthesiaProtocol')

Esempio n. 20

0

Mostra file

File: excelExtractor.py Progetto: d0rg0ld/enes_graph_use_case

#print col + " " + bindmap[col]["varname"] + " " + outval
            bind_dict["var:" + bindmap[col]["varname"]] = outval
            #outval=row[col]
            if bindmap[col]["val"] == "literal":
                outval = '"' + outval + '"'

            if bindmap[col]["type"] == "attr":
                outstatement = outstatement + "\ttmpl:2dvalue_0_0 " + str(
                    outval) + " .\n"
            else:
                outstatement = outstatement + "\ttmpl:value_0 " + str(
                    outval) + " .\n"
            rtemplate = rtemplate + outstatement

    bind_dicts.append(bind_dict)

print(rtemplate)

provtemplate = prov.ProvDocument()
res = provtemplate.deserialize(source="excelProvTemplate.rdf",
                               format="rdf",
                               rdf_format="xml")
print(bind_dicts[0])
print(res.serialize(format="rdf"))
exp = provconv.instantiate_template(res, bind_dicts[0])

outfile = open("excelProvTemplate_exp.provn", "w")
outfile.write(exp.serialize(format="provn"))
outfile.close()

Esempio n. 21

0

Mostra file

File: formats.py Progetto: lgcowell/airr-standards

    def __init__(self, state, filename=None, handle=None, debug=False):
        if not filename and not handle:
            sys.exit(
                "Error: filename or handle must be provided to RearrangementsFile\n"
            )
            return None

        # set logging level. for now, just True/False to issue warnings.
        self.debug = debug

        # define fields
        self.mandatoryFieldNames = []
        self.optionalSpecFieldNames = []
        self.additionalFieldNames = []
        self._inputFieldNames = []
        for f in rearrangements['fields']:
            if f['mandatory']: self.mandatoryFieldNames.append(f['name'])
            else: self.optionalSpecFieldNames.append(f['name'])

        # writing or reading
        if state:
            # writing
            self.writableState = state
            if filename:
                self.dataFile = open(filename, 'w')
                self.metaFile = open(filename + '.meta.json', 'w')
            else:
                self.dataFile = handle
                self.metaFile = open(handle.name + '.meta.json', 'w')
            self.metadata = model.ProvDocument()
            self.metadata.set_default_namespace('http://airr-community.org/')
            self.wroteMetadata = False
            self.dictWriter = None
        else:
            # reading
            self.writableState = state
            if filename:
                self.dataFile = open(filename, 'r')
                try:
                    self.metaFile = open(filename + '.meta.json', 'r')
                except IOError:
                    self.metaFile = None
            else:
                self.dataFile = handle
                try:
                    self.metaFile = open(handle.name + '.meta.json', 'r')
                except IOError:
                    self.metaFile = None
            self.wroteMetadata = None

            # read metadata
            self.metadata = None
            if self.metaFile:
                text = self.metaFile.read()
                self.metaFile.close()
                self.metadata = model.ProvDocument.deserialize(
                    None, text, 'json')

            # data reader, collect field names
            self.dictReader = csv.DictReader(self.dataFile,
                                             dialect='excel-tab')
            self._inputFieldNames = self.dictReader.fieldnames
            for f in self._inputFieldNames:
                if f in self.mandatoryFieldNames: continue
                if f not in self.additionalFieldNames:
                    self.additionalFieldNames.append(f)