Beispiel #1
0
    def process_tags(self, result):
        tags = []
        tagsDict = {}
        # Build a dictionary of the tags
        for doc in result:
            # Get Anotar data from Solr data
            doc = JsonSimple(doc.get("jsonString"))
            # Get actual tag text
            tag = doc.getString(None, ["content", "literal"])
            # Find out if they have locators
            locs = doc.getJsonSimpleList(["annotates", "locators"]).size()
            if locs == 0:
                # Basic tags, just aggregate counts
                if tag in tagsDict:
                    # We've seen it before, just increment the counter
                    existing = tagsDict[tag]
                    count = existing.getInteger(0, ["tagCount"])
                    existing.getJsonObject().put("tagCount", str(count + 1))
                else:
                    # First time, store this object
                    doc.getJsonObject().put("tagCount", str(1))
                    tagsDict[tag] = doc
            else:
                # Tags with a locator, special case for images etc.
                tags.append(doc.toString())

        # Push all the 'basic' counts into the list to return
        for tag in tagsDict:
            tags.append(tagsDict[tag].toString())
        return "[" + ",".join(tags) + "]"
Beispiel #2
0
 def __messages(self):
     if self.message_list is not None and len(self.message_list) > 0:
         msg = JsonSimple()
         msg.getJsonObject().put("oid", self.oid)
         message = msg.toString()
         for target in self.message_list:
             self.utils.sendMessage(target, message)
 def __messages(self):
     if self.message_list is not None and len(self.message_list) > 0:
         msg = JsonSimple()
         msg.getJsonObject().put("oid", self.oid)
         message = msg.toString()
         for target in self.message_list:
             self.utils.sendMessage(target, message)
Beispiel #4
0
    def __activate__(self, context):
        self.request = context["request"]
        self.response = context["response"]
        self.formData = context["formData"]
        self.log = context["log"]

        # Basic response text
        message = JsonSimple()
        self.metadata = message.writeObject(["metadata"])
        self.results  = message.writeArray(["results"])

        # Prepare response Object
        format = self.formData.get("format")
        if format == "json":
            out = self.response.getPrintWriter("application/json; charset=UTF-8")
        else:
            out = self.response.getPrintWriter("text/plain; charset=UTF-8")

        # Success Response
        try:
            self.searchNla()
            out.println(message.toString(True))
            out.close()

        except Exception, ex:
            self.log.error("Error during search: ", ex)

            self.response.setStatus(500)
            message = JsonSimple()
            message.getJsonObject().put("error", ex.getMessage())
            out.println(message.toString(True))
            out.close()
Beispiel #5
0
    def process_tags(self, result):
        tags = []
        tagsDict = {}
        # Build a dictionary of the tags
        for doc in result:
            # Get Anotar data from Solr data
            doc = JsonSimple(doc.get("jsonString"))
            # Get actual tag text
            tag = doc.getString(None, ["content", "literal"])
            # Find out if they have locators
            locs = doc.getJsonSimpleList(["annotates", "locators"]).size()
            if locs == 0:
                # Basic tags, just aggregate counts
                if tag in tagsDict:
                    # We've seen it before, just increment the counter
                    existing = tagsDict[tag]
                    count = existing.getInteger(0, ["tagCount"])
                    existing.getJsonObject().put("tagCount", str(count + 1))
                else:
                    # First time, store this object
                    doc.getJsonObject().put("tagCount", str(1))
                    tagsDict[tag] = doc
            else:
                # Tags with a locator, special case for images etc.
                tags.append(doc.toString())

        # Push all the 'basic' counts into the list to return
        for tag in tagsDict:
            tags.append(tagsDict[tag].toString())
        return "[" + ",".join(tags) + "]"
 def __checkMetadataPayload(self, identifier):
     # We are just going to confirm the existance of
     # 'metadata.json', or create an empty one if it
     # doesn't exist. Makes curation function for this
     # option and removes some log errors on the details
     # screen.
     try:
         self.object.getPayload("metadata.json")
         # all is good, the above will throw an exception if it doesn't exist
         return
     except Exception:
         self.log.info("Creating 'metadata.json' payload for object '{}'", self.oid)
         # Prep data
         metadata = JsonSimple()
         metadata.getJsonObject().put("recordIDPrefix", "")
         metadata.writeObject("data")
         # The only real data we require is the ID for curation
         idHolder = metadata.writeObject("metadata")
         idHolder.put("dc.identifier", identifier)
         # Store it
         inStream = IOUtils.toInputStream(metadata.toString(True), "UTF-8")
         try:
             StorageUtils.createOrUpdatePayload(self.object, "metadata.json", inStream)
         except StorageException, e:
             self.log.error("Error creating 'metadata.json' payload for object '{}'", self.oid, e)
         return
Beispiel #7
0
    def updateRelationships(self, relationship,pid,identifier):
        oid = self.findOidByIdentifier(relationship.get("identifier"))
        self.writer.println(oid)
        digitalObject = StorageUtils.getDigitalObject(self.storage, oid)
        metadataJsonPayload = digitalObject.getPayload("metadata.json")
        metadataJsonInstream = metadataJsonPayload.open()
        metadataJson = JsonSimple(metadataJsonInstream)
        metadataJsonPayload.close()
        relationships = metadataJson.getArray("relationships")


        found = False
        if relationships is None:
            relationships = JSONArray()
            metadataJson.getJsonObject().put("relationships",relationships)

        for relationship1 in relationships:
             if relationship1.get("identifier") == identifier:
                 relationship1.put("isCurated",True)
                 relationship1.put("curatedPid",pid)
                 found = True

        if not found:
            newRelationship = JsonObject()
            newRelationship.put("isCurated",True)
            newRelationship.put("curatedPid",pid)
            newRelationship.put("relationship",relationship.get("relationship"))
            newRelationship.put("identifier",identifier)
            relationships.add(newRelationship)


        istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes())
        StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream)
Beispiel #8
0
 def get_image(self):
     self.type = "http://www.purl.org/anotar/ns/type/0.1#Tag"
     mediaFragType = "http://www.w3.org/TR/2009/WD-media-frags-20091217"
     result = '{"result":' + self.search_solr() + "}"
     if result:
         imageTagList = []
         imageTags = JsonSimple(result).getJsonSimpleList(["result"])
         for imageTag in imageTags:
             imageAno = JsonSimple()
             # We only want tags with locators, not basic tags
             locators = imageTag.getJsonSimpleList(["annotates", "locators"])
             if locators and not locators.isEmpty():
                 locatorValue = locators.get(0).getString(None, ["value"])
                 locatorType = locators.get(0).get(None, ["type"])
                 if locatorValue and locatorValue.find("#xywh=") > -1 and locatorType == mediaFragType:
                     _, locatorValue = locatorValue.split("#xywh=")
                     left, top, width, height = locatorValue.split(",")
                     object = imageAno.getJsonObject()
                     object.put("top", top)
                     object.put("left", left)
                     object.put("width", width)
                     object.put("height", height)
                     object.put("creator", imageTag.getString(None, ["creator", "literal"]))
                     object.put("creatorUri", imageTag.getString(None, ["creator", "uri"]))
                     object.put("id", imageTag.getString(None, ["id"]))
                     # tagCount = imageTag.getString(None, ["tagCount"])
                     object.put("text", imageTag.getString(None, ["content", "literal"]))
                     object.put("editable", "true")
                     imageTagList.append(imageAno.toString())
         result = "[" + ",".join(imageTagList) + "]"
     return result
Beispiel #9
0
    def __activate__(self, context):
        self.velocityContext = context
        formData = self.vc("formData")

        # build the URL and query parameters to retrieve
        proxyUrls = JsonSimple(self.vc("systemConfig").getObject("proxy-urls"))
        url = ""
        key = formData.get("ns", "")
        if proxyUrls.getJsonObject().containsKey(key):
            url = proxyUrls.getString("", [key])
        queryStr = formData.get("qs")
        if queryStr == "searchTerms={searchTerms}":
            queryStr = None
        if queryStr:
            if formData.get("jaffa2autocomplete", "false") == "true":
                url += "?searchTerms=%s" % queryStr.lower()
            else:
                url += "?%s" % queryStr
        self.vc("log").debug("Proxy URL = '{}'", url)

        data = None
        try:
            data = self.__wget(url)
        except Exception, e:
            data = '{"error":"%s"}' % str(e)
            self.vc("log").error("ERROR accessing URL:", e)
Beispiel #10
0
    def __activate__(self, context):
        self.velocityContext = context
        formData = self.vc("formData")

        # build the URL and query parameters to retrieve
        proxyUrls = JsonSimple(self.vc("systemConfig").getObject("proxy-urls"))
        url = ""
        key = formData.get("ns", "")
        if proxyUrls.getJsonObject().containsKey(key):
            url = proxyUrls.getString("", [key])
        queryStr = formData.get("qs")
        if queryStr == "searchTerms={searchTerms}":
            queryStr = None
        if queryStr:
            if formData.get("jaffa2autocomplete", "false") == "true":
                url += "?searchTerms=%s" % queryStr
            else:
                url += "?%s" % queryStr
        self.vc("log").debug("Proxy URL = '{}'", url)

        data = None
        try:
            data = self.__wget(url)
        except Exception, e:
            data = '{"error":"%s"}' % str(e)
            self.vc("log").error("ERROR accessing URL:", e)
Beispiel #11
0
 def __activate__(self, context):
     response = context["response"]
     writer = response.getPrintWriter("text/plain; charset=UTF-8")
     auth = context["page"].authentication
     result = JsonSimple()
     obj = result.getJsonObject()
     obj.put("status", "error")
     obj.put("message", "An unknown error has occurred")
     if auth.is_logged_in():
         services = context["Services"]
         formData = context["formData"]
         sessionState = context["sessionState"]
         urlBase = context["urlBase"]
         if urlBase.endswith("/"):
             urlBase = urlBase[:-1]
         func = formData.get("func")
         portalManager = services.portalManager
         if func == "create-view":
             try:
                 fq = [q for q in sessionState.get("fq") if q != 'item_type:"object"']
                 id = formData.get("id")
                 description = formData.get("description")
                 print "Creating view '%s': '%s'" % (id, description)
                 portal = Portal(id)
                 portal.setDescription(formData.get("description"))
                 portal.setQuery(" OR ".join(fq))
                 portal.setSearchQuery(sessionState.get("searchQuery"))
                 portal.setFacetFields(portalManager.default.facetFields)
                 portalManager.add(portal)
                 portalManager.save(portal)
                 obj.put("status", "ok")
                 obj.put("message", "View '%s' successfully created" % id)
                 obj.put("url", "%s/%s/home" % (urlBase, id))
             except Exception, e:
                 response.setStatus(500)
                 obj.put("message", str(e))
         elif func == "delete-view":
             defaultPortal = context["defaultPortal"]
             portalId = formData.get("view")
             if auth.is_admin():
                 if not portalId:
                     response.setStatus(500)
                     obj.put("message", "No view specified to be deleted")
                 elif portalId != defaultPortal:
                     # sanity check: don't delete default portal
                     print "Deleting view '%s'" % portalId
                     try:
                         portalManager.remove(portalId)
                         obj.put("status", "ok")
                         obj.put("message", "View '%s' successfully removed" % portalId)
                         obj.put("url", "%s/%s/home" % (urlBase, defaultPortal))
                     except Exception, e:
                         obj.put("message", str(e))
                 else:
                     response.setStatus(500)
                     obj.put("message", "The default view cannot be deleted")
             else:
                 response.setStatus(403)
                 obj.put("message", "Only administrative users can access this API")
Beispiel #12
0
    def __activate__(self, context):
        self.request = context["request"]
        self.response = context["response"]
        self.formData = context["formData"]
        self.log = context["log"]

        oid = self.formData.get("oid")
        self.log.debug("Curation request recieved: '{}'", oid)
        message = JsonSimple()
        message.getJsonObject().put("task", "curation")
        message.getJsonObject().put("oid", oid)

        out = self.response.getPrintWriter("text/plain; charset=UTF-8")
        if self.queueMessage(message.toString()):
            out.println("Request successful. The system will now process.")
        else:
            self.response.setStatus(500)
            out.println("Error sending message, see system logs.")
        out.close()
    def __updateMetadataPayload(self, data):
        # Get and parse
        payload = self.object.getPayload("formData.tfpackage")
        json = JsonSimple(payload.open())
        payload.close()

        # Basic test for a mandatory field
        title = json.getString(None, ["dc:title"])
        if title is not None:
            # We've done this before
            return

        # Merge
        json.getJsonObject().putAll(data)

        # Store it
        inStream = IOUtils.toInputStream(json.toString(True), "UTF-8")
        try:
            self.object.updatePayload("formData.tfpackage", inStream)
        except StorageException, e:
            self.log.error("Error updating 'formData.tfpackage' payload for object '{}'", self.oid, e)
Beispiel #14
0
    def modify_json(self):
        # print "**** anotar.py : add_json() : adding json : " + json
        jsonSimple = JsonSimple(self.json)
        jsonObj = jsonSimple.getJsonObject()
        jsonObj.put("id", self.pid)
        rootUri = jsonSimple.getString(None, ["annotates", "rootUri"])
        if rootUri is not None:
            baseUrl = "http://%s:%s/" % (self.vc("request").serverName, self.vc("serverPort"))
            myUri = baseUrl + rootUri + "#" + self.pid
            jsonObj.put("uri", myUri)

        jsonObj.put("schemaVersionUri", "http://www.purl.org/anotar/schema/0.1")
        self.json = jsonSimple.toString()
    def __updateMetadataPayload(self, data):
        # Get and parse
        payload = self.object.getPayload("formData.tfpackage")
        json = JsonSimple(payload.open())
        payload.close()

        # Basic test for a mandatory field
        title = json.getString(None, ["dc:title"])
        if title is not None:
            # We've done this before
            return

        # Merge
        json.getJsonObject().putAll(data)

        # Store it
        inStream = IOUtils.toInputStream(json.toString(True), "UTF-8")
        try:
            self.object.updatePayload("formData.tfpackage", inStream)
        except StorageException, e:
            self.log.error(
                "Error updating 'formData.tfpackage' payload for object '{}'",
                self.oid, e)
Beispiel #16
0
    def __activate__(self, context):
        request = context["request"]
        response = context["response"]
        writer = response.getPrintWriter("text/javascript; charset=UTF-8")
        result = JsonSimple()

        ## Look for the JSONP callback to use
        jsonpCallback = request.getParameter("callback")
        if jsonpCallback is None:
            jsonpCallback = request.getParameter("jsonp_callback")
            if jsonpCallback is None:
                response.setStatus(403)
                writer.println("Error: This interface only responds to JSONP")
                writer.close()
                return

        if context["page"].authentication.is_logged_in():
            result.getJsonObject().put("isAuthenticated", "true")
        else:
            result.getJsonObject().put("isAuthenticated", "false")

        writer.println(jsonpCallback + "(" + result.toString() + ")")
        writer.close()
    def __activate__(self, context):
        request = context["request"]
        response = context["response"]
        writer = response.getPrintWriter("text/javascript; charset=UTF-8")
        result = JsonSimple()

        ## Look for the JSONP callback to use
        jsonpCallback = request.getParameter("callback")
        if jsonpCallback is None:
            jsonpCallback = request.getParameter("jsonp_callback")
            if jsonpCallback is None:
                response.setStatus(403)
                writer.println("Error: This interface only responds to JSONP")
                writer.close()
                return

        if context["page"].authentication.is_logged_in():
            result.getJsonObject().put("isAuthenticated", "true")
        else:
            result.getJsonObject().put("isAuthenticated", "false")

        writer.println(jsonpCallback + "(" + result.toString() + ")")
        writer.close()
Beispiel #18
0
    def modify_json(self):
        #print "**** anotar.py : add_json() : adding json : " + json
        jsonSimple = JsonSimple(self.json)
        jsonObj = jsonSimple.getJsonObject()
        jsonObj.put("id", self.pid)
        rootUri = jsonSimple.getString(None, ["annotates", "rootUri"])
        if rootUri is not None:
            baseUrl = "http://%s:%s/" % (self.vc("request").serverName,
                                         self.vc("serverPort"))
            myUri = baseUrl + rootUri + "#" + self.pid
            jsonObj.put("uri", myUri)

        jsonObj.put("schemaVersionUri",
                    "http://www.purl.org/anotar/schema/0.1")
        self.json = jsonSimple.toString()
Beispiel #19
0
    def __upgrade(self, formData):
        # These fields are handled specially
        ignoredFields = ["metaList", "redbox:formVersion", "redbox:newForm"]

        # Prepare a new JSON setup for upgraded data
        newJsonSimple = JsonSimple()
        newJsonObject = newJsonSimple.getJsonObject()
        metaList = newJsonSimple.writeArray(["metaList"])

        oldJsonObject = formData.getJsonObject()
        for key in oldJsonObject.keySet():
            oldField = str(key)
            if oldField not in ignoredFields:
                newField = self.__parseFieldName(oldField)
                metaList.add(newField)
                newJsonObject.put(newField, oldJsonObject.get(key))

        # Form management
        newJsonObject.put("redbox:formVersion", self.redboxVersion)
        newForm = oldJsonObject.get("redbox:newForm")
        if newForm is not None:
            newJsonObject.put("redbox:newForm", newForm)

        #########
        # Some final custom modifications more complicated than most fields
        #########

        # Old URL checkbox 'on' equals new ID Origin 'internal'
        urlOrigin = oldJsonObject.get("url_useRecordId")
        if urlOrigin is not None and urlOrigin == "on":
            newJsonObject.put("dc:identifier.redbox:origin", "internal")

        # Related data should default to being unlinked if from legacy forms
        counter = 1
        template = "dc:relation.vivo:Dataset"
        newIdField = "%s.%s.dc:identifier" % (template, counter)
        while newJsonObject.containsKey(newIdField):
            newOriginField = "%s.%s.redbox:origin" % (template, counter)
            newJsonObject.put(newOriginField, "external")
            newPublishField = "%s.%s.redbox:publish" % (template, counter)
            newJsonObject.put(newPublishField, "off")
            counter += 1
            newIdField = "%s.%s.dc:identifier" % (template, counter)

        self.audit.add(
            "Migration tool. Version upgrade performed '%s' => '%s'" %
            (self.version, self.redboxVersion))
        return newJsonSimple
    def __upgrade(self, formData):
        # These fields are handled specially
        ignoredFields = ["metaList", "redbox:formVersion", "redbox:newForm"]

        # Prepare a new JSON setup for upgraded data
        newJsonSimple = JsonSimple()
        newJsonObject = newJsonSimple.getJsonObject()
        metaList = newJsonSimple.writeArray(["metaList"])

        oldJsonObject = formData.getJsonObject()
        for key in oldJsonObject.keySet():
            oldField = str(key)
            if oldField not in ignoredFields:
                newField = self.__parseFieldName(oldField)
                metaList.add(newField)
                newJsonObject.put(newField, oldJsonObject.get(key))

        # Form management
        newJsonObject.put("redbox:formVersion", self.redboxVersion)
        newForm = oldJsonObject.get("redbox:newForm")
        if newForm is not None:
            newJsonObject.put("redbox:newForm", newForm)

        #########
        # Some final custom modifications more complicated than most fields
        #########

        # Old URL checkbox 'on' equals new ID Origin 'internal'
        urlOrigin = oldJsonObject.get("url_useRecordId")
        if urlOrigin is not None and urlOrigin == "on":
            newJsonObject.put("dc:identifier.redbox:origin", "internal")

        # Related data should default to being unlinked if from legacy forms
        counter = 1
        template = "dc:relation.vivo:Dataset"
        newIdField = "%s.%s.dc:identifier" % (template, counter)
        while newJsonObject.containsKey(newIdField):
            newOriginField = "%s.%s.redbox:origin" % (template, counter)
            newJsonObject.put(newOriginField, "external")
            newPublishField = "%s.%s.redbox:publish" % (template, counter)
            newJsonObject.put(newPublishField, "off")
            counter += 1
            newIdField = "%s.%s.dc:identifier" % (template, counter)

        self.audit.add("Migration tool. Version upgrade performed '%s' => '%s'" % (self.version, self.redboxVersion))
        return newJsonSimple
    def __activate__(self, context):
        writer = context["response"].getPrintWriter("application/json; charset=UTF-8")
        jsonResponse = "{}"
        try:

            oid = context["formData"].get("oid")
            object = context["Services"].getStorage().getObject(oid);
            payload = object.getPayload("metadata.json")
            json = JsonSimple(payload.open())
            payload.close()
            object.close()

            # We are only going to send the 'data' node though
            data = JsonSimple(json.getJsonObject().get("data"))
            jsonResponse = data.toString(True)

        except Exception, e:
            jsonResponse = '{"message": "%s"}' % e.getMessage()
Beispiel #22
0
 def getJson(self, state="open"):
     title = "%s (%s)" % (self.getName(), self.getCount())
     json = JsonSimple()
     jsonObj = json.getJsonObject()
     attributes = JsonObject()
     attributes.put("id", self.getId())
     attributes.put("fq", self.getFacetQuery())
     attributes.put("title", title)
     jsonObj.put("data", title)
     jsonObj.put("attributes", attributes)
     hasSubFacets = not self.getSubFacets().isEmpty()
     if hasSubFacets:
         jsonObj.put("state", state)
         subFacetList = ArrayList()
         for subFacet in self.getSubFacets():
             subFacetList.add(subFacet.getJson("closed"))
         children = JSONArray()
         children.addAll(subFacetList)
         jsonObj.put("children", children)
     return json
Beispiel #23
0
 def getJson(self, state="open"):
     title = "%s (%s)" % (self.getName(), self.getCount())
     json = JsonSimple()
     jsonObj = json.getJsonObject()
     attributes = JsonObject()
     attributes.put("id", self.getId())
     attributes.put("fq", self.getFacetQuery())
     attributes.put("title", title)
     jsonObj.put("data", title)
     jsonObj.put("attributes", attributes)
     hasSubFacets = not self.getSubFacets().isEmpty()
     if hasSubFacets:
         jsonObj.put("state", state)
         subFacetList = ArrayList()
         for subFacet in self.getSubFacets():
             subFacetList.add(subFacet.getJson("closed"))
         children = JSONArray()
         children.addAll(subFacetList)
         jsonObj.put("children", children)
     return json
Beispiel #24
0
 def get_image(self):
     self.type = "http://www.purl.org/anotar/ns/type/0.1#Tag"
     mediaFragType = "http://www.w3.org/TR/2009/WD-media-frags-20091217"
     result = '{"result":' + self.search_solr() + '}'
     if result:
         imageTagList = []
         imageTags = JsonSimple(result).getJsonSimpleList(["result"])
         for imageTag in imageTags:
             imageAno = JsonSimple()
             # We only want tags with locators, not basic tags
             locators = imageTag.getJsonSimpleList(
                 ["annotates", "locators"])
             if locators and not locators.isEmpty():
                 locatorValue = locators.get(0).getString(None, ["value"])
                 locatorType = locators.get(0).get(None, ["type"])
                 if locatorValue and locatorValue.find(
                         "#xywh=") > -1 and locatorType == mediaFragType:
                     _, locatorValue = locatorValue.split("#xywh=")
                     left, top, width, height = locatorValue.split(",")
                     object = imageAno.getJsonObject()
                     object.put("top", top)
                     object.put("left", left)
                     object.put("width", width)
                     object.put("height", height)
                     object.put(
                         "creator",
                         imageTag.getString(None, ["creator", "literal"]))
                     object.put(
                         "creatorUri",
                         imageTag.getString(None, ["creator", "uri"]))
                     object.put("id", imageTag.getString(None, ["id"]))
                     #tagCount = imageTag.getString(None, ["tagCount"])
                     object.put(
                         "text",
                         imageTag.getString(None, ["content", "literal"]))
                     object.put("editable", "true")
                     imageTagList.append(imageAno.toString())
         result = "[" + ",".join(imageTagList) + "]"
     return result
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "servicesUI2"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        pageTitle = "Services Record"
        displayType = "package-service"
        initialStep = 0
        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]
        formData = manifest.getJsonObject()
        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")   
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)

        self.utils.add(self.index, "display_type", displayType)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
Beispiel #26
0
    def __activate__(self, context):

         try:
             self.log = context["log"]
             self.response = context["response"]
             self.request = context["request"]
             self.systemConfig = context["systemConfig"]
             self.storage = context["Services"].getStorage()
             self.indexer = context["Services"].getIndexer()
             self.sessionState = context["sessionState"]
             self.sessionState.set("username", "admin")

             out = self.response.getPrintWriter("text/plain; charset=UTF-8")
             relationshipMapper = ApplicationContextProvider.getApplicationContext().getBean("relationshipMapper")
             externalCurationMessageBuilder = ApplicationContextProvider.getApplicationContext().getBean("externalCurationMessageBuilder")

             oid = self.request.getParameter("oid")

             if oid is None :
                 identifier = self.request.getParameter("identifier")
                 oid = self.findOidByIdentifier(identifier)

             relationshipType = self.request.getParameter("relationship")
             curatedPid = self.request.getParameter("curatedPid")
             sourceId = self.request.getParameter("sourceIdentifier")
             system = self.request.getParameter("system")

             digitalObject = StorageUtils.getDigitalObject(self.storage, oid)
             metadataJsonPayload = digitalObject.getPayload("metadata.json")
             metadataJsonInstream = metadataJsonPayload.open()
             metadataJson = JsonSimple(metadataJsonInstream)
             metadataJsonPayload.close()

             relationships = metadataJson.getArray("relationships")
             found = False
             if relationships is None:
                relationships = JSONArray()
                metadataJson.getJsonObject().put("relationships",relationships)

             for relationship in relationships:
                 if relationship.get("identifier") == sourceId:
                     relationship.put("isCurated",True)
                     relationship.put("curatedPid",curatedPid)
                     found = True

             if not found:
                 relationship = JsonObject()
                 relationship.put("isCurated",True)
                 relationship.put("curatedPid",curatedPid)
                 relationship.put("relationship",relationshipType)
                 relationship.put("identifier",sourceId)
                 relationship.put("system",system)
                 relationships.add(relationship)

             out.println(metadataJson.toString(True))
             istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes())
             StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream)

             out.close()
         finally:
             self.sessionState.remove("username")
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "Parties_People"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        initialStep = 0

        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = ["title", "description"]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
Beispiel #28
0
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "dataset"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        if self.owner == "guest":
            pageTitle = "Submission Request"
            displayType = "submission-request"
            initialStep = 0
        else:
            pageTitle = "Metadata Record"
            displayType = "package-dataset"
            initialStep = 1
        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]
        
        #Used to make sure we have a created date
        createdDateFlag  = False
        
        formData = manifest.getJsonObject()
        
        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                        self.log.debug("Set created date to :%s" % solrTime)
                        createdDateFlag = True
                    elif field == "redbox:embargo.dc:date":
                        self.embargoedDate = value
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)
                    # check if this is an array field
                    fnameparts = field.split(":")
                    if fnameparts is not None and len(fnameparts) >= 3:
                        if field.startswith("bibo") or field.startswith("skos"):
                            arrParts = fnameparts[1].split(".")
                        else:    
                            arrParts = fnameparts[2].split(".")
                        # we're not interested in: Relationship, Type and some redbox:origin 
                        if arrParts is not None and len(arrParts) >= 2 and field.find(":Relationship.") == -1 and field.find("dc:type") == -1 and field.find("redbox:origin") == -1 and arrParts[1].isdigit():
                            # we've got an array field
                            fldPart = ":%s" % arrParts[0]
                            prefixEndIdx = field.find(fldPart) + len(fldPart)
                            suffixStartIdx = prefixEndIdx+len(arrParts[1])+1
                            arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[suffixStartIdx:]
                            if field.endswith("Name"):
                                arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx]
                            self.log.debug("Array Field name is:%s  from: %s, with value:%s" % (arrFldName, field, value))
                            
                            if field.endswith("Name"):
                                fullFieldMap = self.arrayBucket.get(arrFldName)
                                if fullFieldMap is None:
                                    fullFieldMap = HashMap()
                                    self.arrayBucket.put(arrFldName, fullFieldMap)
                                idx = arrParts[1]
                                fullField = fullFieldMap.get(idx)
                                if (fullField is None):
                                    fullField = ""
                                if (field.endswith("givenName")):
                                    fullField = "%s, %s" % (fullField, value)
                                if (field.endswith("familyName")):
                                    fullField = "%s%s" % (value, fullField) 
                                self.log.debug("fullname now is :%s" % fullField)
                                fullFieldMap.put(idx, fullField)
                            else:
                                fieldlist = self.arrayBucket.get(arrFldName)
                                if fieldlist is None:
                                    fieldlist = []
                                    self.arrayBucket.put(arrFldName, fieldlist)
                                fieldlist.append(value)
                                
                    for compfield in self.compFields:
                        if field.startswith(compfield):    
                            arrFldName = self.reportingFieldPrefix +compfield
                            fullFieldMap = self.arrayBucket.get(arrFldName)
                            if fullFieldMap is None:
                                fullFieldMap = HashMap()
                                self.arrayBucket.put(arrFldName, fullFieldMap)
                            fullField = fullFieldMap.get("1")
                            if fullField is None:
                                fullField = ""
                            if field.endswith(self.compFieldsConfig[compfield]["end"]):
                                fullField = "%s%s%s" % (fullField, self.compFieldsConfig[compfield]["delim"] ,value)
                            if field.endswith(self.compFieldsConfig[compfield]["start"]):
                                fullField = "%s%s" % (value, fullField) 
                            self.log.debug("full field now is :%s" % fullField)
                            fullFieldMap.put("1", fullField)     

        self.utils.add(self.index, "display_type", displayType) 
        
        # Make sure we have a creation date
        if not createdDateFlag:
            self.utils.add(self.index, "date_created", self.last_modified)
            self.log.debug("Forced creation date to %s because it was not explicitly set." % self.last_modified)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
    def __metadata(self):
        self.title = None
        self.dcType = None

        self.__checkMetadataPayload()

        jsonPayload = self.object.getPayload("metadata.json")
        json = self.utils.getJsonObject(jsonPayload.open())
        jsonPayload.close()

        metadata = json.getObject("metadata")

        identifier  = metadata.get("dc.identifier")
        self.utils.add(self.index, "dc:identifier", identifier)
        self.__storeIdentifier(identifier)
        self.utils.add(self.index, "institution", "James Cook University")
        self.utils.add(self.index, "source", "http://spatialecology.jcu.edu.au/Edgar/")
                
        data = json.getObject("data")

        ####Global setting for processing data
        ####These will need to be changed based on you system installation.
        theMintHost = java.lang.System.getProperty("mint.proxy.url")
        collectionRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/"
        servicesRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/"
        descriptionTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/"
        relationshipTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/"

        ###Allocating space to create the formData.tfpackage
        tfpackageData = {}

        ###Using the species name, obtained from the directory name, to replace the text in the Title
        species = data.get("species")
        title = data.get("title")
        title = title.replace("%NAME_OF_FOLDER%", species)
        self.utils.add(self.index, "dc_title", title)
        tfpackageData["dc:title"] = title
        tfpackageData["title"] = title

        self.utils.add(self.index, "dc_type", data.get("type"))
        tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type")
        tfpackageData["dc:type.skos:prefLabel"] = data.get("type")
        tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime())
        tfpackageData["dc:modified"] = ""
        tfpackageData["dc:language.skos:prefLabel"] = "English"
        tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get("temporalCoverage").get("dateFrom")
        
        dateTo = data.get("temporalCoverage").get("dateTo")
        if dateTo is not None:
            tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo
        
        tfpackageData["dc:coverage.redbox:timePeriod"] = ""

        ###Processing the 'spatialCoverage' metadata.
        spatialCoverage = data.get("spatialCoverage")
        for i in range(len(spatialCoverage)):
            location = spatialCoverage[i]
            if  location["type"] == "text":
                tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:type"] = location["type"]
                if  (location["value"].startswith("POLYGON")):
                    tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".redbox:wktRaw"] = location["value"]
                tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".rdf:PlainLiteral"] = location["value"]

        ###Processing the 'description' metadata.
        #Reading the file here, so we only do it once.
        file = open(descriptionTypesFilePath + "descriptionTypes.json")
        descriptionData = file.read()
        file.close()
        description = data.get("description")
        for i in range(len(description)):
            desc = description[i]
            tempDesc = desc.get("value")
            tempDesc = tempDesc.replace("%NAME_OF_FOLDER%", species)
            if  (desc["type"] == "brief"):
                tfpackageData["dc:description"] = tempDesc
            tfpackageData["rif:description." + str(i + 1) + ".type"] = desc["type"]
            tfpackageData["rif:description." + str(i + 1) + ".value"] = tempDesc
            jsonSimple = JsonSimple(descriptionData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")
            #ensuring the Description Type exist
            if  results:
                for j in range(len(results)):
                    descriptionType = results[j]
                    if  (desc["type"] == descriptionType.get("id")):
                        tfpackageData["rif:description." + str(i + 1) + ".label"] = descriptionType.get("label")

        ###Processing the 'relatedPublication' metadata
        relatedPublication = data.get("relatedPublication")
        if relatedPublication is not None:
            for i in range(len(relatedPublication)):
                publication = relatedPublication[i]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:identifier"] = publication["doi"]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:title"] = publication["title"]

        ###Processing the 'relatedWebsite' metadata
        relatedWebsite = data.get("relatedWebsite")
        count = 0
        for i in range(len(relatedWebsite)):
            website = relatedWebsite[i]
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:identifier"] = website["url"]
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:title"] = website["notes"]
            count = i + 1

        ###Processing the 'data_source_website' metadata (override metadata)
        dataSourceWebsites = data.get("data_source_website")
        if  dataSourceWebsites is not None:
            for i in range(len(dataSourceWebsites)):
                website = dataSourceWebsites[i]
                type = website.get("identifier").get("type")
                if type == "uri":
                    count += 1 
                    tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:identifier"] = website.get("identifier").get("value")
                    tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:title"] = website["notes"]

        ###Processing the 'relatedCollection' metadata
        #Reading the file here, so we only do it once.
        file = open(collectionRelationTypesFilePath + "collectionRelationTypes.json")
        collectionData = file.read()
        file.close()
        relatedCollection = data.get("relatedCollection")
        recordIdentifier = ""
        if relatedCollection is not None:
            for i in range(len(relatedCollection)):
                collection = relatedCollection[i]
                tempIdentifier = collection["identifier"]
                if tempIdentifier is not None:
                    tempIdentifier = tempIdentifier.replace("%NAME_OF_FOLDER%", species)
                    recordIdentifier = tempIdentifier
                else:
                    tempIdentifier = ""
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:identifier"] = tempIdentifier
                tempTitle = collection.get("title")
                tempTitle = tempTitle.replace("%NAME_OF_FOLDER%", species)
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:title"] = tempTitle
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = collection["relationship"]
                if  tempIdentifier == "":
                    tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:origin"] = "on"
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:publish"] =  "on"
                #Using the collection data as a lookup to obtain the 'label'
                relationShip = collection.get("relationship")
                jsonSimple = JsonSimple(collectionData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Collection Relation Types exist
                if  results:
                    for j in range(len(results)):
                        relation = results[j]
                        if  (relationShip == relation.get("id")):
                            tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label")

        ###Processing the 'relatedService' metadata
        #Reading the file here, so we only do it once.
        file = open(servicesRelationTypesFilePath + "serviceRelationTypes.json")
        servicesData = file.read()
        file.close()
        relatedServices = data.get("relatedService")
        recordIdentifier = ""
        if relatedServices is not None:
            for i in range(len(relatedServices)):
                service = relatedServices[i]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:identifier"] = service["identifier"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:title"] = service["title"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = service["relationship"]
                #Using the services data as a lookup to obtain the 'label'
                relationShip = service.get("relationship")
                jsonSimple = JsonSimple(servicesData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Service Relation Types exist
                if  results:
                    for j in range(len(results)):
                        relation = results[j]
                        if  (relationShip == relation.get("id")):
                            tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label")

        ###Processing the 'associatedParty' metadata
        #Reading the file here so we only read it once.
        file = open(relationshipTypesFilePath + "relationshipTypes.json")
        relationshipData = file.read()
        file.close()
        associatedParty = data.get("associatedParty")
        for i in range(len(associatedParty)):
            party = associatedParty[i]
            email = party.get("who").get("value")
            if email is not None:
                whoType = party.get("who").get("type")
                if (whoType == 'people'):
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".dc:identifier"] = party.get("who").get("identifier")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:name"] = party.get("who").get("name")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:title"] = party.get("who").get("title")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isCoPrimaryInvestigator"] = "off"
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isPrimaryInvestigator"] = "on"
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:givenName"] = party.get("who").get("givenName")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:familyName"] = party.get("who").get("familyName")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipType"] = party.get("relationship")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.dc:identifier"] = party.get("affiliation").get("id")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.skos:prefLabel"] = party.get("affiliation").get("label")
                    jsonSimple = JsonSimple(relationshipData)
                    jsonObj = jsonSimple.getJsonObject()
                    results = jsonObj.get("results")
                    #ensuring the Relationship Type exists
                    if  results:
                        for j in range(len(results)):
                            relationshipType = results[j]
                            if  (party.get("relationship") == relationshipType.get("id")):
                                tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipLabel"] = relationshipType.get("label")
                    
        ###Processing 'contactInfo.email' metadata
        tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get("contactInfo").get("identifier")
        tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get("contactInfo").get("name")
        tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get("contactInfo").get("title")
        tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get("contactInfo").get("givenName")
        tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get("contactInfo").get("familyName")
        tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get("contactInfo").get("email")

        ##Stored At (on the Data Management page)
        tfpackageData["vivo:Location.vivo:GeographicLocation.gn:name"] = data.get("contactInfo").get("streetAddress")                 

        ###Processing 'coinvestigators' metadata
        coinvestigators = data.get("coinvestigators")
        for i in range(len(coinvestigators)):
            tfpackageData["dc:contributor.locrel:clb." + str(i + 1) + ".foaf:Agent"] = coinvestigators[i]

        ###Processing 'anzsrcFOR' metadata
        anzsrcFOR = data.get("anzsrcFOR")
        for i in range(len(anzsrcFOR)):
            anzsrc = anzsrcFOR[i]
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'anzsrcSEO' metadata                        
        anzsrcSEO = data.get("anzsrcSEO")
        for i in range(len(anzsrcSEO)):
            anzsrc = anzsrcSEO[i]
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'keyword' metadata                        
        keyword = data.get("keyword")
        for i in range(len(keyword)):
            tfpackageData["dc:subject.vivo:keyword." + str(i + 1) + ".rdf:PlainLiteral"] = keyword[i]

        ###Research Themes
        theme = data.get("researchTheme")
        if  (theme == "Tropical Ecosystems, Conservation and Climate Change"):
            tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true"
        elif (theme == "Industries and Economies in the Tropics"):
            tfpackageData["jcu:research.themes.industriesEconomies"] = "true"
        elif (theme == "People and Societies in the Tropics"):
            tfpackageData["jcu:research.themes.peopleSocieties"] = "true"
        elif (theme == "Tropical Health, Medicine and Biosecurity"):
            tfpackageData["jcu:research.themes.tropicalHealth"] = "true"
        elif (theme == "Not aligned to a University theme"):
            tfpackageData["jcu:research.themes.notAligned"] = "true"
            
        tfpackageData["dc:accessRights.skos:prefLabel"] = data.get("accessRights")
        tfpackageData["dc:license.dc:identifier"] = data.get("license").get("url")
        tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get("label")

        #identifier
        additionalId = data.get("additionalIdentifier")
        if additionalId is not None:
            additionalId = additionalId.replace("%NAME_OF_FOLDER%", species)
            tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId
            tfpackageData["dc:identifier.redbox:origin"] = "external"
            tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local"
            tfpackageData["dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier"
        else:
            tfpackageData["dc:identifier.redbox:origin"] = "internal"            

        dataLocation = data.get("dataLocation")
        dataLocation = dataLocation.replace("%NAME_OF_FOLDER%", species)
        tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation

        #The following have been intentionally set to blank. No mapping is required for these fields.
        tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod")
        tfpackageData["dc:extent"] = "unknown"
        tfpackageData["redbox:disposalDate"] = ""
        tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = ""
        tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = ""

        ###Processing 'organizationalGroup' metadata
        organisationalGroup = data.get("organizationalGroup")
        for i in range(len(organisationalGroup)):
            organisation = organisationalGroup[i]
            tfpackageData["foaf:Organization.dc:identifier"] = organisation.get("identifier")
            tfpackageData["foaf:Organization.skos:prefLabel"] = organisation.get("prefLabel")

        tfpackageData["swrc:ResearchProject.dc:title"] = ""
        tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = ""
        tfpackageData["dc:SizeOrDuration"] = ""
        tfpackageData["dc:Policy"] = ""

        #Citations
        citations = data.get("citations")
        for i in range(len(citations)):
            citation = citations[i]
            tfpackageData["dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get("sendCitation")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get("curationIdentifier") 
            paperTitle = citation.get("paperTitle")
            paperTitle = paperTitle.replace("%NAME_OF_FOLDER%", species)
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:familyName"] = citation.get("familyName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:givenName"] = citation.get("givenName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:title"] = title = citation.get("title")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = citation.get("publisher")
            url = citation.get("url")
            url = url.replace("%NAME_OF_FOLDER%", species)
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url 
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData["dc:created"] 
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData["dc:created"]
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get("dataType")
            tfpackageData["dc:biblioGraphicCitation.skos:prefLabel"] = citation.get("familyName") + ", " + citation.get("givenName") + ". (" + time.strftime("%Y", time.gmtime()) + "). " + paperTitle + ". " + citation.get("publisher") + ". [" + citation.get("dataType") + "]  {ID_WILL_BE_HERE}"

        self.__updateMetadataPayload(tfpackageData)
        self.__workflow()
    def __metadata(self):
        self.title = None
        self.dcType = None

        self.__checkMetadataPayload()

        jsonPayload = self.object.getPayload("metadata.json")
        json = self.utils.getJsonObject(jsonPayload.open())
        jsonPayload.close()

        metadata = json.getObject("metadata")

        identifier = metadata.get("dc.identifier")
        self.utils.add(self.index, "dc:identifier", identifier)
        self.__storeIdentifier(identifier)
        self.utils.add(self.index, "institution", "James Cook University")
        self.utils.add(self.index, "source",
                       "http://spatialecology.jcu.edu.au/Edgar/")

        data = json.getObject("data")

        ####Global setting for processing data
        ####These will need to be changed based on you system installation.
        theMintHost = java.lang.System.getProperty("mint.proxy.url")
        collectionRelationTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/redbox/workflows/forms/data/"
        servicesRelationTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/redbox/workflows/forms/data/"
        descriptionTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/local/workflows/forms/data/"
        relationshipTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/local/workflows/forms/data/"

        ###Allocating space to create the formData.tfpackage
        tfpackageData = {}

        ###Using the species name, obtained from the directory name, to replace the text in the Title
        species = data.get("species")
        title = data.get("title")
        title = title.replace("%NAME_OF_FOLDER%", species)
        self.utils.add(self.index, "dc_title", title)
        tfpackageData["dc:title"] = title
        tfpackageData["title"] = title

        self.utils.add(self.index, "dc_type", data.get("type"))
        tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type")
        tfpackageData["dc:type.skos:prefLabel"] = data.get("type")
        tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime())
        tfpackageData["dc:modified"] = ""
        tfpackageData["dc:language.skos:prefLabel"] = "English"
        tfpackageData[
            "dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get(
                "temporalCoverage").get("dateFrom")

        dateTo = data.get("temporalCoverage").get("dateTo")
        if dateTo is not None:
            tfpackageData[
                "dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo

        tfpackageData["dc:coverage.redbox:timePeriod"] = ""

        ###Processing the 'spatialCoverage' metadata.
        spatialCoverage = data.get("spatialCoverage")
        for i in range(len(spatialCoverage)):
            location = spatialCoverage[i]
            if location["type"] == "text":
                tfpackageData["dc:coverage.vivo:GeographicLocation." +
                              str(i + 1) + ".dc:type"] = location["type"]
                tfpackageData["dc:coverage.vivo:GeographicLocation." +
                              str(i + 1) + ".dc:typeLabel"] = "Free Text"
                if (location["value"].startswith("POLYGON")):
                    tfpackageData["dc:coverage.vivo:GeographicLocation." +
                                  str(i + 1) +
                                  ".redbox:wktRaw"] = location["value"]
                tfpackageData["dc:coverage.vivo:GeographicLocation." +
                              str(i + 1) +
                              ".rdf:PlainLiteral"] = location["value"]

        ###Processing the 'description' metadata.
        #Reading the file here, so we only do it once.
        file = open(descriptionTypesFilePath + "descriptionTypes.json")
        descriptionData = file.read()
        file.close()
        description = data.get("description")
        for i in range(len(description)):
            desc = description[i]
            tempDesc = desc.get("value")
            tempDesc = tempDesc.replace("%NAME_OF_FOLDER%", species)
            if (desc["type"] == "brief"):
                tfpackageData["dc:description"] = tempDesc
            tfpackageData["rif:description." + str(i + 1) +
                          ".type"] = desc["type"]
            tfpackageData["rif:description." + str(i + 1) +
                          ".value"] = tempDesc
            jsonSimple = JsonSimple(descriptionData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")
            #ensuring the Description Type exist
            if results:
                for j in range(len(results)):
                    descriptionType = results[j]
                    if (desc["type"] == descriptionType.get("id")):
                        tfpackageData["rif:description." + str(i + 1) +
                                      ".label"] = descriptionType.get("label")

        ###Processing the 'relatedPublication' metadata
        relatedPublication = data.get("relatedPublication")
        if relatedPublication is not None:
            for i in range(len(relatedPublication)):
                publication = relatedPublication[i]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) +
                              ".dc:identifier"] = publication["doi"]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) +
                              ".dc:title"] = publication["title"]

        ###Processing the 'relatedWebsite' metadata
        relatedWebsite = data.get("relatedWebsite")
        count = 0
        for i in range(len(relatedWebsite)):
            website = relatedWebsite[i]
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) +
                          ".dc:identifier"] = website["url"]
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) +
                          ".dc:title"] = website["notes"]
            count = i + 1

        ###Processing the 'data_source_website' metadata (override metadata)
        dataSourceWebsites = data.get("data_source_website")
        if dataSourceWebsites is not None:
            for i in range(len(dataSourceWebsites)):
                website = dataSourceWebsites[i]
                type = website.get("identifier").get("type")
                if type == "uri":
                    count += 1
                    tfpackageData["dc:relation.bibo:Website." + str(count) +
                                  ".dc:identifier"] = website.get(
                                      "identifier").get("value")
                    tfpackageData["dc:relation.bibo:Website." + str(count) +
                                  ".dc:title"] = website["notes"]

        ###Processing the 'relatedCollection' metadata
        #Reading the file here, so we only do it once.
        file = open(collectionRelationTypesFilePath +
                    "collectionRelationTypes.json")
        collectionData = file.read()
        file.close()
        relatedCollection = data.get("relatedCollection")
        recordIdentifier = ""
        if relatedCollection is not None:
            for i in range(len(relatedCollection)):
                collection = relatedCollection[i]
                tempIdentifier = collection["identifier"]
                if tempIdentifier is not None:
                    tempIdentifier = tempIdentifier.replace(
                        "%NAME_OF_FOLDER%", species)
                    recordIdentifier = tempIdentifier
                else:
                    tempIdentifier = ""
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                              ".dc:identifier"] = tempIdentifier
                tempTitle = collection.get("title")
                tempTitle = tempTitle.replace("%NAME_OF_FOLDER%", species)
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                              ".dc:title"] = tempTitle
                tfpackageData[
                    "dc:relation.vivo:Dataset." + str(i + 1) +
                    ".vivo:Relationship.rdf:PlainLiteral"] = collection[
                        "relationship"]
                if tempIdentifier == "":
                    tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                                  ".redbox:origin"] = "on"
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                              ".redbox:publish"] = "on"
                #Using the collection data as a lookup to obtain the 'label'
                relationShip = collection.get("relationship")
                jsonSimple = JsonSimple(collectionData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Collection Relation Types exist
                if results:
                    for j in range(len(results)):
                        relation = results[j]
                        if (relationShip == relation.get("id")):
                            tfpackageData[
                                "dc:relation.vivo:Dataset." + str(i + 1) +
                                ".vivo:Relationship.skos:prefLabel"] = relation.get(
                                    "label")

        ###Processing the 'relatedService' metadata
        #Reading the file here, so we only do it once.
        file = open(servicesRelationTypesFilePath +
                    "serviceRelationTypes.json")
        servicesData = file.read()
        file.close()
        relatedServices = data.get("relatedService")
        recordIdentifier = ""
        if relatedServices is not None:
            for i in range(len(relatedServices)):
                service = relatedServices[i]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) +
                              ".dc:identifier"] = service["identifier"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) +
                              ".dc:title"] = service["title"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) +
                              ".vivo:Relationship.rdf:PlainLiteral"] = service[
                                  "relationship"]
                #Using the services data as a lookup to obtain the 'label'
                relationShip = service.get("relationship")
                jsonSimple = JsonSimple(servicesData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Service Relation Types exist
                if results:
                    for j in range(len(results)):
                        relation = results[j]
                        if (relationShip == relation.get("id")):
                            tfpackageData[
                                "dc:relation.vivo:Service." + str(i + 1) +
                                ".vivo:Relationship.skos:prefLabel"] = relation.get(
                                    "label")

        ###Processing the 'associatedParty' metadata
        #Reading the file here so we only read it once.
        file = open(relationshipTypesFilePath + "relationshipTypes.json")
        relationshipData = file.read()
        file.close()
        associatedParty = data.get("associatedParty")
        if associatedParty is not None:
            for i in range(len(associatedParty)):
                party = associatedParty[i]
                email = party.get("who").get("value")
                if email is not None:
                    whoType = party.get("who").get("type")
                    if (whoType == 'people'):
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".dc:identifier"] = party.get("who").get(
                                          "identifier")
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".foaf:name"] = party.get("who").get(
                                          "name")
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".foaf:title"] = party.get("who").get(
                                          "title")
                        tfpackageData[
                            "dc:creator.foaf:Person." + str(i + 1) +
                            ".redbox:isCoPrimaryInvestigator"] = "off"
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".redbox:isPrimaryInvestigator"] = "on"
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".foaf:givenName"] = party.get(
                                          "who").get("givenName")
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".foaf:familyName"] = party.get(
                                          "who").get("familyName")
                        tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                      ".jcu:relationshipType"] = party.get(
                                          "relationship")
                        tfpackageData[
                            "dc:creator.foaf:Person." + str(i + 1) +
                            ".foaf:Organization.dc:identifier"] = party.get(
                                "affiliation").get("id")
                        tfpackageData[
                            "dc:creator.foaf:Person." + str(i + 1) +
                            ".foaf:Organization.skos:prefLabel"] = party.get(
                                "affiliation").get("label")
                        jsonSimple = JsonSimple(relationshipData)
                        jsonObj = jsonSimple.getJsonObject()
                        results = jsonObj.get("results")
                        #ensuring the Relationship Type exists
                        if results:
                            for j in range(len(results)):
                                relationshipType = results[j]
                                if (party.get("relationship") ==
                                        relationshipType.get("id")):
                                    tfpackageData[
                                        "dc:creator.foaf:Person." +
                                        str(i + 1) +
                                        ".jcu:relationshipLabel"] = relationshipType.get(
                                            "label")

        ###Processing 'contactInfo.email' metadata
        tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get(
            "contactInfo").get("identifier")
        tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get(
            "contactInfo").get("name")
        tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get(
            "contactInfo").get("title")
        tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get(
            "contactInfo").get("givenName")
        tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get(
            "contactInfo").get("familyName")
        tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get(
            "contactInfo").get("email")

        ##Stored At (on the Data Management page)
        tfpackageData[
            "vivo:Location.vivo:GeographicLocation.gn:name"] = data.get(
                "contactInfo").get("streetAddress")

        ###Processing 'coinvestigators' metadata
        coinvestigators = data.get("coinvestigators")
        for i in range(len(coinvestigators)):
            tfpackageData["dc:contributor.locrel:clb." + str(i + 1) +
                          ".foaf:Agent"] = coinvestigators[i]

        ###Processing 'anzsrcFOR' metadata
        anzsrcFOR = data.get("anzsrcFOR")
        for i in range(len(anzsrcFOR)):
            anzsrc = anzsrcFOR[i]
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) +
                          ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) +
                          ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'anzsrcSEO' metadata
        anzsrcSEO = data.get("anzsrcSEO")
        for i in range(len(anzsrcSEO)):
            anzsrc = anzsrcSEO[i]
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) +
                          ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) +
                          ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'keyword' metadata
        keyword = data.get("keyword")
        for i in range(len(keyword)):
            tfpackageData["dc:subject.vivo:keyword." + str(i + 1) +
                          ".rdf:PlainLiteral"] = keyword[i]

        ###Research Themes
        theme = data.get("researchTheme")
        if (theme == "Tropical Ecosystems, Conservation and Climate Change"):
            tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true"
        elif (theme == "Industries and Economies in the Tropics"):
            tfpackageData["jcu:research.themes.industriesEconomies"] = "true"
        elif (theme == "People and Societies in the Tropics"):
            tfpackageData["jcu:research.themes.peopleSocieties"] = "true"
        elif (theme == "Tropical Health, Medicine and Biosecurity"):
            tfpackageData["jcu:research.themes.tropicalHealth"] = "true"
        elif (theme == "Not aligned to a University theme"):
            tfpackageData["jcu:research.themes.notAligned"] = "true"

        tfpackageData["dc:accessRights.skos:prefLabel"] = data.get(
            "accessRights")
        tfpackageData["dc:license.dc:identifier"] = data.get("license").get(
            "url")
        tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get(
            "label")

        #identifier
        additionalId = data.get("additionalIdentifier")
        if additionalId is not None:
            additionalId = additionalId.replace("%NAME_OF_FOLDER%", species)
            tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId
            tfpackageData["dc:identifier.redbox:origin"] = "external"
            tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local"
            tfpackageData[
                "dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier"
        else:
            tfpackageData["dc:identifier.redbox:origin"] = "internal"

        dataLocation = data.get("dataLocation")
        dataLocation = dataLocation.replace("%NAME_OF_FOLDER%", species)
        tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation

        #The following have been intentionally set to blank. No mapping is required for these fields.
        tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod")
        tfpackageData["dc:extent"] = "unknown"
        tfpackageData["redbox:disposalDate"] = ""
        tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = ""
        tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = ""

        ###Processing 'organizationalGroup' metadata
        organisationalGroup = data.get("organizationalGroup")
        for i in range(len(organisationalGroup)):
            organisation = organisationalGroup[i]
            tfpackageData[
                "foaf:Organization.dc:identifier"] = organisation.get(
                    "identifier")
            tfpackageData[
                "foaf:Organization.skos:prefLabel"] = organisation.get(
                    "prefLabel")

        tfpackageData["swrc:ResearchProject.dc:title"] = ""
        tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = ""
        tfpackageData["dc:SizeOrDuration"] = ""
        tfpackageData["dc:Policy"] = ""

        #Citations
        citations = data.get("citations")
        for i in range(len(citations)):
            citation = citations[i]
            tfpackageData[
                "dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get(
                    "sendCitation")
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get(
                    "curationIdentifier")
            paperTitle = citation.get("paperTitle")
            paperTitle = paperTitle.replace("%NAME_OF_FOLDER%", species)
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." +
                          str(i + 1) +
                          ".foaf:familyName"] = citation.get("familyName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." +
                          str(i + 1) +
                          ".foaf:givenName"] = citation.get("givenName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." +
                          str(i + 1) +
                          ".foaf:title"] = title = citation.get("title")
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = citation.get(
                    "publisher")
            url = citation.get("url")
            url = url.replace("%NAME_OF_FOLDER%", species)
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData[
                    "dc:created"]
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData[
                    "dc:created"]
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get(
                    "dataType")
            tfpackageData[
                "dc:biblioGraphicCitation.skos:prefLabel"] = citation.get(
                    "familyName"
                ) + ", " + citation.get("givenName") + ". (" + time.strftime(
                    "%Y",
                    time.gmtime()) + "). " + paperTitle + ". " + citation.get(
                        "publisher") + ". [" + citation.get(
                            "dataType") + "]  {ID_WILL_BE_HERE}"

        self.__updateMetadataPayload(tfpackageData)
        self.__workflow()
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "packaging"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        try:
            wfPayload = self.object.getPayload("workflow.metadata")
            wfMeta = self.utils.getJsonObject(wfPayload.open())
            wfPayload.close()

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")

            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])

            # Form processing
            formData = wfMeta.getObject(["formData"])
            if formData is not None:
                formData = JsonSimple(formData)
            else:
                formData = None

            coreFields = ["title", "creator", "contributor", "description", "format", "creationDate"]
            if formData is not None:
                # Core fields
                formTitles = formData.getStringList(["title"])
                if formTitles:
                    for formTitle in formTitles:
                        if self.title is None:
                            self.title = formTitle
                creator = formData.getStringList(["creator"])
                if creator:
                    self.creatorList = creator
                contributor = formData.getStringList(["contributor"])
                if contributor:
                    self.contributorList = contributor
                description = formData.getStringList(["description"])
                if description:
                    self.descriptionList = description
                format = formData.getStringList(["format"])
                if format:
                    self.formatList = format
                creation = formData.getStringList(["creationDate"])
                if creation:
                    self.creationDate = creation
                # Non-core fields
                data = formData.getJsonObject()
                for field in data.keySet():
                    if field not in coreFields:
                        self.customFields[field] = formData.getStringList([field])

        except StorageException, e:
            # No workflow payload, time to create
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", "pending")
            wfMetaObj.put("pageTitle", "Uploaded Files - Management")
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == "pending":
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])
    def __metadata(self):
        jsonPayload = self.object.getPayload("metadata.json")
        json = self.utils.getJsonObject(jsonPayload.open())
        jsonPayload.close()

        metadata = json.getObject("metadata")
        self.utils.add(self.index, "dc_identifier", metadata.get("dc.identifier"))
                
        data = json.getObject("data")

        ####Global setting for processing data
        ####These will need to be changed based on you system installation.
        theMintHost = "http://*****:*****@example.edu.au"
                sock = urllib.urlopen(theMintHost + "/mint/default/opensearch/lookup?count=999&searchTerms=Email:" + email)
                mintData = sock.read()
                sock.close()
                jsonSimple = JsonSimple(mintData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #Ensuring that the Email identified a Party from The Mint
                if  results:
                    resultMetadata = JsonObject(results.get(0))
                    allData = resultMetadata.get("result-metadata")
                    creator = allData.get("all")
                    whoType = party.get("who").get("type")
                    if ((creator is not None) and (whoType == 'people')):
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".dc_identifier", creator.get("dc_identifier")[0])
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_name", creator.get("dc_title"))
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_title", creator.get("Honorific")[0])
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".redbox_isCoPrimaryInvestigator", "off")
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".redbox_isPrimaryInvestigator", "on")
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_givenName", creator.get("Given_Name")[0])
                        self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_familyName", creator.get("Family_Name")[0])

        ###Processing 'contactInfo.email' metadata
        contactInfoEmail = data.get("contactInfo").get("email")
        #Using the email address to obtain details from The Mint
        #For testing, hard coded email address
        #contactInfoEmail = "*****@*****.**"
        sock = urllib.urlopen(theMintHost + "/mint/default/opensearch/lookup?count=999&searchTerms=Email:" + contactInfoEmail)
        mintData = sock.read()
        sock.close()
        jsonSimple = JsonSimple(mintData)
        jsonObj = jsonSimple.getJsonObject()
        results = jsonObj.get("results")
        #Ensuring that the Email identified a Party from The Mint
        if  results:
            resultMetadata = JsonObject(results.get(0))
            allData = resultMetadata.get("result-metadata")
            creator = allData.get("all")
            if (creator is not None):
                self.utils.add(self.index, "locrel_prc.foaf_Person.dc_identifier", creator.get("dc_identifier").toString())
                self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_name", creator.get("dc_title"))
                self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_title", creator.get("Honorific").toString())
                self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_givenName", creator.get("Given_Name").toString())
                self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_familyName", creator.get("Family_Name").toString())

        ###Processing 'coinvestigators' metadata
        coinvestigators = data.get("coinvestigators")
        for i in range(len(coinvestigators)):
            self.utils.add(self.index, "dc_contributor.loclrel_clb." + str(i) + ".foaf_Agent" , coinvestigators[i])            

        ###Processing 'anzsrcFOR' metadata
        anzsrcFOR = data.get("anzsrcFOR")
        for i in range(len(anzsrcFOR)):
            anzsrc = anzsrcFOR[i]
            #Querying against The Mint, but only using the first 4 numbers from anzsrc, this ensure a result
            sock = urllib.urlopen(theMintHost + "/mint/ANZSRC_FOR/opensearch/lookup?count=999&level=http://purl.org/asc/1297.0/2008/for/" + anzsrc[:4])
            mintData = sock.read()
            sock.close()
            jsonSimple = JsonSimple(mintData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")      
            #ensuring that anzsrc identified a record in The Mint
            if  results:
                for j in range(len(results)):
                    result = JsonObject(results.get(j))
                    rdfAbout = result.get("rdf:about")
                    target = "http://purl.org/asc/1297.0/2008/for/" + anzsrc
                    if  (rdfAbout == target):
                        self.utils.add(self.index, "dc_subject.anzsrc_for." + str(i) + ".skos_prefLabel" , result.get("skos:prefLabel"))            
                        self.utils.add(self.index, "dc_subject.anzsrc_for." + str(i) + ".rdf:resource" , rdfAbout)            

        ###Processing 'anzsrcSEO' metadata                        
        anzsrcSEO = data.get("anzsrcSEO")
        for i in range(len(anzsrcSEO)):
            anzsrc = anzsrcSEO[i]
            #Querying against The Mint, but only using the first 4 numbers from anzsrc, this ensure a result
            sock = urllib.urlopen(theMintHost + "/mint/ANZSRC_SEO/opensearch/lookup?count=999&level=http://purl.org/asc/1297.0/2008/seo/" + anzsrc[:4])
            mintData = sock.read()
            sock.close()
            jsonSimple = JsonSimple(mintData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")      
            #ensuring that anzsrc identified a record in The Mint
            if  results:
                for j in range(len(results)):
                    result = JsonObject(results.get(j))
                    rdfAbout = result.get("rdf:about")
                    target = "http://purl.org/asc/1297.0/2008/seo/" + anzsrc
                    if  (rdfAbout == target):
                        self.utils.add(self.index, "dc_subject.anzsrc_seo." + str(i) + ".skos_prefLabel" , result.get("skos:prefLabel"))            
                        self.utils.add(self.index, "dc_subject.anzsrc_seo." + str(i) + ".rdf:resource" , rdfAbout)            

        ###Processing 'keyword' metadata                        
        keyword = data.get("keyword")
        for i in range(len(keyword)):
            self.utils.add(self.index, "dc_subject.vivo_keyword." + str(i) + ".rdf_PlainLiteral", keyword[i])

        self.utils.add(self.index, "dc_accessRights.skos_prefLabel", data.get("accessRights"))
        self.utils.add(self.index, "dc_license.dc_identifier", data.get("license").get("url"))
        self.utils.add(self.index, "dc_license.skos_prefLabel", data.get("license").get("label"))
        self.utils.add(self.index, "dc_identifier.redbox_origin", "internal")

        dataLocation = data.get("dataLocation")
        dataLocation = dataLocation.replace("%NAME_OF_FOLDER%", species)
        self.utils.add(self.index, "bibo_Website.1.dc_identifier", dataLocation)

        #The following have been intentionally set to blank. No mapping is required for these fields.
        self.utils.add(self.index, "vivo_Location", "")
        self.utils.add(self.index, "redbox_retentionPeriod", data.get("retentionPeriod"))
        self.utils.add(self.index, "dc_extent", "unknown")
        self.utils.add(self.index, "redbox_disposalDate", "")
        self.utils.add(self.index, "locrel_own.foaf_Agent.1_foaf_name", "")
        self.utils.add(self.index, "locrel_dtm.foaf_Agent.foaf_name", "")

        ###Processing 'organizationalGroup' metadata
        organisationalGroup = data.get("organizationalGroup")
        for i in range(len(organisationalGroup)):
            organisation = organisationalGroup[i]
            #Querying against The Mint
            sock = urllib.urlopen(theMintHost + "/mint/Parties_Groups/opensearch/lookup?count=9999&searchTerms=ID:" + organisation)
            mintData = sock.read()
            sock.close()
            jsonSimple = JsonSimple(mintData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")      
            #ensuring that anzsrc identified a record in The Mint
            if  results:
                resultMetadata = JsonObject(results.get(0))
                allData = resultMetadata.get("result-metadata")
                orgGroup = allData.get("all")
                self.utils.add(self.index, "foaf_Organization.dc_identifier", orgGroup.get("dc_identifier")[0])
                self.utils.add(self.index, "foaf_Organization.skos_prefLabel", orgGroup.get("Name")[0])


        self.utils.add(self.index, "foaf_fundedBy.foaf_Agent", "")
        self.utils.add(self.index, "foaf_fundedBy.vivo_Grant", "")
        self.utils.add(self.index, "swrc_ResearchProject.dc_title", "")
        self.utils.add(self.index, "locrel_dpt.foaf_Person.foaf_name", "")
        self.utils.add(self.index, "dc_SizeOrDuration", "")
        self.utils.add(self.index, "dc_Policy", "")
        self.utils.add(self.index, "redbox_ManagementPlan", "")
Beispiel #33
0
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "dataset"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        if self.owner == "guest":
            pageTitle = "Submission Request"
            displayType = "submission-request"
            initialStep = 0
        else:
            pageTitle = "Metadata Record"
            displayType = "package-dataset"
            initialStep = 1
        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(
                    None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put(
                        "label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object,
                                                   "workflow.metadata",
                                                   inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = [
            "title", "description", "manifest", "metaList", "relationships",
            "responses"
        ]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]

        #Used to make sure we have a created date
        createdDateFlag = False

        formData = manifest.getJsonObject()

        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ",
                                                 parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                        self.log.debug("Set created date to :%s" % solrTime)
                        createdDateFlag = True
                    elif field == "redbox:embargo.dc:date":
                        self.embargoedDate = value
                    elif field == "create_timestamp":
                        self.createTimeStamp = value
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)
                    # check if this is an array field
                    fnameparts = field.split(":")
                    if fnameparts is not None and len(fnameparts) >= 3:
                        if field.startswith("bibo") or field.startswith(
                                "skos"):
                            arrParts = fnameparts[1].split(".")
                        else:
                            arrParts = fnameparts[2].split(".")
                        # we're not interested in: Relationship, Type and some redbox:origin
                        if arrParts is not None and len(
                                arrParts) >= 2 and field.find(
                                    ":Relationship.") == -1 and field.find(
                                        "dc:type") == -1 and field.find(
                                            "redbox:origin"
                                        ) == -1 and arrParts[1].isdigit():
                            # we've got an array field
                            fldPart = ":%s" % arrParts[0]
                            prefixEndIdx = field.find(fldPart) + len(fldPart)
                            suffixStartIdx = prefixEndIdx + len(
                                arrParts[1]) + 1
                            arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[
                                suffixStartIdx:]
                            if field.endswith("Name"):
                                arrFldName = self.reportingFieldPrefix + field[:
                                                                               prefixEndIdx]
                            self.log.debug(
                                "Array Field name is:%s  from: %s, with value:%s"
                                % (arrFldName, field, value))

                            if field.endswith("Name"):
                                fullFieldMap = self.arrayBucket.get(arrFldName)
                                if fullFieldMap is None:
                                    fullFieldMap = HashMap()
                                    self.arrayBucket.put(
                                        arrFldName, fullFieldMap)
                                idx = arrParts[1]
                                fullField = fullFieldMap.get(idx)
                                if (fullField is None):
                                    fullField = ""
                                if (field.endswith("givenName")):
                                    fullField = "%s, %s" % (fullField, value)
                                if (field.endswith("familyName")):
                                    fullField = "%s%s" % (value, fullField)
                                self.log.debug("fullname now is :%s" %
                                               fullField)
                                fullFieldMap.put(idx, fullField)
                            else:
                                fieldlist = self.arrayBucket.get(arrFldName)
                                if fieldlist is None:
                                    fieldlist = []
                                    self.arrayBucket.put(arrFldName, fieldlist)
                                fieldlist.append(value)

                    for compfield in self.compFields:
                        if field.startswith(compfield):
                            arrFldName = self.reportingFieldPrefix + compfield
                            fullFieldMap = self.arrayBucket.get(arrFldName)
                            if fullFieldMap is None:
                                fullFieldMap = HashMap()
                                self.arrayBucket.put(arrFldName, fullFieldMap)
                            fullField = fullFieldMap.get("1")
                            if fullField is None:
                                fullField = ""
                            if field.endswith(
                                    self.compFieldsConfig[compfield]["end"]):
                                fullField = "%s%s%s" % (
                                    fullField,
                                    self.compFieldsConfig[compfield]["delim"],
                                    value)
                            if field.endswith(
                                    self.compFieldsConfig[compfield]["start"]):
                                fullField = "%s%s" % (value, fullField)
                            self.log.debug("full field now is :%s" % fullField)
                            fullFieldMap.put("1", fullField)

        self.utils.add(self.index, "display_type", displayType)

        # Make sure we have a creation date
        if not createdDateFlag:
            self.utils.add(self.index, "date_created", self.last_modified)
            self.log.debug(
                "Forced creation date to %s because it was not explicitly set."
                % self.last_modified)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id",
                       wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label",
                       wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
Beispiel #34
0
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "workflow1"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        try:
            wfPayload = self.object.getPayload("workflow.metadata")
            wfMeta = self.utils.getJsonObject(wfPayload.open())
            wfPayload.close()

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(
                    None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")

            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])
            # Security change
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put(
                        "label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])

            # Form processing
            formData = wfMeta.getObject(["formData"])
            if formData is not None:
                formData = JsonSimple(formData)
            else:
                formData = None

            coreFields = [
                "title", "creator", "contributor", "description", "format",
                "creationDate"
            ]
            if formData is not None:
                # Core fields
                formTitles = formData.getStringList(["title"])
                if formTitles:
                    for formTitle in formTitles:
                        if self.title is None:
                            self.title = formTitle
                creator = formData.getStringList(["creator"])
                if creator:
                    self.creatorList = creator
                contributor = formData.getStringList(["contributor"])
                if contributor:
                    self.contributorList = contributor
                description = formData.getStringList(["description"])
                if description:
                    self.descriptionList = description
                format = formData.getStringList(["format"])
                if format:
                    self.formatList = format
                creation = formData.getStringList(["creationDate"])
                if creation:
                    self.creationDate = creation
                # Non-core fields
                data = formData.getJsonObject()
                for field in data.keySet():
                    if field not in coreFields:
                        self.customFields[field] = formData.getStringList(
                            [field])

        except StorageException, e:
            # No workflow payload, time to create
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", "pending")
            wfMetaObj.put("pageTitle", "Uploaded Files - Management")
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == "pending":
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "dataset"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        #if self.owner == "guest":
        #    pageTitle = "Submission Request"
        #    displayType = "submission-request"
        #    initialStep = 0
        #else:
        #    pageTitle = "Metadata Record"
        #    displayType = "package-dataset"
        #    initialStep = 1

        ## Harvesting straight into the 'Published' stage
        pageTitle = "Metadata Record"
        displayType = "package-dataset"
        #initialStep = 4
        initialStep = 3

        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(
                    None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put(
                        "label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create

            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object,
                                                   "workflow.metadata",
                                                   inStream)
            except StorageException:
                print(" ERROR updating dataset payload")

        # Form processing
        coreFields = [
            "title", "description", "manifest", "metaList", "relationships",
            "responses"
        ]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]
        formData = manifest.getJsonObject()
        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ",
                                                 parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)

        self.utils.add(self.index, "display_type", displayType)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id",
                       wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label",
                       wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
    def __metadata(self):
        self.title = None
        self.dcType = None

        self.__checkMetadataPayload()

        jsonPayload = self.object.getPayload("metadata.json")
        json = self.utils.getJsonObject(jsonPayload.open())
        jsonPayload.close()

        metadata = json.getObject("metadata")

        identifier = metadata.get("dc.identifier")
        self.utils.add(self.index, "dc:identifier", identifier)
        self.__storeIdentifier(identifier)
        self.utils.add(self.index, "institution", "James Cook University")
        self.utils.add(self.index, "source",
                       "http://spatialecology.jcu.edu.au/Edgar/")

        data = json.getObject("data")

        ####Global setting for processing data
        ####These will need to be changed based on you system installation.
        theMintHost = java.lang.System.getProperty("mint.proxy.url")
        collectionRelationTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/redbox/workflows/forms/data/"
        servicesRelationTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/redbox/workflows/forms/data/"
        descriptionTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/local/workflows/forms/data/"
        relationshipTypesFilePath = FascinatorHome.getPath(
        ) + "/../portal/default/local/workflows/forms/data/"

        ###Allocating space to create the formData.tfpackage
        tfpackageData = {}

        # We will do string substitutions on data that we get from the default json.
        # We always replace ${NAME_OF_FOLDER} with the name of the folder; if the
        # override json contains a key "DATA_SUBSTITUTIONS", then we also substitute
        # stuff we find there.

        # so: start with just wanting ${NAME_OF_FOLDER} replaced with the actual directory name
        dirName = data.get("harvest_dir_name")
        replacements = {'NAME_OF_FOLDER': dirName}

        # is there a DATA_SUBSTITUTIONS key?  If so, add those in.
        additionalReplacements = data.get("DATA_SUBSTITUTIONS")
        if additionalReplacements:
            replacements.update(additionalReplacements)

        # now there's a replacements dictionary with the replacements we want
        # to do on our incoming JSON strings.

        # FANCY PART---------------------------------------------
        # Now it gets a bit fancy: Here's a method that does a
        # get-and-replace all in one go.  That makes the rest of
        # this __metdata() method much simpler and more readable.
        #
        # Because this method is defined inside this __metadata()
        # method, it already knows about the replacements var we
        # just made.

        # dataBucket is the thing that has the data.  key is the
        # name of the field you want to get.
        def getAndReplace(dataBucket, key):
            temp = dataBucket.get(key)  # fetch the value
            if isinstance(key, str):  # if it's a string, do our replacements
                return Template(temp).safe_substitute(replacements)
            else:  # not a string, then just hand it back
                return temp

        # END OF FANCY PART -------------------------------------

        title = getAndReplace(data, "title")
        self.utils.add(self.index, "dc_title", title)
        tfpackageData["dc:title"] = title
        tfpackageData["title"] = title

        self.utils.add(self.index, "dc_type", data.get("type"))
        tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type")
        tfpackageData["dc:type.skos:prefLabel"] = data.get("type")
        tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime())
        tfpackageData["dc:modified"] = ""
        tfpackageData["dc:language.skos:prefLabel"] = "English"
        tfpackageData[
            "dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get(
                "temporalCoverage").get("dateFrom")

        dateTo = data.get("temporalCoverage").get("dateTo")
        if dateTo is not None:
            tfpackageData[
                "dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo

        tfpackageData["dc:coverage.redbox:timePeriod"] = ""

        ###Processing the 'spatialCoverage' metadata.
        spatialCoverage = data.get("spatialCoverage")
        for i in range(len(spatialCoverage)):
            location = spatialCoverage[i]
            if location["type"] == "text":
                tfpackageData["dc:coverage.vivo:GeographicLocation." +
                              str(i + 1) + ".dc:type"] = location["type"]
                location_value = getAndReplace(location, "value")
                if location_value.startswith("POLYGON"):
                    tfpackageData["dc:coverage.vivo:GeographicLocation." +
                                  str(i + 1) +
                                  ".redbox:wktRaw"] = location_value
                tfpackageData["dc:coverage.vivo:GeographicLocation." +
                              str(i + 1) +
                              ".rdf:PlainLiteral"] = location_value

        ###Processing the 'description' metadata.
        #Reading the file here, so we only do it once.
        file = open(descriptionTypesFilePath + "descriptionTypes.json")
        descriptionData = file.read()
        file.close()
        description = data.get("description")
        for i in range(len(description)):
            desc = description[i]
            tempDesc = getAndReplace(desc, "value")
            if (desc["type"] == "brief"):
                tfpackageData["dc:description"] = tempDesc
            tfpackageData["rif:description." + str(i + 1) +
                          ".type"] = desc["type"]
            tfpackageData["rif:description." + str(i + 1) +
                          ".value"] = tempDesc
            jsonSimple = JsonSimple(descriptionData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")
            #ensuring the Description Type exist
            if results:
                for j in range(len(results)):
                    descriptionType = results[j]
                    if (desc["type"] == descriptionType.get("id")):
                        tfpackageData["rif:description." + str(i + 1) +
                                      ".label"] = descriptionType.get("label")

        ###Processing the 'relatedPublication' metadata
        relatedPublication = data.get("relatedPublication")
        if relatedPublication is not None:
            for i in range(len(relatedPublication)):
                publication = relatedPublication[i]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) +
                              ".dc:identifier"] = publication["doi"]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) +
                              ".dc:title"] = publication["title"]

        ###Processing the 'relatedWebsite' metadata
        relatedWebsite = data.get("relatedWebsite")
        count = 0
        for i in range(len(relatedWebsite)):
            website = relatedWebsite[i]
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) +
                          ".dc:identifier"] = getAndReplace(website, "url")
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) +
                          ".dc:title"] = getAndReplace(website, "notes")
            count = i + 1

        ###Processing the 'data_source_website' metadata (override metadata)
        dataSourceWebsites = data.get("data_source_website")
        if dataSourceWebsites is not None:
            for i in range(len(dataSourceWebsites)):
                website = dataSourceWebsites[i]
                type = website.get("identifier").get("type")
                if type == "uri":
                    count += 1
                    tfpackageData["dc:relation.bibo:Website." + str(count) +
                                  ".dc:identifier"] = getAndReplace(
                                      website.get("identifier"), "value")
                    tfpackageData["dc:relation.bibo:Website." + str(count) +
                                  ".dc:title"] = getAndReplace(
                                      website, "notes")

        ###Processing the 'relatedCollection' metadata
        #Reading the file here, so we only do it once.
        file = open(collectionRelationTypesFilePath +
                    "collectionRelationTypes.json")
        collectionData = file.read()
        file.close()
        relatedCollection = data.get("relatedCollection")
        recordIdentifier = ""
        if relatedCollection is not None:
            for i in range(len(relatedCollection)):
                collection = relatedCollection[i]
                tempIdentifier = collection["identifier"]
                if tempIdentifier is not None:
                    tempIdentifier = Template(tempIdentifier).safe_substitute(
                        replacements)
                    recordIdentifier = tempIdentifier
                else:
                    tempIdentifier = ""
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                              ".dc:identifier"] = tempIdentifier
                tempTitle = collection.get("title")
                tempTitle = Template(tempTitle).safe_substitute(replacements)
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                              ".dc:title"] = tempTitle
                tfpackageData[
                    "dc:relation.vivo:Dataset." + str(i + 1) +
                    ".vivo:Relationship.rdf:PlainLiteral"] = collection[
                        "relationship"]
                if tempIdentifier == "":
                    tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                                  ".redbox:origin"] = "on"
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) +
                              ".redbox:publish"] = "on"
                #Using the collection data as a lookup to obtain the 'label'
                relationShip = collection.get("relationship")
                jsonSimple = JsonSimple(collectionData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Collection Relation Types exist
                if results:
                    for j in range(len(results)):
                        relation = results[j]
                        if (relationShip == relation.get("id")):
                            tfpackageData[
                                "dc:relation.vivo:Dataset." + str(i + 1) +
                                ".vivo:Relationship.skos:prefLabel"] = relation.get(
                                    "label")

        ###Processing the 'relatedService' metadata
        #Reading the file here, so we only do it once.
        file = open(servicesRelationTypesFilePath +
                    "serviceRelationTypes.json")
        servicesData = file.read()
        file.close()
        relatedServices = data.get("relatedService")
        recordIdentifier = ""
        if relatedServices is not None:
            for i in range(len(relatedServices)):
                service = relatedServices[i]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) +
                              ".dc:identifier"] = service["identifier"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) +
                              ".dc:title"] = service["title"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) +
                              ".vivo:Relationship.rdf:PlainLiteral"] = service[
                                  "relationship"]
                #Using the services data as a lookup to obtain the 'label'
                relationShip = service.get("relationship")
                jsonSimple = JsonSimple(servicesData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Service Relation Types exist
                if results:
                    for j in range(len(results)):
                        relation = results[j]
                        if (relationShip == relation.get("id")):
                            tfpackageData[
                                "dc:relation.vivo:Service." + str(i + 1) +
                                ".vivo:Relationship.skos:prefLabel"] = relation.get(
                                    "label")

        ###Processing the 'associatedParty' metadata
        #Reading the file here so we only read it once.
        file = open(relationshipTypesFilePath + "relationshipTypes.json")
        relationshipData = file.read()
        file.close()
        associatedParty = data.get("associatedParty")
        for i in range(len(associatedParty)):
            party = associatedParty[i]
            email = party.get("who").get("value")
            if email is not None:
                whoType = party.get("who").get("type")
                if (whoType == 'people'):
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".dc:identifier"] = party.get("who").get(
                                      "identifier")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".foaf:name"] = party.get("who").get("name")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".foaf:title"] = party.get("who").get(
                                      "title")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".redbox:isCoPrimaryInvestigator"] = "off"
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".redbox:isPrimaryInvestigator"] = "on"
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".foaf:givenName"] = party.get("who").get(
                                      "givenName")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".foaf:familyName"] = party.get("who").get(
                                      "familyName")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) +
                                  ".jcu:relationshipType"] = party.get(
                                      "relationship")
                    tfpackageData[
                        "dc:creator.foaf:Person." + str(i + 1) +
                        ".foaf:Organization.dc:identifier"] = party.get(
                            "affiliation").get("id")
                    tfpackageData[
                        "dc:creator.foaf:Person." + str(i + 1) +
                        ".foaf:Organization.skos:prefLabel"] = party.get(
                            "affiliation").get("label")
                    jsonSimple = JsonSimple(relationshipData)
                    jsonObj = jsonSimple.getJsonObject()
                    results = jsonObj.get("results")
                    #ensuring the Relationship Type exists
                    if results:
                        for j in range(len(results)):
                            relationshipType = results[j]
                            if (party.get("relationship") ==
                                    relationshipType.get("id")):
                                tfpackageData[
                                    "dc:creator.foaf:Person." + str(i + 1) +
                                    ".jcu:relationshipLabel"] = relationshipType.get(
                                        "label")

        ###Processing 'contactInfo.email' metadata
        tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get(
            "contactInfo").get("identifier")
        tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get(
            "contactInfo").get("name")
        tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get(
            "contactInfo").get("title")
        tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get(
            "contactInfo").get("givenName")
        tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get(
            "contactInfo").get("familyName")
        tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get(
            "contactInfo").get("email")

        ##Stored At (on the Data Management page)
        tfpackageData[
            "vivo:Location.vivo:GeographicLocation.gn:name"] = data.get(
                "contactInfo").get("streetAddress")

        ###Processing 'coinvestigators' metadata
        coinvestigators = data.get("coinvestigators")
        for i in range(len(coinvestigators)):
            tfpackageData["dc:contributor.locrel:clb." + str(i + 1) +
                          ".foaf:Agent"] = coinvestigators[i]

        ###Processing 'anzsrcFOR' metadata
        anzsrcFOR = data.get("anzsrcFOR")
        for i in range(len(anzsrcFOR)):
            anzsrc = anzsrcFOR[i]
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) +
                          ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) +
                          ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'anzsrcSEO' metadata
        anzsrcSEO = data.get("anzsrcSEO")
        for i in range(len(anzsrcSEO)):
            anzsrc = anzsrcSEO[i]
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) +
                          ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) +
                          ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'keyword' metadata
        keyword = data.get("keyword")
        for i in range(len(keyword)):
            tfpackageData["dc:subject.vivo:keyword." + str(i + 1) +
                          ".rdf:PlainLiteral"] = keyword[i]

        ###Research Themes
        theme = data.get("researchTheme")
        if (theme == "Tropical Ecosystems, Conservation and Climate Change"):
            tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true"
        elif (theme == "Industries and Economies in the Tropics"):
            tfpackageData["jcu:research.themes.industriesEconomies"] = "true"
        elif (theme == "People and Societies in the Tropics"):
            tfpackageData["jcu:research.themes.peopleSocieties"] = "true"
        elif (theme == "Tropical Health, Medicine and Biosecurity"):
            tfpackageData["jcu:research.themes.tropicalHealth"] = "true"
        elif (theme == "Not aligned to a University theme"):
            tfpackageData["jcu:research.themes.notAligned"] = "true"

        tfpackageData["dc:accessRights.skos:prefLabel"] = data.get(
            "accessRights")
        tfpackageData["dc:license.dc:identifier"] = data.get("license").get(
            "url")
        tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get(
            "label")

        #identifier
        additionalId = data.get("additionalIdentifier")
        if additionalId is not None:
            additionalId = Template(additionalId).safe_substitute(replacements)
            tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId
            tfpackageData["dc:identifier.redbox:origin"] = "external"
            tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local"
            tfpackageData[
                "dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier"
        else:
            tfpackageData["dc:identifier.redbox:origin"] = "internal"

        dataLocation = getAndReplace(data, "dataLocation")
        tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation

        #The following have been intentionally set to blank. No mapping is required for these fields.
        tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod")
        tfpackageData["dc:extent"] = "unknown"
        tfpackageData["redbox:disposalDate"] = ""
        tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = ""
        tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = ""

        ###Processing 'organizationalGroup' metadata
        organisationalGroup = data.get("organizationalGroup")
        for i in range(len(organisationalGroup)):
            organisation = organisationalGroup[i]
            tfpackageData[
                "foaf:Organization.dc:identifier"] = organisation.get(
                    "identifier")
            tfpackageData[
                "foaf:Organization.skos:prefLabel"] = organisation.get(
                    "prefLabel")

        tfpackageData["swrc:ResearchProject.dc:title"] = ""
        tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = ""
        tfpackageData["dc:SizeOrDuration"] = ""
        tfpackageData["dc:Policy"] = ""

        #Citations
        citations = data.get("citations")
        for i in range(len(citations)):
            citation = citations[i]
            tfpackageData[
                "dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get(
                    "sendCitation")
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get(
                    "curationIdentifier")
            paperTitle = getAndReplace(citation, "paperTitle")
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." +
                          str(i + 1) +
                          ".foaf:familyName"] = citation.get("familyName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." +
                          str(i + 1) +
                          ".foaf:givenName"] = citation.get("givenName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." +
                          str(i + 1) +
                          ".foaf:title"] = title = citation.get("title")
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = getAndReplace(
                    citation, "publisher")
            url = getAndReplace(citation, "url")
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData[
                    "dc:created"]
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created"
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData[
                    "dc:created"]
            tfpackageData[
                "dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get(
                    "dataType")
            tfpackageData[
                "dc:biblioGraphicCitation.skos:prefLabel"] = citation.get(
                    "familyName"
                ) + ", " + citation.get("givenName") + ". (" + time.strftime(
                    "%Y",
                    time.gmtime()) + "). " + paperTitle + ". " + citation.get(
                        "publisher") + ". [" + citation.get(
                            "dataType") + "]  {ID_WILL_BE_HERE}"

        self.__updateMetadataPayload(tfpackageData)
        self.__workflow()
    def __metadata(self):
        self.title = None
        self.dcType = None

        self.__checkMetadataPayload()

        jsonPayload = self.object.getPayload("metadata.json")
        json = self.utils.getJsonObject(jsonPayload.open())
        jsonPayload.close()

        metadata = json.getObject("metadata")

        identifier  = metadata.get("dc.identifier")
        self.utils.add(self.index, "dc:identifier", identifier)
        self.__storeIdentifier(identifier)
        self.utils.add(self.index, "institution", "James Cook University")
        self.utils.add(self.index, "source", "http://spatialecology.jcu.edu.au/Edgar/")

        data = json.getObject("data")

        ####Global setting for processing data
        ####These will need to be changed based on you system installation.
        theMintHost = java.lang.System.getProperty("mint.proxy.url")
        collectionRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/"
        servicesRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/"
        descriptionTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/"
        relationshipTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/"

        ###Allocating space to create the formData.tfpackage
        tfpackageData = {}

        # We will do string substitutions on data that we get from the default json.
        # We always replace ${NAME_OF_FOLDER} with the name of the folder; if the
        # override json contains a key "DATA_SUBSTITUTIONS", then we also substitute
        # stuff we find there.

        # so: start with just wanting ${NAME_OF_FOLDER} replaced with the actual directory name
        dirName = data.get("harvest_dir_name")
        replacements = { 'NAME_OF_FOLDER': dirName }

        # is there a DATA_SUBSTITUTIONS key?  If so, add those in.
        additionalReplacements = data.get("DATA_SUBSTITUTIONS")
        if additionalReplacements:
            replacements.update(additionalReplacements)

        # now there's a replacements dictionary with the replacements we want
        # to do on our incoming JSON strings.

        # FANCY PART---------------------------------------------
        # Now it gets a bit fancy: Here's a method that does a
        # get-and-replace all in one go.  That makes the rest of
        # this __metdata() method much simpler and more readable.
        #
        # Because this method is defined inside this __metadata()
        # method, it already knows about the replacements var we
        # just made.

        # dataBucket is the thing that has the data.  key is the
        # name of the field you want to get.
        def getAndReplace(dataBucket, key):
            temp = dataBucket.get(key) # fetch the value
            if isinstance(key, str):   # if it's a string, do our replacements
                return Template(temp).safe_substitute(replacements)
            else:                      # not a string, then just hand it back
                return temp
        # END OF FANCY PART -------------------------------------

        title = getAndReplace(data, "title")
        self.utils.add(self.index, "dc_title", title)
        tfpackageData["dc:title"] = title
        tfpackageData["title"] = title

        self.utils.add(self.index, "dc_type", data.get("type"))
        tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type")
        tfpackageData["dc:type.skos:prefLabel"] = data.get("type")
        tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime())
        tfpackageData["dc:modified"] = ""
        tfpackageData["dc:language.skos:prefLabel"] = "English"
        tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get("temporalCoverage").get("dateFrom")

        dateTo = data.get("temporalCoverage").get("dateTo")
        if dateTo is not None:
            tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo

        tfpackageData["dc:coverage.redbox:timePeriod"] = ""

        ###Processing the 'spatialCoverage' metadata.
        spatialCoverage = data.get("spatialCoverage")
        for i in range(len(spatialCoverage)):
            location = spatialCoverage[i]
            if location["type"] == "text":
                tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:type"] = location["type"]
                location_value = getAndReplace(location, "value")
                if location_value.startswith("POLYGON"):
                    tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".redbox:wktRaw"] = location_value
                tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".rdf:PlainLiteral"] = location_value

        ###Processing the 'description' metadata.
        #Reading the file here, so we only do it once.
        file = open(descriptionTypesFilePath + "descriptionTypes.json")
        descriptionData = file.read()
        file.close()
        description = data.get("description")
        for i in range(len(description)):
            desc = description[i]
            tempDesc = getAndReplace(desc, "value")
            if  (desc["type"] == "brief"):
                tfpackageData["dc:description"] = tempDesc
            tfpackageData["rif:description." + str(i + 1) + ".type"] = desc["type"]
            tfpackageData["rif:description." + str(i + 1) + ".value"] = tempDesc
            jsonSimple = JsonSimple(descriptionData)
            jsonObj = jsonSimple.getJsonObject()
            results = jsonObj.get("results")
            #ensuring the Description Type exist
            if  results:
                for j in range(len(results)):
                    descriptionType = results[j]
                    if  (desc["type"] == descriptionType.get("id")):
                        tfpackageData["rif:description." + str(i + 1) + ".label"] = descriptionType.get("label")

        ###Processing the 'relatedPublication' metadata
        relatedPublication = data.get("relatedPublication")
        if relatedPublication is not None:
            for i in range(len(relatedPublication)):
                publication = relatedPublication[i]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:identifier"] = publication["doi"]
                tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:title"] = publication["title"]

        ###Processing the 'relatedWebsite' metadata
        relatedWebsite = data.get("relatedWebsite")
        count = 0
        for i in range(len(relatedWebsite)):
            website = relatedWebsite[i]
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:identifier"] = getAndReplace(website, "url")
            tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:title"] = getAndReplace(website, "notes")
            count = i + 1

        ###Processing the 'data_source_website' metadata (override metadata)
        dataSourceWebsites = data.get("data_source_website")
        if  dataSourceWebsites is not None:
            for i in range(len(dataSourceWebsites)):
                website = dataSourceWebsites[i]
                type = website.get("identifier").get("type")
                if type == "uri":
                    count += 1
                    tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:identifier"] = getAndReplace(website.get("identifier"), "value")
                    tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:title"] = getAndReplace(website, "notes")

        ###Processing the 'relatedCollection' metadata
        #Reading the file here, so we only do it once.
        file = open(collectionRelationTypesFilePath + "collectionRelationTypes.json")
        collectionData = file.read()
        file.close()
        relatedCollection = data.get("relatedCollection")
        recordIdentifier = ""
        if relatedCollection is not None:
            for i in range(len(relatedCollection)):
                collection = relatedCollection[i]
                tempIdentifier = collection["identifier"]
                if tempIdentifier is not None:
                    tempIdentifier = Template( tempIdentifier ).safe_substitute(replacements)
                    recordIdentifier = tempIdentifier
                else:
                    tempIdentifier = ""
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:identifier"] = tempIdentifier
                tempTitle = collection.get("title")
                tempTitle = Template( tempTitle ).safe_substitute(replacements)
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:title"] = tempTitle
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = collection["relationship"]
                if  tempIdentifier == "":
                    tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:origin"] = "on"
                tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:publish"] =  "on"
                #Using the collection data as a lookup to obtain the 'label'
                relationShip = collection.get("relationship")
                jsonSimple = JsonSimple(collectionData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Collection Relation Types exist
                if  results:
                    for j in range(len(results)):
                        relation = results[j]
                        if  (relationShip == relation.get("id")):
                            tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label")

        ###Processing the 'relatedService' metadata
        #Reading the file here, so we only do it once.
        file = open(servicesRelationTypesFilePath + "serviceRelationTypes.json")
        servicesData = file.read()
        file.close()
        relatedServices = data.get("relatedService")
        recordIdentifier = ""
        if relatedServices is not None:
            for i in range(len(relatedServices)):
                service = relatedServices[i]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:identifier"] = service["identifier"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:title"] = service["title"]
                tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = service["relationship"]
                #Using the services data as a lookup to obtain the 'label'
                relationShip = service.get("relationship")
                jsonSimple = JsonSimple(servicesData)
                jsonObj = jsonSimple.getJsonObject()
                results = jsonObj.get("results")
                #ensuring the Service Relation Types exist
                if  results:
                    for j in range(len(results)):
                        relation = results[j]
                        if  (relationShip == relation.get("id")):
                            tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label")

        ###Processing the 'associatedParty' metadata
        #Reading the file here so we only read it once.
        file = open(relationshipTypesFilePath + "relationshipTypes.json")
        relationshipData = file.read()
        file.close()
        associatedParty = data.get("associatedParty")
        for i in range(len(associatedParty)):
            party = associatedParty[i]
            email = party.get("who").get("value")
            if email is not None:
                whoType = party.get("who").get("type")
                if (whoType == 'people'):
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".dc:identifier"] = party.get("who").get("identifier")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:name"] = party.get("who").get("name")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:title"] = party.get("who").get("title")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isCoPrimaryInvestigator"] = "off"
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isPrimaryInvestigator"] = "on"
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:givenName"] = party.get("who").get("givenName")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:familyName"] = party.get("who").get("familyName")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipType"] = party.get("relationship")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.dc:identifier"] = party.get("affiliation").get("id")
                    tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.skos:prefLabel"] = party.get("affiliation").get("label")
                    jsonSimple = JsonSimple(relationshipData)
                    jsonObj = jsonSimple.getJsonObject()
                    results = jsonObj.get("results")
                    #ensuring the Relationship Type exists
                    if  results:
                        for j in range(len(results)):
                            relationshipType = results[j]
                            if  (party.get("relationship") == relationshipType.get("id")):
                                tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipLabel"] = relationshipType.get("label")

        ###Processing 'contactInfo.email' metadata
        tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get("contactInfo").get("identifier")
        tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get("contactInfo").get("name")
        tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get("contactInfo").get("title")
        tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get("contactInfo").get("givenName")
        tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get("contactInfo").get("familyName")
        tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get("contactInfo").get("email")

        ##Stored At (on the Data Management page)
        tfpackageData["vivo:Location.vivo:GeographicLocation.gn:name"] = data.get("contactInfo").get("streetAddress")

        ###Processing 'coinvestigators' metadata
        coinvestigators = data.get("coinvestigators")
        for i in range(len(coinvestigators)):
            tfpackageData["dc:contributor.locrel:clb." + str(i + 1) + ".foaf:Agent"] = coinvestigators[i]

        ###Processing 'anzsrcFOR' metadata
        anzsrcFOR = data.get("anzsrcFOR")
        for i in range(len(anzsrcFOR)):
            anzsrc = anzsrcFOR[i]
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'anzsrcSEO' metadata
        anzsrcSEO = data.get("anzsrcSEO")
        for i in range(len(anzsrcSEO)):
            anzsrc = anzsrcSEO[i]
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel")
            tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource")

        ###Processing 'keyword' metadata
        keyword = data.get("keyword")
        for i in range(len(keyword)):
            tfpackageData["dc:subject.vivo:keyword." + str(i + 1) + ".rdf:PlainLiteral"] = keyword[i]

        ###Research Themes
        theme = data.get("researchTheme")
        if  (theme == "Tropical Ecosystems, Conservation and Climate Change"):
            tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true"
        elif (theme == "Industries and Economies in the Tropics"):
            tfpackageData["jcu:research.themes.industriesEconomies"] = "true"
        elif (theme == "People and Societies in the Tropics"):
            tfpackageData["jcu:research.themes.peopleSocieties"] = "true"
        elif (theme == "Tropical Health, Medicine and Biosecurity"):
            tfpackageData["jcu:research.themes.tropicalHealth"] = "true"
        elif (theme == "Not aligned to a University theme"):
            tfpackageData["jcu:research.themes.notAligned"] = "true"

        tfpackageData["dc:accessRights.skos:prefLabel"] = data.get("accessRights")
        tfpackageData["dc:license.dc:identifier"] = data.get("license").get("url")
        tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get("label")

        #identifier
        additionalId = data.get("additionalIdentifier")
        if additionalId is not None:
            additionalId = Template( additionalId ).safe_substitute(replacements)
            tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId
            tfpackageData["dc:identifier.redbox:origin"] = "external"
            tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local"
            tfpackageData["dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier"
        else:
            tfpackageData["dc:identifier.redbox:origin"] = "internal"

        dataLocation = getAndReplace(data, "dataLocation")
        tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation

        #The following have been intentionally set to blank. No mapping is required for these fields.
        tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod")
        tfpackageData["dc:extent"] = "unknown"
        tfpackageData["redbox:disposalDate"] = ""
        tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = ""
        tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = ""

        ###Processing 'organizationalGroup' metadata
        organisationalGroup = data.get("organizationalGroup")
        for i in range(len(organisationalGroup)):
            organisation = organisationalGroup[i]
            tfpackageData["foaf:Organization.dc:identifier"] = organisation.get("identifier")
            tfpackageData["foaf:Organization.skos:prefLabel"] = organisation.get("prefLabel")

        tfpackageData["swrc:ResearchProject.dc:title"] = ""
        tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = ""
        tfpackageData["dc:SizeOrDuration"] = ""
        tfpackageData["dc:Policy"] = ""

        #Citations
        citations = data.get("citations")
        for i in range(len(citations)):
            citation = citations[i]
            tfpackageData["dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get("sendCitation")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get("curationIdentifier")
            paperTitle = getAndReplace(citation, "paperTitle")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:familyName"] = citation.get("familyName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:givenName"] = citation.get("givenName")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:title"] = title = citation.get("title")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = getAndReplace(citation, "publisher")
            url = getAndReplace(citation, "url")
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData["dc:created"]
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created"
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData["dc:created"]
            tfpackageData["dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get("dataType")
            tfpackageData["dc:biblioGraphicCitation.skos:prefLabel"] = citation.get("familyName") + ", " + citation.get("givenName") + ". (" + time.strftime("%Y", time.gmtime()) + "). " + paperTitle + ". " + citation.get("publisher") + ". [" + citation.get("dataType") + "]  {ID_WILL_BE_HERE}"

        self.__updateMetadataPayload(tfpackageData)
        self.__workflow()