def process_tags(self, result): tags = [] tagsDict = {} # Build a dictionary of the tags for doc in result: # Get Anotar data from Solr data doc = JsonSimple(doc.get("jsonString")) # Get actual tag text tag = doc.getString(None, ["content", "literal"]) # Find out if they have locators locs = doc.getJsonSimpleList(["annotates", "locators"]).size() if locs == 0: # Basic tags, just aggregate counts if tag in tagsDict: # We've seen it before, just increment the counter existing = tagsDict[tag] count = existing.getInteger(0, ["tagCount"]) existing.getJsonObject().put("tagCount", str(count + 1)) else: # First time, store this object doc.getJsonObject().put("tagCount", str(1)) tagsDict[tag] = doc else: # Tags with a locator, special case for images etc. tags.append(doc.toString()) # Push all the 'basic' counts into the list to return for tag in tagsDict: tags.append(tagsDict[tag].toString()) return "[" + ",".join(tags) + "]"
def __messages(self): if self.message_list is not None and len(self.message_list) > 0: msg = JsonSimple() msg.getJsonObject().put("oid", self.oid) message = msg.toString() for target in self.message_list: self.utils.sendMessage(target, message)
def __activate__(self, context): self.request = context["request"] self.response = context["response"] self.formData = context["formData"] self.log = context["log"] # Basic response text message = JsonSimple() self.metadata = message.writeObject(["metadata"]) self.results = message.writeArray(["results"]) # Prepare response Object format = self.formData.get("format") if format == "json": out = self.response.getPrintWriter("application/json; charset=UTF-8") else: out = self.response.getPrintWriter("text/plain; charset=UTF-8") # Success Response try: self.searchNla() out.println(message.toString(True)) out.close() except Exception, ex: self.log.error("Error during search: ", ex) self.response.setStatus(500) message = JsonSimple() message.getJsonObject().put("error", ex.getMessage()) out.println(message.toString(True)) out.close()
def __checkMetadataPayload(self, identifier): # We are just going to confirm the existance of # 'metadata.json', or create an empty one if it # doesn't exist. Makes curation function for this # option and removes some log errors on the details # screen. try: self.object.getPayload("metadata.json") # all is good, the above will throw an exception if it doesn't exist return except Exception: self.log.info("Creating 'metadata.json' payload for object '{}'", self.oid) # Prep data metadata = JsonSimple() metadata.getJsonObject().put("recordIDPrefix", "") metadata.writeObject("data") # The only real data we require is the ID for curation idHolder = metadata.writeObject("metadata") idHolder.put("dc.identifier", identifier) # Store it inStream = IOUtils.toInputStream(metadata.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "metadata.json", inStream) except StorageException, e: self.log.error("Error creating 'metadata.json' payload for object '{}'", self.oid, e) return
def updateRelationships(self, relationship,pid,identifier): oid = self.findOidByIdentifier(relationship.get("identifier")) self.writer.println(oid) digitalObject = StorageUtils.getDigitalObject(self.storage, oid) metadataJsonPayload = digitalObject.getPayload("metadata.json") metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") found = False if relationships is None: relationships = JSONArray() metadataJson.getJsonObject().put("relationships",relationships) for relationship1 in relationships: if relationship1.get("identifier") == identifier: relationship1.put("isCurated",True) relationship1.put("curatedPid",pid) found = True if not found: newRelationship = JsonObject() newRelationship.put("isCurated",True) newRelationship.put("curatedPid",pid) newRelationship.put("relationship",relationship.get("relationship")) newRelationship.put("identifier",identifier) relationships.add(newRelationship) istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream)
def get_image(self): self.type = "http://www.purl.org/anotar/ns/type/0.1#Tag" mediaFragType = "http://www.w3.org/TR/2009/WD-media-frags-20091217" result = '{"result":' + self.search_solr() + "}" if result: imageTagList = [] imageTags = JsonSimple(result).getJsonSimpleList(["result"]) for imageTag in imageTags: imageAno = JsonSimple() # We only want tags with locators, not basic tags locators = imageTag.getJsonSimpleList(["annotates", "locators"]) if locators and not locators.isEmpty(): locatorValue = locators.get(0).getString(None, ["value"]) locatorType = locators.get(0).get(None, ["type"]) if locatorValue and locatorValue.find("#xywh=") > -1 and locatorType == mediaFragType: _, locatorValue = locatorValue.split("#xywh=") left, top, width, height = locatorValue.split(",") object = imageAno.getJsonObject() object.put("top", top) object.put("left", left) object.put("width", width) object.put("height", height) object.put("creator", imageTag.getString(None, ["creator", "literal"])) object.put("creatorUri", imageTag.getString(None, ["creator", "uri"])) object.put("id", imageTag.getString(None, ["id"])) # tagCount = imageTag.getString(None, ["tagCount"]) object.put("text", imageTag.getString(None, ["content", "literal"])) object.put("editable", "true") imageTagList.append(imageAno.toString()) result = "[" + ",".join(imageTagList) + "]" return result
def __activate__(self, context): self.velocityContext = context formData = self.vc("formData") # build the URL and query parameters to retrieve proxyUrls = JsonSimple(self.vc("systemConfig").getObject("proxy-urls")) url = "" key = formData.get("ns", "") if proxyUrls.getJsonObject().containsKey(key): url = proxyUrls.getString("", [key]) queryStr = formData.get("qs") if queryStr == "searchTerms={searchTerms}": queryStr = None if queryStr: if formData.get("jaffa2autocomplete", "false") == "true": url += "?searchTerms=%s" % queryStr.lower() else: url += "?%s" % queryStr self.vc("log").debug("Proxy URL = '{}'", url) data = None try: data = self.__wget(url) except Exception, e: data = '{"error":"%s"}' % str(e) self.vc("log").error("ERROR accessing URL:", e)
def __activate__(self, context): self.velocityContext = context formData = self.vc("formData") # build the URL and query parameters to retrieve proxyUrls = JsonSimple(self.vc("systemConfig").getObject("proxy-urls")) url = "" key = formData.get("ns", "") if proxyUrls.getJsonObject().containsKey(key): url = proxyUrls.getString("", [key]) queryStr = formData.get("qs") if queryStr == "searchTerms={searchTerms}": queryStr = None if queryStr: if formData.get("jaffa2autocomplete", "false") == "true": url += "?searchTerms=%s" % queryStr else: url += "?%s" % queryStr self.vc("log").debug("Proxy URL = '{}'", url) data = None try: data = self.__wget(url) except Exception, e: data = '{"error":"%s"}' % str(e) self.vc("log").error("ERROR accessing URL:", e)
def __activate__(self, context): response = context["response"] writer = response.getPrintWriter("text/plain; charset=UTF-8") auth = context["page"].authentication result = JsonSimple() obj = result.getJsonObject() obj.put("status", "error") obj.put("message", "An unknown error has occurred") if auth.is_logged_in(): services = context["Services"] formData = context["formData"] sessionState = context["sessionState"] urlBase = context["urlBase"] if urlBase.endswith("/"): urlBase = urlBase[:-1] func = formData.get("func") portalManager = services.portalManager if func == "create-view": try: fq = [q for q in sessionState.get("fq") if q != 'item_type:"object"'] id = formData.get("id") description = formData.get("description") print "Creating view '%s': '%s'" % (id, description) portal = Portal(id) portal.setDescription(formData.get("description")) portal.setQuery(" OR ".join(fq)) portal.setSearchQuery(sessionState.get("searchQuery")) portal.setFacetFields(portalManager.default.facetFields) portalManager.add(portal) portalManager.save(portal) obj.put("status", "ok") obj.put("message", "View '%s' successfully created" % id) obj.put("url", "%s/%s/home" % (urlBase, id)) except Exception, e: response.setStatus(500) obj.put("message", str(e)) elif func == "delete-view": defaultPortal = context["defaultPortal"] portalId = formData.get("view") if auth.is_admin(): if not portalId: response.setStatus(500) obj.put("message", "No view specified to be deleted") elif portalId != defaultPortal: # sanity check: don't delete default portal print "Deleting view '%s'" % portalId try: portalManager.remove(portalId) obj.put("status", "ok") obj.put("message", "View '%s' successfully removed" % portalId) obj.put("url", "%s/%s/home" % (urlBase, defaultPortal)) except Exception, e: obj.put("message", str(e)) else: response.setStatus(500) obj.put("message", "The default view cannot be deleted") else: response.setStatus(403) obj.put("message", "Only administrative users can access this API")
def __activate__(self, context): self.request = context["request"] self.response = context["response"] self.formData = context["formData"] self.log = context["log"] oid = self.formData.get("oid") self.log.debug("Curation request recieved: '{}'", oid) message = JsonSimple() message.getJsonObject().put("task", "curation") message.getJsonObject().put("oid", oid) out = self.response.getPrintWriter("text/plain; charset=UTF-8") if self.queueMessage(message.toString()): out.println("Request successful. The system will now process.") else: self.response.setStatus(500) out.println("Error sending message, see system logs.") out.close()
def __updateMetadataPayload(self, data): # Get and parse payload = self.object.getPayload("formData.tfpackage") json = JsonSimple(payload.open()) payload.close() # Basic test for a mandatory field title = json.getString(None, ["dc:title"]) if title is not None: # We've done this before return # Merge json.getJsonObject().putAll(data) # Store it inStream = IOUtils.toInputStream(json.toString(True), "UTF-8") try: self.object.updatePayload("formData.tfpackage", inStream) except StorageException, e: self.log.error("Error updating 'formData.tfpackage' payload for object '{}'", self.oid, e)
def modify_json(self): # print "**** anotar.py : add_json() : adding json : " + json jsonSimple = JsonSimple(self.json) jsonObj = jsonSimple.getJsonObject() jsonObj.put("id", self.pid) rootUri = jsonSimple.getString(None, ["annotates", "rootUri"]) if rootUri is not None: baseUrl = "http://%s:%s/" % (self.vc("request").serverName, self.vc("serverPort")) myUri = baseUrl + rootUri + "#" + self.pid jsonObj.put("uri", myUri) jsonObj.put("schemaVersionUri", "http://www.purl.org/anotar/schema/0.1") self.json = jsonSimple.toString()
def __updateMetadataPayload(self, data): # Get and parse payload = self.object.getPayload("formData.tfpackage") json = JsonSimple(payload.open()) payload.close() # Basic test for a mandatory field title = json.getString(None, ["dc:title"]) if title is not None: # We've done this before return # Merge json.getJsonObject().putAll(data) # Store it inStream = IOUtils.toInputStream(json.toString(True), "UTF-8") try: self.object.updatePayload("formData.tfpackage", inStream) except StorageException, e: self.log.error( "Error updating 'formData.tfpackage' payload for object '{}'", self.oid, e)
def __activate__(self, context): request = context["request"] response = context["response"] writer = response.getPrintWriter("text/javascript; charset=UTF-8") result = JsonSimple() ## Look for the JSONP callback to use jsonpCallback = request.getParameter("callback") if jsonpCallback is None: jsonpCallback = request.getParameter("jsonp_callback") if jsonpCallback is None: response.setStatus(403) writer.println("Error: This interface only responds to JSONP") writer.close() return if context["page"].authentication.is_logged_in(): result.getJsonObject().put("isAuthenticated", "true") else: result.getJsonObject().put("isAuthenticated", "false") writer.println(jsonpCallback + "(" + result.toString() + ")") writer.close()
def modify_json(self): #print "**** anotar.py : add_json() : adding json : " + json jsonSimple = JsonSimple(self.json) jsonObj = jsonSimple.getJsonObject() jsonObj.put("id", self.pid) rootUri = jsonSimple.getString(None, ["annotates", "rootUri"]) if rootUri is not None: baseUrl = "http://%s:%s/" % (self.vc("request").serverName, self.vc("serverPort")) myUri = baseUrl + rootUri + "#" + self.pid jsonObj.put("uri", myUri) jsonObj.put("schemaVersionUri", "http://www.purl.org/anotar/schema/0.1") self.json = jsonSimple.toString()
def __upgrade(self, formData): # These fields are handled specially ignoredFields = ["metaList", "redbox:formVersion", "redbox:newForm"] # Prepare a new JSON setup for upgraded data newJsonSimple = JsonSimple() newJsonObject = newJsonSimple.getJsonObject() metaList = newJsonSimple.writeArray(["metaList"]) oldJsonObject = formData.getJsonObject() for key in oldJsonObject.keySet(): oldField = str(key) if oldField not in ignoredFields: newField = self.__parseFieldName(oldField) metaList.add(newField) newJsonObject.put(newField, oldJsonObject.get(key)) # Form management newJsonObject.put("redbox:formVersion", self.redboxVersion) newForm = oldJsonObject.get("redbox:newForm") if newForm is not None: newJsonObject.put("redbox:newForm", newForm) ######### # Some final custom modifications more complicated than most fields ######### # Old URL checkbox 'on' equals new ID Origin 'internal' urlOrigin = oldJsonObject.get("url_useRecordId") if urlOrigin is not None and urlOrigin == "on": newJsonObject.put("dc:identifier.redbox:origin", "internal") # Related data should default to being unlinked if from legacy forms counter = 1 template = "dc:relation.vivo:Dataset" newIdField = "%s.%s.dc:identifier" % (template, counter) while newJsonObject.containsKey(newIdField): newOriginField = "%s.%s.redbox:origin" % (template, counter) newJsonObject.put(newOriginField, "external") newPublishField = "%s.%s.redbox:publish" % (template, counter) newJsonObject.put(newPublishField, "off") counter += 1 newIdField = "%s.%s.dc:identifier" % (template, counter) self.audit.add( "Migration tool. Version upgrade performed '%s' => '%s'" % (self.version, self.redboxVersion)) return newJsonSimple
def __upgrade(self, formData): # These fields are handled specially ignoredFields = ["metaList", "redbox:formVersion", "redbox:newForm"] # Prepare a new JSON setup for upgraded data newJsonSimple = JsonSimple() newJsonObject = newJsonSimple.getJsonObject() metaList = newJsonSimple.writeArray(["metaList"]) oldJsonObject = formData.getJsonObject() for key in oldJsonObject.keySet(): oldField = str(key) if oldField not in ignoredFields: newField = self.__parseFieldName(oldField) metaList.add(newField) newJsonObject.put(newField, oldJsonObject.get(key)) # Form management newJsonObject.put("redbox:formVersion", self.redboxVersion) newForm = oldJsonObject.get("redbox:newForm") if newForm is not None: newJsonObject.put("redbox:newForm", newForm) ######### # Some final custom modifications more complicated than most fields ######### # Old URL checkbox 'on' equals new ID Origin 'internal' urlOrigin = oldJsonObject.get("url_useRecordId") if urlOrigin is not None and urlOrigin == "on": newJsonObject.put("dc:identifier.redbox:origin", "internal") # Related data should default to being unlinked if from legacy forms counter = 1 template = "dc:relation.vivo:Dataset" newIdField = "%s.%s.dc:identifier" % (template, counter) while newJsonObject.containsKey(newIdField): newOriginField = "%s.%s.redbox:origin" % (template, counter) newJsonObject.put(newOriginField, "external") newPublishField = "%s.%s.redbox:publish" % (template, counter) newJsonObject.put(newPublishField, "off") counter += 1 newIdField = "%s.%s.dc:identifier" % (template, counter) self.audit.add("Migration tool. Version upgrade performed '%s' => '%s'" % (self.version, self.redboxVersion)) return newJsonSimple
def __activate__(self, context): writer = context["response"].getPrintWriter("application/json; charset=UTF-8") jsonResponse = "{}" try: oid = context["formData"].get("oid") object = context["Services"].getStorage().getObject(oid); payload = object.getPayload("metadata.json") json = JsonSimple(payload.open()) payload.close() object.close() # We are only going to send the 'data' node though data = JsonSimple(json.getJsonObject().get("data")) jsonResponse = data.toString(True) except Exception, e: jsonResponse = '{"message": "%s"}' % e.getMessage()
def getJson(self, state="open"): title = "%s (%s)" % (self.getName(), self.getCount()) json = JsonSimple() jsonObj = json.getJsonObject() attributes = JsonObject() attributes.put("id", self.getId()) attributes.put("fq", self.getFacetQuery()) attributes.put("title", title) jsonObj.put("data", title) jsonObj.put("attributes", attributes) hasSubFacets = not self.getSubFacets().isEmpty() if hasSubFacets: jsonObj.put("state", state) subFacetList = ArrayList() for subFacet in self.getSubFacets(): subFacetList.add(subFacet.getJson("closed")) children = JSONArray() children.addAll(subFacetList) jsonObj.put("children", children) return json
def get_image(self): self.type = "http://www.purl.org/anotar/ns/type/0.1#Tag" mediaFragType = "http://www.w3.org/TR/2009/WD-media-frags-20091217" result = '{"result":' + self.search_solr() + '}' if result: imageTagList = [] imageTags = JsonSimple(result).getJsonSimpleList(["result"]) for imageTag in imageTags: imageAno = JsonSimple() # We only want tags with locators, not basic tags locators = imageTag.getJsonSimpleList( ["annotates", "locators"]) if locators and not locators.isEmpty(): locatorValue = locators.get(0).getString(None, ["value"]) locatorType = locators.get(0).get(None, ["type"]) if locatorValue and locatorValue.find( "#xywh=") > -1 and locatorType == mediaFragType: _, locatorValue = locatorValue.split("#xywh=") left, top, width, height = locatorValue.split(",") object = imageAno.getJsonObject() object.put("top", top) object.put("left", left) object.put("width", width) object.put("height", height) object.put( "creator", imageTag.getString(None, ["creator", "literal"])) object.put( "creatorUri", imageTag.getString(None, ["creator", "uri"])) object.put("id", imageTag.getString(None, ["id"])) #tagCount = imageTag.getString(None, ["tagCount"]) object.put( "text", imageTag.getString(None, ["content", "literal"])) object.put("editable", "true") imageTagList.append(imageAno.toString()) result = "[" + ",".join(imageTagList) + "]" return result
def __workflow(self): # Workflow data WORKFLOW_ID = "servicesUI2" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) pageTitle = "Services Record" displayType = "package-service" initialStep = 0 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) self.utils.add(self.index, "display_type", displayType) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __activate__(self, context): try: self.log = context["log"] self.response = context["response"] self.request = context["request"] self.systemConfig = context["systemConfig"] self.storage = context["Services"].getStorage() self.indexer = context["Services"].getIndexer() self.sessionState = context["sessionState"] self.sessionState.set("username", "admin") out = self.response.getPrintWriter("text/plain; charset=UTF-8") relationshipMapper = ApplicationContextProvider.getApplicationContext().getBean("relationshipMapper") externalCurationMessageBuilder = ApplicationContextProvider.getApplicationContext().getBean("externalCurationMessageBuilder") oid = self.request.getParameter("oid") if oid is None : identifier = self.request.getParameter("identifier") oid = self.findOidByIdentifier(identifier) relationshipType = self.request.getParameter("relationship") curatedPid = self.request.getParameter("curatedPid") sourceId = self.request.getParameter("sourceIdentifier") system = self.request.getParameter("system") digitalObject = StorageUtils.getDigitalObject(self.storage, oid) metadataJsonPayload = digitalObject.getPayload("metadata.json") metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") found = False if relationships is None: relationships = JSONArray() metadataJson.getJsonObject().put("relationships",relationships) for relationship in relationships: if relationship.get("identifier") == sourceId: relationship.put("isCurated",True) relationship.put("curatedPid",curatedPid) found = True if not found: relationship = JsonObject() relationship.put("isCurated",True) relationship.put("curatedPid",curatedPid) relationship.put("relationship",relationshipType) relationship.put("identifier",sourceId) relationship.put("system",system) relationships.add(relationship) out.println(metadataJson.toString(True)) istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream) out.close() finally: self.sessionState.remove("username")
def __workflow(self): # Workflow data WORKFLOW_ID = "Parties_People" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) initialStep = 0 try: wfMeta = self.__getJsonPayload("workflow.metadata") # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) if self.owner == "guest": pageTitle = "Submission Request" displayType = "submission-request" initialStep = 0 else: pageTitle = "Metadata Record" displayType = "package-dataset" initialStep = 1 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] #Used to make sure we have a created date createdDateFlag = False formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) self.log.debug("Set created date to :%s" % solrTime) createdDateFlag = True elif field == "redbox:embargo.dc:date": self.embargoedDate = value # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) # check if this is an array field fnameparts = field.split(":") if fnameparts is not None and len(fnameparts) >= 3: if field.startswith("bibo") or field.startswith("skos"): arrParts = fnameparts[1].split(".") else: arrParts = fnameparts[2].split(".") # we're not interested in: Relationship, Type and some redbox:origin if arrParts is not None and len(arrParts) >= 2 and field.find(":Relationship.") == -1 and field.find("dc:type") == -1 and field.find("redbox:origin") == -1 and arrParts[1].isdigit(): # we've got an array field fldPart = ":%s" % arrParts[0] prefixEndIdx = field.find(fldPart) + len(fldPart) suffixStartIdx = prefixEndIdx+len(arrParts[1])+1 arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[suffixStartIdx:] if field.endswith("Name"): arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] self.log.debug("Array Field name is:%s from: %s, with value:%s" % (arrFldName, field, value)) if field.endswith("Name"): fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) idx = arrParts[1] fullField = fullFieldMap.get(idx) if (fullField is None): fullField = "" if (field.endswith("givenName")): fullField = "%s, %s" % (fullField, value) if (field.endswith("familyName")): fullField = "%s%s" % (value, fullField) self.log.debug("fullname now is :%s" % fullField) fullFieldMap.put(idx, fullField) else: fieldlist = self.arrayBucket.get(arrFldName) if fieldlist is None: fieldlist = [] self.arrayBucket.put(arrFldName, fieldlist) fieldlist.append(value) for compfield in self.compFields: if field.startswith(compfield): arrFldName = self.reportingFieldPrefix +compfield fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) fullField = fullFieldMap.get("1") if fullField is None: fullField = "" if field.endswith(self.compFieldsConfig[compfield]["end"]): fullField = "%s%s%s" % (fullField, self.compFieldsConfig[compfield]["delim"] ,value) if field.endswith(self.compFieldsConfig[compfield]["start"]): fullField = "%s%s" % (value, fullField) self.log.debug("full field now is :%s" % fullField) fullFieldMap.put("1", fullField) self.utils.add(self.index, "display_type", displayType) # Make sure we have a creation date if not createdDateFlag: self.utils.add(self.index, "date_created", self.last_modified) self.log.debug("Forced creation date to %s because it was not explicitly set." % self.last_modified) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __metadata(self): self.title = None self.dcType = None self.__checkMetadataPayload() jsonPayload = self.object.getPayload("metadata.json") json = self.utils.getJsonObject(jsonPayload.open()) jsonPayload.close() metadata = json.getObject("metadata") identifier = metadata.get("dc.identifier") self.utils.add(self.index, "dc:identifier", identifier) self.__storeIdentifier(identifier) self.utils.add(self.index, "institution", "James Cook University") self.utils.add(self.index, "source", "http://spatialecology.jcu.edu.au/Edgar/") data = json.getObject("data") ####Global setting for processing data ####These will need to be changed based on you system installation. theMintHost = java.lang.System.getProperty("mint.proxy.url") collectionRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/" servicesRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/" descriptionTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/" relationshipTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/" ###Allocating space to create the formData.tfpackage tfpackageData = {} ###Using the species name, obtained from the directory name, to replace the text in the Title species = data.get("species") title = data.get("title") title = title.replace("%NAME_OF_FOLDER%", species) self.utils.add(self.index, "dc_title", title) tfpackageData["dc:title"] = title tfpackageData["title"] = title self.utils.add(self.index, "dc_type", data.get("type")) tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type") tfpackageData["dc:type.skos:prefLabel"] = data.get("type") tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime()) tfpackageData["dc:modified"] = "" tfpackageData["dc:language.skos:prefLabel"] = "English" tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get("temporalCoverage").get("dateFrom") dateTo = data.get("temporalCoverage").get("dateTo") if dateTo is not None: tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo tfpackageData["dc:coverage.redbox:timePeriod"] = "" ###Processing the 'spatialCoverage' metadata. spatialCoverage = data.get("spatialCoverage") for i in range(len(spatialCoverage)): location = spatialCoverage[i] if location["type"] == "text": tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:type"] = location["type"] if (location["value"].startswith("POLYGON")): tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".redbox:wktRaw"] = location["value"] tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".rdf:PlainLiteral"] = location["value"] ###Processing the 'description' metadata. #Reading the file here, so we only do it once. file = open(descriptionTypesFilePath + "descriptionTypes.json") descriptionData = file.read() file.close() description = data.get("description") for i in range(len(description)): desc = description[i] tempDesc = desc.get("value") tempDesc = tempDesc.replace("%NAME_OF_FOLDER%", species) if (desc["type"] == "brief"): tfpackageData["dc:description"] = tempDesc tfpackageData["rif:description." + str(i + 1) + ".type"] = desc["type"] tfpackageData["rif:description." + str(i + 1) + ".value"] = tempDesc jsonSimple = JsonSimple(descriptionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Description Type exist if results: for j in range(len(results)): descriptionType = results[j] if (desc["type"] == descriptionType.get("id")): tfpackageData["rif:description." + str(i + 1) + ".label"] = descriptionType.get("label") ###Processing the 'relatedPublication' metadata relatedPublication = data.get("relatedPublication") if relatedPublication is not None: for i in range(len(relatedPublication)): publication = relatedPublication[i] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:identifier"] = publication["doi"] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:title"] = publication["title"] ###Processing the 'relatedWebsite' metadata relatedWebsite = data.get("relatedWebsite") count = 0 for i in range(len(relatedWebsite)): website = relatedWebsite[i] tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:identifier"] = website["url"] tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:title"] = website["notes"] count = i + 1 ###Processing the 'data_source_website' metadata (override metadata) dataSourceWebsites = data.get("data_source_website") if dataSourceWebsites is not None: for i in range(len(dataSourceWebsites)): website = dataSourceWebsites[i] type = website.get("identifier").get("type") if type == "uri": count += 1 tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:identifier"] = website.get("identifier").get("value") tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:title"] = website["notes"] ###Processing the 'relatedCollection' metadata #Reading the file here, so we only do it once. file = open(collectionRelationTypesFilePath + "collectionRelationTypes.json") collectionData = file.read() file.close() relatedCollection = data.get("relatedCollection") recordIdentifier = "" if relatedCollection is not None: for i in range(len(relatedCollection)): collection = relatedCollection[i] tempIdentifier = collection["identifier"] if tempIdentifier is not None: tempIdentifier = tempIdentifier.replace("%NAME_OF_FOLDER%", species) recordIdentifier = tempIdentifier else: tempIdentifier = "" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:identifier"] = tempIdentifier tempTitle = collection.get("title") tempTitle = tempTitle.replace("%NAME_OF_FOLDER%", species) tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:title"] = tempTitle tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = collection["relationship"] if tempIdentifier == "": tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:origin"] = "on" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:publish"] = "on" #Using the collection data as a lookup to obtain the 'label' relationShip = collection.get("relationship") jsonSimple = JsonSimple(collectionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Collection Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label") ###Processing the 'relatedService' metadata #Reading the file here, so we only do it once. file = open(servicesRelationTypesFilePath + "serviceRelationTypes.json") servicesData = file.read() file.close() relatedServices = data.get("relatedService") recordIdentifier = "" if relatedServices is not None: for i in range(len(relatedServices)): service = relatedServices[i] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:identifier"] = service["identifier"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:title"] = service["title"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = service["relationship"] #Using the services data as a lookup to obtain the 'label' relationShip = service.get("relationship") jsonSimple = JsonSimple(servicesData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Service Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label") ###Processing the 'associatedParty' metadata #Reading the file here so we only read it once. file = open(relationshipTypesFilePath + "relationshipTypes.json") relationshipData = file.read() file.close() associatedParty = data.get("associatedParty") for i in range(len(associatedParty)): party = associatedParty[i] email = party.get("who").get("value") if email is not None: whoType = party.get("who").get("type") if (whoType == 'people'): tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".dc:identifier"] = party.get("who").get("identifier") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:name"] = party.get("who").get("name") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:title"] = party.get("who").get("title") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isCoPrimaryInvestigator"] = "off" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isPrimaryInvestigator"] = "on" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:givenName"] = party.get("who").get("givenName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:familyName"] = party.get("who").get("familyName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipType"] = party.get("relationship") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.dc:identifier"] = party.get("affiliation").get("id") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.skos:prefLabel"] = party.get("affiliation").get("label") jsonSimple = JsonSimple(relationshipData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Relationship Type exists if results: for j in range(len(results)): relationshipType = results[j] if (party.get("relationship") == relationshipType.get("id")): tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipLabel"] = relationshipType.get("label") ###Processing 'contactInfo.email' metadata tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get("contactInfo").get("identifier") tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get("contactInfo").get("name") tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get("contactInfo").get("title") tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get("contactInfo").get("givenName") tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get("contactInfo").get("familyName") tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get("contactInfo").get("email") ##Stored At (on the Data Management page) tfpackageData["vivo:Location.vivo:GeographicLocation.gn:name"] = data.get("contactInfo").get("streetAddress") ###Processing 'coinvestigators' metadata coinvestigators = data.get("coinvestigators") for i in range(len(coinvestigators)): tfpackageData["dc:contributor.locrel:clb." + str(i + 1) + ".foaf:Agent"] = coinvestigators[i] ###Processing 'anzsrcFOR' metadata anzsrcFOR = data.get("anzsrcFOR") for i in range(len(anzsrcFOR)): anzsrc = anzsrcFOR[i] tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'anzsrcSEO' metadata anzsrcSEO = data.get("anzsrcSEO") for i in range(len(anzsrcSEO)): anzsrc = anzsrcSEO[i] tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'keyword' metadata keyword = data.get("keyword") for i in range(len(keyword)): tfpackageData["dc:subject.vivo:keyword." + str(i + 1) + ".rdf:PlainLiteral"] = keyword[i] ###Research Themes theme = data.get("researchTheme") if (theme == "Tropical Ecosystems, Conservation and Climate Change"): tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true" elif (theme == "Industries and Economies in the Tropics"): tfpackageData["jcu:research.themes.industriesEconomies"] = "true" elif (theme == "People and Societies in the Tropics"): tfpackageData["jcu:research.themes.peopleSocieties"] = "true" elif (theme == "Tropical Health, Medicine and Biosecurity"): tfpackageData["jcu:research.themes.tropicalHealth"] = "true" elif (theme == "Not aligned to a University theme"): tfpackageData["jcu:research.themes.notAligned"] = "true" tfpackageData["dc:accessRights.skos:prefLabel"] = data.get("accessRights") tfpackageData["dc:license.dc:identifier"] = data.get("license").get("url") tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get("label") #identifier additionalId = data.get("additionalIdentifier") if additionalId is not None: additionalId = additionalId.replace("%NAME_OF_FOLDER%", species) tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId tfpackageData["dc:identifier.redbox:origin"] = "external" tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local" tfpackageData["dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier" else: tfpackageData["dc:identifier.redbox:origin"] = "internal" dataLocation = data.get("dataLocation") dataLocation = dataLocation.replace("%NAME_OF_FOLDER%", species) tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation #The following have been intentionally set to blank. No mapping is required for these fields. tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod") tfpackageData["dc:extent"] = "unknown" tfpackageData["redbox:disposalDate"] = "" tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = "" tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = "" ###Processing 'organizationalGroup' metadata organisationalGroup = data.get("organizationalGroup") for i in range(len(organisationalGroup)): organisation = organisationalGroup[i] tfpackageData["foaf:Organization.dc:identifier"] = organisation.get("identifier") tfpackageData["foaf:Organization.skos:prefLabel"] = organisation.get("prefLabel") tfpackageData["swrc:ResearchProject.dc:title"] = "" tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = "" tfpackageData["dc:SizeOrDuration"] = "" tfpackageData["dc:Policy"] = "" #Citations citations = data.get("citations") for i in range(len(citations)): citation = citations[i] tfpackageData["dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get("sendCitation") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get("curationIdentifier") paperTitle = citation.get("paperTitle") paperTitle = paperTitle.replace("%NAME_OF_FOLDER%", species) tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:familyName"] = citation.get("familyName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:givenName"] = citation.get("givenName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:title"] = title = citation.get("title") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = citation.get("publisher") url = citation.get("url") url = url.replace("%NAME_OF_FOLDER%", species) tfpackageData["dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData["dc:created"] tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData["dc:created"] tfpackageData["dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get("dataType") tfpackageData["dc:biblioGraphicCitation.skos:prefLabel"] = citation.get("familyName") + ", " + citation.get("givenName") + ". (" + time.strftime("%Y", time.gmtime()) + "). " + paperTitle + ". " + citation.get("publisher") + ". [" + citation.get("dataType") + "] {ID_WILL_BE_HERE}" self.__updateMetadataPayload(tfpackageData) self.__workflow()
def __metadata(self): self.title = None self.dcType = None self.__checkMetadataPayload() jsonPayload = self.object.getPayload("metadata.json") json = self.utils.getJsonObject(jsonPayload.open()) jsonPayload.close() metadata = json.getObject("metadata") identifier = metadata.get("dc.identifier") self.utils.add(self.index, "dc:identifier", identifier) self.__storeIdentifier(identifier) self.utils.add(self.index, "institution", "James Cook University") self.utils.add(self.index, "source", "http://spatialecology.jcu.edu.au/Edgar/") data = json.getObject("data") ####Global setting for processing data ####These will need to be changed based on you system installation. theMintHost = java.lang.System.getProperty("mint.proxy.url") collectionRelationTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/redbox/workflows/forms/data/" servicesRelationTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/redbox/workflows/forms/data/" descriptionTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/local/workflows/forms/data/" relationshipTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/local/workflows/forms/data/" ###Allocating space to create the formData.tfpackage tfpackageData = {} ###Using the species name, obtained from the directory name, to replace the text in the Title species = data.get("species") title = data.get("title") title = title.replace("%NAME_OF_FOLDER%", species) self.utils.add(self.index, "dc_title", title) tfpackageData["dc:title"] = title tfpackageData["title"] = title self.utils.add(self.index, "dc_type", data.get("type")) tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type") tfpackageData["dc:type.skos:prefLabel"] = data.get("type") tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime()) tfpackageData["dc:modified"] = "" tfpackageData["dc:language.skos:prefLabel"] = "English" tfpackageData[ "dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get( "temporalCoverage").get("dateFrom") dateTo = data.get("temporalCoverage").get("dateTo") if dateTo is not None: tfpackageData[ "dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo tfpackageData["dc:coverage.redbox:timePeriod"] = "" ###Processing the 'spatialCoverage' metadata. spatialCoverage = data.get("spatialCoverage") for i in range(len(spatialCoverage)): location = spatialCoverage[i] if location["type"] == "text": tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:type"] = location["type"] tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:typeLabel"] = "Free Text" if (location["value"].startswith("POLYGON")): tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".redbox:wktRaw"] = location["value"] tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".rdf:PlainLiteral"] = location["value"] ###Processing the 'description' metadata. #Reading the file here, so we only do it once. file = open(descriptionTypesFilePath + "descriptionTypes.json") descriptionData = file.read() file.close() description = data.get("description") for i in range(len(description)): desc = description[i] tempDesc = desc.get("value") tempDesc = tempDesc.replace("%NAME_OF_FOLDER%", species) if (desc["type"] == "brief"): tfpackageData["dc:description"] = tempDesc tfpackageData["rif:description." + str(i + 1) + ".type"] = desc["type"] tfpackageData["rif:description." + str(i + 1) + ".value"] = tempDesc jsonSimple = JsonSimple(descriptionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Description Type exist if results: for j in range(len(results)): descriptionType = results[j] if (desc["type"] == descriptionType.get("id")): tfpackageData["rif:description." + str(i + 1) + ".label"] = descriptionType.get("label") ###Processing the 'relatedPublication' metadata relatedPublication = data.get("relatedPublication") if relatedPublication is not None: for i in range(len(relatedPublication)): publication = relatedPublication[i] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:identifier"] = publication["doi"] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:title"] = publication["title"] ###Processing the 'relatedWebsite' metadata relatedWebsite = data.get("relatedWebsite") count = 0 for i in range(len(relatedWebsite)): website = relatedWebsite[i] tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:identifier"] = website["url"] tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:title"] = website["notes"] count = i + 1 ###Processing the 'data_source_website' metadata (override metadata) dataSourceWebsites = data.get("data_source_website") if dataSourceWebsites is not None: for i in range(len(dataSourceWebsites)): website = dataSourceWebsites[i] type = website.get("identifier").get("type") if type == "uri": count += 1 tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:identifier"] = website.get( "identifier").get("value") tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:title"] = website["notes"] ###Processing the 'relatedCollection' metadata #Reading the file here, so we only do it once. file = open(collectionRelationTypesFilePath + "collectionRelationTypes.json") collectionData = file.read() file.close() relatedCollection = data.get("relatedCollection") recordIdentifier = "" if relatedCollection is not None: for i in range(len(relatedCollection)): collection = relatedCollection[i] tempIdentifier = collection["identifier"] if tempIdentifier is not None: tempIdentifier = tempIdentifier.replace( "%NAME_OF_FOLDER%", species) recordIdentifier = tempIdentifier else: tempIdentifier = "" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:identifier"] = tempIdentifier tempTitle = collection.get("title") tempTitle = tempTitle.replace("%NAME_OF_FOLDER%", species) tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:title"] = tempTitle tfpackageData[ "dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = collection[ "relationship"] if tempIdentifier == "": tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:origin"] = "on" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:publish"] = "on" #Using the collection data as a lookup to obtain the 'label' relationShip = collection.get("relationship") jsonSimple = JsonSimple(collectionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Collection Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData[ "dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get( "label") ###Processing the 'relatedService' metadata #Reading the file here, so we only do it once. file = open(servicesRelationTypesFilePath + "serviceRelationTypes.json") servicesData = file.read() file.close() relatedServices = data.get("relatedService") recordIdentifier = "" if relatedServices is not None: for i in range(len(relatedServices)): service = relatedServices[i] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:identifier"] = service["identifier"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:title"] = service["title"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = service[ "relationship"] #Using the services data as a lookup to obtain the 'label' relationShip = service.get("relationship") jsonSimple = JsonSimple(servicesData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Service Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData[ "dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get( "label") ###Processing the 'associatedParty' metadata #Reading the file here so we only read it once. file = open(relationshipTypesFilePath + "relationshipTypes.json") relationshipData = file.read() file.close() associatedParty = data.get("associatedParty") if associatedParty is not None: for i in range(len(associatedParty)): party = associatedParty[i] email = party.get("who").get("value") if email is not None: whoType = party.get("who").get("type") if (whoType == 'people'): tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".dc:identifier"] = party.get("who").get( "identifier") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:name"] = party.get("who").get( "name") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:title"] = party.get("who").get( "title") tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".redbox:isCoPrimaryInvestigator"] = "off" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isPrimaryInvestigator"] = "on" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:givenName"] = party.get( "who").get("givenName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:familyName"] = party.get( "who").get("familyName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipType"] = party.get( "relationship") tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.dc:identifier"] = party.get( "affiliation").get("id") tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.skos:prefLabel"] = party.get( "affiliation").get("label") jsonSimple = JsonSimple(relationshipData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Relationship Type exists if results: for j in range(len(results)): relationshipType = results[j] if (party.get("relationship") == relationshipType.get("id")): tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipLabel"] = relationshipType.get( "label") ###Processing 'contactInfo.email' metadata tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get( "contactInfo").get("identifier") tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get( "contactInfo").get("name") tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get( "contactInfo").get("title") tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get( "contactInfo").get("givenName") tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get( "contactInfo").get("familyName") tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get( "contactInfo").get("email") ##Stored At (on the Data Management page) tfpackageData[ "vivo:Location.vivo:GeographicLocation.gn:name"] = data.get( "contactInfo").get("streetAddress") ###Processing 'coinvestigators' metadata coinvestigators = data.get("coinvestigators") for i in range(len(coinvestigators)): tfpackageData["dc:contributor.locrel:clb." + str(i + 1) + ".foaf:Agent"] = coinvestigators[i] ###Processing 'anzsrcFOR' metadata anzsrcFOR = data.get("anzsrcFOR") for i in range(len(anzsrcFOR)): anzsrc = anzsrcFOR[i] tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'anzsrcSEO' metadata anzsrcSEO = data.get("anzsrcSEO") for i in range(len(anzsrcSEO)): anzsrc = anzsrcSEO[i] tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'keyword' metadata keyword = data.get("keyword") for i in range(len(keyword)): tfpackageData["dc:subject.vivo:keyword." + str(i + 1) + ".rdf:PlainLiteral"] = keyword[i] ###Research Themes theme = data.get("researchTheme") if (theme == "Tropical Ecosystems, Conservation and Climate Change"): tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true" elif (theme == "Industries and Economies in the Tropics"): tfpackageData["jcu:research.themes.industriesEconomies"] = "true" elif (theme == "People and Societies in the Tropics"): tfpackageData["jcu:research.themes.peopleSocieties"] = "true" elif (theme == "Tropical Health, Medicine and Biosecurity"): tfpackageData["jcu:research.themes.tropicalHealth"] = "true" elif (theme == "Not aligned to a University theme"): tfpackageData["jcu:research.themes.notAligned"] = "true" tfpackageData["dc:accessRights.skos:prefLabel"] = data.get( "accessRights") tfpackageData["dc:license.dc:identifier"] = data.get("license").get( "url") tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get( "label") #identifier additionalId = data.get("additionalIdentifier") if additionalId is not None: additionalId = additionalId.replace("%NAME_OF_FOLDER%", species) tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId tfpackageData["dc:identifier.redbox:origin"] = "external" tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local" tfpackageData[ "dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier" else: tfpackageData["dc:identifier.redbox:origin"] = "internal" dataLocation = data.get("dataLocation") dataLocation = dataLocation.replace("%NAME_OF_FOLDER%", species) tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation #The following have been intentionally set to blank. No mapping is required for these fields. tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod") tfpackageData["dc:extent"] = "unknown" tfpackageData["redbox:disposalDate"] = "" tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = "" tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = "" ###Processing 'organizationalGroup' metadata organisationalGroup = data.get("organizationalGroup") for i in range(len(organisationalGroup)): organisation = organisationalGroup[i] tfpackageData[ "foaf:Organization.dc:identifier"] = organisation.get( "identifier") tfpackageData[ "foaf:Organization.skos:prefLabel"] = organisation.get( "prefLabel") tfpackageData["swrc:ResearchProject.dc:title"] = "" tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = "" tfpackageData["dc:SizeOrDuration"] = "" tfpackageData["dc:Policy"] = "" #Citations citations = data.get("citations") for i in range(len(citations)): citation = citations[i] tfpackageData[ "dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get( "sendCitation") tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get( "curationIdentifier") paperTitle = citation.get("paperTitle") paperTitle = paperTitle.replace("%NAME_OF_FOLDER%", species) tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:familyName"] = citation.get("familyName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:givenName"] = citation.get("givenName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:title"] = title = citation.get("title") tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = citation.get( "publisher") url = citation.get("url") url = url.replace("%NAME_OF_FOLDER%", species) tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData[ "dc:created"] tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData[ "dc:created"] tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get( "dataType") tfpackageData[ "dc:biblioGraphicCitation.skos:prefLabel"] = citation.get( "familyName" ) + ", " + citation.get("givenName") + ". (" + time.strftime( "%Y", time.gmtime()) + "). " + paperTitle + ". " + citation.get( "publisher") + ". [" + citation.get( "dataType") + "] {ID_WILL_BE_HERE}" self.__updateMetadataPayload(tfpackageData) self.__workflow()
def __workflow(self): # Workflow data WORKFLOW_ID = "packaging" wfChanged = False workflow_security = [] self.message_list = None try: wfPayload = self.object.getPayload("workflow.metadata") wfMeta = self.utils.getJsonObject(wfPayload.open()) wfPayload.close() # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) # Form processing formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) else: formData = None coreFields = ["title", "creator", "contributor", "description", "format", "creationDate"] if formData is not None: # Core fields formTitles = formData.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle creator = formData.getStringList(["creator"]) if creator: self.creatorList = creator contributor = formData.getStringList(["contributor"]) if contributor: self.contributorList = contributor description = formData.getStringList(["description"]) if description: self.descriptionList = description format = formData.getStringList(["format"]) if format: self.formatList = format creation = formData.getStringList(["creationDate"]) if creation: self.creationDate = creation # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) except StorageException, e: # No workflow payload, time to create wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", "pending") wfMetaObj.put("pageTitle", "Uploaded Files - Management") stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == "pending": wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"])
def __metadata(self): jsonPayload = self.object.getPayload("metadata.json") json = self.utils.getJsonObject(jsonPayload.open()) jsonPayload.close() metadata = json.getObject("metadata") self.utils.add(self.index, "dc_identifier", metadata.get("dc.identifier")) data = json.getObject("data") ####Global setting for processing data ####These will need to be changed based on you system installation. theMintHost = "http://*****:*****@example.edu.au" sock = urllib.urlopen(theMintHost + "/mint/default/opensearch/lookup?count=999&searchTerms=Email:" + email) mintData = sock.read() sock.close() jsonSimple = JsonSimple(mintData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #Ensuring that the Email identified a Party from The Mint if results: resultMetadata = JsonObject(results.get(0)) allData = resultMetadata.get("result-metadata") creator = allData.get("all") whoType = party.get("who").get("type") if ((creator is not None) and (whoType == 'people')): self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".dc_identifier", creator.get("dc_identifier")[0]) self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_name", creator.get("dc_title")) self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_title", creator.get("Honorific")[0]) self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".redbox_isCoPrimaryInvestigator", "off") self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".redbox_isPrimaryInvestigator", "on") self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_givenName", creator.get("Given_Name")[0]) self.utils.add(self.index, "dc_creator.foaf_Person." + str(i) + ".foaf_familyName", creator.get("Family_Name")[0]) ###Processing 'contactInfo.email' metadata contactInfoEmail = data.get("contactInfo").get("email") #Using the email address to obtain details from The Mint #For testing, hard coded email address #contactInfoEmail = "*****@*****.**" sock = urllib.urlopen(theMintHost + "/mint/default/opensearch/lookup?count=999&searchTerms=Email:" + contactInfoEmail) mintData = sock.read() sock.close() jsonSimple = JsonSimple(mintData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #Ensuring that the Email identified a Party from The Mint if results: resultMetadata = JsonObject(results.get(0)) allData = resultMetadata.get("result-metadata") creator = allData.get("all") if (creator is not None): self.utils.add(self.index, "locrel_prc.foaf_Person.dc_identifier", creator.get("dc_identifier").toString()) self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_name", creator.get("dc_title")) self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_title", creator.get("Honorific").toString()) self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_givenName", creator.get("Given_Name").toString()) self.utils.add(self.index, "locrel_prc.foaf_Person.foaf_familyName", creator.get("Family_Name").toString()) ###Processing 'coinvestigators' metadata coinvestigators = data.get("coinvestigators") for i in range(len(coinvestigators)): self.utils.add(self.index, "dc_contributor.loclrel_clb." + str(i) + ".foaf_Agent" , coinvestigators[i]) ###Processing 'anzsrcFOR' metadata anzsrcFOR = data.get("anzsrcFOR") for i in range(len(anzsrcFOR)): anzsrc = anzsrcFOR[i] #Querying against The Mint, but only using the first 4 numbers from anzsrc, this ensure a result sock = urllib.urlopen(theMintHost + "/mint/ANZSRC_FOR/opensearch/lookup?count=999&level=http://purl.org/asc/1297.0/2008/for/" + anzsrc[:4]) mintData = sock.read() sock.close() jsonSimple = JsonSimple(mintData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring that anzsrc identified a record in The Mint if results: for j in range(len(results)): result = JsonObject(results.get(j)) rdfAbout = result.get("rdf:about") target = "http://purl.org/asc/1297.0/2008/for/" + anzsrc if (rdfAbout == target): self.utils.add(self.index, "dc_subject.anzsrc_for." + str(i) + ".skos_prefLabel" , result.get("skos:prefLabel")) self.utils.add(self.index, "dc_subject.anzsrc_for." + str(i) + ".rdf:resource" , rdfAbout) ###Processing 'anzsrcSEO' metadata anzsrcSEO = data.get("anzsrcSEO") for i in range(len(anzsrcSEO)): anzsrc = anzsrcSEO[i] #Querying against The Mint, but only using the first 4 numbers from anzsrc, this ensure a result sock = urllib.urlopen(theMintHost + "/mint/ANZSRC_SEO/opensearch/lookup?count=999&level=http://purl.org/asc/1297.0/2008/seo/" + anzsrc[:4]) mintData = sock.read() sock.close() jsonSimple = JsonSimple(mintData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring that anzsrc identified a record in The Mint if results: for j in range(len(results)): result = JsonObject(results.get(j)) rdfAbout = result.get("rdf:about") target = "http://purl.org/asc/1297.0/2008/seo/" + anzsrc if (rdfAbout == target): self.utils.add(self.index, "dc_subject.anzsrc_seo." + str(i) + ".skos_prefLabel" , result.get("skos:prefLabel")) self.utils.add(self.index, "dc_subject.anzsrc_seo." + str(i) + ".rdf:resource" , rdfAbout) ###Processing 'keyword' metadata keyword = data.get("keyword") for i in range(len(keyword)): self.utils.add(self.index, "dc_subject.vivo_keyword." + str(i) + ".rdf_PlainLiteral", keyword[i]) self.utils.add(self.index, "dc_accessRights.skos_prefLabel", data.get("accessRights")) self.utils.add(self.index, "dc_license.dc_identifier", data.get("license").get("url")) self.utils.add(self.index, "dc_license.skos_prefLabel", data.get("license").get("label")) self.utils.add(self.index, "dc_identifier.redbox_origin", "internal") dataLocation = data.get("dataLocation") dataLocation = dataLocation.replace("%NAME_OF_FOLDER%", species) self.utils.add(self.index, "bibo_Website.1.dc_identifier", dataLocation) #The following have been intentionally set to blank. No mapping is required for these fields. self.utils.add(self.index, "vivo_Location", "") self.utils.add(self.index, "redbox_retentionPeriod", data.get("retentionPeriod")) self.utils.add(self.index, "dc_extent", "unknown") self.utils.add(self.index, "redbox_disposalDate", "") self.utils.add(self.index, "locrel_own.foaf_Agent.1_foaf_name", "") self.utils.add(self.index, "locrel_dtm.foaf_Agent.foaf_name", "") ###Processing 'organizationalGroup' metadata organisationalGroup = data.get("organizationalGroup") for i in range(len(organisationalGroup)): organisation = organisationalGroup[i] #Querying against The Mint sock = urllib.urlopen(theMintHost + "/mint/Parties_Groups/opensearch/lookup?count=9999&searchTerms=ID:" + organisation) mintData = sock.read() sock.close() jsonSimple = JsonSimple(mintData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring that anzsrc identified a record in The Mint if results: resultMetadata = JsonObject(results.get(0)) allData = resultMetadata.get("result-metadata") orgGroup = allData.get("all") self.utils.add(self.index, "foaf_Organization.dc_identifier", orgGroup.get("dc_identifier")[0]) self.utils.add(self.index, "foaf_Organization.skos_prefLabel", orgGroup.get("Name")[0]) self.utils.add(self.index, "foaf_fundedBy.foaf_Agent", "") self.utils.add(self.index, "foaf_fundedBy.vivo_Grant", "") self.utils.add(self.index, "swrc_ResearchProject.dc_title", "") self.utils.add(self.index, "locrel_dpt.foaf_Person.foaf_name", "") self.utils.add(self.index, "dc_SizeOrDuration", "") self.utils.add(self.index, "dc_Policy", "") self.utils.add(self.index, "redbox_ManagementPlan", "")
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) if self.owner == "guest": pageTitle = "Submission Request" displayType = "submission-request" initialStep = 0 else: pageTitle = "Metadata Record" displayType = "package-dataset" initialStep = 1 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = [ "title", "description", "manifest", "metaList", "relationships", "responses" ] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] #Used to make sure we have a created date createdDateFlag = False formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) self.log.debug("Set created date to :%s" % solrTime) createdDateFlag = True elif field == "redbox:embargo.dc:date": self.embargoedDate = value elif field == "create_timestamp": self.createTimeStamp = value # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) # check if this is an array field fnameparts = field.split(":") if fnameparts is not None and len(fnameparts) >= 3: if field.startswith("bibo") or field.startswith( "skos"): arrParts = fnameparts[1].split(".") else: arrParts = fnameparts[2].split(".") # we're not interested in: Relationship, Type and some redbox:origin if arrParts is not None and len( arrParts) >= 2 and field.find( ":Relationship.") == -1 and field.find( "dc:type") == -1 and field.find( "redbox:origin" ) == -1 and arrParts[1].isdigit(): # we've got an array field fldPart = ":%s" % arrParts[0] prefixEndIdx = field.find(fldPart) + len(fldPart) suffixStartIdx = prefixEndIdx + len( arrParts[1]) + 1 arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[ suffixStartIdx:] if field.endswith("Name"): arrFldName = self.reportingFieldPrefix + field[: prefixEndIdx] self.log.debug( "Array Field name is:%s from: %s, with value:%s" % (arrFldName, field, value)) if field.endswith("Name"): fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put( arrFldName, fullFieldMap) idx = arrParts[1] fullField = fullFieldMap.get(idx) if (fullField is None): fullField = "" if (field.endswith("givenName")): fullField = "%s, %s" % (fullField, value) if (field.endswith("familyName")): fullField = "%s%s" % (value, fullField) self.log.debug("fullname now is :%s" % fullField) fullFieldMap.put(idx, fullField) else: fieldlist = self.arrayBucket.get(arrFldName) if fieldlist is None: fieldlist = [] self.arrayBucket.put(arrFldName, fieldlist) fieldlist.append(value) for compfield in self.compFields: if field.startswith(compfield): arrFldName = self.reportingFieldPrefix + compfield fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) fullField = fullFieldMap.get("1") if fullField is None: fullField = "" if field.endswith( self.compFieldsConfig[compfield]["end"]): fullField = "%s%s%s" % ( fullField, self.compFieldsConfig[compfield]["delim"], value) if field.endswith( self.compFieldsConfig[compfield]["start"]): fullField = "%s%s" % (value, fullField) self.log.debug("full field now is :%s" % fullField) fullFieldMap.put("1", fullField) self.utils.add(self.index, "display_type", displayType) # Make sure we have a creation date if not createdDateFlag: self.utils.add(self.index, "date_created", self.last_modified) self.log.debug( "Forced creation date to %s because it was not explicitly set." % self.last_modified) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "workflow1" wfChanged = False workflow_security = [] self.message_list = None try: wfPayload = self.object.getPayload("workflow.metadata") wfMeta = self.utils.getJsonObject(wfPayload.open()) wfPayload.close() # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) # Form processing formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) else: formData = None coreFields = [ "title", "creator", "contributor", "description", "format", "creationDate" ] if formData is not None: # Core fields formTitles = formData.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle creator = formData.getStringList(["creator"]) if creator: self.creatorList = creator contributor = formData.getStringList(["contributor"]) if contributor: self.contributorList = contributor description = formData.getStringList(["description"]) if description: self.descriptionList = description format = formData.getStringList(["format"]) if format: self.formatList = format creation = formData.getStringList(["creationDate"]) if creation: self.creationDate = creation # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList( [field]) except StorageException, e: # No workflow payload, time to create wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", "pending") wfMetaObj.put("pageTitle", "Uploaded Files - Management") stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == "pending": wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"])
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) #if self.owner == "guest": # pageTitle = "Submission Request" # displayType = "submission-request" # initialStep = 0 #else: # pageTitle = "Metadata Record" # displayType = "package-dataset" # initialStep = 1 ## Harvesting straight into the 'Published' stage pageTitle = "Metadata Record" displayType = "package-dataset" #initialStep = 4 initialStep = 3 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print(" ERROR updating dataset payload") # Form processing coreFields = [ "title", "description", "manifest", "metaList", "relationships", "responses" ] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) self.utils.add(self.index, "display_type", displayType) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __metadata(self): self.title = None self.dcType = None self.__checkMetadataPayload() jsonPayload = self.object.getPayload("metadata.json") json = self.utils.getJsonObject(jsonPayload.open()) jsonPayload.close() metadata = json.getObject("metadata") identifier = metadata.get("dc.identifier") self.utils.add(self.index, "dc:identifier", identifier) self.__storeIdentifier(identifier) self.utils.add(self.index, "institution", "James Cook University") self.utils.add(self.index, "source", "http://spatialecology.jcu.edu.au/Edgar/") data = json.getObject("data") ####Global setting for processing data ####These will need to be changed based on you system installation. theMintHost = java.lang.System.getProperty("mint.proxy.url") collectionRelationTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/redbox/workflows/forms/data/" servicesRelationTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/redbox/workflows/forms/data/" descriptionTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/local/workflows/forms/data/" relationshipTypesFilePath = FascinatorHome.getPath( ) + "/../portal/default/local/workflows/forms/data/" ###Allocating space to create the formData.tfpackage tfpackageData = {} # We will do string substitutions on data that we get from the default json. # We always replace ${NAME_OF_FOLDER} with the name of the folder; if the # override json contains a key "DATA_SUBSTITUTIONS", then we also substitute # stuff we find there. # so: start with just wanting ${NAME_OF_FOLDER} replaced with the actual directory name dirName = data.get("harvest_dir_name") replacements = {'NAME_OF_FOLDER': dirName} # is there a DATA_SUBSTITUTIONS key? If so, add those in. additionalReplacements = data.get("DATA_SUBSTITUTIONS") if additionalReplacements: replacements.update(additionalReplacements) # now there's a replacements dictionary with the replacements we want # to do on our incoming JSON strings. # FANCY PART--------------------------------------------- # Now it gets a bit fancy: Here's a method that does a # get-and-replace all in one go. That makes the rest of # this __metdata() method much simpler and more readable. # # Because this method is defined inside this __metadata() # method, it already knows about the replacements var we # just made. # dataBucket is the thing that has the data. key is the # name of the field you want to get. def getAndReplace(dataBucket, key): temp = dataBucket.get(key) # fetch the value if isinstance(key, str): # if it's a string, do our replacements return Template(temp).safe_substitute(replacements) else: # not a string, then just hand it back return temp # END OF FANCY PART ------------------------------------- title = getAndReplace(data, "title") self.utils.add(self.index, "dc_title", title) tfpackageData["dc:title"] = title tfpackageData["title"] = title self.utils.add(self.index, "dc_type", data.get("type")) tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type") tfpackageData["dc:type.skos:prefLabel"] = data.get("type") tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime()) tfpackageData["dc:modified"] = "" tfpackageData["dc:language.skos:prefLabel"] = "English" tfpackageData[ "dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get( "temporalCoverage").get("dateFrom") dateTo = data.get("temporalCoverage").get("dateTo") if dateTo is not None: tfpackageData[ "dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo tfpackageData["dc:coverage.redbox:timePeriod"] = "" ###Processing the 'spatialCoverage' metadata. spatialCoverage = data.get("spatialCoverage") for i in range(len(spatialCoverage)): location = spatialCoverage[i] if location["type"] == "text": tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:type"] = location["type"] location_value = getAndReplace(location, "value") if location_value.startswith("POLYGON"): tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".redbox:wktRaw"] = location_value tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".rdf:PlainLiteral"] = location_value ###Processing the 'description' metadata. #Reading the file here, so we only do it once. file = open(descriptionTypesFilePath + "descriptionTypes.json") descriptionData = file.read() file.close() description = data.get("description") for i in range(len(description)): desc = description[i] tempDesc = getAndReplace(desc, "value") if (desc["type"] == "brief"): tfpackageData["dc:description"] = tempDesc tfpackageData["rif:description." + str(i + 1) + ".type"] = desc["type"] tfpackageData["rif:description." + str(i + 1) + ".value"] = tempDesc jsonSimple = JsonSimple(descriptionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Description Type exist if results: for j in range(len(results)): descriptionType = results[j] if (desc["type"] == descriptionType.get("id")): tfpackageData["rif:description." + str(i + 1) + ".label"] = descriptionType.get("label") ###Processing the 'relatedPublication' metadata relatedPublication = data.get("relatedPublication") if relatedPublication is not None: for i in range(len(relatedPublication)): publication = relatedPublication[i] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:identifier"] = publication["doi"] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:title"] = publication["title"] ###Processing the 'relatedWebsite' metadata relatedWebsite = data.get("relatedWebsite") count = 0 for i in range(len(relatedWebsite)): website = relatedWebsite[i] tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:identifier"] = getAndReplace(website, "url") tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:title"] = getAndReplace(website, "notes") count = i + 1 ###Processing the 'data_source_website' metadata (override metadata) dataSourceWebsites = data.get("data_source_website") if dataSourceWebsites is not None: for i in range(len(dataSourceWebsites)): website = dataSourceWebsites[i] type = website.get("identifier").get("type") if type == "uri": count += 1 tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:identifier"] = getAndReplace( website.get("identifier"), "value") tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:title"] = getAndReplace( website, "notes") ###Processing the 'relatedCollection' metadata #Reading the file here, so we only do it once. file = open(collectionRelationTypesFilePath + "collectionRelationTypes.json") collectionData = file.read() file.close() relatedCollection = data.get("relatedCollection") recordIdentifier = "" if relatedCollection is not None: for i in range(len(relatedCollection)): collection = relatedCollection[i] tempIdentifier = collection["identifier"] if tempIdentifier is not None: tempIdentifier = Template(tempIdentifier).safe_substitute( replacements) recordIdentifier = tempIdentifier else: tempIdentifier = "" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:identifier"] = tempIdentifier tempTitle = collection.get("title") tempTitle = Template(tempTitle).safe_substitute(replacements) tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:title"] = tempTitle tfpackageData[ "dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = collection[ "relationship"] if tempIdentifier == "": tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:origin"] = "on" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:publish"] = "on" #Using the collection data as a lookup to obtain the 'label' relationShip = collection.get("relationship") jsonSimple = JsonSimple(collectionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Collection Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData[ "dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get( "label") ###Processing the 'relatedService' metadata #Reading the file here, so we only do it once. file = open(servicesRelationTypesFilePath + "serviceRelationTypes.json") servicesData = file.read() file.close() relatedServices = data.get("relatedService") recordIdentifier = "" if relatedServices is not None: for i in range(len(relatedServices)): service = relatedServices[i] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:identifier"] = service["identifier"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:title"] = service["title"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = service[ "relationship"] #Using the services data as a lookup to obtain the 'label' relationShip = service.get("relationship") jsonSimple = JsonSimple(servicesData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Service Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData[ "dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get( "label") ###Processing the 'associatedParty' metadata #Reading the file here so we only read it once. file = open(relationshipTypesFilePath + "relationshipTypes.json") relationshipData = file.read() file.close() associatedParty = data.get("associatedParty") for i in range(len(associatedParty)): party = associatedParty[i] email = party.get("who").get("value") if email is not None: whoType = party.get("who").get("type") if (whoType == 'people'): tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".dc:identifier"] = party.get("who").get( "identifier") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:name"] = party.get("who").get("name") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:title"] = party.get("who").get( "title") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isCoPrimaryInvestigator"] = "off" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isPrimaryInvestigator"] = "on" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:givenName"] = party.get("who").get( "givenName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:familyName"] = party.get("who").get( "familyName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipType"] = party.get( "relationship") tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.dc:identifier"] = party.get( "affiliation").get("id") tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.skos:prefLabel"] = party.get( "affiliation").get("label") jsonSimple = JsonSimple(relationshipData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Relationship Type exists if results: for j in range(len(results)): relationshipType = results[j] if (party.get("relationship") == relationshipType.get("id")): tfpackageData[ "dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipLabel"] = relationshipType.get( "label") ###Processing 'contactInfo.email' metadata tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get( "contactInfo").get("identifier") tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get( "contactInfo").get("name") tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get( "contactInfo").get("title") tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get( "contactInfo").get("givenName") tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get( "contactInfo").get("familyName") tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get( "contactInfo").get("email") ##Stored At (on the Data Management page) tfpackageData[ "vivo:Location.vivo:GeographicLocation.gn:name"] = data.get( "contactInfo").get("streetAddress") ###Processing 'coinvestigators' metadata coinvestigators = data.get("coinvestigators") for i in range(len(coinvestigators)): tfpackageData["dc:contributor.locrel:clb." + str(i + 1) + ".foaf:Agent"] = coinvestigators[i] ###Processing 'anzsrcFOR' metadata anzsrcFOR = data.get("anzsrcFOR") for i in range(len(anzsrcFOR)): anzsrc = anzsrcFOR[i] tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'anzsrcSEO' metadata anzsrcSEO = data.get("anzsrcSEO") for i in range(len(anzsrcSEO)): anzsrc = anzsrcSEO[i] tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'keyword' metadata keyword = data.get("keyword") for i in range(len(keyword)): tfpackageData["dc:subject.vivo:keyword." + str(i + 1) + ".rdf:PlainLiteral"] = keyword[i] ###Research Themes theme = data.get("researchTheme") if (theme == "Tropical Ecosystems, Conservation and Climate Change"): tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true" elif (theme == "Industries and Economies in the Tropics"): tfpackageData["jcu:research.themes.industriesEconomies"] = "true" elif (theme == "People and Societies in the Tropics"): tfpackageData["jcu:research.themes.peopleSocieties"] = "true" elif (theme == "Tropical Health, Medicine and Biosecurity"): tfpackageData["jcu:research.themes.tropicalHealth"] = "true" elif (theme == "Not aligned to a University theme"): tfpackageData["jcu:research.themes.notAligned"] = "true" tfpackageData["dc:accessRights.skos:prefLabel"] = data.get( "accessRights") tfpackageData["dc:license.dc:identifier"] = data.get("license").get( "url") tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get( "label") #identifier additionalId = data.get("additionalIdentifier") if additionalId is not None: additionalId = Template(additionalId).safe_substitute(replacements) tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId tfpackageData["dc:identifier.redbox:origin"] = "external" tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local" tfpackageData[ "dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier" else: tfpackageData["dc:identifier.redbox:origin"] = "internal" dataLocation = getAndReplace(data, "dataLocation") tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation #The following have been intentionally set to blank. No mapping is required for these fields. tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod") tfpackageData["dc:extent"] = "unknown" tfpackageData["redbox:disposalDate"] = "" tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = "" tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = "" ###Processing 'organizationalGroup' metadata organisationalGroup = data.get("organizationalGroup") for i in range(len(organisationalGroup)): organisation = organisationalGroup[i] tfpackageData[ "foaf:Organization.dc:identifier"] = organisation.get( "identifier") tfpackageData[ "foaf:Organization.skos:prefLabel"] = organisation.get( "prefLabel") tfpackageData["swrc:ResearchProject.dc:title"] = "" tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = "" tfpackageData["dc:SizeOrDuration"] = "" tfpackageData["dc:Policy"] = "" #Citations citations = data.get("citations") for i in range(len(citations)): citation = citations[i] tfpackageData[ "dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get( "sendCitation") tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get( "curationIdentifier") paperTitle = getAndReplace(citation, "paperTitle") tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:familyName"] = citation.get("familyName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:givenName"] = citation.get("givenName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:title"] = title = citation.get("title") tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = getAndReplace( citation, "publisher") url = getAndReplace(citation, "url") tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData[ "dc:created"] tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created" tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData[ "dc:created"] tfpackageData[ "dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get( "dataType") tfpackageData[ "dc:biblioGraphicCitation.skos:prefLabel"] = citation.get( "familyName" ) + ", " + citation.get("givenName") + ". (" + time.strftime( "%Y", time.gmtime()) + "). " + paperTitle + ". " + citation.get( "publisher") + ". [" + citation.get( "dataType") + "] {ID_WILL_BE_HERE}" self.__updateMetadataPayload(tfpackageData) self.__workflow()
def __metadata(self): self.title = None self.dcType = None self.__checkMetadataPayload() jsonPayload = self.object.getPayload("metadata.json") json = self.utils.getJsonObject(jsonPayload.open()) jsonPayload.close() metadata = json.getObject("metadata") identifier = metadata.get("dc.identifier") self.utils.add(self.index, "dc:identifier", identifier) self.__storeIdentifier(identifier) self.utils.add(self.index, "institution", "James Cook University") self.utils.add(self.index, "source", "http://spatialecology.jcu.edu.au/Edgar/") data = json.getObject("data") ####Global setting for processing data ####These will need to be changed based on you system installation. theMintHost = java.lang.System.getProperty("mint.proxy.url") collectionRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/" servicesRelationTypesFilePath = FascinatorHome.getPath() + "/../portal/default/redbox/workflows/forms/data/" descriptionTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/" relationshipTypesFilePath = FascinatorHome.getPath() + "/../portal/default/local/workflows/forms/data/" ###Allocating space to create the formData.tfpackage tfpackageData = {} # We will do string substitutions on data that we get from the default json. # We always replace ${NAME_OF_FOLDER} with the name of the folder; if the # override json contains a key "DATA_SUBSTITUTIONS", then we also substitute # stuff we find there. # so: start with just wanting ${NAME_OF_FOLDER} replaced with the actual directory name dirName = data.get("harvest_dir_name") replacements = { 'NAME_OF_FOLDER': dirName } # is there a DATA_SUBSTITUTIONS key? If so, add those in. additionalReplacements = data.get("DATA_SUBSTITUTIONS") if additionalReplacements: replacements.update(additionalReplacements) # now there's a replacements dictionary with the replacements we want # to do on our incoming JSON strings. # FANCY PART--------------------------------------------- # Now it gets a bit fancy: Here's a method that does a # get-and-replace all in one go. That makes the rest of # this __metdata() method much simpler and more readable. # # Because this method is defined inside this __metadata() # method, it already knows about the replacements var we # just made. # dataBucket is the thing that has the data. key is the # name of the field you want to get. def getAndReplace(dataBucket, key): temp = dataBucket.get(key) # fetch the value if isinstance(key, str): # if it's a string, do our replacements return Template(temp).safe_substitute(replacements) else: # not a string, then just hand it back return temp # END OF FANCY PART ------------------------------------- title = getAndReplace(data, "title") self.utils.add(self.index, "dc_title", title) tfpackageData["dc:title"] = title tfpackageData["title"] = title self.utils.add(self.index, "dc_type", data.get("type")) tfpackageData["dc:type.rdf:PlainLiteral"] = data.get("type") tfpackageData["dc:type.skos:prefLabel"] = data.get("type") tfpackageData["dc:created"] = time.strftime("%Y-%m-%d", time.gmtime()) tfpackageData["dc:modified"] = "" tfpackageData["dc:language.skos:prefLabel"] = "English" tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:start"] = data.get("temporalCoverage").get("dateFrom") dateTo = data.get("temporalCoverage").get("dateTo") if dateTo is not None: tfpackageData["dc:coverage.vivo:DateTimeInterval.vivo:end"] = dateTo tfpackageData["dc:coverage.redbox:timePeriod"] = "" ###Processing the 'spatialCoverage' metadata. spatialCoverage = data.get("spatialCoverage") for i in range(len(spatialCoverage)): location = spatialCoverage[i] if location["type"] == "text": tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".dc:type"] = location["type"] location_value = getAndReplace(location, "value") if location_value.startswith("POLYGON"): tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".redbox:wktRaw"] = location_value tfpackageData["dc:coverage.vivo:GeographicLocation." + str(i + 1) + ".rdf:PlainLiteral"] = location_value ###Processing the 'description' metadata. #Reading the file here, so we only do it once. file = open(descriptionTypesFilePath + "descriptionTypes.json") descriptionData = file.read() file.close() description = data.get("description") for i in range(len(description)): desc = description[i] tempDesc = getAndReplace(desc, "value") if (desc["type"] == "brief"): tfpackageData["dc:description"] = tempDesc tfpackageData["rif:description." + str(i + 1) + ".type"] = desc["type"] tfpackageData["rif:description." + str(i + 1) + ".value"] = tempDesc jsonSimple = JsonSimple(descriptionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Description Type exist if results: for j in range(len(results)): descriptionType = results[j] if (desc["type"] == descriptionType.get("id")): tfpackageData["rif:description." + str(i + 1) + ".label"] = descriptionType.get("label") ###Processing the 'relatedPublication' metadata relatedPublication = data.get("relatedPublication") if relatedPublication is not None: for i in range(len(relatedPublication)): publication = relatedPublication[i] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:identifier"] = publication["doi"] tfpackageData["dc:relation.swrc:Publication." + str(i + 1) + ".dc:title"] = publication["title"] ###Processing the 'relatedWebsite' metadata relatedWebsite = data.get("relatedWebsite") count = 0 for i in range(len(relatedWebsite)): website = relatedWebsite[i] tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:identifier"] = getAndReplace(website, "url") tfpackageData["dc:relation.bibo:Website." + str(i + 1) + ".dc:title"] = getAndReplace(website, "notes") count = i + 1 ###Processing the 'data_source_website' metadata (override metadata) dataSourceWebsites = data.get("data_source_website") if dataSourceWebsites is not None: for i in range(len(dataSourceWebsites)): website = dataSourceWebsites[i] type = website.get("identifier").get("type") if type == "uri": count += 1 tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:identifier"] = getAndReplace(website.get("identifier"), "value") tfpackageData["dc:relation.bibo:Website." + str(count) + ".dc:title"] = getAndReplace(website, "notes") ###Processing the 'relatedCollection' metadata #Reading the file here, so we only do it once. file = open(collectionRelationTypesFilePath + "collectionRelationTypes.json") collectionData = file.read() file.close() relatedCollection = data.get("relatedCollection") recordIdentifier = "" if relatedCollection is not None: for i in range(len(relatedCollection)): collection = relatedCollection[i] tempIdentifier = collection["identifier"] if tempIdentifier is not None: tempIdentifier = Template( tempIdentifier ).safe_substitute(replacements) recordIdentifier = tempIdentifier else: tempIdentifier = "" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:identifier"] = tempIdentifier tempTitle = collection.get("title") tempTitle = Template( tempTitle ).safe_substitute(replacements) tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".dc:title"] = tempTitle tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = collection["relationship"] if tempIdentifier == "": tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:origin"] = "on" tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".redbox:publish"] = "on" #Using the collection data as a lookup to obtain the 'label' relationShip = collection.get("relationship") jsonSimple = JsonSimple(collectionData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Collection Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData["dc:relation.vivo:Dataset." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label") ###Processing the 'relatedService' metadata #Reading the file here, so we only do it once. file = open(servicesRelationTypesFilePath + "serviceRelationTypes.json") servicesData = file.read() file.close() relatedServices = data.get("relatedService") recordIdentifier = "" if relatedServices is not None: for i in range(len(relatedServices)): service = relatedServices[i] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:identifier"] = service["identifier"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".dc:title"] = service["title"] tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.rdf:PlainLiteral"] = service["relationship"] #Using the services data as a lookup to obtain the 'label' relationShip = service.get("relationship") jsonSimple = JsonSimple(servicesData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Service Relation Types exist if results: for j in range(len(results)): relation = results[j] if (relationShip == relation.get("id")): tfpackageData["dc:relation.vivo:Service." + str(i + 1) + ".vivo:Relationship.skos:prefLabel"] = relation.get("label") ###Processing the 'associatedParty' metadata #Reading the file here so we only read it once. file = open(relationshipTypesFilePath + "relationshipTypes.json") relationshipData = file.read() file.close() associatedParty = data.get("associatedParty") for i in range(len(associatedParty)): party = associatedParty[i] email = party.get("who").get("value") if email is not None: whoType = party.get("who").get("type") if (whoType == 'people'): tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".dc:identifier"] = party.get("who").get("identifier") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:name"] = party.get("who").get("name") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:title"] = party.get("who").get("title") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isCoPrimaryInvestigator"] = "off" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".redbox:isPrimaryInvestigator"] = "on" tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:givenName"] = party.get("who").get("givenName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:familyName"] = party.get("who").get("familyName") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipType"] = party.get("relationship") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.dc:identifier"] = party.get("affiliation").get("id") tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".foaf:Organization.skos:prefLabel"] = party.get("affiliation").get("label") jsonSimple = JsonSimple(relationshipData) jsonObj = jsonSimple.getJsonObject() results = jsonObj.get("results") #ensuring the Relationship Type exists if results: for j in range(len(results)): relationshipType = results[j] if (party.get("relationship") == relationshipType.get("id")): tfpackageData["dc:creator.foaf:Person." + str(i + 1) + ".jcu:relationshipLabel"] = relationshipType.get("label") ###Processing 'contactInfo.email' metadata tfpackageData["locrel:prc.foaf:Person.dc:identifier"] = data.get("contactInfo").get("identifier") tfpackageData["locrel:prc.foaf:Person.foaf:name"] = data.get("contactInfo").get("name") tfpackageData["locrel:prc.foaf:Person.foaf:title"] = data.get("contactInfo").get("title") tfpackageData["locrel:prc.foaf:Person.foaf:givenName"] = data.get("contactInfo").get("givenName") tfpackageData["locrel:prc.foaf:Person.foaf:familyName"] = data.get("contactInfo").get("familyName") tfpackageData["locrel:prc.foaf:Person.foaf:email"] = data.get("contactInfo").get("email") ##Stored At (on the Data Management page) tfpackageData["vivo:Location.vivo:GeographicLocation.gn:name"] = data.get("contactInfo").get("streetAddress") ###Processing 'coinvestigators' metadata coinvestigators = data.get("coinvestigators") for i in range(len(coinvestigators)): tfpackageData["dc:contributor.locrel:clb." + str(i + 1) + ".foaf:Agent"] = coinvestigators[i] ###Processing 'anzsrcFOR' metadata anzsrcFOR = data.get("anzsrcFOR") for i in range(len(anzsrcFOR)): anzsrc = anzsrcFOR[i] tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:for." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'anzsrcSEO' metadata anzsrcSEO = data.get("anzsrcSEO") for i in range(len(anzsrcSEO)): anzsrc = anzsrcSEO[i] tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".skos:prefLabel"] = anzsrc.get("prefLabel") tfpackageData["dc:subject.anzsrc:seo." + str(i + 1) + ".rdf:resource"] = anzsrc.get("resource") ###Processing 'keyword' metadata keyword = data.get("keyword") for i in range(len(keyword)): tfpackageData["dc:subject.vivo:keyword." + str(i + 1) + ".rdf:PlainLiteral"] = keyword[i] ###Research Themes theme = data.get("researchTheme") if (theme == "Tropical Ecosystems, Conservation and Climate Change"): tfpackageData["jcu:research.themes.tropicalEcoSystems"] = "true" elif (theme == "Industries and Economies in the Tropics"): tfpackageData["jcu:research.themes.industriesEconomies"] = "true" elif (theme == "People and Societies in the Tropics"): tfpackageData["jcu:research.themes.peopleSocieties"] = "true" elif (theme == "Tropical Health, Medicine and Biosecurity"): tfpackageData["jcu:research.themes.tropicalHealth"] = "true" elif (theme == "Not aligned to a University theme"): tfpackageData["jcu:research.themes.notAligned"] = "true" tfpackageData["dc:accessRights.skos:prefLabel"] = data.get("accessRights") tfpackageData["dc:license.dc:identifier"] = data.get("license").get("url") tfpackageData["dc:license.skos:prefLabel"] = data.get("license").get("label") #identifier additionalId = data.get("additionalIdentifier") if additionalId is not None: additionalId = Template( additionalId ).safe_substitute(replacements) tfpackageData["dc:identifier.rdf:PlainLiteral"] = additionalId tfpackageData["dc:identifier.redbox:origin"] = "external" tfpackageData["dc:identifier.dc:type.rdf:PlainLiteral"] = "local" tfpackageData["dc:identifier.dc:type.skos:prefLabel"] = "Local Identifier" else: tfpackageData["dc:identifier.redbox:origin"] = "internal" dataLocation = getAndReplace(data, "dataLocation") tfpackageData["bibo:Website.1.dc:identifier"] = dataLocation #The following have been intentionally set to blank. No mapping is required for these fields. tfpackageData["redbox:retentionPeriod"] = data.get("retentionPeriod") tfpackageData["dc:extent"] = "unknown" tfpackageData["redbox:disposalDate"] = "" tfpackageData["locrel:own.foaf:Agent.1.foaf:name"] = "" tfpackageData["locrel:dtm.foaf:Agent.foaf:name"] = "" ###Processing 'organizationalGroup' metadata organisationalGroup = data.get("organizationalGroup") for i in range(len(organisationalGroup)): organisation = organisationalGroup[i] tfpackageData["foaf:Organization.dc:identifier"] = organisation.get("identifier") tfpackageData["foaf:Organization.skos:prefLabel"] = organisation.get("prefLabel") tfpackageData["swrc:ResearchProject.dc:title"] = "" tfpackageData["locrel:dpt.foaf:Person.foaf:name"] = "" tfpackageData["dc:SizeOrDuration"] = "" tfpackageData["dc:Policy"] = "" #Citations citations = data.get("citations") for i in range(len(citations)): citation = citations[i] tfpackageData["dc:biblioGraphicCitation.redbox:sendCitation"] = citation.get("sendCitation") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:identifier.skos:note"] = citation.get("curationIdentifier") paperTitle = getAndReplace(citation, "paperTitle") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:title"] = paperTitle tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:familyName"] = citation.get("familyName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:givenName"] = citation.get("givenName") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.locrel:ctb." + str(i + 1) + ".foaf:title"] = title = citation.get("title") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:publisher.rdf:PlainLiteral"] = getAndReplace(citation, "publisher") url = getAndReplace(citation, "url") tfpackageData["dc:biblioGraphicCitation.dc:hasPart.bibo:Website.dc:identifier"] = url tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.rdf:PlainLiteral"] = tfpackageData["dc:created"] tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.rdf:PlainLiteral"] = "publicationDate" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.1.dc:type.skos:prefLabel"] = "Publication Date" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.rdf:PlainLiteral"] = "created" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.dc:type.skos:prefLabel"] = "Date Created" tfpackageData["dc:biblioGraphicCitation.dc:hasPart.dc:date.2.rdf:PlainLiteral"] = tfpackageData["dc:created"] tfpackageData["dc:biblioGraphicCitation.dc:hasPart.jcu:dataType"] = citation.get("dataType") tfpackageData["dc:biblioGraphicCitation.skos:prefLabel"] = citation.get("familyName") + ", " + citation.get("givenName") + ". (" + time.strftime("%Y", time.gmtime()) + "). " + paperTitle + ". " + citation.get("publisher") + ". [" + citation.get("dataType") + "] {ID_WILL_BE_HERE}" self.__updateMetadataPayload(tfpackageData) self.__workflow()