def __activate__(self, context): self.request = context["request"] self.response = context["response"] self.formData = context["formData"] self.log = context["log"] # Basic response text message = JsonSimple() self.metadata = message.writeObject(["metadata"]) self.results = message.writeArray(["results"]) # Prepare response Object format = self.formData.get("format") if format == "json": out = self.response.getPrintWriter("application/json; charset=UTF-8") else: out = self.response.getPrintWriter("text/plain; charset=UTF-8") # Success Response try: self.searchNla() out.println(message.toString(True)) out.close() except Exception, ex: self.log.error("Error during search: ", ex) self.response.setStatus(500) message = JsonSimple() message.getJsonObject().put("error", ex.getMessage()) out.println(message.toString(True)) out.close()
def __activate__(self, context): try: self.log = context["log"] self.response = context["response"] self.request = context["request"] self.systemConfig = context["systemConfig"] self.storage = context["Services"].getStorage() self.indexer = context["Services"].getIndexer() self.sessionState = context["sessionState"] self.sessionState.set("username", "admin") out = self.response.getPrintWriter("text/plain; charset=UTF-8") relationshipMapper = ApplicationContextProvider.getApplicationContext().getBean("relationshipMapper") externalCurationMessageBuilder = ApplicationContextProvider.getApplicationContext().getBean("externalCurationMessageBuilder") oid = self.request.getParameter("oid") if oid is None : identifier = self.request.getParameter("identifier") oid = self.findOidByIdentifier(identifier) relationshipType = self.request.getParameter("relationship") curatedPid = self.request.getParameter("curatedPid") sourceId = self.request.getParameter("sourceIdentifier") system = self.request.getParameter("system") digitalObject = StorageUtils.getDigitalObject(self.storage, oid) metadataJsonPayload = digitalObject.getPayload("metadata.json") metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") found = False for relationship in relationships: if relationship.get("identifier") == sourceId: relationship.put("isCurated",True) relationship.put("curatedPid",curatedPid) found = True if not found: relationship = JsonObject() relationship.put("isCurated",True) relationship.put("curatedPid",curatedPid) relationship.put("relationship",relationshipType) relationship.put("identifier",sourceId) relationship.put("system",system) relationships.add(relationship) out.println(metadataJson.toString(True)) istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream) out.close() finally: self.sessionState.remove("username")
def process_tags(self, result): tags = [] tagsDict = {} # Build a dictionary of the tags for doc in result: # Get Anotar data from Solr data doc = JsonSimple(doc.get("jsonString")) # Get actual tag text tag = doc.getString(None, ["content", "literal"]) # Find out if they have locators locs = doc.getJsonSimpleList(["annotates", "locators"]).size() if locs == 0: # Basic tags, just aggregate counts if tag in tagsDict: # We've seen it before, just increment the counter existing = tagsDict[tag] count = existing.getInteger(0, ["tagCount"]) existing.getJsonObject().put("tagCount", str(count + 1)) else: # First time, store this object doc.getJsonObject().put("tagCount", str(1)) tagsDict[tag] = doc else: # Tags with a locator, special case for images etc. tags.append(doc.toString()) # Push all the 'basic' counts into the list to return for tag in tagsDict: tags.append(tagsDict[tag].toString()) return "[" + ",".join(tags) + "]"
def get_image(self): self.type = "http://www.purl.org/anotar/ns/type/0.1#Tag" mediaFragType = "http://www.w3.org/TR/2009/WD-media-frags-20091217" result = '{"result":' + self.search_solr() + "}" if result: imageTagList = [] imageTags = JsonSimple(result).getJsonSimpleList(["result"]) for imageTag in imageTags: imageAno = JsonSimple() # We only want tags with locators, not basic tags locators = imageTag.getJsonSimpleList(["annotates", "locators"]) if locators and not locators.isEmpty(): locatorValue = locators.get(0).getString(None, ["value"]) locatorType = locators.get(0).get(None, ["type"]) if locatorValue and locatorValue.find("#xywh=") > -1 and locatorType == mediaFragType: _, locatorValue = locatorValue.split("#xywh=") left, top, width, height = locatorValue.split(",") object = imageAno.getJsonObject() object.put("top", top) object.put("left", left) object.put("width", width) object.put("height", height) object.put("creator", imageTag.getString(None, ["creator", "literal"])) object.put("creatorUri", imageTag.getString(None, ["creator", "uri"])) object.put("id", imageTag.getString(None, ["id"])) # tagCount = imageTag.getString(None, ["tagCount"]) object.put("text", imageTag.getString(None, ["content", "literal"])) object.put("editable", "true") imageTagList.append(imageAno.toString()) result = "[" + ",".join(imageTagList) + "]" return result
def process_tags(self, result): tags = [] tagsDict = {} # Build a dictionary of the tags for doc in result: # Get Anotar data from Solr data doc = JsonSimple(doc.get("jsonString")) # Get actual tag text tag = doc.getString(None, ["content", "literal"]) # Find out if they have locators locs = doc.getJsonSimpleList(["annotates", "locators"]).size() if locs == 0: # Basic tags, just aggregate counts if tag in tagsDict: # We've seen it before, just increment the counter existing = tagsDict[tag] count = existing.getInteger(0, ["tagCount"]) existing.getJsonObject().put("tagCount", str(count + 1)) else: # First time, store this object doc.getJsonObject().put("tagCount", str(1)) tagsDict[tag] = doc else: # Tags with a locator, special case for images etc. tags.append(doc.toString()) # Push all the 'basic' counts into the list to return for tag in tagsDict: tags.append(tagsDict[tag].toString()) return "[" + ",".join(tags) + "]"
def updateRelationships(self, relationship,pid,identifier): oid = self.findOidByIdentifier(relationship.get("identifier")) self.writer.println(oid) digitalObject = StorageUtils.getDigitalObject(self.storage, oid) metadataJsonPayload = digitalObject.getPayload("metadata.json") metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") found = False if relationships is None: relationships = JSONArray() metadataJson.getJsonObject().put("relationships",relationships) for relationship1 in relationships: if relationship1.get("identifier") == identifier: relationship1.put("isCurated",True) relationship1.put("curatedPid",pid) found = True if not found: newRelationship = JsonObject() newRelationship.put("isCurated",True) newRelationship.put("curatedPid",pid) newRelationship.put("relationship",relationship.get("relationship")) newRelationship.put("identifier",identifier) relationships.add(newRelationship) istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream)
def __checkMetadataPayload(self): try: # Simple check for its existance self.object.getPayload("formData.tfpackage") self.firstHarvest = False except Exception: self.firstHarvest = True # We need to create it self.log.info( "Creating 'formData.tfpackage' payload for object '{}'", self.oid) # Prep data data = { "viewId": "default", "workflow_source": "Edgar Import", "packageType": "dataset", "redbox:formVersion": self.redboxVersion, "redbox:newForm": "true" } package = JsonSimple(JsonObject(data)) # Store it inStream = IOUtils.toInputStream(package.toString(True), "UTF-8") try: self.object.createStoredPayload("formData.tfpackage", inStream) self.packagePid = "formData.tfpackage" except StorageException, e: self.log.error( "Error creating 'formData.tfpackage' payload for object '{}'", self.oid, e) raise Exception("Error creating package payload: ", e)
def __checkMetadataPayload(self, identifier): # We are just going to confirm the existance of # 'metadata.json', or create an empty one if it # doesn't exist. Makes curation function for this # option and removes some log errors on the details # screen. try: self.object.getPayload("metadata.json") # all is good, the above will throw an exception if it doesn't exist return except Exception: self.log.info("Creating 'metadata.json' payload for object '{}'", self.oid) # Prep data metadata = JsonSimple() metadata.getJsonObject().put("recordIDPrefix", "") metadata.writeObject("data") # The only real data we require is the ID for curation idHolder = metadata.writeObject("metadata") idHolder.put("dc.identifier", identifier) # Store it inStream = IOUtils.toInputStream(metadata.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "metadata.json", inStream) except StorageException, e: self.log.error("Error creating 'metadata.json' payload for object '{}'", self.oid, e) return
def updateLocalRecordRelations(self, jobItems): oidIdentifierMap = HashMap() for jobItem in jobItems: oidIdentifierMap.put(jobItem.get("oid"),jobItem.get("required_identifiers")[0].get("identifier")) for jobItem in jobItems: type = jobItem.get("type"); targetSystem = self.systemConfig.getString(None, "curation", "supported-types", type); if targetSystem == "redbox": oid = jobItem.get("oid") digitalObject = StorageUtils.getDigitalObject(self.services.getStorage(), oid) tfPackagePid = self.getPackageData(digitalObject) metadataJsonPayload = digitalObject.getPayload(tfPackagePid) metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") if relationships is not None: for relationship in relationships: system = relationship.get("system") if system != "redbox" or system != None: url = self.systemConfig.getString("can't find it", "curation","external-system-urls","get-oid-for-identifier",system) client = BasicHttpClient(url+ "&identifier="+relationship.get("identifier")) get = GetMethod(url+ "&identifier="+relationship.get("identifier")) client.executeMethod(get) if get.getStatusCode() == 200: response = JsonSimple(get.getResponseBodyAsString()) relationship.put("curatedPid",oidIdentifierMap.get(response.getString(None,"oid"))) relationship.put("isCurated",True) #Now update the relationship on Mint's side break istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,tfPackagePid,istream)
def __formData(self): # Find our workflow form data packagePid = None try: self.pidList = self.object.getPayloadIdList() for pid in self.pidList: if pid.endswith(self.packagePidSuffix): packagePid = pid except StorageException: self.log.error("Error accessing object PID list for object '{}' ", self.oid) return if packagePid is None: self.log.debug("Object '{}' has no form data", self.oid) return # Retrieve our form data workflowData = None try: payload = self.object.getPayload(packagePid) try: workflowData = JsonSimple(payload.open()) except Exception: self.log.error("Error parsing JSON '{}'", packagePid) finally: payload.close() except StorageException: self.log.error("Error accessing '{}'", packagePid) return # Test our version data self.version = workflowData.getString("{NO VERSION}", ["redbox:formVersion"]) oldData = String(workflowData.toString(True)) if self.version != self.redboxVersion: self.log.info("OID '{}' requires an upgrade: '{}' => '{}'", [self.oid, self.version, self.redboxVersion]) # The version data is old, run our upgrade # function to see if any alterations are # required. Most likely at least the # version number will change. newWorkflowData = self.__upgrade(workflowData) else: newWorkflowData = self.__hotfix(workflowData) if newWorkflowData is not None: self.log.debug("OID '{}' was hotfixed for v1.2 'dc:type' bug", self.oid) else: self.log.debug("OID '{}' requires no work, skipping", self.oid) return # Backup our data first backedUp = self.__backup(oldData) if not backedUp: self.log.error("Upgrade aborted, data backup failed!") return # Save the newly modified data jsonString = String(newWorkflowData.toString(True)) inStream = ByteArrayInputStream(jsonString.getBytes("UTF-8")) try: self.object.updatePayload(packagePid, inStream) except StorageException, e: self.log.error("Error updating workflow payload: ", e)
def __checkMetadataPayload(self): try: # Simple check for its existance self.object.getPayload("formData.tfpackage") self.firstHarvest = False except Exception: self.firstHarvest = True # We need to create it self.log.info("Creating 'formData.tfpackage' payload for object '{}'", self.oid) # Prep data data = { "viewId": "default", "workflow_source": "Edgar Import", "packageType": "dataset", "redbox:formVersion": self.redboxVersion, "redbox:newForm": "true" } package = JsonSimple(JsonObject(data)) # Store it inStream = IOUtils.toInputStream(package.toString(True), "UTF-8") try: self.object.createStoredPayload("formData.tfpackage", inStream) self.packagePid = "formData.tfpackage" except StorageException, e: self.log.error("Error creating 'formData.tfpackage' payload for object '{}'", self.oid, e) raise Exception("Error creating package payload: ", e)
def __messages(self): if self.message_list is not None and len(self.message_list) > 0: msg = JsonSimple() msg.getJsonObject().put("oid", self.oid) message = msg.toString() for target in self.message_list: self.utils.sendMessage(target, message)
def __messages(self): if self.message_list is not None and len(self.message_list) > 0: msg = JsonSimple() msg.getJsonObject().put("oid", self.oid) message = msg.toString() for target in self.message_list: self.utils.sendMessage(target, message)
def updateWorkFlowMetadata(self, workflowMetadata, toWorkflowId, toWorkflowStage): workflowMetaDataJson = JsonSimple(workflowMetadata.open()).getJsonObject() workflowMetaDataJson.put("id", toWorkflowId) workflowMetaDataJson.put("step", toWorkflowStage) inStream = IOUtils.toInputStream(workflowMetaDataJson.toString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload"
def updatePackageType(self, tfPackage, toWorkflowId): tfPackageJson = JsonSimple(tfPackage.open()).getJsonObject() tfPackageJson.put("packageType", toWorkflowId) inStream = IOUtils.toInputStream(tfPackageJson.toString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, tfPackage.getId(), inStream) except StorageException: print " ERROR updating dataset payload"
def updatePackageType(self, tfPackage, toWorkflowId): tfPackageJson = JsonSimple(tfPackage.open()).getJsonObject() tfPackageJson.put("packageType", toWorkflowId) inStream = IOUtils.toInputStream(tfPackageJson.toString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, tfPackage.getId(), inStream) except StorageException: print " ERROR updating dataset payload"
def modify_json(self): # print "**** anotar.py : add_json() : adding json : " + json jsonSimple = JsonSimple(self.json) jsonObj = jsonSimple.getJsonObject() jsonObj.put("id", self.pid) rootUri = jsonSimple.getString(None, ["annotates", "rootUri"]) if rootUri is not None: baseUrl = "http://%s:%s/" % (self.vc("request").serverName, self.vc("serverPort")) myUri = baseUrl + rootUri + "#" + self.pid jsonObj.put("uri", myUri) jsonObj.put("schemaVersionUri", "http://www.purl.org/anotar/schema/0.1") self.json = jsonSimple.toString()
def updateWorkFlowMetadata(self, workflowMetadata, toWorkflowId, toWorkflowStage): workflowMetaDataJson = JsonSimple( workflowMetadata.open()).getJsonObject() workflowMetaDataJson.put("id", toWorkflowId) workflowMetaDataJson.put("step", toWorkflowStage) inStream = IOUtils.toInputStream(workflowMetaDataJson.toString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload"
def modify_json(self): #print "**** anotar.py : add_json() : adding json : " + json jsonSimple = JsonSimple(self.json) jsonObj = jsonSimple.getJsonObject() jsonObj.put("id", self.pid) rootUri = jsonSimple.getString(None, ["annotates", "rootUri"]) if rootUri is not None: baseUrl = "http://%s:%s/" % (self.vc("request").serverName, self.vc("serverPort")) myUri = baseUrl + rootUri + "#" + self.pid jsonObj.put("uri", myUri) jsonObj.put("schemaVersionUri", "http://www.purl.org/anotar/schema/0.1") self.json = jsonSimple.toString()
def __activate__(self, context): writer = context["response"].getPrintWriter("application/json; charset=UTF-8") jsonResponse = "{}" try: oid = context["formData"].get("oid") object = context["Services"].getStorage().getObject(oid); payload = object.getPayload("metadata.json") json = JsonSimple(payload.open()) payload.close() object.close() # We are only going to send the 'data' node though data = JsonSimple(json.getJsonObject().get("data")) jsonResponse = data.toString(True) except Exception, e: jsonResponse = '{"message": "%s"}' % e.getMessage()
def __activate__(self, context): self.request = context["request"] self.response = context["response"] self.formData = context["formData"] self.log = context["log"] oid = self.formData.get("oid") self.log.debug("Curation request recieved: '{}'", oid) message = JsonSimple() message.getJsonObject().put("task", "curation") message.getJsonObject().put("oid", oid) out = self.response.getPrintWriter("text/plain; charset=UTF-8") if self.queueMessage(message.toString()): out.println("Request successful. The system will now process.") else: self.response.setStatus(500) out.println("Error sending message, see system logs.") out.close()
def process(self): valid = self.vc("page").csrfSecurePage() if not valid: self.throw_error("Invalid request") return source = self.vc("formData").get("source") path = self.vc("formData").get("path") switch = {"logs": self.show_logs} result = switch.get(source, self.unknown_action)() if path is not None: if result.containsKey(path): result = self.get_file_contents_as_json(path) else: self.log.info("requested log: %s but the path does not exist" % path) result = JsonSimple().getJsonObject() self.writer.println(result.toString()) self.writer.close()
def __updateMetadataPayload(self, data): # Get and parse payload = self.object.getPayload("formData.tfpackage") json = JsonSimple(payload.open()) payload.close() # Basic test for a mandatory field title = json.getString(None, ["dc:title"]) if title is not None: # We've done this before return # Merge json.getJsonObject().putAll(data) # Store it inStream = IOUtils.toInputStream(json.toString(True), "UTF-8") try: self.object.updatePayload("formData.tfpackage", inStream) except StorageException, e: self.log.error("Error updating 'formData.tfpackage' payload for object '{}'", self.oid, e)
def process(self): valid = self.vc("page").csrfSecurePage() if not valid: self.throw_error("Invalid request") return source = self.vc("formData").get("source") path = self.vc("formData").get("path") switch = { "logs" : self.show_logs } result = switch.get(source, self.unknown_action)() if path is not None: if result.containsKey(path): result = self.get_file_contents_as_json(path) else: self.log.info("requested log: %s but the path does not exist" % path) result = JsonSimple().getJsonObject() self.writer.println(result.toString()) self.writer.close()
def get_image(self): self.type = "http://www.purl.org/anotar/ns/type/0.1#Tag" mediaFragType = "http://www.w3.org/TR/2009/WD-media-frags-20091217" result = '{"result":' + self.search_solr() + '}' if result: imageTagList = [] imageTags = JsonSimple(result).getJsonSimpleList(["result"]) for imageTag in imageTags: imageAno = JsonSimple() # We only want tags with locators, not basic tags locators = imageTag.getJsonSimpleList( ["annotates", "locators"]) if locators and not locators.isEmpty(): locatorValue = locators.get(0).getString(None, ["value"]) locatorType = locators.get(0).get(None, ["type"]) if locatorValue and locatorValue.find( "#xywh=") > -1 and locatorType == mediaFragType: _, locatorValue = locatorValue.split("#xywh=") left, top, width, height = locatorValue.split(",") object = imageAno.getJsonObject() object.put("top", top) object.put("left", left) object.put("width", width) object.put("height", height) object.put( "creator", imageTag.getString(None, ["creator", "literal"])) object.put( "creatorUri", imageTag.getString(None, ["creator", "uri"])) object.put("id", imageTag.getString(None, ["id"])) #tagCount = imageTag.getString(None, ["tagCount"]) object.put( "text", imageTag.getString(None, ["content", "literal"])) object.put("editable", "true") imageTagList.append(imageAno.toString()) result = "[" + ",".join(imageTagList) + "]" return result
def __activate__(self, context): request = context["request"] response = context["response"] writer = response.getPrintWriter("text/javascript; charset=UTF-8") result = JsonSimple() ## Look for the JSONP callback to use jsonpCallback = request.getParameter("callback") if jsonpCallback is None: jsonpCallback = request.getParameter("jsonp_callback") if jsonpCallback is None: response.setStatus(403) writer.println("Error: This interface only responds to JSONP") writer.close() return if context["page"].authentication.is_logged_in(): result.getJsonObject().put("isAuthenticated", "true") else: result.getJsonObject().put("isAuthenticated", "false") writer.println(jsonpCallback + "(" + result.toString() + ")") writer.close()
def __activate__(self, context): request = context["request"] response = context["response"] writer = response.getPrintWriter("text/javascript; charset=UTF-8") result = JsonSimple() ## Look for the JSONP callback to use jsonpCallback = request.getParameter("callback") if jsonpCallback is None: jsonpCallback = request.getParameter("jsonp_callback") if jsonpCallback is None: response.setStatus(403) writer.println("Error: This interface only responds to JSONP") writer.close() return if context["page"].authentication.is_logged_in(): result.getJsonObject().put("isAuthenticated", "true") else: result.getJsonObject().put("isAuthenticated", "false") writer.println(jsonpCallback + "(" + result.toString() + ")") writer.close()
def __updateMetadataPayload(self, data): # Get and parse payload = self.object.getPayload("formData.tfpackage") json = JsonSimple(payload.open()) payload.close() # Basic test for a mandatory field title = json.getString(None, ["dc:title"]) if title is not None: # We've done this before return # Merge json.getJsonObject().putAll(data) # Store it inStream = IOUtils.toInputStream(json.toString(True), "UTF-8") try: self.object.updatePayload("formData.tfpackage", inStream) except StorageException, e: self.log.error( "Error updating 'formData.tfpackage' payload for object '{}'", self.oid, e)
organizations = organizations.findall(ns+"organization") organizations = [o for o in organizations if o.attrib.get("identifier")==defaultName] organization = organizations[0] title = organization.find(ns+"title").text rvtMap.put("title", title) items = organization.findall(ns+"item") rvtMap.put("toc", self.__getJsonItems(ns, items, resources)) except Exception, e: data["error"] = "Error - %s" % str(e) print data["error"] object.close() except StorageException, e: data["DEBUG"] = str(e.getMessage()) rvtManifest = JsonSimple(rvtMap) return rvtManifest.toString() def __getJsonItems(self, ns, items, resources): rvtNodes = ArrayList() for item in items: attr = item.attrib isvisible = attr.get("isvisible") == "true" idref = attr.get("identifierref") id = resources.get(idref) title = item.find(ns+"title").text if isvisible and id and id.endswith(".htm"): rvtNode = HashMap() rvtNode.put("visible", True) rvtNode.put("relPath", id) rvtNode.put("title", title) rvtNode.put("children", self.__getJsonItems(ns, item.findall(ns+"item"), resources))
def __workflow(self): # Workflow data WORKFLOW_ID = "servicesUI2" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) pageTitle = "Services Record" displayType = "package-service" initialStep = 0 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) self.utils.add(self.index, "display_type", displayType) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) if self.owner == "guest": pageTitle = "Submission Request" displayType = "submission-request" initialStep = 0 else: pageTitle = "Metadata Record" displayType = "package-dataset" initialStep = 1 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] #Used to make sure we have a created date createdDateFlag = False formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) self.log.debug("Set created date to :%s" % solrTime) createdDateFlag = True elif field == "redbox:embargo.dc:date": self.embargoedDate = value # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) # check if this is an array field fnameparts = field.split(":") if fnameparts is not None and len(fnameparts) >= 3: if field.startswith("bibo") or field.startswith("skos"): arrParts = fnameparts[1].split(".") else: arrParts = fnameparts[2].split(".") # we're not interested in: Relationship, Type and some redbox:origin if arrParts is not None and len(arrParts) >= 2 and field.find(":Relationship.") == -1 and field.find("dc:type") == -1 and field.find("redbox:origin") == -1 and arrParts[1].isdigit(): # we've got an array field fldPart = ":%s" % arrParts[0] prefixEndIdx = field.find(fldPart) + len(fldPart) suffixStartIdx = prefixEndIdx+len(arrParts[1])+1 arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[suffixStartIdx:] if field.endswith("Name"): arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] self.log.debug("Array Field name is:%s from: %s, with value:%s" % (arrFldName, field, value)) if field.endswith("Name"): fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) idx = arrParts[1] fullField = fullFieldMap.get(idx) if (fullField is None): fullField = "" if (field.endswith("givenName")): fullField = "%s, %s" % (fullField, value) if (field.endswith("familyName")): fullField = "%s%s" % (value, fullField) self.log.debug("fullname now is :%s" % fullField) fullFieldMap.put(idx, fullField) else: fieldlist = self.arrayBucket.get(arrFldName) if fieldlist is None: fieldlist = [] self.arrayBucket.put(arrFldName, fieldlist) fieldlist.append(value) for compfield in self.compFields: if field.startswith(compfield): arrFldName = self.reportingFieldPrefix +compfield fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) fullField = fullFieldMap.get("1") if fullField is None: fullField = "" if field.endswith(self.compFieldsConfig[compfield]["end"]): fullField = "%s%s%s" % (fullField, self.compFieldsConfig[compfield]["delim"] ,value) if field.endswith(self.compFieldsConfig[compfield]["start"]): fullField = "%s%s" % (value, fullField) self.log.debug("full field now is :%s" % fullField) fullFieldMap.put("1", fullField) self.utils.add(self.index, "display_type", displayType) # Make sure we have a creation date if not createdDateFlag: self.utils.add(self.index, "date_created", self.last_modified) self.log.debug("Forced creation date to %s because it was not explicitly set." % self.last_modified) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "Parties_People" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) initialStep = 0 try: wfMeta = self.__getJsonPayload("workflow.metadata") # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __formData(self): # Find our workflow form data packagePid = None try: self.pidList = self.object.getPayloadIdList() for pid in self.pidList: if pid.endswith(self.packagePidSuffix): packagePid = pid except StorageException: self.log.error("Error accessing object PID list for object '{}' ", self.oid) return if packagePid is None: self.log.debug("Object '{}' has no form data", self.oid) return # Retrieve our form data workflowData = None try: payload = self.object.getPayload(packagePid) try: workflowData = JsonSimple(payload.open()) except Exception: self.log.error("Error parsing JSON '{}'", packagePid) finally: payload.close() except StorageException: self.log.error("Error accessing '{}'", packagePid) return # Test our version data self.version = workflowData.getString("{NO VERSION}", ["redbox:formVersion"]) oldData = String(workflowData.toString(True)) if self.version != self.redboxVersion: self.log.info("OID '{}' requires an upgrade: '{}' => '{}'", [self.oid, self.version, self.redboxVersion]) # The version data is old, run our upgrade # function to see if any alterations are # required. Most likely at least the # version number will change. newWorkflowData = self.__upgrade(workflowData) else: newWorkflowData = self.__hotfix(workflowData) if newWorkflowData is not None: self.log.debug("OID '{}' was hotfixed for v1.2 'dc:type' bug", self.oid) else: self.log.debug("OID '{}' requires no work, skipping", self.oid) return # Backup our data first backedUp = self.__backup(oldData) if not backedUp: self.log.error("Upgrade aborted, data backup failed!") return # Save the newly modified data jsonString = String(newWorkflowData.toString(True)) inStream = ByteArrayInputStream(jsonString.getBytes("UTF-8")) try: self.object.updatePayload(packagePid, inStream) except StorageException, e: self.log.error("Error updating workflow payload: ", e)
def __activate__(self, context): request = context["request"] storage = context["Services"].getStorage() auth = context["page"].authentication log = context["log"] username = auth.get_name() oid = request.getParameter("oid") approval = request.getParameter("approval") approval_comment = request.getParameter("approval_comment") storedObj = storage.getObject(oid) committeeResponses = None payloadList = storedObj.getPayloadIdList() if payloadList.contains("committee-responses.metadata"): committeeResponsePayload = storedObj.getPayload("committee-responses.metadata") committeeResponses = JsonSimple(committeeResponsePayload.open()).getJsonObject() else: committeeResponses = JsonObject() committeeResponse = JsonObject() committeeResponse.put("approval",approval) committeeResponse.put("approval_comment",approval_comment) committeeResponses.put(username,committeeResponse) log.debug(" %s: Committee %s, approval = %s, comment = %s" % ( oid, username, approval, approval_comment)) StorageUtils.createOrUpdatePayload(storedObj,"committee-responses.metadata",IOUtils.toInputStream(committeeResponses.toString(), "UTF-8")) context["response"].sendRedirect(context["portalPath"] +"/detail/"+oid)
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) #if self.owner == "guest": # pageTitle = "Submission Request" # displayType = "submission-request" # initialStep = 0 #else: # pageTitle = "Metadata Record" # displayType = "package-dataset" # initialStep = 1 ## Harvesting straight into the 'Published' stage pageTitle = "Metadata Record" displayType = "package-dataset" #initialStep = 4 initialStep = 3 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print(" ERROR updating dataset payload") # Form processing coreFields = [ "title", "description", "manifest", "metaList", "relationships", "responses" ] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) self.utils.add(self.index, "display_type", displayType) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) if self.owner == "guest": pageTitle = "Submission Request" displayType = "submission-request" initialStep = 0 else: pageTitle = "Metadata Record" displayType = "package-dataset" initialStep = 1 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = [ "title", "description", "manifest", "metaList", "relationships", "responses" ] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] #Used to make sure we have a created date createdDateFlag = False formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) self.log.debug("Set created date to :%s" % solrTime) createdDateFlag = True elif field == "redbox:embargo.dc:date": self.embargoedDate = value elif field == "create_timestamp": self.createTimeStamp = value # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) # check if this is an array field fnameparts = field.split(":") if fnameparts is not None and len(fnameparts) >= 3: if field.startswith("bibo") or field.startswith( "skos"): arrParts = fnameparts[1].split(".") else: arrParts = fnameparts[2].split(".") # we're not interested in: Relationship, Type and some redbox:origin if arrParts is not None and len( arrParts) >= 2 and field.find( ":Relationship.") == -1 and field.find( "dc:type") == -1 and field.find( "redbox:origin" ) == -1 and arrParts[1].isdigit(): # we've got an array field fldPart = ":%s" % arrParts[0] prefixEndIdx = field.find(fldPart) + len(fldPart) suffixStartIdx = prefixEndIdx + len( arrParts[1]) + 1 arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[ suffixStartIdx:] if field.endswith("Name"): arrFldName = self.reportingFieldPrefix + field[: prefixEndIdx] self.log.debug( "Array Field name is:%s from: %s, with value:%s" % (arrFldName, field, value)) if field.endswith("Name"): fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put( arrFldName, fullFieldMap) idx = arrParts[1] fullField = fullFieldMap.get(idx) if (fullField is None): fullField = "" if (field.endswith("givenName")): fullField = "%s, %s" % (fullField, value) if (field.endswith("familyName")): fullField = "%s%s" % (value, fullField) self.log.debug("fullname now is :%s" % fullField) fullFieldMap.put(idx, fullField) else: fieldlist = self.arrayBucket.get(arrFldName) if fieldlist is None: fieldlist = [] self.arrayBucket.put(arrFldName, fieldlist) fieldlist.append(value) for compfield in self.compFields: if field.startswith(compfield): arrFldName = self.reportingFieldPrefix + compfield fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) fullField = fullFieldMap.get("1") if fullField is None: fullField = "" if field.endswith( self.compFieldsConfig[compfield]["end"]): fullField = "%s%s%s" % ( fullField, self.compFieldsConfig[compfield]["delim"], value) if field.endswith( self.compFieldsConfig[compfield]["start"]): fullField = "%s%s" % (value, fullField) self.log.debug("full field now is :%s" % fullField) fullFieldMap.put("1", fullField) self.utils.add(self.index, "display_type", displayType) # Make sure we have a creation date if not createdDateFlag: self.utils.add(self.index, "date_created", self.last_modified) self.log.debug( "Forced creation date to %s because it was not explicitly set." % self.last_modified) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")