def _readReviewers(self, storedObj, tfpackage): """Read from TFPACKAGE for reviewer's recommendation and map to a json with short keys: reviewer-recommend-for : for reviewer-recommended-storage : storage """ reviewersPayload = storedObj.getPayload(tfpackage) reviewersRecommends = JsonSimple(reviewersPayload.open()).getJsonObject() reviewers = JsonObject() reviewers.put("for", reviewersRecommends.get("reviewer-recommend-for")) reviewers.put("storage", reviewersRecommends.get("reviewer-recommended-storage")) return reviewers
def process_tags(self, result): tags = [] tagsDict = {} # Build a dictionary of the tags for doc in result: # Get Anotar data from Solr data doc = JsonSimple(doc.get("jsonString")) # Get actual tag text tag = doc.getString(None, ["content", "literal"]) # Find out if they have locators locs = doc.getJsonSimpleList(["annotates", "locators"]).size() if locs == 0: # Basic tags, just aggregate counts if tag in tagsDict: # We've seen it before, just increment the counter existing = tagsDict[tag] count = existing.getInteger(0, ["tagCount"]) existing.getJsonObject().put("tagCount", str(count + 1)) else: # First time, store this object doc.getJsonObject().put("tagCount", str(1)) tagsDict[tag] = doc else: # Tags with a locator, special case for images etc. tags.append(doc.toString()) # Push all the 'basic' counts into the list to return for tag in tagsDict: tags.append(tagsDict[tag].toString()) return "[" + ",".join(tags) + "]"
def __workflow(self): # Workflow data WORKFLOW_ID = "servicesUI2" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) pageTitle = "Services Record" displayType = "package-service" initialStep = 0 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) self.utils.add(self.index, "display_type", displayType) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) if self.owner == "guest": pageTitle = "Submission Request" displayType = "submission-request" initialStep = 0 else: pageTitle = "Metadata Record" displayType = "package-dataset" initialStep = 1 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] #Used to make sure we have a created date createdDateFlag = False formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) self.log.debug("Set created date to :%s" % solrTime) createdDateFlag = True elif field == "redbox:embargo.dc:date": self.embargoedDate = value # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) # check if this is an array field fnameparts = field.split(":") if fnameparts is not None and len(fnameparts) >= 3: if field.startswith("bibo") or field.startswith("skos"): arrParts = fnameparts[1].split(".") else: arrParts = fnameparts[2].split(".") # we're not interested in: Relationship, Type and some redbox:origin if arrParts is not None and len(arrParts) >= 2 and field.find(":Relationship.") == -1 and field.find("dc:type") == -1 and field.find("redbox:origin") == -1 and arrParts[1].isdigit(): # we've got an array field fldPart = ":%s" % arrParts[0] prefixEndIdx = field.find(fldPart) + len(fldPart) suffixStartIdx = prefixEndIdx+len(arrParts[1])+1 arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[suffixStartIdx:] if field.endswith("Name"): arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] self.log.debug("Array Field name is:%s from: %s, with value:%s" % (arrFldName, field, value)) if field.endswith("Name"): fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) idx = arrParts[1] fullField = fullFieldMap.get(idx) if (fullField is None): fullField = "" if (field.endswith("givenName")): fullField = "%s, %s" % (fullField, value) if (field.endswith("familyName")): fullField = "%s%s" % (value, fullField) self.log.debug("fullname now is :%s" % fullField) fullFieldMap.put(idx, fullField) else: fieldlist = self.arrayBucket.get(arrFldName) if fieldlist is None: fieldlist = [] self.arrayBucket.put(arrFldName, fieldlist) fieldlist.append(value) for compfield in self.compFields: if field.startswith(compfield): arrFldName = self.reportingFieldPrefix +compfield fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) fullField = fullFieldMap.get("1") if fullField is None: fullField = "" if field.endswith(self.compFieldsConfig[compfield]["end"]): fullField = "%s%s%s" % (fullField, self.compFieldsConfig[compfield]["delim"] ,value) if field.endswith(self.compFieldsConfig[compfield]["start"]): fullField = "%s%s" % (value, fullField) self.log.debug("full field now is :%s" % fullField) fullFieldMap.put("1", fullField) self.utils.add(self.index, "display_type", displayType) # Make sure we have a creation date if not createdDateFlag: self.utils.add(self.index, "date_created", self.last_modified) self.log.debug("Forced creation date to %s because it was not explicitly set." % self.last_modified) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) #if self.owner == "guest": # pageTitle = "Submission Request" # displayType = "submission-request" # initialStep = 0 #else: # pageTitle = "Metadata Record" # displayType = "package-dataset" # initialStep = 1 ## Harvesting straight into the 'Published' stage pageTitle = "Metadata Record" displayType = "package-dataset" #initialStep = 4 initialStep = 3 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print(" ERROR updating dataset payload") # Form processing coreFields = [ "title", "description", "manifest", "metaList", "relationships", "responses" ] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) self.utils.add(self.index, "display_type", displayType) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")
def getResponses(self, storedObj): committeeResponsePayload = storedObj.getPayload(self.PAYLOAD) committeeResponses = JsonSimple(committeeResponsePayload.open()).getJsonObject() # get current assessor's assessment self.myResponse = committeeResponses.get(self.assessor) return committeeResponses
def __activate__(self, context): self.auth = context["page"].authentication self.errorMsg = "" self.request = context["request"] self.response = context["response"] self.formData = context["formData"] self.storage = context["Services"].getStorage() self.log = context["log"] self.reportManager = context["Services"].getService("reportManager") fromOid = self.formData.get("fromOid") fromObject = self.storage.getObject(fromOid) if (self.auth.is_logged_in()): if (self.auth.is_admin() == True): pass elif (self.__isOwner(fromObject)): pass else: self.errorMsg = "Requires Admin / Librarian / Reviewer / owner access." else: self.errorMsg = "Please login." if self.errorMsg == "": toOid = self.formData.get("toOid") toObject = self.storage.getObject(toOid) storeRelatedData = self.formData.get("relatedData") fromTFPackage = self._getTFPackage(fromObject) toTFPackage = self._getTFPackage(toObject) # get relevant dc:description from new object before you overwrite it with 'from' data, as this should exist from form data created in initial object packaging (packaging.py) toTFPackageJson = JsonSimple(toTFPackage.open()).getJsonObject() relevant_description = toTFPackageJson.get("dc:description") fromInputStream = fromTFPackage.open() try: StorageUtils.createOrUpdatePayload(toObject, toTFPackage.getId(), fromInputStream) except StorageException: print "error setting tfPackage" finally: fromTFPackage.close() tfMetaPropertyValue = self.formData.get("tfMetaPropertyValue") if tfMetaPropertyValue == 'dmpToSelfSub': # fetch recently created new 'to' object from storage to get all data toTFPackage = self._getTFPackage(toObject) toTFPackageJson = JsonSimple(toTFPackage.open()).getJsonObject() self.setMultiDescription(toTFPackageJson, relevant_description) inStream = IOUtils.toInputStream(toTFPackageJson.toJSONString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(toObject, toTFPackage.getId(), inStream) except StorageException: print "error setting description text in tfPackage" finally: inStream.close() self.log.info( "Completed migrating 'dc:description' to dc:decription.1.text' for oid: %s." % toTFPackage.getId()) self.log.debug("Result: %r" % toTFPackageJson) fromTFPackageJson = JsonSimple(fromTFPackage.open()).getJsonObject() self.log.debug('from json is: %r' % fromTFPackageJson) if storeRelatedData != "false": # add relatedOid info fromTFPackageJson = self._addRelatedOid(JsonSimple(fromTFPackage.open()), toOid) self.log.debug('from tfPackage json is now: %r' % fromTFPackageJson) inStream = IOUtils.toInputStream(fromTFPackageJson.toJSONString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(fromObject, fromTFPackage.getId(), inStream) except StorageException: print "error setting tfPackage" finally: inStream.close() self._addPropertyValueToTFMeta(toObject, tfMetaPropertyValue) self._reharvestPackage() result = '{"status": "ok", "url": "%s/workflow/%s", "oid": "%s" }' % (context["portalPath"], toOid, toOid) else: result = '{"status": "err", "message": "%s"}' % self.errorMsg writer = self.response.getPrintWriter("application/json; charset=UTF-8") writer.println(result) writer.close()
def __workflow(self): # Workflow data WORKFLOW_ID = "dataset" wfChanged = False workflow_security = [] self.message_list = None stages = self.config.getJsonSimpleList(["stages"]) if self.owner == "guest": pageTitle = "Submission Request" displayType = "submission-request" initialStep = 0 else: pageTitle = "Metadata Record" displayType = "package-dataset" initialStep = 1 try: wfMeta = self.__getJsonPayload("workflow.metadata") wfMeta.getJsonObject().put("pageTitle", pageTitle) # Are we indexing because of a workflow progression? targetStep = wfMeta.getString(None, ["targetStep"]) if targetStep is not None and targetStep != wfMeta.getString( None, ["step"]): wfChanged = True # Step change wfMeta.getJsonObject().put("step", targetStep) wfMeta.getJsonObject().remove("targetStep") # This must be a re-index then else: targetStep = wfMeta.getString(None, ["step"]) # Security change for stage in stages: if stage.getString(None, ["name"]) == targetStep: wfMeta.getJsonObject().put( "label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) if wfChanged == True: self.message_list = stage.getStringList(["message"]) except StorageException: # No workflow payload, time to create initialStage = stages.get(initialStep).getString(None, ["name"]) wfChanged = True wfMeta = JsonSimple() wfMetaObj = wfMeta.getJsonObject() wfMetaObj.put("id", WORKFLOW_ID) wfMetaObj.put("step", initialStage) wfMetaObj.put("pageTitle", pageTitle) stages = self.config.getJsonSimpleList(["stages"]) for stage in stages: if stage.getString(None, ["name"]) == initialStage: wfMetaObj.put("label", stage.getString(None, ["label"])) self.item_security = stage.getStringList(["visibility"]) workflow_security = stage.getStringList(["security"]) self.message_list = stage.getStringList(["message"]) # Has the workflow metadata changed? if wfChanged == True: inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload" # Form processing coreFields = [ "title", "description", "manifest", "metaList", "relationships", "responses" ] formData = wfMeta.getObject(["formData"]) if formData is not None: formData = JsonSimple(formData) # Core fields description = formData.getStringList(["description"]) if description: self.descriptionList = description # Non-core fields data = formData.getJsonObject() for field in data.keySet(): if field not in coreFields: self.customFields[field] = formData.getStringList([field]) # Manifest processing (formData not present in wfMeta) manifest = self.__getJsonPayload(self.packagePid) formTitles = manifest.getStringList(["title"]) if formTitles: for formTitle in formTitles: if self.title is None: self.title = formTitle self.descriptionList = [manifest.getString("", ["description"])] #Used to make sure we have a created date createdDateFlag = False formData = manifest.getJsonObject() for field in formData.keySet(): if field not in coreFields: value = formData.get(field) if value is not None and value.strip() != "": self.utils.add(self.index, field, value) # We want to sort by date of creation, so it # needs to be indexed as a date (ie. 'date_*') if field == "dc:created": parsedTime = time.strptime(value, "%Y-%m-%d") solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime) self.utils.add(self.index, "date_created", solrTime) self.log.debug("Set created date to :%s" % solrTime) createdDateFlag = True elif field == "redbox:embargo.dc:date": self.embargoedDate = value elif field == "create_timestamp": self.createTimeStamp = value # try to extract some common fields for faceting if field.startswith("dc:") and \ not (field.endswith(".dc:identifier.rdf:PlainLiteral") \ or field.endswith(".dc:identifier") \ or field.endswith(".rdf:resource")): # index dublin core fields for faceting basicField = field.replace("dc:", "dc_") dot = field.find(".") if dot > 0: facetField = basicField[:dot] else: facetField = basicField #print "Indexing DC field '%s':'%s'" % (field, facetField) if facetField == "dc_title": if self.title is None: self.title = value elif facetField == "dc_type": if self.dcType is None: self.dcType = value elif facetField == "dc_creator": if basicField.endswith("foaf_name"): self.utils.add(self.index, "dc_creator", value) else: self.utils.add(self.index, facetField, value) # index keywords for lookup if field.startswith("dc:subject.vivo:keyword."): self.utils.add(self.index, "keywords", value) # check if this is an array field fnameparts = field.split(":") if fnameparts is not None and len(fnameparts) >= 3: if field.startswith("bibo") or field.startswith( "skos"): arrParts = fnameparts[1].split(".") else: arrParts = fnameparts[2].split(".") # we're not interested in: Relationship, Type and some redbox:origin if arrParts is not None and len( arrParts) >= 2 and field.find( ":Relationship.") == -1 and field.find( "dc:type") == -1 and field.find( "redbox:origin" ) == -1 and arrParts[1].isdigit(): # we've got an array field fldPart = ":%s" % arrParts[0] prefixEndIdx = field.find(fldPart) + len(fldPart) suffixStartIdx = prefixEndIdx + len( arrParts[1]) + 1 arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[ suffixStartIdx:] if field.endswith("Name"): arrFldName = self.reportingFieldPrefix + field[: prefixEndIdx] self.log.debug( "Array Field name is:%s from: %s, with value:%s" % (arrFldName, field, value)) if field.endswith("Name"): fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put( arrFldName, fullFieldMap) idx = arrParts[1] fullField = fullFieldMap.get(idx) if (fullField is None): fullField = "" if (field.endswith("givenName")): fullField = "%s, %s" % (fullField, value) if (field.endswith("familyName")): fullField = "%s%s" % (value, fullField) self.log.debug("fullname now is :%s" % fullField) fullFieldMap.put(idx, fullField) else: fieldlist = self.arrayBucket.get(arrFldName) if fieldlist is None: fieldlist = [] self.arrayBucket.put(arrFldName, fieldlist) fieldlist.append(value) for compfield in self.compFields: if field.startswith(compfield): arrFldName = self.reportingFieldPrefix + compfield fullFieldMap = self.arrayBucket.get(arrFldName) if fullFieldMap is None: fullFieldMap = HashMap() self.arrayBucket.put(arrFldName, fullFieldMap) fullField = fullFieldMap.get("1") if fullField is None: fullField = "" if field.endswith( self.compFieldsConfig[compfield]["end"]): fullField = "%s%s%s" % ( fullField, self.compFieldsConfig[compfield]["delim"], value) if field.endswith( self.compFieldsConfig[compfield]["start"]): fullField = "%s%s" % (value, fullField) self.log.debug("full field now is :%s" % fullField) fullFieldMap.put("1", fullField) self.utils.add(self.index, "display_type", displayType) # Make sure we have a creation date if not createdDateFlag: self.utils.add(self.index, "date_created", self.last_modified) self.log.debug( "Forced creation date to %s because it was not explicitly set." % self.last_modified) # Workflow processing wfStep = wfMeta.getString(None, ["step"]) self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"])) self.utils.add(self.index, "workflow_step", wfStep) self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"])) for group in workflow_security: self.utils.add(self.index, "workflow_security", group) if self.owner is not None: self.utils.add(self.index, "workflow_security", self.owner) # set OAI-PMH status to deleted if wfStep == "retired": self.utils.add(self.index, "oai_deleted", "true")