Beispiel #1
0
    def __activate__(self, context):
        self.auth = context["page"].authentication
        self.errorMsg = "" 
        self.request = context["request"]
        self.response = context["response"]
        self.formData = context["formData"]
        self.storage = context["Services"].getStorage()
        
        self.log = context["log"]
        self.reportManager = context["Services"].getService("reportManager")
            
        fromOid = self.formData.get("fromOid")
        fromObject = self.storage.getObject(fromOid)

        if (self.auth.is_logged_in()):
            if (self.auth.is_admin() == True):
                pass
            elif (self.__isOwner(fromObject)):
                pass
            else:
                self.errorMsg = "Requires Admin / Librarian / Reviewer / owner access." 
        else:
            self.errorMsg = "Please login."
        if self.errorMsg == "": 
             toOid = self.formData.get("toOid")
             toObject = self.storage.getObject(toOid)
             storeRelatedData = self.formData.get("relatedData")
             fromTFPackage = self._getTFPackage(fromObject)
             toTFPackage = self._getTFPackage(toObject)
             fromInputStream = fromTFPackage.open()
             
             try:
                 StorageUtils.createOrUpdatePayload(toObject, toTFPackage.getId(), fromInputStream)
             except StorageException:
                 print "error setting tfPackage"
                 
             fromTFPackage.close()
             fromTFPackageJson = JsonSimple(fromTFPackage.open()).getJsonObject()
             if storeRelatedData != "false" :
                # add relatedOid info
                fromTFPackageJson = self._addRelatedOid(JsonSimple(fromTFPackage.open()), toOid)
             
             inStream = IOUtils.toInputStream(fromTFPackageJson.toJSONString(), "UTF-8")
             
             try:
                 StorageUtils.createOrUpdatePayload(fromObject, fromTFPackage.getId(), inStream)
             except StorageException:
                 print "error setting tfPackage"
             
             tfMetaPropertyValue = self.formData.get("tfMetaPropertyValue")
             self._addPropertyValueToTFMeta(toObject, tfMetaPropertyValue)
             
             self._reharvestPackage()
                 
             result = '{"status": "ok", "url": "%s/workflow/%s", "oid": "%s" }' % (context["portalPath"], toOid , toOid)
        else:
            result = '{"status": "err", "message": "%s"}' % self.errorMsg
        writer = self.response.getPrintWriter("application/json; charset=UTF-8")
        writer.println(result)
        writer.close()
Beispiel #2
0
    def __activate__(self, context):
        request = context["request"]
        storage = context["Services"].getStorage()
        auth = context["page"].authentication
        log = context["log"]
        
        username = auth.get_name()
        
        oid = request.getParameter("oid")
        approval = request.getParameter("approval")
        approval_comment = request.getParameter("approval_comment")
        
        storedObj = storage.getObject(oid)
        committeeResponses = None
        
        payloadList = storedObj.getPayloadIdList()
        if payloadList.contains("committee-responses.metadata"):
            committeeResponsePayload = storedObj.getPayload("committee-responses.metadata")
            committeeResponses = JsonSimple(committeeResponsePayload.open()).getJsonObject()
        else:
            committeeResponses = JsonObject()
        
        committeeResponse = JsonObject()
        committeeResponse.put("approval",approval)
        committeeResponse.put("approval_comment",approval_comment)
        
        committeeResponses.put(username,committeeResponse)

        log.debug(" %s: Committee %s, approval = %s, comment = %s"  % ( oid, username, approval, approval_comment))
        StorageUtils.createOrUpdatePayload(storedObj,"committee-responses.metadata",IOUtils.toInputStream(committeeResponses.toString(), "UTF-8"))
        context["response"].sendRedirect(context["portalPath"] +"/detail/"+oid)
Beispiel #3
0
    def saveResponse(self, context):
        """ Save into object storage key to assessor's name
            It has four keys: status, recommendation, size-agreement and comments
            when status == "submitted", reviewer sees it
        """
        oid = self.request.getParameter("oid")
        action = self.request.getParameter("action")
        if action and re.match("submit", action, re.I):
            status = "submitted"
        else:
            status = "draft"

        recommendation = self.request.getParameter("recommendation")
        sizeAgreement = self.request.getParameter("size-agreement")
        comments = self.request.getParameter("comments")

        storedObj, fileExisted = self.hasResponses(oid)
        if fileExisted:
            committeeResponses = self.getResponses(storedObj)
        else:
            committeeResponses = JsonObject()

        assessorResponse = JsonObject()
        assessorResponse.put("status", status)
        if status == 'submitted':
            assessorResponse.put("date",self._getToday())
        assessorResponse.put("recommendation",recommendation)
        assessorResponse.put("size-agreement",sizeAgreement)
        assessorResponse.put("comments",comments)

        committeeResponses.put(self.assessor,assessorResponse)

        StorageUtils.createOrUpdatePayload(storedObj,self.PAYLOAD,IOUtils.toInputStream(committeeResponses.toString(), "UTF-8"))
        context["response"].sendRedirect(context["portalPath"] +"/detail/"+oid)
 def __checkMetadataPayload(self):
     try:
         # Simple check for its existance
         self.object.getPayload("formData.tfpackage")
         self.firstHarvest = False
     except Exception:
         self.firstHarvest = True
         # We need to create it
         self.log.info("Creating 'formData.tfpackage' payload for object '{}'", self.oid)
         # Prep data
         data = {
             "viewId": "default",
             "workflow_source": "Edgar Import",
             "packageType": "dataset",
             "redbox:formVersion": self.redboxVersion,
             "redbox:newForm": "true"
         }
         package = JsonSimple(JsonObject(data))
         # Store it
         inStream = IOUtils.toInputStream(package.toString(True), "UTF-8")
         try:
             self.object.createStoredPayload("formData.tfpackage", inStream)
             self.packagePid = "formData.tfpackage"
         except StorageException, e:
             self.log.error("Error creating 'formData.tfpackage' payload for object '{}'", self.oid, e)
             raise Exception("Error creating package payload: ", e)
 def __checkMetadataPayload(self):
     try:
         # Simple check for its existance
         self.object.getPayload("formData.tfpackage")
         self.firstHarvest = False
     except Exception:
         self.firstHarvest = True
         # We need to create it
         self.log.info(
             "Creating 'formData.tfpackage' payload for object '{}'",
             self.oid)
         # Prep data
         data = {
             "viewId": "default",
             "workflow_source": "Edgar Import",
             "packageType": "dataset",
             "redbox:formVersion": self.redboxVersion,
             "redbox:newForm": "true"
         }
         package = JsonSimple(JsonObject(data))
         # Store it
         inStream = IOUtils.toInputStream(package.toString(True), "UTF-8")
         try:
             self.object.createStoredPayload("formData.tfpackage", inStream)
             self.packagePid = "formData.tfpackage"
         except StorageException, e:
             self.log.error(
                 "Error creating 'formData.tfpackage' payload for object '{}'",
                 self.oid, e)
             raise Exception("Error creating package payload: ", e)
 def __checkMetadataPayload(self, identifier):
     # We are just going to confirm the existance of
     # 'metadata.json', or create an empty one if it
     # doesn't exist. Makes curation function for this
     # option and removes some log errors on the details
     # screen.
     try:
         self.object.getPayload("metadata.json")
         # all is good, the above will throw an exception if it doesn't exist
         return
     except Exception:
         self.log.info("Creating 'metadata.json' payload for object '{}'", self.oid)
         # Prep data
         metadata = JsonSimple()
         metadata.getJsonObject().put("recordIDPrefix", "")
         metadata.writeObject("data")
         # The only real data we require is the ID for curation
         idHolder = metadata.writeObject("metadata")
         idHolder.put("dc.identifier", identifier)
         # Store it
         inStream = IOUtils.toInputStream(metadata.toString(True), "UTF-8")
         try:
             StorageUtils.createOrUpdatePayload(self.object, "metadata.json", inStream)
         except StorageException, e:
             self.log.error("Error creating 'metadata.json' payload for object '{}'", self.oid, e)
         return
Beispiel #7
0
 def updateWorkFlowMetadata(self, workflowMetadata, toWorkflowId, toWorkflowStage):
     workflowMetaDataJson = JsonSimple(workflowMetadata.open()).getJsonObject()
     workflowMetaDataJson.put("id", toWorkflowId)
     workflowMetaDataJson.put("step", toWorkflowStage)
     inStream = IOUtils.toInputStream(workflowMetaDataJson.toString(), "UTF-8")
     try:
         StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
     except StorageException:
         print " ERROR updating dataset payload"
 def updatePackageType(self, tfPackage, toWorkflowId):
     tfPackageJson = JsonSimple(tfPackage.open()).getJsonObject()
     tfPackageJson.put("packageType", toWorkflowId)
     
     inStream = IOUtils.toInputStream(tfPackageJson.toString(), "UTF-8")
     try:
         StorageUtils.createOrUpdatePayload(self.object, tfPackage.getId(), inStream)
     except StorageException:
         print " ERROR updating dataset payload"            
Beispiel #9
0
    def updatePackageType(self, tfPackage, toWorkflowId):
        tfPackageJson = JsonSimple(tfPackage.open()).getJsonObject()
        tfPackageJson.put("packageType", toWorkflowId)

        inStream = IOUtils.toInputStream(tfPackageJson.toString(), "UTF-8")
        try:
            StorageUtils.createOrUpdatePayload(self.object, tfPackage.getId(),
                                               inStream)
        except StorageException:
            print " ERROR updating dataset payload"
Beispiel #10
0
 def __createOrUpdateArrayPayload(self, oid, payloadName, newObject):
     """
         Create or update a payload object in storage defined by oid
         The content of this digital object is a JsonArray of objects
         payloadName: name of the payload
         newObject: new object to be appended, e.g. a JsonObject
     """
     objList = self.__getPayloadJsonArray(oid, payloadName)
     objList.add(newObject)
     storedObj = self.Services.getStorage().getObject(oid)
     StorageUtils.createOrUpdatePayload(storedObj, payloadName,IOUtils.toInputStream(objList.toString(), "UTF-8"))
Beispiel #11
0
 def __createOrUpdateArrayPayload(self, oid, payloadName, newObject):
     """
         Create or update a payload object in storage defined by oid
         The content of this digital object is a JsonArray of objects
         payloadName: name of the payload
         newObject: new object to be appended, e.g. a JsonObject
     """
     objList = self.__getPayloadJsonArray(oid, payloadName)
     objList.add(newObject)
     storedObj = self.Services.getStorage().getObject(oid)
     StorageUtils.createOrUpdatePayload(
         storedObj, payloadName,
         IOUtils.toInputStream(objList.toString(), "UTF-8"))
Beispiel #12
0
 def updateWorkFlowMetadata(self, workflowMetadata, toWorkflowId,
                            toWorkflowStage):
     workflowMetaDataJson = JsonSimple(
         workflowMetadata.open()).getJsonObject()
     workflowMetaDataJson.put("id", toWorkflowId)
     workflowMetaDataJson.put("step", toWorkflowStage)
     inStream = IOUtils.toInputStream(workflowMetaDataJson.toString(),
                                      "UTF-8")
     try:
         StorageUtils.createOrUpdatePayload(self.object,
                                            "workflow.metadata", inStream)
     except StorageException:
         print " ERROR updating dataset payload"
Beispiel #13
0
 def __tidy(self, content):
     tidy = Tidy()
     tidy.setIndentAttributes(False)
     tidy.setIndentContent(False)
     tidy.setPrintBodyOnly(True)
     tidy.setSmartIndent(False)
     tidy.setWraplen(0)
     tidy.setXHTML(False)
     tidy.setNumEntities(True)
     out = ByteArrayOutputStream()
     doc = tidy.parseDOM(IOUtils.toInputStream(content, "UTF-8"), out)
     content = out.toString("UTF-8")
     return content, doc
    def __updateMetadataPayload(self, data):
        # Get and parse
        payload = self.object.getPayload("formData.tfpackage")
        json = JsonSimple(payload.open())
        payload.close()

        # Basic test for a mandatory field
        title = json.getString(None, ["dc:title"])
        if title is not None:
            # We've done this before
            return

        # Merge
        json.getJsonObject().putAll(data)

        # Store it
        inStream = IOUtils.toInputStream(json.toString(True), "UTF-8")
        try:
            self.object.updatePayload("formData.tfpackage", inStream)
        except StorageException, e:
            self.log.error("Error updating 'formData.tfpackage' payload for object '{}'", self.oid, e)
    def __updateMetadataPayload(self, data):
        # Get and parse
        payload = self.object.getPayload("formData.tfpackage")
        json = JsonSimple(payload.open())
        payload.close()

        # Basic test for a mandatory field
        title = json.getString(None, ["dc:title"])
        if title is not None:
            # We've done this before
            return

        # Merge
        json.getJsonObject().putAll(data)

        # Store it
        inStream = IOUtils.toInputStream(json.toString(True), "UTF-8")
        try:
            self.object.updatePayload("formData.tfpackage", inStream)
        except StorageException, e:
            self.log.error(
                "Error updating 'formData.tfpackage' payload for object '{}'",
                self.oid, e)
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "Parties_People"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        initialStep = 0

        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = ["title", "description"]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
Beispiel #17
0
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "dataset"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        if self.owner == "guest":
            pageTitle = "Submission Request"
            displayType = "submission-request"
            initialStep = 0
        else:
            pageTitle = "Metadata Record"
            displayType = "package-dataset"
            initialStep = 1
        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(
                    None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put(
                        "label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object,
                                                   "workflow.metadata",
                                                   inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = [
            "title", "description", "manifest", "metaList", "relationships",
            "responses"
        ]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]

        #Used to make sure we have a created date
        createdDateFlag = False

        formData = manifest.getJsonObject()

        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ",
                                                 parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                        self.log.debug("Set created date to :%s" % solrTime)
                        createdDateFlag = True
                    elif field == "redbox:embargo.dc:date":
                        self.embargoedDate = value
                    elif field == "create_timestamp":
                        self.createTimeStamp = value
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)
                    # check if this is an array field
                    fnameparts = field.split(":")
                    if fnameparts is not None and len(fnameparts) >= 3:
                        if field.startswith("bibo") or field.startswith(
                                "skos"):
                            arrParts = fnameparts[1].split(".")
                        else:
                            arrParts = fnameparts[2].split(".")
                        # we're not interested in: Relationship, Type and some redbox:origin
                        if arrParts is not None and len(
                                arrParts) >= 2 and field.find(
                                    ":Relationship.") == -1 and field.find(
                                        "dc:type") == -1 and field.find(
                                            "redbox:origin"
                                        ) == -1 and arrParts[1].isdigit():
                            # we've got an array field
                            fldPart = ":%s" % arrParts[0]
                            prefixEndIdx = field.find(fldPart) + len(fldPart)
                            suffixStartIdx = prefixEndIdx + len(
                                arrParts[1]) + 1
                            arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[
                                suffixStartIdx:]
                            if field.endswith("Name"):
                                arrFldName = self.reportingFieldPrefix + field[:
                                                                               prefixEndIdx]
                            self.log.debug(
                                "Array Field name is:%s  from: %s, with value:%s"
                                % (arrFldName, field, value))

                            if field.endswith("Name"):
                                fullFieldMap = self.arrayBucket.get(arrFldName)
                                if fullFieldMap is None:
                                    fullFieldMap = HashMap()
                                    self.arrayBucket.put(
                                        arrFldName, fullFieldMap)
                                idx = arrParts[1]
                                fullField = fullFieldMap.get(idx)
                                if (fullField is None):
                                    fullField = ""
                                if (field.endswith("givenName")):
                                    fullField = "%s, %s" % (fullField, value)
                                if (field.endswith("familyName")):
                                    fullField = "%s%s" % (value, fullField)
                                self.log.debug("fullname now is :%s" %
                                               fullField)
                                fullFieldMap.put(idx, fullField)
                            else:
                                fieldlist = self.arrayBucket.get(arrFldName)
                                if fieldlist is None:
                                    fieldlist = []
                                    self.arrayBucket.put(arrFldName, fieldlist)
                                fieldlist.append(value)

                    for compfield in self.compFields:
                        if field.startswith(compfield):
                            arrFldName = self.reportingFieldPrefix + compfield
                            fullFieldMap = self.arrayBucket.get(arrFldName)
                            if fullFieldMap is None:
                                fullFieldMap = HashMap()
                                self.arrayBucket.put(arrFldName, fullFieldMap)
                            fullField = fullFieldMap.get("1")
                            if fullField is None:
                                fullField = ""
                            if field.endswith(
                                    self.compFieldsConfig[compfield]["end"]):
                                fullField = "%s%s%s" % (
                                    fullField,
                                    self.compFieldsConfig[compfield]["delim"],
                                    value)
                            if field.endswith(
                                    self.compFieldsConfig[compfield]["start"]):
                                fullField = "%s%s" % (value, fullField)
                            self.log.debug("full field now is :%s" % fullField)
                            fullFieldMap.put("1", fullField)

        self.utils.add(self.index, "display_type", displayType)

        # Make sure we have a creation date
        if not createdDateFlag:
            self.utils.add(self.index, "date_created", self.last_modified)
            self.log.debug(
                "Forced creation date to %s because it was not explicitly set."
                % self.last_modified)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id",
                       wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label",
                       wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
Beispiel #18
0
    def __activate__(self, context):
        self.auth = context["page"].authentication
        self.errorMsg = ""
        self.request = context["request"]
        self.response = context["response"]
        self.formData = context["formData"]
        self.storage = context["Services"].getStorage()

        self.log = context["log"]
        self.reportManager = context["Services"].getService("reportManager")

        fromOid = self.formData.get("fromOid")
        fromObject = self.storage.getObject(fromOid)

        if (self.auth.is_logged_in()):
            if (self.auth.is_admin() == True):
                pass
            elif (self.__isOwner(fromObject)):
                pass
            else:
                self.errorMsg = "Requires Admin / Librarian / Reviewer / owner access."
        else:
            self.errorMsg = "Please login."
        if self.errorMsg == "":
            toOid = self.formData.get("toOid")
            toObject = self.storage.getObject(toOid)
            storeRelatedData = self.formData.get("relatedData")
            fromTFPackage = self._getTFPackage(fromObject)
            toTFPackage = self._getTFPackage(toObject)
            # get relevant dc:description from new object before you overwrite it with 'from' data, as this should exist from form data created in initial object packaging (packaging.py)
            toTFPackageJson = JsonSimple(toTFPackage.open()).getJsonObject()
            relevant_description = toTFPackageJson.get("dc:description")
            fromInputStream = fromTFPackage.open()
            try:
                StorageUtils.createOrUpdatePayload(toObject, toTFPackage.getId(), fromInputStream)
            except StorageException:
                print "error setting tfPackage"
            finally:
                fromTFPackage.close()

            tfMetaPropertyValue = self.formData.get("tfMetaPropertyValue")
            if tfMetaPropertyValue == 'dmpToSelfSub':
                # fetch recently created new 'to' object from storage to get all data
                toTFPackage = self._getTFPackage(toObject)
                toTFPackageJson = JsonSimple(toTFPackage.open()).getJsonObject()
                self.setMultiDescription(toTFPackageJson, relevant_description)
                inStream = IOUtils.toInputStream(toTFPackageJson.toJSONString(), "UTF-8")
                try:
                    StorageUtils.createOrUpdatePayload(toObject, toTFPackage.getId(), inStream)
                except StorageException:
                    print "error setting description text in tfPackage"
                finally:
                    inStream.close()
                self.log.info(
                    "Completed migrating 'dc:description' to dc:decription.1.text' for oid: %s." % toTFPackage.getId())
                self.log.debug("Result: %r" % toTFPackageJson)

            fromTFPackageJson = JsonSimple(fromTFPackage.open()).getJsonObject()
            self.log.debug('from json is: %r' % fromTFPackageJson)
            if storeRelatedData != "false":
                # add relatedOid info
                fromTFPackageJson = self._addRelatedOid(JsonSimple(fromTFPackage.open()), toOid)

            self.log.debug('from tfPackage json is now: %r' % fromTFPackageJson)
            inStream = IOUtils.toInputStream(fromTFPackageJson.toJSONString(), "UTF-8")

            try:
                StorageUtils.createOrUpdatePayload(fromObject, fromTFPackage.getId(), inStream)
            except StorageException:
                print "error setting tfPackage"
            finally:
                inStream.close()

            self._addPropertyValueToTFMeta(toObject, tfMetaPropertyValue)

            self._reharvestPackage()

            result = '{"status": "ok", "url": "%s/workflow/%s", "oid": "%s" }' % (context["portalPath"], toOid, toOid)
        else:
            result = '{"status": "err", "message": "%s"}' % self.errorMsg
        writer = self.response.getPrintWriter("application/json; charset=UTF-8")
        writer.println(result)
        writer.close()
    def __getDigitalItems(self, manifest):
        for itemHash in manifest.keySet():
            payloadDict = {}
            item = manifest.get(itemHash)
            id = item.getString(None, "id")
            title = item.getString(None, "title")
            hidden = item.getBoolean(False, "hidden")
            if hidden:
                print "Skipping hidden item: %s (%s)" % (title, id)
                continue
            children = item.getJsonSimpleMap("children")

            isImage = False
            object = Services.storage.getObject(id)
            pid = object.getSourceId()
            htmlFileName = pid[:pid.rfind(".")] + ".htm"
            nodeHtm = "%s.htm" % itemHash  #.replace("-", "_")
            sourcePayload = object.getPayload(pid)
            if sourcePayload and hidden != 'true':
                payloadType = sourcePayload.contentType
                htmlPayload = object.getPayload(htmlFileName)
                process = True
                if htmlPayload:
                    #gather all the related payload
                    payloadDict[nodeHtm] = htmlPayload, "application/xhtml+xml"
                    payloadList = object.getPayloadIdList()
                    for payloadid in payloadList:
                        payload = object.getPayload(payloadid)
                        if payloadid.find("_files") > -1:
                            if payload.contentType.startswith("image"):
                                #hash the name here....
                                filepath, filename = os.path.split(payload.id)
                                filename, ext = os.path.splitext(filename)
                                filename = hashlib.md5(filename).hexdigest()
                                payloadid = os.path.join(
                                    "%s" % filepath.lower().replace(" ", "_"),
                                    "node-%s%s" % (filename, ext))
                            payloadDict[
                                payloadid] = payload, payload.contentType
                #elif sourcePayload:
                #for now only works for images
                elif payloadType.startswith("image") and sourcePayload:
                    #hash the file name to avoid invalid id in epub...
                    isImage = True
                    #use thumbnail if exist
                    ext = os.path.splitext(id)[1]
                    filename = id[id.rfind("/") + 1:-len(ext)]  #+ ".thumb.jpg"
                    hashedFileName = hashlib.md5(filename).hexdigest()
                    thumbNailPayload = object.getPayload("%s_thumbnail.jpg" %
                                                         filename)
                    htmlString = """<html xmlns="http://www.w3.org/1999/xhtml"><head><title>%s</title>
                                        <link rel="stylesheet" href="epub.css"/>
                                        </head><body><div><span style="display: block"><img src="%s" alt="%s"/></span></div></body></html>"""
                    if thumbNailPayload:
                        htmlString = htmlString % (
                            pid, "node-%s_thumbnail.jpg" % hashedFileName, pid)
                        payloadDict[
                            "node-%s_thumbnail.jpg" %
                            hashedFileName] = thumbNailPayload, "image/jpeg"
                    else:
                        htmlString = htmlString % (pid, pid.lower().replace(
                            " ", "_"), pid)
                        payloadDict[pid] = sourcePayload, payloadType
                    payloadDict[nodeHtm] = IOUtils.toInputStream(
                        htmlString, "UTF-8"), "application/xhtml+xml"
                else:
                    process = False

                if process:
                    self.__itemRefDict[
                        itemHash] = id, title, nodeHtm, payloadDict, isImage
                    self.__orderedItem.append(itemHash)
                    if children:
                        self.__getDigitalItems(children)

            object.close()
 def __copyString(self, s, out):
     IOUtils.copy(IOUtils.toInputStream(String(s), "UTF-8"), out)
Beispiel #21
0
 def __copyString(self, s, out):
     IOUtils.copy(IOUtils.toInputStream(String(s), "UTF-8"), out)
Beispiel #22
0
 def __getDigitalItems(self, manifest):
     for itemHash in manifest.keySet():
         payloadDict = {}
         item = manifest[itemHash]
         id = item.get("id")
         title = item.get("title")
         hidden = item.get("hidden", "False")
         if hidden == "True":
             print "Skipping hidden item: %s (%s)" % (title, id)
             continue
         children = item.getJsonMap("children")
         
         isImage=False
         object = Services.storage.getObject(id)
         pid = object.getSourceId()
         htmlFileName = pid[:pid.rfind(".")] + ".htm"
         nodeHtm = "%s.htm" % itemHash #.replace("-", "_")
         sourcePayload = object.getPayload(pid)
         if sourcePayload and hidden != 'true':
             payloadType = sourcePayload.contentType
             htmlPayload = object.getPayload(htmlFileName)
             process = True
             if htmlPayload:
                 #gather all the related payload
                 payloadDict[nodeHtm] = htmlPayload, "application/xhtml+xml"
                 payloadList = object.getPayloadIdList()
                 for payloadid in payloadList:
                     payload = object.getPayload(payloadid)
                     if payloadid.find("_files") > -1:
                         if payload.contentType.startswith("image"):
                             #hash the name here....
                             filepath, filename = os.path.split(payload.id)
                             filename, ext = os.path.splitext(filename)
                             filename = hashlib.md5(filename).hexdigest()
                             payloadid = os.path.join("%s" % filepath.lower().replace(" ", "_"), "node-%s%s" % (filename, ext))
                         payloadDict[payloadid] = payload, payload.contentType
             #elif sourcePayload:
                 #for now only works for images
             elif payloadType.startswith("image") and sourcePayload:
                     #hash the file name to avoid invalid id in epub...
                     isImage=True
                     #use thumbnail if exist 
                     ext = os.path.splitext(id)[1]
                     filename = id[id.rfind("/")+1:-len(ext)] #+ ".thumb.jpg"
                     hashedFileName = hashlib.md5(filename).hexdigest()
                     thumbNailPayload = object.getPayload("%s_thumbnail.jpg" % filename)
                     htmlString = """<html xmlns="http://www.w3.org/1999/xhtml"><head><title>%s</title>
                                     <link rel="stylesheet" href="epub.css"/>
                                     </head><body><div><span style="display: block"><img src="%s" alt="%s"/></span></div></body></html>"""
                     if thumbNailPayload:
                         htmlString = htmlString % (pid, "node-%s_thumbnail.jpg" % hashedFileName, pid)
                         payloadDict["node-%s_thumbnail.jpg" % hashedFileName] = thumbNailPayload, "image/jpeg"
                     else:
                         htmlString = htmlString % (pid, pid.lower().replace(" ", "_"), pid)
                         payloadDict[pid] = sourcePayload, payloadType
                     payloadDict[nodeHtm] = IOUtils.toInputStream(htmlString, "UTF-8"), "application/xhtml+xml"
             else:
                 process = False
         
             if process:
                 self.__itemRefDict[itemHash] = id, title, nodeHtm, payloadDict, isImage
                 self.__orderedItem.append(itemHash)
                 if children:
                     self.__getDigitalItems(children)
         
         object.close()
 def __authorize_with_service_account(service_account_json):
     credential = GoogleCredential.fromStream(IOUtils.toInputStream(service_account_json, StandardCharsets.UTF_8)).createScoped(Collections.singleton(AndroidPublisherScopes.ANDROIDPUBLISHER))
     return credential
Beispiel #24
0
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "dataset"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        if self.owner == "guest":
            pageTitle = "Submission Request"
            displayType = "submission-request"
            initialStep = 0
        else:
            pageTitle = "Metadata Record"
            displayType = "package-dataset"
            initialStep = 1
        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]
        
        #Used to make sure we have a created date
        createdDateFlag  = False
        
        formData = manifest.getJsonObject()
        
        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                        self.log.debug("Set created date to :%s" % solrTime)
                        createdDateFlag = True
                    elif field == "redbox:embargo.dc:date":
                        self.embargoedDate = value
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)
                    # check if this is an array field
                    fnameparts = field.split(":")
                    if fnameparts is not None and len(fnameparts) >= 3:
                        if field.startswith("bibo") or field.startswith("skos"):
                            arrParts = fnameparts[1].split(".")
                        else:    
                            arrParts = fnameparts[2].split(".")
                        # we're not interested in: Relationship, Type and some redbox:origin 
                        if arrParts is not None and len(arrParts) >= 2 and field.find(":Relationship.") == -1 and field.find("dc:type") == -1 and field.find("redbox:origin") == -1 and arrParts[1].isdigit():
                            # we've got an array field
                            fldPart = ":%s" % arrParts[0]
                            prefixEndIdx = field.find(fldPart) + len(fldPart)
                            suffixStartIdx = prefixEndIdx+len(arrParts[1])+1
                            arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx] + field[suffixStartIdx:]
                            if field.endswith("Name"):
                                arrFldName = self.reportingFieldPrefix + field[:prefixEndIdx]
                            self.log.debug("Array Field name is:%s  from: %s, with value:%s" % (arrFldName, field, value))
                            
                            if field.endswith("Name"):
                                fullFieldMap = self.arrayBucket.get(arrFldName)
                                if fullFieldMap is None:
                                    fullFieldMap = HashMap()
                                    self.arrayBucket.put(arrFldName, fullFieldMap)
                                idx = arrParts[1]
                                fullField = fullFieldMap.get(idx)
                                if (fullField is None):
                                    fullField = ""
                                if (field.endswith("givenName")):
                                    fullField = "%s, %s" % (fullField, value)
                                if (field.endswith("familyName")):
                                    fullField = "%s%s" % (value, fullField) 
                                self.log.debug("fullname now is :%s" % fullField)
                                fullFieldMap.put(idx, fullField)
                            else:
                                fieldlist = self.arrayBucket.get(arrFldName)
                                if fieldlist is None:
                                    fieldlist = []
                                    self.arrayBucket.put(arrFldName, fieldlist)
                                fieldlist.append(value)
                                
                    for compfield in self.compFields:
                        if field.startswith(compfield):    
                            arrFldName = self.reportingFieldPrefix +compfield
                            fullFieldMap = self.arrayBucket.get(arrFldName)
                            if fullFieldMap is None:
                                fullFieldMap = HashMap()
                                self.arrayBucket.put(arrFldName, fullFieldMap)
                            fullField = fullFieldMap.get("1")
                            if fullField is None:
                                fullField = ""
                            if field.endswith(self.compFieldsConfig[compfield]["end"]):
                                fullField = "%s%s%s" % (fullField, self.compFieldsConfig[compfield]["delim"] ,value)
                            if field.endswith(self.compFieldsConfig[compfield]["start"]):
                                fullField = "%s%s" % (value, fullField) 
                            self.log.debug("full field now is :%s" % fullField)
                            fullFieldMap.put("1", fullField)     

        self.utils.add(self.index, "display_type", displayType) 
        
        # Make sure we have a creation date
        if not createdDateFlag:
            self.utils.add(self.index, "date_created", self.last_modified)
            self.log.debug("Forced creation date to %s because it was not explicitly set." % self.last_modified)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
Beispiel #25
0
class AnotarData:
    def __init__(self):
        pass

    def __activate__(self, context):
        self.velocityContext = context

        self.__auth = context["page"].authentication
        # This gets called a lot
        self.fd = self.vc("formData").get

        self.action = self.fd("action")
        self.rootUri = self.fd("rootUri")
        self.json = self.fd("json")
        self.type = self.fd("type")
        self.rootUriList = self.vc("formData").getValues("rootUriList")
        if self.rootUriList is None:
            self.rootUriList = self.vc("formData").getValues("rootUriList[]")
        self.portalPath = self.vc("portalPath")
        #print "action:'%s' formData:'%s'" % (self.action, formData)

        # used so that ajax requests don't cache
        if self.rootUri and self.rootUri.find("?ticks") > -1:
            self.rootUri = self.rootUri[:self.rootUri.find("?ticks")]

        # Portal path info
        portalPath = self.portalPath + "/"
        self.oid = self.rootUri
        if self.oid and self.oid.startswith(portalPath):
            self.oid = self.oid[len(portalPath):]

        # oid for packaged items
        if self.oid:
            hashIndex = self.oid.find("#")
            if hashIndex > -1:
                self.oid = self.oid[hashIndex + 1:]

        result = ""
        if self.action == "getList":
            # Response is a list of object (nested)
            #print "**** anotar.py : GET_SOLR : " + self.rootUri
            result = self.search_solr()
        elif self.action == "put":
            result = self.__authenticate()
            if result is None:
                # Response is an ID
                #print "**** anotar.py : PUT : " + self.rootUri
                result = self.put()
        elif self.action == "delete":
            result = self.__authenticate()
            if result is None:
                # Response is empty
                result = self.delete()
                if result != "":
                    self.vc("response").setStatus(500)
        elif self.action == "get-image":
            # Response is the JSON format expected by image annotation plugin
            result = self.get_image()
        elif self.action == "save-image":
            result = self.__authenticate()
            if result is None:
                # Response is anotar JSON
                result = self.save_image()
        elif self.action == "delete-image":
            result = self.__authenticate()
            if result is None:
                result = self.delete_image()

        writer = self.vc("response").getPrintWriter(
            "text/plain; charset=UTF-8")
        writer.println(result)
        writer.close()

    def __authenticate(self):
        if not self.__auth.is_logged_in():
            self.vc("response").setStatus(500)
            return "Only registered users can access this API"
        return None

    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            log.error("ERROR: Requested context entry '" + index +
                      "' doesn't exist")
            return None

    def generate_id(self):
        counter = 0
        fileName = "anotar." + str(counter)
        payloadList = self.obj.getPayloadIdList()
        while fileName in payloadList:
            counter = counter + 1
            fileName = "anotar." + str(counter)
        self.pid = fileName
        print "New ID (" + self.pid + ")"

    def modify_json(self):
        #print "**** anotar.py : add_json() : adding json : " + json
        jsonSimple = JsonSimple(self.json)
        jsonObj = jsonSimple.getJsonObject()
        jsonObj.put("id", self.pid)
        rootUri = jsonSimple.getString(None, ["annotates", "rootUri"])
        if rootUri is not None:
            baseUrl = "http://%s:%s/" % (self.vc("request").serverName,
                                         self.vc("serverPort"))
            myUri = baseUrl + rootUri + "#" + self.pid
            jsonObj.put("uri", myUri)

        jsonObj.put("schemaVersionUri",
                    "http://www.purl.org/anotar/schema/0.1")
        self.json = jsonSimple.toString()

    def process_response(self, result):
        #print " ******** result =", result
        docs = []
        rootDocs = []
        docsDict = {}
        # Build a dictionary of the annotations
        for doc in result:
            #hack is done here to replace [] with null as json.py does not properly parse
            jsonStr = unicode(doc.get("jsonString").replace(
                "[]", "null")).encode("utf-8")
            doc = json.read(jsonStr)
            doc["replies"] = []
            docs.append(doc)
            docsDict[doc["uri"]] = doc
            if doc["annotates"]["uri"] == doc["annotates"]["rootUri"]:
                rootDocs.append(doc)

        # Now process the dictionary
        for doc in docs:
            # If we are NOT a top level annotation
            if doc["annotates"]["uri"] != doc["annotates"]["rootUri"]:
                # Find what we are annotating
                try:
                    d = docsDict[doc["annotates"]["uri"]]
                    d["replies"].append(doc)  # Add ourselves to its reply list
                except:
                    # TODO KeyError
                    pass
        return json.write(rootDocs)

    def process_tags(self, result):
        tags = []
        tagsDict = {}
        # Build a dictionary of the tags
        for doc in result:
            # Get Anotar data from Solr data
            doc = JsonSimple(doc.get("jsonString"))
            # Get actual tag text
            tag = doc.getString(None, ["content", "literal"])
            # Find out if they have locators
            locs = doc.getJsonSimpleList(["annotates", "locators"]).size()
            if locs == 0:
                # Basic tags, just aggregate counts
                if tag in tagsDict:
                    # We've seen it before, just increment the counter
                    existing = tagsDict[tag]
                    count = existing.getInteger(0, ["tagCount"])
                    existing.getJsonObject().put("tagCount", str(count + 1))
                else:
                    # First time, store this object
                    doc.getJsonObject().put("tagCount", str(1))
                    tagsDict[tag] = doc
            else:
                # Tags with a locator, special case for images etc.
                tags.append(doc.toString())

        # Push all the 'basic' counts into the list to return
        for tag in tagsDict:
            tags.append(tagsDict[tag].toString())
        return "[" + ",".join(tags) + "]"

    def put(self, pid=None):
        try:
            self.obj = Services.storage.getObject(self.oid)
        except StorageException, e:
            print " * anotar.py : Error creating object : ", e
            return e.getMessage()

        if pid:
            self.pid = pid
        else:
            self.generate_id()
        self.modify_json()

        try:
            p = StorageUtils.createOrUpdatePayload(
                self.obj, self.pid, IOUtils.toInputStream(self.json, "UTF-8"))
        except StorageException, e:
            print " * anotar.py : Error creating payload :", e
            return e.getMessage()
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "servicesUI2"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        pageTitle = "Services Record"
        displayType = "package-service"
        initialStep = 0
        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create
            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream)
            except StorageException:
                print " ERROR updating dataset payload"

        # Form processing
        coreFields = ["title", "description", "manifest", "metaList", "relationships", "responses"]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]
        formData = manifest.getJsonObject()
        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")   
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ", parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)

        self.utils.add(self.index, "display_type", displayType)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id", wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label", wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")
    def __workflow(self):
        # Workflow data
        WORKFLOW_ID = "dataset"
        wfChanged = False
        workflow_security = []
        self.message_list = None
        stages = self.config.getJsonSimpleList(["stages"])
        #if self.owner == "guest":
        #    pageTitle = "Submission Request"
        #    displayType = "submission-request"
        #    initialStep = 0
        #else:
        #    pageTitle = "Metadata Record"
        #    displayType = "package-dataset"
        #    initialStep = 1

        ## Harvesting straight into the 'Published' stage
        pageTitle = "Metadata Record"
        displayType = "package-dataset"
        #initialStep = 4
        initialStep = 3

        try:
            wfMeta = self.__getJsonPayload("workflow.metadata")
            wfMeta.getJsonObject().put("pageTitle", pageTitle)

            # Are we indexing because of a workflow progression?
            targetStep = wfMeta.getString(None, ["targetStep"])
            if targetStep is not None and targetStep != wfMeta.getString(
                    None, ["step"]):
                wfChanged = True
                # Step change
                wfMeta.getJsonObject().put("step", targetStep)
                wfMeta.getJsonObject().remove("targetStep")
            # This must be a re-index then
            else:
                targetStep = wfMeta.getString(None, ["step"])

            # Security change
            for stage in stages:
                if stage.getString(None, ["name"]) == targetStep:
                    wfMeta.getJsonObject().put(
                        "label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    if wfChanged == True:
                        self.message_list = stage.getStringList(["message"])
        except StorageException:
            # No workflow payload, time to create

            initialStage = stages.get(initialStep).getString(None, ["name"])
            wfChanged = True
            wfMeta = JsonSimple()
            wfMetaObj = wfMeta.getJsonObject()
            wfMetaObj.put("id", WORKFLOW_ID)
            wfMetaObj.put("step", initialStage)
            wfMetaObj.put("pageTitle", pageTitle)
            stages = self.config.getJsonSimpleList(["stages"])
            for stage in stages:
                if stage.getString(None, ["name"]) == initialStage:
                    wfMetaObj.put("label", stage.getString(None, ["label"]))
                    self.item_security = stage.getStringList(["visibility"])
                    workflow_security = stage.getStringList(["security"])
                    self.message_list = stage.getStringList(["message"])

        # Has the workflow metadata changed?
        if wfChanged == True:
            inStream = IOUtils.toInputStream(wfMeta.toString(True), "UTF-8")
            try:
                StorageUtils.createOrUpdatePayload(self.object,
                                                   "workflow.metadata",
                                                   inStream)
            except StorageException:
                print(" ERROR updating dataset payload")

        # Form processing
        coreFields = [
            "title", "description", "manifest", "metaList", "relationships",
            "responses"
        ]
        formData = wfMeta.getObject(["formData"])
        if formData is not None:
            formData = JsonSimple(formData)
            # Core fields
            description = formData.getStringList(["description"])
            if description:
                self.descriptionList = description
            # Non-core fields
            data = formData.getJsonObject()
            for field in data.keySet():
                if field not in coreFields:
                    self.customFields[field] = formData.getStringList([field])

        # Manifest processing (formData not present in wfMeta)
        manifest = self.__getJsonPayload(self.packagePid)
        formTitles = manifest.getStringList(["title"])
        if formTitles:
            for formTitle in formTitles:
                if self.title is None:
                    self.title = formTitle
        self.descriptionList = [manifest.getString("", ["description"])]
        formData = manifest.getJsonObject()
        for field in formData.keySet():
            if field not in coreFields:
                value = formData.get(field)
                if value is not None and value.strip() != "":
                    self.utils.add(self.index, field, value)
                    # We want to sort by date of creation, so it
                    # needs to be indexed as a date (ie. 'date_*')
                    if field == "dc:created":
                        parsedTime = time.strptime(value, "%Y-%m-%d")
                        solrTime = time.strftime("%Y-%m-%dT%H:%M:%SZ",
                                                 parsedTime)
                        self.utils.add(self.index, "date_created", solrTime)
                    # try to extract some common fields for faceting
                    if field.startswith("dc:") and \
                            not (field.endswith(".dc:identifier.rdf:PlainLiteral") \
                              or field.endswith(".dc:identifier") \
                              or field.endswith(".rdf:resource")):
                        # index dublin core fields for faceting
                        basicField = field.replace("dc:", "dc_")
                        dot = field.find(".")
                        if dot > 0:
                            facetField = basicField[:dot]
                        else:
                            facetField = basicField
                        #print "Indexing DC field '%s':'%s'" % (field, facetField)
                        if facetField == "dc_title":
                            if self.title is None:
                                self.title = value
                        elif facetField == "dc_type":
                            if self.dcType is None:
                                self.dcType = value
                        elif facetField == "dc_creator":
                            if basicField.endswith("foaf_name"):
                                self.utils.add(self.index, "dc_creator", value)
                        else:
                            self.utils.add(self.index, facetField, value)
                        # index keywords for lookup
                        if field.startswith("dc:subject.vivo:keyword."):
                            self.utils.add(self.index, "keywords", value)

        self.utils.add(self.index, "display_type", displayType)

        # Workflow processing
        wfStep = wfMeta.getString(None, ["step"])
        self.utils.add(self.index, "workflow_id",
                       wfMeta.getString(None, ["id"]))
        self.utils.add(self.index, "workflow_step", wfStep)
        self.utils.add(self.index, "workflow_step_label",
                       wfMeta.getString(None, ["label"]))
        for group in workflow_security:
            self.utils.add(self.index, "workflow_security", group)
            if self.owner is not None:
                self.utils.add(self.index, "workflow_security", self.owner)
        # set OAI-PMH status to deleted
        if wfStep == "retired":
            self.utils.add(self.index, "oai_deleted", "true")