def store_zipfile(silo, target_item_uri, POSTED_file, ident): zipfile_id = get_next_zipfile_id(silo.state['storage_dir']) while(silo.exists("%s%s" % (zipfile_root, zipfile_id))): zipfile_id = get_next_zipfile_id(silo.state['storage_dir']) #zip_item = silo.get_item("%s%s" % (zipfile_root, zipfile_id)) zip_item = create_new(silo, "%s%s" % (zipfile_root, zipfile_id), ident) zip_item.add_triple("%s/%s" % (zip_item.uri, POSTED_file.filename.lstrip(os.sep)), "dcterms:hasVersion", target_item_uri) zip_item.put_stream(POSTED_file.filename, POSTED_file.file) try: POSTED_file.file.close() except: pass zip_item.sync() return zip_item
def datasetview(self, silo, id): """Get a list of zipfiles in dataset 'id' within the silo 'silo' and unpack a dataset.""" if not ag.granary.issilo(silo): abort(404) rdfsilo = ag.granary.get_rdf_silo(silo) if not rdfsilo.exists(id): abort(404) # tmpl_context variables needed: c.silo_name, c.zipfiles, c.ident, c.id, c.path c.silo_name = silo c.id = id ident = request.environ.get("repoze.who.identity") c.ident = ident dataset = rdfsilo.get_item(id) creator = None if ( dataset.manifest and dataset.manifest.state and "metadata" in dataset.manifest.state and dataset.manifest.state["metadata"] and "createdby" in dataset.manifest.state["metadata"] and dataset.manifest.state["metadata"]["createdby"] ): creator = dataset.manifest.state["metadata"]["createdby"] http_method = request.environ["REQUEST_METHOD"] if http_method == "GET": c.editor = False if ag.metadata_embargoed: if not ident: abort(401, "Not Authorised") silos = ag.authz(ident) if silo not in silos: abort(403, "Forbidden") silos_admin = ag.authz(ident, permission="administrator") silos_manager = ag.authz(ident, permission="manager") # if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]: if ident["repoze.who.userid"] == creator or silo in silos_admin or silo in silos_manager: c.editor = True elif ident: silos = ag.authz(ident) if silo in silos: silos_admin = ag.authz(ident, permission="administrator") silos_manager = ag.authz(ident, permission="manager") # if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]: if ident["repoze.who.userid"] == creator or silo in silos_admin or silo in silos_manager: c.editor = True else: # identity management of item if not ident: abort(401, "Not Authorised") silos = ag.authz(ident) if silo not in silos: abort(403, "Forbidden") silos_admin = ag.authz(ident, permission="administrator") silos_manager = ag.authz(ident, permission="manager") # if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]): if not (ident["repoze.who.userid"] == creator or silo in silos_admin or silo in silos_manager): abort(403, "Forbidden") if http_method == "GET": c.zipfiles = get_zipfiles_in_dataset(dataset) # conneg return accept_list = None if "HTTP_ACCEPT" in request.environ: try: accept_list = conneg_parse(request.environ["HTTP_ACCEPT"]) except: accept_list = [MT("text", "html")] if not accept_list: accept_list = [MT("text", "html")] mimetype = accept_list.pop(0) while mimetype: if str(mimetype).lower() in ["text/html", "text/xhtml"]: return render("/list_of_zipfiles.html") elif str(mimetype).lower() in ["text/plain", "application/json"]: response.content_type = 'application/json; charset="UTF-8"' response.status_int = 200 response.status = "200 OK" # return simplejson.dumps(dict(c.zipfiles)) return simplejson.dumps(list(c.zipfiles.keys())) try: mimetype = accept_list.pop(0) except IndexError: mimetype = None # Whoops nothing satisfies - return text/html return render("/list_of_zipfiles.html") elif http_method == "POST": params = request.POST if not (params.has_key("filename") and params["filename"]): abort(400, "You must supply a filename to unpack") item_real_filepath = dataset.to_dirpath() target_filepath = "%s/%s" % (item_real_filepath, params["filename"]) if not os.path.isfile(target_filepath): abort(404, "File to unpack not found") if not check_file_mimetype(target_filepath, "application/zip"): abort(415, "File is not of type application/zip") if params.has_key("id") and params["id"]: target_dataset_name = params["id"] else: # (head, fn) = os.path.split(params['filename']) # (fn, ext) = os.path.splitext(fn) # target_dataset_name = "%s-%s"%(id,fn) target_dataset_name = id # step 1: Create / initialize target dataset if not rdfsilo.exists(target_dataset_name): if not allowable_id2(target_dataset_name): response.content_type = "text/plain" response.status_int = 400 response.status = "400 Bad request. Data package name not valid" return ( "Data package name can contain only the following characters - %s and has to be more than 1 character" % ag.naming_rule_humanized ) target_dataset = create_new(rdfsilo, target_dataset_name, ident["repoze.who.userid"]) response.status_int = 201 response.status = "201 Created" response.headers["Content-Location"] = url( controller="datasets", action="datasetview", silo=silo, id=target_dataset_name ) response_message = "201 Created" else: target_dataset = rdfsilo.get_item(target_dataset_name) response.status = "204 Updated" response.status_int = 204 response_message = None # step 2: Unpack zip item try: unpack_zip_item(target_dataset, dataset, params["filename"], rdfsilo, ident["repoze.who.userid"]) except BadZipfile: abort(400, "BadZipfile: Couldn't unpack zipfile") target_dataset.sync() target_dataset.sync() target_dataset.sync() if response.status_int == 201: try: ag.b.creation(silo, id, ident=ident["repoze.who.userid"]) except: pass else: try: ag.b.change(silo, id, ident=ident["repoze.who.userid"]) except: pass # conneg return accept_list = None if "HTTP_ACCEPT" in request.environ: try: accept_list = conneg_parse(request.environ["HTTP_ACCEPT"]) except: accept_list = [MT("text", "html")] if not accept_list: accept_list = [MT("text", "html")] mimetype = accept_list.pop(0) while mimetype: if str(mimetype).lower() in ["text/html", "text/xhtml"]: redirect(url(controller="datasets", action="datasetview", silo=silo, id=target_dataset_name)) elif str(mimetype).lower() in ["text/plain", "application/json"]: response.content_type = "text/plain" return response_message try: mimetype = accept_list.pop(0) except IndexError: mimetype = None # Whoops - nothing satisfies - return text/plain response.content_type = "text/plain" return response_message
def itemview(self, silo, id, path): """API call to GET - read the contents of a zip-file (without having to unpack) and POST- unpack a zip file into a new / existing dataset PUT - Add the zipfile and unpack it onto the existing dataset""" # tmpl_context variables needed: c.silo_name, c.zipfile_contents c.ident, c.id, c.path if not path: abort(400, "You must supply a filename to unpack") if not ag.granary.issilo(silo): abort(404) rdfsilo = ag.granary.get_rdf_silo(silo) if not rdfsilo.exists(id): abort(404) c.silo_name = silo c.id = id c.path = path ident = request.environ.get("repoze.who.identity") c.ident = ident dataset = rdfsilo.get_item(id) creator = None if ( dataset.manifest and dataset.manifest.state and "metadata" in dataset.manifest.state and dataset.manifest.state["metadata"] and "createdby" in dataset.manifest.state["metadata"] and dataset.manifest.state["metadata"]["createdby"] ): creator = dataset.manifest.state["metadata"]["createdby"] http_method = request.environ["REQUEST_METHOD"] if http_method == "GET": if dataset.metadata.get("embargoed") not in ["false", 0, False]: if not ident: abort(401, "Not Authorised") silos = ag.authz(ident) if silo not in silos: abort(403, "Forbidden") else: if not ident: abort(401, "Not Authorised") silos = ag.authz(ident) if silo not in silos: abort(403, "Forbidden") silos_admin = ag.authz(ident, permission="administrator") silos_manager = ag.authz(ident, permission="manager") # if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]): if not (ident["repoze.who.userid"] == creator or silo in silos_admin or silo in silos_manager): abort(403, "Forbidden") item_real_filepath = dataset.to_dirpath() target_filepath = "%s/%s" % (item_real_filepath, path) # c.parts = dataset.list_parts(detailed=False) if http_method in ["GET", "POST"]: if not dataset.isfile(path): abort(404, "File not found") if not os.path.isfile(target_filepath): abort(404, "File not found") if not check_file_mimetype(target_filepath, "application/zip"): abort(415, "File is not of type application/zip") if http_method == "GET": try: c.zipfile_contents = read_zipfile(target_filepath) except BadZipfile: abort(400, "Could not read zipfile") # conneg return accept_list = None if "HTTP_ACCEPT" in request.environ: try: accept_list = conneg_parse(request.environ["HTTP_ACCEPT"]) except: accept_list = [MT("text", "html")] if not accept_list: accept_list = [MT("text", "html")] mimetype = accept_list.pop(0) while mimetype: if str(mimetype).lower() in ["text/html", "text/xhtml"]: return render("/zipfileview.html") elif str(mimetype).lower() in ["text/plain", "application/json"]: response.content_type = 'application/json; charset="UTF-8"' response.status_int = 200 response.status = "200 OK" return simplejson.dumps(c.zipfile_contents) try: mimetype = accept_list.pop(0) except IndexError: mimetype = None # Whoops - nothing satisfies - return text/html return render("/zipfileview.html") elif http_method == "POST": params = request.POST # if not (params.has_key("filename") and params['filename']): # abort(400, "You must supply a filename to unpack") if params.has_key("id") and params["id"]: target_dataset_name = params["id"] else: # (head, fn) = os.path.split(path) # (fn, ext) = os.path.splitext(fn) # target_dataset_name = "%s-%s"%(id,fn) target_dataset_name = id # step 1: Create / initialize target dataset if not rdfsilo.exists(target_dataset_name): if not allowable_id2(target_dataset_name): response.content_type = "text/plain" response.status_int = 400 response.status = "400 Bad request. Data package name not valid" return ( "Data package name can contain only the following characters - %s and has to be more than 1 character" % ag.naming_rule_humanized ) target_dataset = create_new(rdfsilo, target_dataset_name, ident["repoze.who.userid"]) response.status_int = 201 response.status = "201 Created" response.headers["Content-Location"] = url( controller="datasets", action="datasetview", silo=silo, id=target_dataset_name ) response_message = "201 Created" else: target_dataset = rdfsilo.get_item(target_dataset_name) response.status = "204 Updated" response.status_int = 204 response_message = None # step 2: Unpack zip item try: unpack_zip_item(target_dataset_name, dataset, path, rdfsilo, ident["repoze.who.userid"]) except BadZipfile: abort(400, "Couldn't unpack zipfile") target_dataset.sync() target_dataset.sync() target_dataset.sync() if response.status_int == 201: try: ag.b.creation(silo, id, ident=ident["repoze.who.userid"]) except: pass else: try: ag.b.change(silo, id, ident=ident["repoze.who.userid"]) except: pass # conneg return accept_list = None if "HTTP_ACCEPT" in request.environ: try: accept_list = conneg_parse(request.environ["HTTP_ACCEPT"]) except: accept_list = [MT("text", "html")] if not accept_list: accept_list = [MT("text", "html")] mimetype = accept_list.pop(0) while mimetype: if str(mimetype).lower() in ["text/html", "text/xhtml"]: redirect(url(controller="datasets", action="datasetview", silo=silo, id=target_dataset_name)) elif str(mimetype).lower() in ["text/plain", "application/json"]: response.content_type = "text/plain" return response_message try: mimetype = accept_list.pop(0) except IndexError: mimetype = None # Whoops - nothing satisfies - return text/plain response.content_type = "text/plain" return response_message elif http_method == "PUT": # Pylons loads the request body into request.body... # This is not going to work for large files... ah well # POST will handle large files as they are pushed to disc, # but this won't content = request.body if JAILBREAK.search(path) != None: abort(400, "'..' cannot be used in the path") # Step 1: Put zipfile in dataset if dataset.isdir(path): response.content_type = "text/plain" response.status_int = 403 response.status = "403 Forbidden" return "Cannot PUT a file on to an existing directory" if dataset.isfile(path): code = 204 else: code = 201 if code == 204: dataset.increment_version_delta(clone_previous_version=True, copy_filenames=["manifest.rdf", path]) else: dataset.increment_version_delta(clone_previous_version=True, copy_filenames=["manifest.rdf"]) dataset.put_stream(path, content) dataset.del_triple(dataset.uri, u"dcterms:modified") dataset.add_triple(dataset.uri, u"dcterms:modified", datetime.now()) dataset.del_triple(dataset.uri, u"oxds:currentVersion") dataset.add_triple(dataset.uri, u"oxds:currentVersion", dataset.currentversion) dataset.sync() target_dataset = rdfsilo.get_item(id) # step 2: Unpack zip item if not check_file_mimetype(target_filepath, "application/zip"): abort(415, "File is not of type application/zip") try: unpack_zip_item(target_dataset, dataset, path, rdfsilo, ident["repoze.who.userid"]) except BadZipfile: abort(400, "Couldn't unpack zipfile") target_dataset.sync() target_dataset.sync() target_dataset.sync() response.status = "204 Updated" response.status_int = 204 response_message = None try: ag.b.change(silo, id, path, ident=ident["repoze.who.userid"]) except: pass # conneg return accept_list = None if "HTTP_ACCEPT" in request.environ: try: accept_list = conneg_parse(request.environ["HTTP_ACCEPT"]) except: accept_list = [MT("text", "html")] if not accept_list: accept_list = [MT("text", "html")] mimetype = accept_list.pop(0) while mimetype: if str(mimetype).lower() in ["text/html", "text/xhtml"]: redirect(url(controller="datasets", action="datasetview", silo=silo, id=id)) elif str(mimetype).lower() in ["text/plain", "application/json"]: response.content_type = "text/plain" return response_message try: mimetype = accept_list.pop(0) except IndexError: mimetype = None # Whoops - nothing satisfies - return text/plain response.content_type = "text/plain" return response_message
def deposit_new(self, silo, deposit): """ Take the supplied deposit and treat it as a new container with content to be created in the specified collection Args: -collection: the ID of the collection to be deposited into -deposit: the DepositRequest object to be processed Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error """ # check against the authorised list of silos rdf_silo = self._get_authorised_rdf_silo(silo) # ensure that we have a slug if deposit.slug is None: deposit.slug = str(uuid.uuid4()) # weed out unacceptable deposits if rdf_silo.exists(deposit.slug): raise SwordError(error_uri=DataBankErrors.dataset_conflict, msg="A Dataset with the name " + deposit.slug + " already exists") if not allowable_id2(deposit.slug): raise SwordError(error_uri=Errors.bad_request, msg="Dataset name can contain only the following characters - " + ag.naming_rule_humanized + " and has to be more than 1 character") # NOTE: we pass in an empty dictionary of metadata on create, and then run # _ingest_metadata to augment the item from the deposit item = create_new(rdf_silo, deposit.slug, self.auth_credentials.username, {}) add_dataset(silo, deposit.slug) self._ingest_metadata(item, deposit) # NOTE: left in for reference for the time being, but deposit_new # only support entry only deposits in databank. This will need to be # re-introduced for full sword support # store the content file if one exists, and do some processing on it #deposit_uri = None #derived_resource_uris = [] #if deposit.content is not None: # if deposit.filename is None: # deposit.filename = "unnamed.file" # fn = self.dao.store_content(collection, id, deposit.content, deposit.filename) # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract # all the metadata and any files we want # FIXME: because the deposit interpreter doesn't deal with multipart properly # we don't get the correct packaging format here if the package is anything # other than Binary # ssslog.info("attempting to load ingest packager for format " + str(deposit.packaging)) # packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao) # derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant) # An identifier which will resolve to the package just deposited # deposit_uri = self.um.part_uri(collection, id, fn) # a list of identifiers which will resolve to the derived resources # derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources) # the aggregation uri agg_uri = self.um.agg_uri(silo, deposit.slug) # the Edit-URI edit_uri = self.um.edit_uri(silo, deposit.slug) # create the initial statement s = Statement(aggregation_uri=agg_uri, rem_uri=edit_uri, states=[DataBankStates.initial_state]) # FIXME: need to sort out authentication before we can do this ... # FIXME: also, it's not relevant unless we take a binary-only deposit, which # we currently don't # User already authorized to deposit in this silo (_get_authorised_rdf_silo). # This is to augment metadata with details like who created, on behalf of, when # #by = deposit.auth.username if deposit.auth is not None else None #obo = deposit.auth.on_behalf_of if deposit.auth is not None else None #if deposit_uri is not None: # s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo) #s.aggregates = derived_resource_uris # In creating the statement we use the existing manifest.rdf file in the # item: manifest = item.get_rdf_manifest() f = open(manifest.filepath, "r") rdf_string = f.read() # create the new manifest and store it #Serialize rdf adds the sword statement - state, depositedOn, by, onBehalfOf, stateDesc new_manifest = s.serialise_rdf(rdf_string) item.put_stream("manifest.rdf", new_manifest) # FIXME: here is where we have to put the correct treatment in # now generate a receipt for the deposit # TODO: Add audit log from item.manifest in place of "created new item" receipt = self.deposit_receipt(silo, deposit.slug, item, "created new item") # FIXME: while we don't have full text deposit, we don't need to augment # the deposit receipt # now augment the receipt with the details of this particular deposit # this handles None arguments, and converts the xml receipt into a string # receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris) # finally, assemble the deposit response and return dr = DepositResponse() dr.receipt = receipt.serialise() dr.location = receipt.edit_uri # Broadcast change as message ag.b.creation(silo, deposit.slug, ident=self.auth_credentials.username) return dr
def deposit_new(self, silo, deposit): """ Take the supplied deposit and treat it as a new container with content to be created in the specified collection Args: -collection: the ID of the collection to be deposited into -deposit: the DepositRequest object to be processed Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error """ # check against the authorised list of silos rdf_silo = self._get_authorised_rdf_silo(silo) # ensure that we have a slug if deposit.slug is None: deposit.slug = str(uuid.uuid4()) # weed out unacceptable deposits if rdf_silo.exists(deposit.slug): raise SwordError(error_uri=DataBankErrors.dataset_conflict, msg="A Dataset with the name " + deposit.slug + " already exists") if not allowable_id2(deposit.slug): raise SwordError( error_uri=Errors.bad_request, msg="Dataset name can contain only the following characters - " + ag.naming_rule_humanized + " and has to be more than 1 character") # NOTE: we pass in an empty dictionary of metadata on create, and then run # _ingest_metadata to augment the item from the deposit item = create_new(rdf_silo, deposit.slug, self.auth_credentials.username, {}) add_dataset(silo, deposit.slug) self._ingest_metadata(item, deposit) # NOTE: left in for reference for the time being, but deposit_new # only support entry only deposits in databank. This will need to be # re-introduced for full sword support # store the content file if one exists, and do some processing on it #deposit_uri = None #derived_resource_uris = [] #if deposit.content is not None: # if deposit.filename is None: # deposit.filename = "unnamed.file" # fn = self.dao.store_content(collection, id, deposit.content, deposit.filename) # now that we have stored the atom and the content, we can invoke a package ingester over the top to extract # all the metadata and any files we want # FIXME: because the deposit interpreter doesn't deal with multipart properly # we don't get the correct packaging format here if the package is anything # other than Binary # ssslog.info("attempting to load ingest packager for format " + str(deposit.packaging)) # packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao) # derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant) # An identifier which will resolve to the package just deposited # deposit_uri = self.um.part_uri(collection, id, fn) # a list of identifiers which will resolve to the derived resources # derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources) # the aggregation uri agg_uri = self.um.agg_uri(silo, deposit.slug) # the Edit-URI edit_uri = self.um.edit_uri(silo, deposit.slug) # create the initial statement s = Statement(aggregation_uri=agg_uri, rem_uri=edit_uri, states=[DataBankStates.initial_state]) # FIXME: need to sort out authentication before we can do this ... # FIXME: also, it's not relevant unless we take a binary-only deposit, which # we currently don't # User already authorized to deposit in this silo (_get_authorised_rdf_silo). # This is to augment metadata with details like who created, on behalf of, when # #by = deposit.auth.username if deposit.auth is not None else None #obo = deposit.auth.on_behalf_of if deposit.auth is not None else None #if deposit_uri is not None: # s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo) #s.aggregates = derived_resource_uris # In creating the statement we use the existing manifest.rdf file in the # item: manifest = item.get_rdf_manifest() f = open(manifest.filepath, "r") rdf_string = f.read() # create the new manifest and store it #Serialize rdf adds the sword statement - state, depositedOn, by, onBehalfOf, stateDesc new_manifest = s.serialise_rdf(rdf_string) item.put_stream("manifest.rdf", new_manifest) # FIXME: here is where we have to put the correct treatment in # now generate a receipt for the deposit # TODO: Add audit log from item.manifest in place of "created new item" receipt = self.deposit_receipt(silo, deposit.slug, item, "created new item") # FIXME: while we don't have full text deposit, we don't need to augment # the deposit receipt # now augment the receipt with the details of this particular deposit # this handles None arguments, and converts the xml receipt into a string # receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris) # finally, assemble the deposit response and return dr = DepositResponse() dr.receipt = receipt.serialise() dr.location = receipt.edit_uri # Broadcast change as message ag.b.creation(silo, deposit.slug, ident=self.auth_credentials.username) return dr