def _getFirstFileByHash(self, algo, hash, user=None): """ Return the first file that the user has access to given its hash and its associated hashsum algorithm name. :param algo: Algorithm the given hash is encoded with. :param hash: Hash of the file to find. :param user: User to test access against. Default (none) is the current user. :return: A file document. """ self._validateAlgo(algo) query = {algo: hash} fileModel = FileModel() cursor = fileModel.find(query) if not user: user = self.getCurrentUser() for file in cursor: if fileModel.hasAccess(file, user, AccessType.READ): return file return None
def _checkUploadSize(self, upload): """ Check if an upload will fit within a quota restriction. :param upload: an upload document. :returns: None if the upload is allowed, otherwise a dictionary of information about the quota restriction. """ origSize = 0 if 'fileId' in upload: file = File().load(id=upload['fileId'], force=True) origSize = int(file.get('size', 0)) model, resource = self._getBaseResource('file', file) else: model, resource = self._getBaseResource(upload['parentType'], upload['parentId']) if resource is None: return None fileSizeQuota = self._getFileSizeQuota(model, resource) if not fileSizeQuota: return None newSize = resource['size'] + upload['size'] - origSize # always allow replacement with a smaller object if newSize <= fileSizeQuota or upload['size'] < origSize: return None left = fileSizeQuota - resource['size'] if left < 0: left = 0 return { 'fileSizeQuota': fileSizeQuota, 'sizeNeeded': upload['size'] - origSize, 'quotaLeft': left, 'quotaUsed': resource['size'] }
def _computeHash(file, progress=noProgress): """ Computes all supported checksums on a given file. Downloads the file data and stream-computes all required hashes on it, saving the results in the file document. In the case of assetstore impls that already compute the sha512, and when sha512 is the only supported algorithm, we will not download the file to the server. """ toCompute = SUPPORTED_ALGORITHMS - set(file) toCompute = {alg: getattr(hashlib, alg)() for alg in toCompute} if not toCompute: return fileModel = FileModel() with fileModel.open(file) as fh: while True: chunk = fh.read(_CHUNK_LEN) if not chunk: break for digest in six.viewvalues(toCompute): digest.update(chunk) progress.update(increment=len(chunk)) digests = { alg: digest.hexdigest() for alg, digest in six.viewitems(toCompute) } fileModel.update({'_id': file['_id']}, update={'$set': digests}, multi=False) return digests
def importFile(self, item, path, user, name=None, mimeType=None, **kwargs): """ Import a single file from the filesystem into the assetstore. :param item: The parent item for the file. :type item: dict :param path: The path on the local filesystem. :type path: str :param user: The user to list as the creator of the file. :type user: dict :param name: Name for the file. Defaults to the basename of ``path``. :type name: str :param mimeType: MIME type of the file if known. :type mimeType: str :returns: The file document that was created. """ logger.debug('Importing file %s to item %s on filesystem assetstore %s', path, item['_id'], self.assetstore['_id']) stat = os.stat(path) name = name or os.path.basename(path) file = File().createFile( name=name, creator=user, item=item, reuseExisting=True, assetstore=self.assetstore, mimeType=mimeType, size=stat.st_size, saveFile=False) file['path'] = os.path.abspath(os.path.expanduser(path)) file['mtime'] = stat.st_mtime file['imported'] = True file = File().save(file) logger.debug('Imported file %s to item %s on filesystem assetstore %s', path, item['_id'], self.assetstore['_id']) return file
def remove(self, item, **kwargs): """ Delete an item, and all references to it in the database. :param item: The item document to delete. :type item: dict """ from girderformindlogger.models.file import File from girderformindlogger.models.upload import Upload # Delete all files in this item fileModel = File() files = fileModel.find({'itemId': item['_id']}) for file in files: fileKwargs = kwargs.copy() fileKwargs.pop('updateItemSize', None) fileModel.remove(file, updateItemSize=False, **fileKwargs) # Delete pending uploads into this item uploadModel = Upload() uploads = uploadModel.find({ 'parentId': item['_id'], 'parentType': 'item' }) for upload in uploads: uploadModel.remove(upload, **kwargs) # Delete the item itself Model.remove(self, item)
def _onDownloadFileRequest(event): if event.info['startByte'] == 0: File().increment(query={'_id': event.info['file']['_id']}, field='downloadStatistics.started', amount=1) File().increment(query={'_id': event.info['file']['_id']}, field='downloadStatistics.requested', amount=1)
def moveFileToAssetstore(self, file, user, assetstore, progress=noProgress): """ Move a file from whatever assetstore it is located in to a different assetstore. This is done by downloading and re-uploading the file. :param file: the file to move. :param user: the user that is authorizing the move. :param assetstore: the destination assetstore. :param progress: optional progress context. :returns: the original file if it is not moved, or the newly 'uploaded' file if it is. """ from girderformindlogger.models.file import File if file['assetstoreId'] == assetstore['_id']: return file # Allow an event to cancel the move. This could be done, for instance, # on files that could change dynamically. event = events.trigger('model.upload.movefile', { 'file': file, 'assetstore': assetstore }) if event.defaultPrevented: raise GirderException( 'The file %s could not be moved to assetstore %s' % (file['_id'], assetstore['_id'])) # Create a new upload record into the existing file upload = self.createUploadToFile(file=file, user=user, size=int(file['size']), assetstore=assetstore) if file['size'] == 0: return File().filter(self.finalizeUpload(upload), user) # Uploads need to be chunked for some assetstores chunkSize = self._getChunkSize() chunk = None for data in File().download(file, headers=False)(): if chunk is not None: chunk += data else: chunk = data if len(chunk) >= chunkSize: upload = self.handleChunk( upload, RequestBodyStream(six.BytesIO(chunk), len(chunk))) progress.update(increment=len(chunk)) chunk = None if chunk is not None: upload = self.handleChunk( upload, RequestBodyStream(six.BytesIO(chunk), len(chunk))) progress.update(increment=len(chunk)) return upload
def getByHash(self, algo, hash): self._validateAlgo(algo) model = FileModel() user = self.getCurrentUser() cursor = model.find({algo: hash}) return [ file for file in cursor if model.hasAccess(file, user, AccessType.READ) ]
def copyItem(self, srcItem, creator, name=None, folder=None, description=None): """ Copy an item, including duplicating files and metadata. :param srcItem: the item to copy. :type srcItem: dict :param creator: the user who will own the copied item. :param name: The name of the new item. None to copy the original name. :type name: str :param folder: The parent folder of the new item. None to store in the same folder as the original item. :param description: Description for the new item. None to copy the original description. :type description: str :returns: the new item. """ from girderformindlogger.models.file import File from girderformindlogger.models.folder import Folder if name is None: name = srcItem['name'] if folder is None: folder = Folder().load(srcItem['folderId'], force=True) if description is None: description = srcItem['description'] newItem = self.createItem(folder=folder, name=name, creator=creator, description=description) # copy metadata and other extension values newItem['meta'] = copy.deepcopy(srcItem['meta']) filteredItem = self.filter(newItem, creator) for key in srcItem: if key not in filteredItem and key not in newItem: newItem[key] = copy.deepcopy(srcItem[key]) # add a reference to the original item newItem['copyOfItem'] = srcItem['_id'] newItem = self.save(newItem, triggerEvents=False) # Give listeners a chance to change things events.trigger('model.item.copy.prepare', (srcItem, newItem)) # copy files fileModel = File() for file in self.childFiles(item=srcItem): fileModel.copyFile(file, creator=creator, item=newItem) # Reload to get updated size value newItem = self.load(newItem['_id'], force=True) events.trigger('model.item.copy.after', newItem) return newItem
def __init__(self, file): """ Create a file-like object representing a file blob stored in girderformindlogger. :param file: The file object being opened. :type file: dict """ super(_FileHandle, self).__init__() self.file = file self._handle = File().open(file)
def removeThumbnails(event): """ When a resource containing thumbnails is about to be deleted, we delete all of the thumbnails that are attached to it. """ thumbs = event.info.get('_thumbnails', ()) fileModel = File() for fileId in thumbs: file = fileModel.load(fileId, force=True) if file: fileModel.remove(file)
def remove(self, assetstore, **kwargs): """ Delete an assetstore. If there are any files within this assetstore, a validation exception is raised. :param assetstore: The assetstore document to delete. :type assetstore: dict """ from girderformindlogger.models.file import File files = File().findOne({'assetstoreId': assetstore['_id']}) if files is not None: raise ValidationException( 'You may not delete an assetstore that contains files.') # delete partial uploads before we delete the store. adapter = assetstore_utilities.getAssetstoreAdapter(assetstore) try: adapter.untrackedUploads([], delete=True) except ValidationException: # this assetstore is currently unreachable, so skip this step pass # now remove the assetstore Model.remove(self, assetstore) # If after removal there is no current assetstore, then pick a # different assetstore to be the current one. current = self.findOne({'current': True}) if current is None: first = self.findOne(sort=[('created', SortDir.DESCENDING)]) if first is not None: first['current'] = True self.save(first)
def open(self, path, flags): """ Open a path and return a descriptor. :param path: path within the fuse. :param flags: a combination of O_* flags. This will fail if it is not read only. :returns: a file descriptor. """ resource = self._getPath(path) if resource['model'] != 'file': return super(ServerFuse, self).open(path, flags) if flags & (os.O_APPEND | os.O_ASYNC | os.O_CREAT | os.O_DIRECTORY | os.O_EXCL | os.O_RDWR | os.O_TRUNC | os.O_WRONLY): raise fuse.FuseOSError(errno.EROFS) info = { 'path': path, 'handle': File().open(resource['document']), 'lock': threading.Lock(), } with self.openFilesLock: fh = self.nextFH self.nextFH += 1 self.openFiles[fh] = info return fh
def testManualComputeHashes(self): Setting().set(hashsum_download.PluginSettings.AUTO_COMPUTE, False) old = hashsum_download.SUPPORTED_ALGORITHMS hashsum_download.SUPPORTED_ALGORITHMS = {'sha512', 'sha256'} self.assertNotIn('sha256', self.privateFile) expected = hashlib.sha256() expected.update(self.userData) # Running the compute endpoint should only compute the missing ones resp = self.request( '/file/%s/hashsum' % self.privateFile['_id'], method='POST', user=self.user) self.assertStatusOk(resp) self.assertEqual(resp.json, { 'sha256': expected.hexdigest() }) # Running again should be a no-op resp = self.request( '/file/%s/hashsum' % self.privateFile['_id'], method='POST', user=self.user) self.assertStatusOk(resp) self.assertEqual(resp.json, None) file = File().load(self.privateFile['_id'], force=True) self.assertEqual(file['sha256'], expected.hexdigest()) hashsum_download.SUPPORTED_ALGORITHMS = old
def testAutoComputeHashes(self): with self.assertRaises(ValidationException): Setting().set(hashsum_download.PluginSettings.AUTO_COMPUTE, 'bad') old = hashsum_download.SUPPORTED_ALGORITHMS hashsum_download.SUPPORTED_ALGORITHMS = {'sha512', 'sha256'} Setting().set(hashsum_download.PluginSettings.AUTO_COMPUTE, True) file = Upload().uploadFromFile( obj=six.BytesIO(self.userData), size=len(self.userData), name='Another file', parentType='folder', parent=self.privateFolder, user=self.user) start = time.time() while time.time() < start + 15: file = File().load(file['_id'], force=True) if 'sha256' in file: break time.sleep(0.2) expected = hashlib.sha256() expected.update(self.userData) self.assertIn('sha256', file) self.assertEqual(file['sha256'], expected.hexdigest()) expected = hashlib.sha512() expected.update(self.userData) self.assertIn('sha512', file) self.assertEqual(file['sha512'], expected.hexdigest()) hashsum_download.SUPPORTED_ALGORITHMS = old
def load(self, info): # Bind REST events events.bind('model.file.download.request', 'download_statistics', _onDownloadFileRequest) events.bind('model.file.download.complete', 'download_statistics', _onDownloadFileComplete) # Add download count fields to file model File().exposeFields(level=AccessType.READ, fields='downloadStatistics')
def findInvalidFiles(self, progress=progress.noProgress, filters=None, checkSize=True, **kwargs): """ Goes through every file in this assetstore and finds those whose underlying data is missing or invalid. This is a generator function -- for each invalid file found, a dictionary is yielded to the caller that contains the file, its absolute path on disk, and a reason for invalid, e.g. "missing" or "size". :param progress: Pass a progress context to record progress. :type progress: :py:class:`girderformindlogger.utility.progress.ProgressContext` :param filters: Additional query dictionary to restrict the search for files. There is no need to set the ``assetstoreId`` in the filters, since that is done automatically. :type filters: dict or None :param checkSize: Whether to make sure the size of the underlying data matches the size of the file. :type checkSize: bool """ filters = filters or {} q = dict({ 'assetstoreId': self.assetstore['_id'] }, **filters) cursor = File().find(q) progress.update(total=cursor.count(), current=0) for file in cursor: progress.update(increment=1, message=file['name']) path = self.fullPath(file) if not os.path.isfile(path): yield { 'reason': 'missing', 'file': file, 'path': path } elif checkSize and os.path.getsize(path) != file['size']: yield { 'reason': 'size', 'file': file, 'path': path }
def _pruneOrphans(self, progress): count = 0 models = [File(), Folder(), Item()] steps = sum(model.find().count() for model in models) progress.update(total=steps, current=0) for model in models: for doc in model.find(): progress.update(increment=1) if model.isOrphan(doc): model.remove(doc) count += 1 return count
class _FileHandle(paramiko.SFTPHandle): def __init__(self, file): """ Create a file-like object representing a file blob stored in girderformindlogger. :param file: The file object being opened. :type file: dict """ super(_FileHandle, self).__init__() self.file = file self._handle = File().open(file) def read(self, offset, length): if length > MAX_BUF_LEN: raise IOError( 'Requested chunk length (%d) is larger than the maximum allowed.' % length) if offset != self._handle.tell() and offset < self.file['size']: self._handle.seek(offset) return self._handle.read(length) def stat(self): return _stat(self.file, 'file') def close(self): self._handle.close() return paramiko.SFTP_OK
def updateSize(self, doc): """ Recomputes the size of this item and its underlying files and fixes the sizes as needed. :param doc: The item. :type doc: dict """ from girderformindlogger.models.file import File # get correct size from child files size = 0 fixes = 0 fileModel = File() for file in self.childFiles(doc): s, f = fileModel.updateSize(file) size += s fixes += f # fix value if incorrect if size != doc.get('size'): self.update({'_id': doc['_id']}, update={'$set': {'size': size}}) fixes += 1 return size, fixes
def deleteFile(self, file): """ Delete all of the chunks in the collection that correspond to the given file. """ q = { 'chunkUuid': file['chunkUuid'], 'assetstoreId': self.assetstore['_id'] } matching = File().find(q, limit=2, projection=[]) if matching.count(True) == 1: # If we can't reach the database, we return anyway. A system check # will be necessary to remove the abandoned file. Since we already # can handle that case, tell Mongo to use a 0 write concern -- we # don't need to know that the chunks have been deleted, and this # can be faster. try: self.chunkColl.with_options( write_concern=pymongo.WriteConcern(w=0)).delete_many( {'uuid': file['chunkUuid']}) except pymongo.errors.AutoReconnect: pass
def childFiles(self, item, limit=0, offset=0, sort=None, **kwargs): """ Returns child files of the item. Passes any kwargs to the find function. :param item: The parent item. :param limit: Result limit. :param offset: Result offset. :param sort: The sort structure to pass to pymongo. """ from girderformindlogger.models.file import File q = {'itemId': item['_id']} return File().find(q, limit=limit, offset=offset, sort=sort, **kwargs)
def _validateLogo(doc): try: logoFile = File().load(doc['value'], level=AccessType.READ, user=None, exc=True) except ValidationException as e: # Invalid ObjectId, or non-existent document raise ValidationException(str(e), 'value') except AccessException: raise ValidationException('Logo must be publicly readable', 'value') # Store this field natively as an ObjectId doc['value'] = logoFile['_id']
def _parseFile(f): try: # download file and try to parse dicom with File().open(f) as fp: dataset = pydicom.dcmread( fp, # don't read huge fields, esp. if this isn't even really dicom defer_size=1024, # don't read image data, just metadata stop_before_pixels=True) return _coerceMetadata(dataset) except pydicom.errors.InvalidDicomError: # if this error occurs, probably not a dicom file return None
def deleteFile(self, file): """ Deletes the file from disk if it is the only File in this assetstore with the given sha512. Imported files are not actually deleted. """ from girderformindlogger.models.file import File if file.get('imported') or 'path' not in file: return q = { 'sha512': file['sha512'], 'assetstoreId': self.assetstore['_id'] } path = os.path.join(self.assetstore['root'], file['path']) if os.path.isfile(path): with filelock.FileLock(path + '.deleteLock'): matching = File().find(q, limit=2, fields=[]) matchingUpload = Upload().findOne(q) if matching.count(True) == 1 and matchingUpload is None: try: os.unlink(path) except Exception: logger.exception('Failed to delete file %s' % path)
def download(self, item, offset, format, contentDisposition, extraParameters): user = self.getCurrentUser() files = list(self._model.childFiles(item=item, limit=2)) if format not in (None, '', 'zip'): raise RestException('Unsupported format: %s.' % format) if len(files) == 1 and format != 'zip': if contentDisposition not in {None, 'inline', 'attachment'}: raise RestException('Unallowed contentDisposition type "%s".' % contentDisposition) return File().download(files[0], offset, contentDisposition=contentDisposition, extraParameters=extraParameters) else: return self._downloadMultifileItem(item, user)
def __init__(self): super(File, self).__init__() self._model = FileModel() self.resourceName = 'file' self.route('DELETE', (':id', ), self.deleteFile) self.route('DELETE', ('upload', ':id'), self.cancelUpload) self.route('GET', ('offset', ), self.requestOffset) self.route('GET', (':id', ), self.getFile) self.route('GET', (':id', 'download'), self.download) self.route('GET', (':id', 'download', ':name'), self.downloadWithName) self.route('POST', (), self.initUpload) self.route('POST', ('chunk', ), self.readChunk) self.route('POST', ('completion', ), self.finalizeUpload) self.route('POST', (':id', 'copy'), self.copy) self.route('PUT', (':id', ), self.updateFile) self.route('PUT', (':id', 'contents'), self.updateFileContents) self.route('PUT', (':id', 'move'), self.moveFileToAssetstore)
def addComputedInfo(self, assetstore): """ Add all runtime-computed properties about an assetstore to its document. :param assetstore: The assetstore object. :type assetstore: dict """ from girderformindlogger.models.file import File try: adapter = assetstore_utilities.getAssetstoreAdapter(assetstore) except NoAssetstoreAdapter: # If the adapter doesn't exist, use the abstract adapter, since # this will just give the default capacity information adapter = AbstractAssetstoreAdapter(assetstore) assetstore['capacity'] = adapter.capacityInfo() assetstore['hasFiles'] = File().findOne( {'assetstoreId': assetstore['_id']}) is not None
def handleChunk(self, upload, chunk, filter=False, user=None): """ When a chunk is uploaded, this should be called to process the chunk. If this is the final chunk of the upload, this method will finalize the upload automatically. This method will return EITHER an upload or a file document. If this is the final chunk of the upload, the upload is finalized and the created file document is returned. Otherwise, it returns the upload document with the relevant fields modified. :param upload: The upload document to update. :type upload: dict :param chunk: The file object representing the chunk that was uploaded. :type chunk: file :param filter: Whether the model should be filtered. Only affects behavior when returning a file model, not the upload model. :type filter: bool :param user: The current user. Only affects behavior if filter=True. :type user: dict or None """ from girderformindlogger.models.assetstore import Assetstore from girderformindlogger.models.file import File from girderformindlogger.utility import assetstore_utilities assetstore = Assetstore().load(upload['assetstoreId']) adapter = assetstore_utilities.getAssetstoreAdapter(assetstore) upload = adapter.uploadChunk(upload, chunk) if '_id' in upload or upload['received'] != upload['size']: upload = self.save(upload) # If upload is finished, we finalize it if upload['received'] == upload['size']: file = self.finalizeUpload(upload, assetstore) if filter: return File().filter(file, user=user) else: return file else: return upload
def attachThumbnail(file, thumbnail, attachToType, attachToId, width, height): """ Add the required information to the thumbnail file and the resource it is being attached to, and save the documents. :param file: The file from which the thumbnail was derived. :type file: dict :param thumbnail: The newly generated thumbnail file document. :type thumbnail: dict :param attachToType: The type to which the thumbnail is being attached. :type attachToType: str :param attachToId: The ID of the document to attach the thumbnail to. :type attachToId: str or ObjectId :param width: Thumbnail width. :type width: int :param height: Thumbnail height. :type height: int :returns: The updated thumbnail file document. """ parentModel = ModelImporter.model(attachToType) parent = parentModel.load(attachToId, force=True) parent['_thumbnails'] = parent.get('_thumbnails', []) parent['_thumbnails'].append(thumbnail['_id']) parentModel.save(parent) thumbnail['attachedToType'] = attachToType thumbnail['attachedToId'] = parent['_id'] thumbnail['isThumbnail'] = True thumbnail['derivedFrom'] = { 'type': 'file', 'id': file['_id'], 'process': 'thumbnail', 'width': width, 'height': height } return File().save(thumbnail)