def __init__(self, assetstore): """ :param assetstore: The assetstore to act on. """ self.assetstore = assetstore try: self.chunkColl = getDbConnection( assetstore.get('mongohost', None), assetstore.get('replicaset', None))[assetstore['db']]['chunk'] except pymongo.errors.ConnectionFailure: logger.error('Failed to connect to GridFS assetstore %s', assetstore['db']) self.chunkColl = 'Failed to connect' self.unavailable = True return except pymongo.errors.ConfigurationError: logger.exception('Failed to configure GridFS assetstore %s', assetstore['db']) self.chunkColl = 'Failed to configure' self.unavailable = True return self.chunkColl.ensure_index([ ('uuid', pymongo.ASCENDING), ('n', pymongo.ASCENDING) ], unique=True)
def _itemFromEvent(event, identifierEnding, itemAccessLevel=AccessType.READ): """ If an event has a reference and an associated identifier that ends with a specific string, return the associated item, user, and image file. :param event: the data.process event. :param identifierEnding: the required end of the identifier. :returns: a dictionary with item, user, and file if there was a match. """ info = event.info identifier = None reference = info.get('reference', None) if reference is not None: try: reference = json.loads(reference) if (isinstance(reference, dict) and isinstance( reference.get('identifier'), six.string_types)): identifier = reference['identifier'] except (ValueError, TypeError): logger.debug('Failed to parse data.process reference: %r', reference) if identifier is not None and identifier.endswith(identifierEnding): if 'userId' not in reference or 'itemId' not in reference or 'fileId' not in reference: logger.error('Reference does not contain required information.') return userId = reference['userId'] imageId = reference['fileId'] # load models from the database user = User().load(userId, force=True) image = File().load(imageId, level=AccessType.READ, user=user) item = Item().load(image['itemId'], level=itemAccessLevel, user=user) return {'item': item, 'user': user, 'file': image}
def results(self, params): offset = int(params['offset'] if 'offset' in params else 0) limit = int(params['limit'] if 'limit' in params else 20) resp = requests.get(self.search_url + '/get_results', params={ 'sid': params['sid'], 'i': offset, 'j': offset + limit }).json() # @todo handle errors documents = solr_documents_from_field('sha1sum_s_md', [sha for (sha, _) in resp['results']]) # The documents from Solr (since shas map to >= 1 document) may not be in the order of confidence # returned by IQR, sort the documents to match the confidence values. # Sort by confidence values first, then sha checksums second so duplicate images are grouped together confidenceValues = dict(resp['results']) # Mapping of sha -> confidence values if len(documents) < len(resp['results']): logger.error('SID %s: There are SMQTK descriptors that have no corresponding Solr document(s).' % params['sid']) for document in documents: document['smqtk_iqr_confidence'] = confidenceValues[document['sha1sum_s_md']] return { 'numFound': resp['total_results'], 'docs': sorted(documents, key=lambda x: (x['smqtk_iqr_confidence'], x['sha1sum_s_md']), reverse=True) }
def run(self): while True: try: logger.info('Running DM file GC') self.collect() except Exception: logger.error('File collection failure', exc_info=1) time.sleep(self.settings.get(constants.PluginSettings.GC_RUN_INTERVAL))
def _proxiedUploadChunk(self, upload, chunk): """ Clients that do not support direct-to-S3 upload behavior will go through this method by sending the chunk data as they normally would for other assetstore types. Girder will send the data to S3 on behalf of the client. """ bucket = self._getBucket() if upload['s3']['chunked']: if 'uploadId' not in upload['s3']: # Initiate a new multipart upload mp = bucket.initiate_multipart_upload( upload['s3']['key'], headers=self._getRequestHeaders(upload)) upload['s3']['uploadId'] = mp.id upload['s3']['keyName'] = mp.key_name upload['s3']['partNumber'] = 0 upload['s3']['partNumber'] += 1 s3Info = upload['s3'] size = chunk.getSize() queryParams = { 'partNumber': s3Info['partNumber'], 'uploadId': s3Info['uploadId'] } headers = { 'Content-Length': str(size) } url = self._botoGenerateUrl( method='PUT', key=s3Info['key'], queryParams=queryParams, headers=headers) resp = requests.request(method='PUT', url=url, data=chunk, headers=headers) if resp.status_code not in (200, 201): logger.error('S3 multipart upload failure %d (uploadId=%s):\n%s' % ( resp.status_code, upload['_id'], resp.text)) raise GirderException('Upload failed (bad gateway)') upload['received'] += size else: size = chunk.getSize() if size < upload['size']: raise ValidationException('Uploads of this length must be sent in a single chunk.') reqInfo = upload['s3']['request'] resp = requests.request( method=reqInfo['method'], url=reqInfo['url'], data=chunk, headers=dict(reqInfo['headers'], **{'Content-Length': str(size)})) if resp.status_code not in (200, 201): logger.error('S3 upload failure %d (uploadId=%s):\n%s' % ( resp.status_code, upload['_id'], resp.text)) raise GirderException('Upload failed (bad gateway)') upload['received'] = size return upload
def refine(self, params): sid = str(params['item']['_id']) pos_uuids = params['pos_uuids'] neg_uuids = params['neg_uuids'] if params[ 'neg_uuids'] is not None else [] if len(pos_uuids) == 0: raise RestException('No positive UUIDs given.') with self.controller: if not self.controller.has_session_uuid(sid): raise RestException('Session ID %s not found.' % sid, 404) iqrs = self.controller.get_session(sid) iqrs.lock.acquire() # lock BEFORE releasing controller descriptor_index = self._descriptorIndexFromSessionId(sid) neighbor_index = self._nearestNeighborIndex(sid, descriptor_index) if descriptor_index is None or neighbor_index is None: logger.error( 'Unable to compute descriptor or neighbor index from sid %s.' % sid) raise RestException( 'Unable to compute descriptor or neighbor index from sid %s.' % sid, 500) # Get appropriate descriptor elements from index for # setting new adjudication state. try: pos_descrs = set(descriptor_index.get_many_descriptors(pos_uuids)) neg_descrs = set(descriptor_index.get_many_descriptors(neg_uuids)) except KeyError as ex: logger.warn(traceback.format_exc()) raise RestException('Descriptor UUID %s not found in index.' % ex, 404) # if a new classifier should be made upon the next # classification request. diff_pos = pos_descrs.symmetric_difference(iqrs.positive_descriptors) diff_neg = neg_descrs.symmetric_difference(iqrs.negative_descriptors) if diff_pos or diff_neg: logger.debug("[%s] session Classifier dirty", sid) self.session_classifier_dirty[sid] = True logger.info("[%s] Setting adjudications", sid) iqrs.positive_descriptors = pos_descrs iqrs.negative_descriptors = neg_descrs logger.info("[%s] Updating working index", sid) iqrs.update_working_index(neighbor_index) logger.info("[%s] Refining", sid) iqrs.refine() iqrs.lock.release() return sid
def refine(self, params): sid = str(params['item']['_id']) pos_uuids = params['pos_uuids'] neg_uuids = params['neg_uuids'] if params['neg_uuids'] is not None else [] if len(pos_uuids) == 0: raise RestException('No positive UUIDs given.') with self.controller: if not self.controller.has_session_uuid(sid): raise RestException('Session ID %s not found.' % sid, 404) iqrs = self.controller.get_session(sid) iqrs.lock.acquire() # lock BEFORE releasing controller try: descriptor_index = self._descriptorIndexFromSessionId(sid) neighbor_index = self._nearestNeighborIndex(sid, descriptor_index) if descriptor_index is None or neighbor_index is None: logger.error('Unable to compute descriptor or neighbor index from sid %s.' % sid) raise RestException('Unable to compute descriptor or neighbor index from sid %s.' % sid, 500) # Get appropriate descriptor elements from index for # setting new adjudication state. try: pos_descrs = set(descriptor_index.get_many_descriptors(pos_uuids)) neg_descrs = set(descriptor_index.get_many_descriptors(neg_uuids)) except KeyError as ex: logger.warn(traceback.format_exc()) raise RestException('Descriptor UUID %s not found in index.' % ex, 404) # if a new classifier should be made upon the next # classification request. diff_pos = pos_descrs.symmetric_difference(iqrs.positive_descriptors) diff_neg = neg_descrs.symmetric_difference(iqrs.negative_descriptors) if diff_pos or diff_neg: logger.debug("[%s] session Classifier dirty", sid) self.session_classifier_dirty[sid] = True logger.info("[%s] Setting adjudications", sid) iqrs.positive_descriptors = pos_descrs iqrs.negative_descriptors = neg_descrs logger.info("[%s] Updating working index", sid) iqrs.update_working_index(neighbor_index) logger.info("[%s] Refining", sid) iqrs.refine() finally: iqrs.lock.release() return sid
def testOneFile(self): self.configureLogging({'log_max_info_level': 'CRITICAL'}, oneFile=True) logger.info(self.infoMessage) infoSize = os.path.getsize(self.infoFile) errorSize = os.path.getsize(self.errorFile) self.assertEqual(infoSize, errorSize) logger.error(self.errorMessage) newInfoSize = os.path.getsize(self.infoFile) newErrorSize = os.path.getsize(self.errorFile) self.assertEqual(newInfoSize, newErrorSize) self.assertGreater(newInfoSize, infoSize)
def testOneFile(tempLog): tempLog = configureLogging({'log_max_info_level': 'CRITICAL'}, oneFile=True) logger.info(INFO_MSG) infoSize = os.path.getsize(tempLog['info_log_file']) errorSize = os.path.getsize(tempLog['error_log_file']) assert infoSize == errorSize logger.error(ERROR_MSG) newInfoSize = os.path.getsize(tempLog['info_log_file']) newErrorSize = os.path.getsize(tempLog['error_log_file']) assert newInfoSize == newErrorSize assert newInfoSize > infoSize
def computeDescriptor(self, params): # @todo Naively assuming we will always be able to retrieve the URL image, _type = base64FromUrl(params['url']) r = requests.post('%(base_url)s/compute/base64://%(image)s?content_type=%(type)s' % { 'base_url': self.search_url, 'image': image, 'type': _type}) if not r.ok: logger.error('Failed to compute SMQTK descriptor for image %s.' % params['url']) raise GirderException('Failed to compute descriptor', 'girder.plugins.imagespace_smqtk.smqtk.computeDescriptor') else: return r.json()
def testInfoMaxLevel(tempLog): tempLog = configureLogging({'log_max_info_level': 'CRITICAL'}) infoSize1 = os.path.getsize(tempLog['info_log_file']) errorSize1 = os.path.getsize(tempLog['error_log_file']) logger.info(INFO_MSG) infoSize2 = os.path.getsize(tempLog['info_log_file']) errorSize2 = os.path.getsize(tempLog['error_log_file']) assert infoSize2 > infoSize1 assert errorSize2 == errorSize1 logger.error(ERROR_MSG) infoSize3 = os.path.getsize(tempLog['info_log_file']) errorSize3 = os.path.getsize(tempLog['error_log_file']) assert infoSize3 > infoSize2 assert errorSize3 > errorSize2
def testInfoMaxLevel(self): self.configureLogging({'log_max_info_level': 'CRITICAL'}) infoSize1 = os.path.getsize(self.infoFile) errorSize1 = os.path.getsize(self.errorFile) logger.info(self.infoMessage) infoSize2 = os.path.getsize(self.infoFile) errorSize2 = os.path.getsize(self.errorFile) self.assertGreater(infoSize2, infoSize1) self.assertEqual(errorSize2, errorSize1) logger.error(self.errorMessage) infoSize3 = os.path.getsize(self.infoFile) errorSize3 = os.path.getsize(self.errorFile) self.assertGreater(infoSize3, infoSize2) self.assertGreater(errorSize3, errorSize2)
def process_annotations(event): """Add annotations to an image on a ``data.process`` event""" info = event.info identifier = None reference = info.get('reference', None) if reference is not None: try: reference = json.loads(reference) if (isinstance(reference, dict) and isinstance( reference.get('identifier'), six.string_types)): identifier = reference['identifier'] except (ValueError, TypeError): logger.warning('Failed to parse data.process reference: %r', reference) if identifier is not None and identifier.endswith('AnnotationFile'): if 'userId' not in reference or 'itemId' not in reference: logger.error( 'Annotation reference does not contain required information.') return userId = reference['userId'] imageId = reference['itemId'] # load model classes Item = ModelImporter.model('item') File = ModelImporter.model('file') User = ModelImporter.model('user') Annotation = ModelImporter.model('annotation', plugin='large_image') # load models from the database user = User.load(userId, force=True) image = File.load(imageId, level=AccessType.READ, user=user) item = Item.load(image['itemId'], level=AccessType.READ, user=user) file = File.load(info.get('file', {}).get('_id'), level=AccessType.READ, user=user) if not (item and user and file): logger.error('Could not load models from the database') return try: data = json.loads(b''.join(File.download(file)()).decode('utf8')) except Exception: logger.error('Could not parse annotation file') raise if not isinstance(data, list): data = [data] for annotation in data: try: Annotation.createAnnotation(item, user, annotation) except Exception: logger.error('Could not create annotation object from data') raise
def __init__(self, assetstore): super(FilesystemAssetstoreAdapter, self).__init__(assetstore) # If we can't create the temp directory, the assetstore still needs to # be initialized so that it can be deleted or modified. The validation # prevents invalid new assetstores from being created, so this only # happens to existing assetstores that no longer can access their temp # directories. self.tempDir = os.path.join(self.assetstore['root'], 'temp') try: mkdir(self.tempDir) except OSError: self.unavailable = True logger.exception('Failed to create filesystem assetstore ' 'directories %s' % self.tempDir) if not os.access(self.assetstore['root'], os.W_OK): self.unavailable = True logger.error('Could not write to assetstore root: %s', self.assetstore['root'])
def __init__(self, assetstore): super().__init__(assetstore) # If we can't create the temp directory, the assetstore still needs to # be initialized so that it can be deleted or modified. The validation # prevents invalid new assetstores from being created, so this only # happens to existing assetstores that no longer can access their temp # directories. self.tempDir = os.path.join(self.assetstore['root'], 'temp') try: mkdir(self.tempDir) except OSError: self.unavailable = True logger.exception('Failed to create filesystem assetstore ' 'directories %s' % self.tempDir) if not os.access(self.assetstore['root'], os.W_OK): self.unavailable = True logger.error('Could not write to assetstore root: %s', self.assetstore['root'])
def update_status(event): simulation_model = Simulation() job = event.info['job'] if job['type'] != NLI_JOB_TYPE: return simulation_id = job['kwargs'].get('simulation_id') simulation = simulation_model.load(simulation_id, force=True) if simulation is None: logger.error(f'Could not find simulation for job {job["_id"]}') return progress = job['progress'] simulation['nli']['progress'] = 100 * (progress['current'] / progress['total']) simulation['nli']['status'] = job['status'] simulation_model.save(simulation)
def __init__(self, assetstore): """ :param assetstore: The assetstore to act on. """ super(GridFsAssetstoreAdapter, self).__init__(assetstore) recent = False try: # Guard in case the connectionArgs is unhashable key = (self.assetstore.get('mongohost'), self.assetstore.get('replicaset'), self.assetstore.get('shard')) if key in _recentConnections: recent = (time.time() - _recentConnections[key]['created'] < RECENT_CONNECTION_CACHE_TIME) except TypeError: key = None try: # MongoClient automatically reuses connections from a pool, but we # want to avoid redoing ensureChunkIndices each time we get such a # connection. client = getDbConnection(self.assetstore.get('mongohost'), self.assetstore.get('replicaset'), quiet=recent) self.chunkColl = MongoProxy(client[self.assetstore['db']].chunk) if not recent: _ensureChunkIndices(self.chunkColl) if self.assetstore.get('shard') == 'auto': _setupSharding(self.chunkColl) if key is not None: if len(_recentConnections) >= RECENT_CONNECTION_CACHE_MAX_SIZE: _recentConnections.clear() _recentConnections[key] = { 'created': time.time() } except pymongo.errors.ConnectionFailure: logger.error('Failed to connect to GridFS assetstore %s', self.assetstore['db']) self.chunkColl = 'Failed to connect' self.unavailable = True except pymongo.errors.ConfigurationError: logger.exception('Failed to configure GridFS assetstore %s', self.assetstore['db']) self.chunkColl = 'Failed to configure' self.unavailable = True
def process_metadata(event): """Add metadata to an item on a ``data.process`` event""" results = _itemFromEvent(event, 'ItemMetadata', AccessType.WRITE) if not results: return file = File().load(event.info.get('file', {}).get('_id'), level=AccessType.READ, user=results['user']) if not file: logger.error('Could not load models from the database') return try: data = json.loads(b''.join(File().download(file)()).decode('utf8')) except Exception: logger.error('Could not parse metadata file') raise item = results['item'] Item().setMetadata(item, data, allowNull=False)
def __init__(self, assetstore): """ :param assetstore: The assetstore to act on. """ self.assetstore = assetstore try: self.chunkColl = getDbConnection( assetstore.get('mongohost', None), assetstore.get('replicaset', None))[assetstore['db']].chunk except pymongo.errors.ConnectionFailure: logger.error('Failed to connect to GridFS assetstore %s', assetstore['db']) self.chunkColl = 'Failed to connect' self.unavailable = True return except pymongo.errors.ConfigurationError: logger.exception('Failed to configure GridFS assetstore %s', assetstore['db']) self.chunkColl = 'Failed to configure' self.unavailable = True return
def process_annotations(event): """Add annotations to an image on a ``data.process`` event""" results = _itemFromEvent(event, 'AnnotationFile') if not results: return item = results['item'] user = results['user'] file = File().load(event.info.get('file', {}).get('_id'), level=AccessType.READ, user=user) if not file: logger.error('Could not load models from the database') return try: data = json.loads(b''.join(File().download(file)()).decode('utf8')) except Exception: logger.error('Could not parse annotation file') raise if not isinstance(data, list): data = [data] for annotation in data: try: Annotation().createAnnotation(item, user, annotation) except Exception: logger.error('Could not create annotation object from data') raise
def results(self, params): offset = int(params['offset'] if 'offset' in params else 0) limit = int(params['limit'] if 'limit' in params else 20) resp = requests.get(self.search_url + '/get_results', params={ 'sid': params['sid'], 'i': offset, 'j': offset + limit }).json() # @todo handle errors documents = solr_documents_from_field( 'sha1sum_s_md', [sha for (sha, _) in resp['results']]) # The documents from Solr (since shas map to >= 1 document) may not be in the order of confidence # returned by IQR, sort the documents to match the confidence values. # Sort by confidence values first, then sha checksums second so duplicate images are grouped together confidenceValues = dict( resp['results']) # Mapping of sha -> confidence values if len(documents) < len(resp['results']): logger.error( 'SID %s: There are SMQTK descriptors that have no corresponding Solr document(s).' % params['sid']) for document in documents: document['smqtk_iqr_confidence'] = confidenceValues[ document['sha1sum_s_md']] return { 'numFound': resp['total_results'], 'docs': sorted(documents, key=lambda x: (x['smqtk_iqr_confidence'], x['sha1sum_s_md']), reverse=True) }
def __init__(self, assetstore): """ :param assetstore: The assetstore to act on. """ super(GridFsAssetstoreAdapter, self).__init__(assetstore) try: self.chunkColl = getDbConnection( self.assetstore.get('mongohost', None), self.assetstore.get('replicaset', None) )[self.assetstore['db']].chunk _ensureChunkIndices(self.chunkColl) except pymongo.errors.ConnectionFailure: logger.error('Failed to connect to GridFS assetstore %s', self.assetstore['db']) self.chunkColl = 'Failed to connect' self.unavailable = True return except pymongo.errors.ConfigurationError: logger.exception('Failed to configure GridFS assetstore %s', self.assetstore['db']) self.chunkColl = 'Failed to configure' self.unavailable = True return
def __init__(self, assetstore): """ :param assetstore: The assetstore to act on. """ super(GridFsAssetstoreAdapter, self).__init__(assetstore) try: self.chunkColl = getDbConnection( self.assetstore.get('mongohost', None), self.assetstore.get('replicaset', None))[self.assetstore['db']].chunk _ensureChunkIndices(self.chunkColl) except pymongo.errors.ConnectionFailure: logger.error('Failed to connect to GridFS assetstore %s', self.assetstore['db']) self.chunkColl = 'Failed to connect' self.unavailable = True return except pymongo.errors.ConfigurationError: logger.exception('Failed to configure GridFS assetstore %s', self.assetstore['db']) self.chunkColl = 'Failed to configure' self.unavailable = True return
def testFileRotation(tempLog): tempLog = configureLogging({ 'log_access': ['screen', 'info'], 'log_quiet': True, 'log_max_size': '1 kb', 'log_backup_count': 2, 'log_level': 'DEBUG', }) logger.info(INFO_MSG) logger.error(ERROR_MSG) infoSize = os.path.getsize(tempLog['info_log_file']) errorSize = os.path.getsize(tempLog['error_log_file']) assert os.path.exists(tempLog['info_log_file'] + '.1') is False assert os.path.exists(tempLog['error_log_file'] + '.1') is False logger.info(INFO_MSG) logger.error(ERROR_MSG) newInfoSize = os.path.getsize(tempLog['info_log_file']) newErrorSize = os.path.getsize(tempLog['error_log_file']) deltaInfo = newInfoSize - infoSize deltaError = newErrorSize - errorSize assert deltaInfo > len(INFO_MSG) assert deltaError > len(ERROR_MSG) while newInfoSize < 1024 * 1.5: logger.info(INFO_MSG) newInfoSize += deltaInfo while newErrorSize < 1024 * 1.5: logger.error(ERROR_MSG) newErrorSize += deltaError assert os.path.exists(tempLog['info_log_file'] + '.1') is True assert os.path.exists(tempLog['error_log_file'] + '.1') is True assert os.path.exists(tempLog['info_log_file'] + '.2') is False assert os.path.exists(tempLog['error_log_file'] + '.2') is False while newInfoSize < 1024 * 3.5: logger.info(INFO_MSG) newInfoSize += deltaInfo while newErrorSize < 1024 * 3.5: logger.error(ERROR_MSG) newErrorSize += deltaError assert os.path.exists(tempLog['info_log_file'] + '.1') is True assert os.path.exists(tempLog['error_log_file'] + '.1') is True assert os.path.exists(tempLog['info_log_file'] + '.2') is True assert os.path.exists(tempLog['error_log_file'] + '.2') is True assert os.path.exists(tempLog['info_log_file'] + '.3') is False assert os.path.exists(tempLog['error_log_file'] + '.3') is False
def testFileRotation(self): self.configureLogging({ 'log_access': ['screen', 'info'], 'log_quiet': True, 'log_max_size': '1 kb', 'log_backup_count': 2, 'log_level': 'DEBUG', }) logger.info(self.infoMessage) logger.error(self.errorMessage) infoSize = os.path.getsize(self.infoFile) errorSize = os.path.getsize(self.errorFile) self.assertFalse(os.path.exists(self.infoFile + '.1')) self.assertFalse(os.path.exists(self.errorFile + '.1')) logger.info(self.infoMessage) logger.error(self.errorMessage) newInfoSize = os.path.getsize(self.infoFile) newErrorSize = os.path.getsize(self.errorFile) deltaInfo = newInfoSize - infoSize deltaError = newErrorSize - errorSize self.assertGreater(deltaInfo, len(self.infoMessage)) self.assertGreater(deltaError, len(self.errorMessage)) while newInfoSize < 1024 * 1.5: logger.info(self.infoMessage) newInfoSize += deltaInfo while newErrorSize < 1024 * 1.5: logger.error(self.errorMessage) newErrorSize += deltaError self.assertTrue(os.path.exists(self.infoFile + '.1')) self.assertTrue(os.path.exists(self.errorFile + '.1')) self.assertFalse(os.path.exists(self.infoFile + '.2')) self.assertFalse(os.path.exists(self.errorFile + '.2')) while newInfoSize < 1024 * 3.5: logger.info(self.infoMessage) newInfoSize += deltaInfo while newErrorSize < 1024 * 3.5: logger.error(self.errorMessage) newErrorSize += deltaError self.assertTrue(os.path.exists(self.infoFile + '.1')) self.assertTrue(os.path.exists(self.errorFile + '.1')) self.assertTrue(os.path.exists(self.infoFile + '.2')) self.assertTrue(os.path.exists(self.errorFile + '.2')) self.assertFalse(os.path.exists(self.infoFile + '.3')) self.assertFalse(os.path.exists(self.errorFile + '.3'))
def testRotation(self): from girder import logger infoMessage = 'Log info message' errorMessage = 'Log info message' logger.info(infoMessage) logger.error(errorMessage) infoSize = os.path.getsize(self.infoFile) errorSize = os.path.getsize(self.errorFile) self.assertFalse(os.path.exists(self.infoFile + '.1')) self.assertFalse(os.path.exists(self.errorFile + '.1')) logger.info(infoMessage) logger.error(errorMessage) newInfoSize = os.path.getsize(self.infoFile) newErrorSize = os.path.getsize(self.errorFile) deltaInfo = newInfoSize - infoSize deltaError = newErrorSize - errorSize self.assertGreater(deltaInfo, len(infoMessage)) self.assertGreater(deltaError, len(errorMessage)) while newInfoSize < 1024 * 1.5: logger.info(infoMessage) newInfoSize += deltaInfo while newErrorSize < 1024 * 1.5: logger.error(errorMessage) newErrorSize += deltaError self.assertTrue(os.path.exists(self.infoFile + '.1')) self.assertTrue(os.path.exists(self.errorFile + '.1')) self.assertFalse(os.path.exists(self.infoFile + '.2')) self.assertFalse(os.path.exists(self.errorFile + '.2')) while newInfoSize < 1024 * 3.5: logger.info(infoMessage) newInfoSize += deltaInfo while newErrorSize < 1024 * 3.5: logger.error(errorMessage) newErrorSize += deltaError self.assertTrue(os.path.exists(self.infoFile + '.1')) self.assertTrue(os.path.exists(self.errorFile + '.1')) self.assertTrue(os.path.exists(self.infoFile + '.2')) self.assertTrue(os.path.exists(self.errorFile + '.2')) self.assertFalse(os.path.exists(self.infoFile + '.3')) self.assertFalse(os.path.exists(self.errorFile + '.3'))
def process_annotations(event): """Add annotations to an image on a ``data.process`` event""" results = _itemFromEvent(event, 'AnnotationFile') if not results: return item = results['item'] user = results['user'] file = File().load(event.info.get('file', {}).get('_id'), level=AccessType.READ, user=user) if not file: logger.error('Could not load models from the database') return try: data = json.loads(b''.join(File().download(file)()).decode('utf8')) except Exception: logger.error('Could not parse annotation file') raise if not isinstance(data, list): data = [data] # Check some of the early elements to see if there are any girderIds # that need resolution. if 'uuid' in results: girderIds = [ element for annotation in data for element in annotation.get('elements', [])[:100] if 'girderId' in element ] if len(girderIds): if not resolveAnnotationGirderIds(event, results, data, girderIds): return for annotation in data: try: Annotation().createAnnotation(item, user, annotation) except Exception: logger.error('Could not create annotation object from data') raise
def _proxiedUploadChunk(self, upload, chunk): """ Clients that do not support direct-to-S3 upload behavior will go through this method by sending the chunk data as they normally would for other assetstore types. Girder will send the data to S3 on behalf of the client. """ bucket = self._getBucket() if upload['s3']['chunked']: if 'uploadId' not in upload['s3']: # Initiate a new multipart upload mp = bucket.initiate_multipart_upload( upload['s3']['key'], headers=self._getRequestHeaders(upload)) upload['s3']['uploadId'] = mp.id upload['s3']['keyName'] = mp.key_name upload['s3']['partNumber'] = 0 upload['s3']['partNumber'] += 1 s3Info = upload['s3'] size = chunk.getSize() queryParams = { 'partNumber': s3Info['partNumber'], 'uploadId': s3Info['uploadId'] } headers = {'Content-Length': str(size)} url = self._botoGenerateUrl(method='PUT', key=s3Info['key'], queryParams=queryParams, headers=headers) resp = requests.request(method='PUT', url=url, data=chunk, headers=headers) if resp.status_code not in (200, 201): logger.error( 'S3 multipart upload failure %d (uploadId=%s):\n%s' % (resp.status_code, upload['_id'], resp.text)) raise GirderException('Upload failed (bad gateway)') upload['received'] += size else: size = chunk.getSize() if size < upload['size']: raise ValidationException( 'Uploads of this length must be sent in a single chunk.') reqInfo = upload['s3']['request'] resp = requests.request(method=reqInfo['method'], url=reqInfo['url'], data=chunk, headers=dict( reqInfo['headers'], **{'Content-Length': str(size)})) if resp.status_code not in (200, 201): logger.error('S3 upload failure %d (uploadId=%s):\n%s' % (resp.status_code, upload['_id'], resp.text)) raise GirderException('Upload failed (bad gateway)') upload['received'] = size return upload
def update_status(event): simulation_model = Simulation() job = event.info['job'] if job['type'] != NLI_JOB_TYPE: return simulation_id = job['kwargs'].get('simulation_id') simulation = simulation_model.load(simulation_id, force=True) if simulation is None: logger.error(f'Could not find simulation for job {job["_id"]}') return progress = job['progress'] simulation['nli']['progress'] = 100 * (progress['current'] / progress['total']) simulation['nli']['status'] = job['status'] simulation_model.save(simulation) # update the progress for the experiment, if this is part of one if job['kwargs'].get('in_experiment'): experiment_model = Experiment() experiment = experiment_model.load(job['kwargs'].get('experiment_id'), force=True) # update the individual progress experiment['nli']['per_sim_progress'][str(simulation_id)] = simulation['nli']['progress'] per_sim_progress = experiment['nli']['per_sim_progress'] # update the total progress (defining this as the mean progress) experiment['nli']['progress'] = sum(per_sim_progress.values()) / len(per_sim_progress) # update job status experiment['nli']['per_sim_status'][str(simulation_id)] = job['status'] # any errors or cancellations count as an error or cancellation of the experiment, # experiment doesn't become active until all of the sims are active. if any( status == JobStatus.ERROR for status in experiment['nli']['per_sim_status'].values() ): experiment['nli']['status'] = JobStatus.ERROR elif any( status == JobStatus.CANCELED for status in experiment['nli']['per_sim_status'].values() ): experiment['nli']['status'] = JobStatus.CANCELED elif any( status == JobStatus.INACTIVE for status in experiment['nli']['per_sim_status'].values() ): experiment['nli']['status'] = JobStatus.INACTIVE else: # in this case, all statuses must be QUEUED, RUNNING, or SUCCESS # we take the "minimum" for the experiment's status. if any( status == JobStatus.QUEUED for status in experiment['nli']['per_sim_status'].values() ): experiment['nli']['status'] = JobStatus.QUEUED elif any( status == JobStatus.RUNNING for status in experiment['nli']['per_sim_status'].values() ): experiment['nli']['status'] = JobStatus.RUNNING else: experiment['nli']['status'] = JobStatus.SUCCESS experiment_model.save(experiment)
def _proxiedUploadChunk(self, upload, chunk): """ Clients that do not support direct-to-S3 upload behavior will go through this method by sending the chunk data as they normally would for other assetstore types. Girder will send the data to S3 on behalf of the client. """ if upload['s3']['chunked']: if 'uploadId' not in upload['s3']: # Initiate a new multipart upload if this is the first chunk disp = 'attachment; filename="%s"' % upload['name'] mime = upload.get('mimeType', '') mp = self.client.create_multipart_upload( Bucket=self.assetstore['bucket'], Key=upload['s3']['key'], ACL='private', ContentDisposition=disp, ContentType=mime, Metadata={ 'uploader-id': str(upload['userId']), 'uploader-ip': str(cherrypy.request.remote.ip) }) upload['s3']['uploadId'] = mp['UploadId'] upload['s3']['keyName'] = mp['Key'] upload['s3']['partNumber'] = 0 upload['s3']['partNumber'] += 1 size = chunk.getSize() headers = { 'Content-Length': str(size) } # We can't just call upload_part directly because they require a # seekable file object, and ours isn't. url = self._generatePresignedUrl(ClientMethod='upload_part', Params={ 'Bucket': self.assetstore['bucket'], 'Key': upload['s3']['key'], 'ContentLength': size, 'UploadId': upload['s3']['uploadId'], 'PartNumber': upload['s3']['partNumber'] }) resp = requests.request(method='PUT', url=url, data=chunk, headers=headers) if resp.status_code not in (200, 201): logger.error('S3 multipart upload failure %d (uploadId=%s):\n%s' % ( resp.status_code, upload['_id'], resp.text)) raise GirderException('Upload failed (bad gateway)') upload['received'] += size else: size = chunk.getSize() if size < upload['size']: raise ValidationException('Uploads of this length must be sent in a single chunk.') reqInfo = upload['s3']['request'] resp = requests.request( method=reqInfo['method'], url=reqInfo['url'], data=chunk, headers=dict(reqInfo['headers'], **{'Content-Length': str(size)})) if resp.status_code not in (200, 201): logger.error('S3 upload failure %d (uploadId=%s):\n%s' % ( resp.status_code, upload['_id'], resp.text)) raise GirderException('Upload failed (bad gateway)') upload['received'] = size return upload
def genRESTEndPointsForSlicerCLIsInDocker(info, restResource, dockerImages): """Generates REST end points for slicer CLIs placed in subdirectories of a given root directory and attaches them to a REST resource with the given name. For each CLI, it creates: * a GET Route (<apiURL>/`restResourceName`/<cliRelativePath>/xmlspec) that returns the xml spec of the CLI * a POST Route (<apiURL>/`restResourceName`/<cliRelativePath>/run) that runs the CLI It also creates a GET route (<apiURL>/`restResourceName`) that returns a list of relative routes to all CLIs attached to the generated REST resource Parameters ---------- info restResource : str or girder.api.rest.Resource REST resource to which the end-points should be attached dockerImages : a list of docker image names """ dockerImages # validate restResource argument if not isinstance(restResource, (str, Resource)): raise Exception('restResource must either be a string or ' 'an object of girder.api.rest.Resource') # validate dockerImages arguments if not isinstance(dockerImages, (str, list)): raise Exception('dockerImages must either be a single docker image ' 'string or a list of docker image strings') if isinstance(dockerImages, list): for img in dockerImages: if not isinstance(img, str): raise Exception('dockerImages must either be a single ' 'docker image string or a list of docker ' 'image strings') else: dockerImages = [dockerImages] # create REST resource if given a name if isinstance(restResource, str): restResource = type(restResource, (Resource, ), {'resourceName': restResource})() restResourceName = type(restResource).__name__ # Add REST routes for slicer CLIs in each docker image cliList = [] for dimg in dockerImages: # check if the docker image exists getDockerImage(dimg, True) # get CLI list cliListSpec = getDockerImageCLIList(dimg) cliListSpec = json.loads(cliListSpec) # Add REST end-point for each CLI for cliRelPath in cliListSpec.keys(): cliXML = getDockerImageCLIXMLSpec(dimg, cliRelPath) # create a POST REST route that runs the CLI try: cliRunHandler = genHandlerToRunDockerCLI( dimg, cliRelPath, cliXML, restResource) except Exception: logger.execption('Failed to create REST endpoints for %s', cliRelPath) continue cliSuffix = os.path.normpath(cliRelPath).replace(os.sep, '_') cliRunHandlerName = 'run_' + cliSuffix setattr(restResource, cliRunHandlerName, cliRunHandler) restResource.route('POST', (cliRelPath, 'run'), getattr(restResource, cliRunHandlerName)) # create GET REST route that returns the xml of the CLI try: cliGetXMLSpecHandler = genHandlerToGetDockerCLIXmlSpec( cliRelPath, cliXML, restResource) except Exception: logger.exception('Failed to create REST endpoints for %s', cliRelPath) exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error('%r' % [exc_type, fname, exc_tb.tb_lineno]) continue cliGetXMLSpecHandlerName = 'get_xml_' + cliSuffix setattr(restResource, cliGetXMLSpecHandlerName, cliGetXMLSpecHandler) restResource.route('GET', ( cliRelPath, 'xmlspec', ), getattr(restResource, cliGetXMLSpecHandlerName)) cliList.append(cliRelPath) # create GET route that returns a list of relative routes to all CLIs @boundHandler(restResource) @access.user @describeRoute(Description('Get list of relative routes to all CLIs')) def getCLIListHandler(self, *args, **kwargs): return cliList getCLIListHandlerName = 'get_cli_list' setattr(restResource, getCLIListHandlerName, getCLIListHandler) restResource.route('GET', (), getattr(restResource, getCLIListHandlerName)) # expose the generated REST resource via apiRoot setattr(info['apiRoot'], restResourceName, restResource) # return restResource return restResource
def genRESTEndPointsForSlicerCLIsInDockerCache(restResource, dockerCache): """Generates REST end points for slicer CLIs placed in subdirectories of a given root directory and attaches them to a REST resource with the given name. For each CLI, it creates: * a GET Route (<apiURL>/`restResourceName`/<cliRelativePath>/xmlspec) that returns the xml spec of the CLI * a POST Route (<apiURL>/`restResourceName`/<cliRelativePath>/run) that runs the CLI It also creates a GET route (<apiURL>/`restResourceName`) that returns a list of relative routes to all CLIs attached to the generated REST resource Parameters ---------- restResource : a dockerResource REST resource to which the end-points should be attached dockerCache : DockerCache object representing data stored in settings """ dockerImages = dockerCache.getImageNames() # validate restResource argument if not isinstance(restResource, Resource): raise Exception('restResource must be a ' 'Docker Resource') for dimg in dockerImages: docker_image = dockerCache.getImageByName(dimg) # get CLI list cliListSpec = docker_image.getCLIListSpec() # Add REST end-point for each CLI for cliRelPath in cliListSpec.keys(): restPath = dimg.replace(':', '_').replace('/', '_').replace('@', '_') # create a POST REST route that runs the CLI try: cliXML = docker_image.getCLIXML(cliRelPath) cliRunHandler = genHandlerToRunDockerCLI( dimg, cliRelPath, cliXML, restResource) except Exception: logger.exception('Failed to create REST endpoints for %r', cliRelPath) continue cliSuffix = os.path.normpath(cliRelPath).replace(os.sep, '_') cliRunHandlerName = restPath + '_run_' + cliSuffix setattr(restResource, cliRunHandlerName, cliRunHandler) restResource.route('POST', (restPath, cliRelPath, 'run'), getattr(restResource, cliRunHandlerName)) # store new rest endpoint restResource.storeEndpoints( dimg, cliRelPath, 'run', ['POST', (restPath, cliRelPath, 'run'), cliRunHandlerName]) # create GET REST route that returns the xml of the CLI try: cliGetXMLSpecHandler = genHandlerToGetDockerCLIXmlSpec( cliRelPath, cliXML, restResource) except Exception: logger.exception('Failed to create REST endpoints for %s', cliRelPath) exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error('%r', [exc_type, fname, exc_tb.tb_lineno]) continue cliGetXMLSpecHandlerName = restPath + '_get_xml_' + cliSuffix setattr(restResource, cliGetXMLSpecHandlerName, cliGetXMLSpecHandler) restResource.route('GET', ( restPath, cliRelPath, 'xmlspec', ), getattr(restResource, cliGetXMLSpecHandlerName)) restResource.storeEndpoints(dimg, cliRelPath, 'xmlspec', [ 'GET', (restPath, cliRelPath, 'xmlspec'), cliGetXMLSpecHandlerName ]) logger.debug('Created REST endpoints for %s', cliRelPath) return restResource
def results(self, params): def sid_exists(sid): """ Determine if a session ID already exists in SMQTK. This currently creates the session if it doesn't already exist. """ return not requests.post(self.search_url + '/session', data={ 'sid': params['sid'] }).ok offset = int(params['offset'] if 'offset' in params else 0) limit = int(params['limit'] if 'limit' in params else 20) if not sid_exists(params['sid']): # Get pos/neg uuids from current session session = self.model('item').findOne({'meta.sid': params['sid']}) if session: self._refine({ 'sid': params['sid'], 'pos_uuids': session['meta']['pos_uuids'], 'neg_uuids': session['meta']['neg_uuids'] }) resp = requests.get(self.search_url + '/get_results', params={ 'sid': params['sid'], 'i': offset, 'j': offset + limit }).json() # @todo handle errors try: documents = solr_documents_from_field( 'sha1sum_s_md', [sha for (sha, _) in resp['results']]) except KeyError: return {'numFound': 0, 'docs': []} # The documents from Solr (since shas map to >= 1 document) may not be in the order of confidence # returned by IQR, sort the documents to match the confidence values. # Sort by confidence values first, then sha checksums second so duplicate images are grouped together confidenceValues = dict( resp['results']) # Mapping of sha -> confidence values if len(documents) < len(resp['results']): logger.error( 'SID %s: There are SMQTK descriptors that have no corresponding Solr document(s).' % params['sid']) for document in documents: document['smqtk_iqr_confidence'] = confidenceValues[ document['sha1sum_s_md']] return { 'numFound': resp['total_results'], 'docs': sorted(documents, key=lambda x: (x['smqtk_iqr_confidence'], x['sha1sum_s_md']), reverse=True) }