Ejemplo n.º 1
0
    def create_dataset_record(self, folder_id):
        """
        Creates a record that describes a Dataset
        :param folder_id: Folder that represents a dataset
        :return: Dictionary that describes a dataset
        """
        try:
            folder = self.folderModel.load(folder_id,
                                           user=self.user,
                                           exc=True,
                                           level=AccessType.READ)
            provider = folder['meta']['provider']
            if provider in {'HTTP', 'HTTPS'}:
                return None
            identifier = folder['meta']['identifier']
            return {
                "@id": identifier,
                "@type": "Dataset",
                "name": folder['name'],
                "identifier": identifier,
                # "publisher": self.publishers[provider]
            }

        except (KeyError, TypeError, ValidationException):
            msg = 'While creating a manifest for Tale "{}" '.format(
                str(self.tale['_id']))
            msg += 'encountered a following error:\n'
            logger.warning(msg)
            raise  # We don't want broken manifests, do we?
Ejemplo n.º 2
0
    def _loadTileSource(cls, item, **kwargs):
        if 'largeImage' not in item:
            raise TileSourceError('No large image file in this item.')
        if item['largeImage'].get('expected'):
            raise TileSourceError('The large image file for this item is '
                                  'still pending creation.')

        sourceName = item['largeImage']['sourceName']
        try:
            # First try to use the tilesource we recorded as the preferred one.
            # This is faster than trying to find the best source each time.
            tileSource = girder_tilesource.AvailableGirderTileSources[sourceName](item, **kwargs)
        except TileSourceError as exc:
            # We could try any source
            # tileSource = girder_tilesource.getGirderTileSource(item, **kwargs)
            # but, instead, log that the original source no longer works are
            # reraise the exception
            logger.warning('The original tile source for item %s is not working' % item['_id'])
            try:
                file = File().load(item['largeImage']['fileId'], force=True)
                localPath = File().getLocalFilePath(file)
                open(localPath, 'rb').read(1)
            except IOError:
                logger.warning(
                    'Is the original data reachable and readable (it fails via %r)?', localPath)
                raise IOError(localPath) from None
            except Exception:
                pass
            raise exc
        return tileSource
Ejemplo n.º 3
0
    def restartInterruptedTransfers(self):
        # transfers and item.dm.transferInProgress are not atomically
        # set, so use both to figure out what needs to be re-started
        activeTransfersFromItem = Models.lockModel.listDownloadingItems()
        activeTransfers = Models.transferModel.listAll()

        ids = set()
        data = []

        for item in activeTransfersFromItem:
            ids.add(item['_id'])
            data.append({
                'itemId': item['_id'],
                'ownerId': item['dm']['transfer']['userId'],
                'sessionId': item['dm']['transfer']['sessionId']
            })
        for transfer in activeTransfers:
            if not transfer['itemId'] in ids:
                data.append(transfer)

        for item in data:
            print('Restarting transfer for item ' + str(item))
            try:
                user = self.getUser(item['ownerId'])
                self.startTransfer(user, item['itemId'], item['sessionId'])
            except Exception as ex:
                logger.warning(
                    'Failed to strart transfer for itemId %s. Reason: %s' %
                    (item['itemId'], str(ex)))
Ejemplo n.º 4
0
def bind(eventName, handlerName, handler):
    """
    Bind a listener (handler) to the event identified by eventName. It is
    convention that plugins will use their own name as the handlerName, so that
    the trigger() caller can see which plugin(s) responded to the event.

    :param eventName: The name that identifies the event.
    :type eventName: str
    :param handlerName: The name that identifies the handler calling bind().
    :type handlerName: str
    :param handler: The function that will be called when the event is fired.
                    It must accept a single argument, which is the Event that
                    was created by trigger(). This function should not return
                    a value; any data that it needs to pass back to the
                    triggerer should be passed via the addResponse() method of
                    the Event.
    :type handler: function
    """
    if eventName in _deprecated:
        logger.warning('event "%s" is deprecated; %s'
                       % (eventName, _deprecated[eventName]))

    if eventName not in _mapping:
        _mapping[eventName] = []

    _mapping[eventName].append({
        'name': handlerName,
        'handler': handler
    })
Ejemplo n.º 5
0
    def create_regex(self):
        url = self.get_base_url_setting()
        if not url.endswith('json'):
            url = urlunparse(urlparse(url)._replace(path='/api/info/version'))
        try:
            resp = urlopen(url, timeout=1)
            resp_body = resp.read()
            data = json.loads(resp_body.decode('utf-8'))
        except Exception:
            logger.warning(
                "[dataverse] failed to fetch installations, using a local copy."
            )
            with open(
                    os.path.join(os.path.dirname(__file__),
                                 "installations.json"), "r") as fp:
                data = json.load(fp)

        # in case DATAVERSE_URL points to a specific instance rather than an installation JSON
        # we need to add its domain to the regex
        single_hostname = urlparse(self.get_base_url_setting()).netloc
        domains = [
            _["hostname"]
            for _ in data.get("installations", [{
                "hostname": single_hostname
            }])
        ]
        domains += self.get_extra_hosts_setting()
        if domains:
            return re.compile("^https?://(" + "|".join(domains) + ").*$")
        else:
            return re.compile("^$")
Ejemplo n.º 6
0
def process_annotations(event):
    """Add annotations to an image on a ``data.process`` event"""
    info = event.info
    identifier = None
    reference = info.get('reference', None)
    if reference is not None:
        try:
            reference = json.loads(reference)
            if (isinstance(reference, dict) and isinstance(
                    reference.get('identifier'), six.string_types)):
                identifier = reference['identifier']
        except (ValueError, TypeError):
            logger.warning('Failed to parse data.process reference: %r',
                           reference)
    if identifier is not None and identifier.endswith('AnnotationFile'):
        if 'userId' not in reference or 'itemId' not in reference:
            logger.error(
                'Annotation reference does not contain required information.')
            return

        userId = reference['userId']
        imageId = reference['itemId']

        # load model classes
        Item = ModelImporter.model('item')
        File = ModelImporter.model('file')
        User = ModelImporter.model('user')
        Annotation = ModelImporter.model('annotation', plugin='large_image')

        # load models from the database
        user = User.load(userId, force=True)
        image = File.load(imageId, level=AccessType.READ, user=user)
        item = Item.load(image['itemId'], level=AccessType.READ, user=user)
        file = File.load(info.get('file', {}).get('_id'),
                         level=AccessType.READ,
                         user=user)

        if not (item and user and file):
            logger.error('Could not load models from the database')
            return

        try:
            data = json.loads(b''.join(File.download(file)()).decode('utf8'))
        except Exception:
            logger.error('Could not parse annotation file')
            raise

        if not isinstance(data, list):
            data = [data]
        for annotation in data:
            try:
                Annotation.createAnnotation(item, user, annotation)
            except Exception:
                logger.error('Could not create annotation object from data')
                raise
Ejemplo n.º 7
0
def import_recursive(job):
    try:
        root = job['kwargs']['root']
        token = job['kwargs']['token']

        user = User().load(job['userId'], force=True)

        children = list(Folder().childFolders(root, 'collection', user=user))
        count = len(children)
        progress = 0

        job = Job().updateJob(job,
                              log='Started TCGA import\n',
                              status=JobStatus.RUNNING,
                              progressCurrent=progress,
                              progressTotal=count)
        logger.info('Starting recursive TCGA import')

        for child in children:
            progress += 1
            try:
                msg = 'Importing "%s"' % child.get('name', '')
                job = Job().updateJob(job,
                                      log=msg,
                                      progressMessage=msg + '\n',
                                      progressCurrent=progress)
                logger.debug(msg)
                Cohort().importDocument(child,
                                        recurse=True,
                                        user=user,
                                        token=token,
                                        job=job)
                job = Job().load(id=job['_id'], force=True)

                # handle any request to stop execution
                if (not job or job['status']
                        in (JobStatus.CANCELED, JobStatus.ERROR)):
                    logger.info('TCGA import job halted with')
                    return

            except ValidationException:
                logger.warning('Failed to import %s' % child.get('name', ''))

        logger.info('Starting recursive TCGA import')
        job = Job().updateJob(job,
                              log='Finished TCGA import\n',
                              status=JobStatus.SUCCESS,
                              progressCurrent=count,
                              progressMessage='Finished TCGA import')
    except Exception as e:
        logger.exception('Importing TCGA failed with %s' % str(e))
        job = Job().updateJob(job,
                              log='Import failed with %s\n' % str(e),
                              status=JobStatus.ERROR)
Ejemplo n.º 8
0
def cleanup_failed_taskflows():
    queues = list(Queue().find(limit=sys.maxsize, force=True))
    for queue in queues:
        user = UserModel().load(queue['userId'], force=True)
        if user is None:
            continue

        for taskflow_id, status in queue['taskflows'].items():
            if status == TaskStatus.RUNNING:
                taskflow = TaskflowModel().load(taskflow_id, force=True)
                if taskflow['status'] in TASKFLOW_NON_RUNNING_STATES:
                    logger.warning("Removing non-running taskflow {} from the queue {}".format(taskflow_id, queue["_id"]))
                    Queue().finish(queue, taskflow, user)
Ejemplo n.º 9
0
def _onUpload(event):
    """
    Histogram creation can be requested on file upload by passing a reference
    'histogram' that is a JSON object of the following form:

        {
          "histogram": {
            "bins": 255,
            "label": True,
            "bitmask": False
          }
        }

    bins, label, and bitmask arguments are optional
    """
    file_ = event.info['file']
    user = event.info['currentUser']
    token = event.info['currentToken']
    if 'itemId' not in file_:
        return

    try:
        ref = json.loads(event.info.get('reference', ''))
    except (TypeError, ValueError):
        return

    if not isinstance(ref, dict):
        return

    if ref.get('isHistogram'):
        # jobId = ref.get('jobId')
        fakeId = ref.get('fakeId')
        if not fakeId:
            msg = 'Histogram file %s uploaded without fakeId reference.'
            logger.warning(msg % file_['_id'])
            return
        histograms = list(Histogram().find({'fakeId': fakeId}, limit=2))
        if len(histograms) == 1:
            histogram = histograms[0]
            del histogram['expected']
            histogram['fileId'] = file_['_id']
            Histogram().save(histogram)
        else:
            msg = 'Failed to retrieve histogram for file %s using fakeId %s.'
            logger.warning(msg % (file_['_id'], fakeId))
            return
    elif isinstance(ref.get('histogram'), dict):
        item = Item().load(file_['itemId'], force=True)
        Histogram().createHistogram(item, file_, user, token,
                                    **ref['histogram'])
Ejemplo n.º 10
0
    def createRestDataForImageVersion(self, dockerImage):
        """
        Creates a dictionary with rest endpoint information for the given
        DockerImage object

        :param dockerImage: DockerImage object

        :returns: structured dictionary documenting clis and rest
            endpoints for this image version
        """

        name = dockerImage.name

        # print name
        if name in self.currentEndpoints:
            # print name
            endpointData = self.currentEndpoints[name]

            if ':' in name:
                imageAndTag = name.split(':')
            else:
                imageAndTag = name.split('@')
            userAndRepo = imageAndTag[0]
            tag = imageAndTag[1]

            data = {}
            cli_dict = dockerImage.getCLIListSpec()

            for (cli, val) in six.iteritems(cli_dict):
                if cli not in endpointData:
                    logger.warning('"%s" not present in endpoint data.' % cli)
                    continue
                data[cli] = {}

                data[cli][DockerImage.type] = val
                cli_endpoints = endpointData[cli]

                for (operation, endpointRoute) in six.iteritems(cli_endpoints):
                    cli_list = endpointRoute[1]
                    if cli in cli_list:
                        data[cli][operation] = '/' + self.resourceName + \
                                               '/' + '/'.join(cli_list)
            return userAndRepo, tag, data
        else:
            return 'skip', 'skip', 'skip'
Ejemplo n.º 11
0
    def create(
        self,
        tale: dict,
        name: Optional[str],
        versionsDir: Path,
        versionsRoot: dict,
        user=None,
        force=False,
    ) -> dict:
        last = self.getLastVersion(versionsRoot)
        last_restore = Folder().load(tale.get("restoredFrom", ObjectId()),
                                     force=True)
        workspace = Folder().load(tale["workspaceId"], force=True)
        crtWorkspace = Path(workspace["fsPath"])

        # NOTE: order is important, we want oldWorkspace -> last.workspace
        for version in (last_restore, last):
            oldWorkspace = (None if version is None else
                            Path(version["fsPath"]) / "workspace")
            if (not force and self.is_same(tale, version, user)
                    and self.sameTree(oldWorkspace, crtWorkspace)):
                assert version is not None
                raise RestException("Not modified",
                                    code=303,
                                    extra=str(version["_id"]))

        new_version = self.createSubdir(versionsDir,
                                        versionsRoot,
                                        name,
                                        user=user)

        try:
            self.snapshot(last, tale, new_version, user=user, force=force)
            return new_version
        except Exception:  # NOQA
            try:
                shutil.rmtree(new_version["fsPath"])
                Folder().remove(new_version)
            except Exception as ex:  # NOQA
                logger.warning(
                    "Exception caught while rolling back version ckeckpoint.",
                    ex)
            raise
Ejemplo n.º 12
0
def _savePDF(event):
    """
    Extract PDF from submission ZIP file and save to a subfolder of the submission folder.

    Event info should contain the following fields:
    - submission: The submission document.
    - folder: The submission folder document.
    - file: The submission ZIP file document.
    """
    submission = event.info['submission']
    folder = event.info['folder']
    file = event.info['file']

    # Read submission ZIP file data into an in-memory buffer.
    # Reading into memory avoids managing temporary files and directories.
    zipData = _readFile(file)

    # Parse ZIP data to get PDF file name and data
    try:
        with zipfile.ZipFile(zipData) as zipFile:
            pdfItems = [
                zipItem for zipItem in zipFile.infolist() if _isPDF(zipItem)
            ]
            if not pdfItems or len(pdfItems) > 1:
                logger.warning(
                    'Submission ZIP file contains multiple PDF files (FileId=%s)'
                    % file['_id'])
                return
            pdfItem = pdfItems[0]
            pdfFileName = os.path.basename(pdfItem.filename)
            pdfData = zipFile.read(pdfItem)
            if not pdfData:
                logger.warning(
                    'Submission ZIP file contains empty PDF file (FileId=%s)' %
                    file['_id'])
                return
    except zipfile.BadZipfile:
        logger.warning('Failed to process submission ZIP file (FileId=%s)' %
                       file['_id'])
        return

    # Save PDF file to a subfolder of the submission folder
    user = User().load(submission['creatorId'], force=True)
    abstractFolder = Folder().createFolder(parent=folder,
                                           name='Abstract',
                                           creator=user)
    abstractFile = Upload().uploadFromFile(obj=io.BytesIO(pdfData),
                                           size=len(pdfData),
                                           name=pdfFileName,
                                           parentType='folder',
                                           parent=abstractFolder,
                                           user=user,
                                           mimeType='application/pdf')

    # Set submission documentation URL
    submission['documentationUrl'] = \
        'https://challenge.kitware.com/api/v1/file/%s/download?contentDisposition=inline' % \
        abstractFile['_id']
    ModelImporter.model('submission', 'covalic').save(submission)
Ejemplo n.º 13
0
    def _migrateACL(self, annotation):
        """
        Add access control information to an annotation model.

        Originally annotation models were not access controlled.  This function
        performs the migration for annotations created before this change was
        made.  The access object is copied from the folder containing the image
        the annotation is attached to.   In addition, the creator is given
        admin access.
        """
        if annotation is None or 'access' in annotation:
            return annotation

        item = Item().load(annotation['itemId'], force=True)
        if item is None:
            logger.warning(
                'Could not generate annotation ACL due to missing item %s',
                annotation['_id'])
            return annotation

        folder = Folder().load(item['folderId'], force=True)
        if folder is None:
            logger.warning(
                'Could not generate annotation ACL due to missing folder %s',
                annotation['_id'])
            return annotation

        user = User().load(annotation['creatorId'], force=True)
        if user is None:
            logger.warning(
                'Could not generate annotation ACL %s due to missing user %s',
                annotation['_id'])
            return annotation

        self.copyAccessPolicies(item, annotation, save=False)
        self.setUserAccess(annotation,
                           user,
                           AccessType.ADMIN,
                           force=True,
                           save=False)
        self.setPublic(annotation, folder.get('public'), save=False)

        # call the super class save method to avoid messing with elements
        super().save(annotation)
        logger.info('Generated annotation ACL for %s', annotation['_id'])
        return annotation
Ejemplo n.º 14
0
def find_resource_pid(pid, base_url):
    """
    Find the PID of the resource map for a given PID, which may be a resource map.
    :param pid: The pid of the object on DataONE
    :param base_url: The base url of the node endpoint that will be used for the search
    :type pid: str
    :type base_url: str
    :return:
    """

    result = query(
        q="identifier:\"{}\"".format(esc(pid)),
        base_url=base_url,
        fields=["identifier", "formatType", "formatId", "resourceMap"])
    result_len = int(result['response']['numFound'])

    if result_len == 0:
        error_msg = 'No object was found in the index for {}.'.format(pid)
        logger.warning(error_msg)
        raise RestException(error_msg)
    elif result_len > 1:
        error_msg = 'More than one object was found in the index for the identifier ' \
                    '{} which is an unexpected state.'.format(pid)
        logger.warning(error_msg)
        raise RestException(error_msg)

    # Find out if the PID is an OAI-ORE PID and return early if so
    try:
        if result['response']['docs'][0]['formatType'] == 'RESOURCE':
            return result['response']['docs'][0]['identifier']
    except KeyError:
        error_msg = 'Unable to find a resource file in the data package'
        logger.warning(error_msg)
        raise RestException(error_msg)

    try:
        if len(result['response']['docs'][0]['resourceMap']) == 1:
            return result['response']['docs'][0]['resourceMap'][0]
    except KeyError:
        raise RestException(
            'Unable to find a resource map for the data package')

    if len(result['response']['docs'][0]['resourceMap']) > 1:
        # Extract all of the candidate resource map PIDs (list of lists)
        resmaps = [doc['resourceMap'] for doc in result['response']['docs']]

        # Flatten the above result out and query
        # Flattening is required because the above 'resourceMap' field is a
        # Solr array type so the result is a list of lists
        nonobs = find_nonobsolete_resmaps(
            [item for items in resmaps for item in items], base_url=base_url)

        # Only return of one non-obsolete Resource Map was found
        # If we find multiple, that implies the original PID we queried for
        # is a member of multiple packages and what to do isn't implemented
        if len(nonobs) == 1:
            return nonobs[0]

    # Error out if the document passed in has multiple resource maps. What I can
    # still do here is determine the most likely resource map given the set.
    # Usually we do this by rejecting any obsoleted resource maps and that
    # usually leaves us with one.

    # If I look up
    # https://cn.dataone.org/cn/v2/resolve/urn:uuid:9266a118-78b3-48e3-a675-b3dfcc5d0fc4 the code
    # gets here. Typing that in the browser correctly results in the file being downloaded.  The
    # lookup above returns both the file
    # ('resource_map_urn:uuid:7e4586c0-9812-4355-8f3b-1445b9a8ca53') and the parent dataset
    # ('resource_map_doi:10.5063/F1JM27VG'). Shouldn't it be possible to look up single files?
    # [Mihael]
    raise RestException(
        "Multiple resource maps were for the data package, which isn't supported."
    )
Ejemplo n.º 15
0
            continue
        # For each of our sources, try to import the named class from the
        # source module
        className = source['className']
        sourceModule = __import__(
            source['moduleName'].lstrip('.'), globals(), locals(), [className],
            len(source['moduleName']) - len(source['moduleName'].lstrip('.')))
        sourceClass = getattr(sourceModule, className)
        # Add the source class to the locals name so that it can be reached by
        # importing the tilesource module
        locals().update({className: sourceClass})
        # add it to our list of exports
        all.append(sourceClass)
        # add it to our dictionary of available sources if it has a name
        if getattr(sourceClass, 'name', None):
            AvailableTileSources[sourceClass.name] = sourceClass
    except ImportError:
        if girder:
            print(TerminalColor.error('Error: Could not import %s' % className))
            logger.exception('Error: Could not import %s' % className)
        else:
            logger.warning('Error: Could not import %s' % className)

# Create a partial function that will work through the known functions to get a
# tile source.
getTileSource = functools.partial(getTileSourceFromDict,
                                  AvailableTileSources)
all.append(getTileSource)

__all__ = all
Ejemplo n.º 16
0
    def _getAndCacheImageOrData(
            self, item, imageFunc, checkAndCreate, keydict, pickleCache=False, **kwargs):
        """
        Get a file associated with an image that can be generated by a
        function.

        :param item: the idem to process.
        :param imageFunc: the function to call to generate a file.
        :param checkAndCreate: False to return the data, creating and caching
            it if needed.  True to return True if the data is already in cache,
            or to create the data, cache, and return it if not.  'nosave' to
            return data from the cache if present, or generate the data but do
            not return it if not in the cache.  'check' to just return True or
            False to report if it is in the cache.
        :param keydict: a dictionary of values to use for the cache key.
        :param pickleCache: if True, the results of the function are pickled to
            preserver them.  If Fales, the results can be saved as a file
            directly.
        :params **kwargs: passed to the tile source and to the imageFunc.  May
            contain contentDisposition to determine how results are returned.
        :returns:
        """
        if 'fill' in keydict and (keydict['fill']).lower() == 'none':
            del keydict['fill']
        keydict = {k: v for k, v in keydict.items() if v is not None and not k.startswith('_')}
        key = json.dumps(keydict, sort_keys=True, separators=(',', ':'))
        existing = File().findOne({
            'attachedToType': 'item',
            'attachedToId': item['_id'],
            'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True,
            'thumbnailKey': key,
        })
        if existing:
            if checkAndCreate and checkAndCreate != 'nosave':
                return True
            if kwargs.get('contentDisposition') != 'attachment':
                contentDisposition = 'inline'
            else:
                contentDisposition = kwargs['contentDisposition']
            if pickleCache:
                data = File().open(existing).read()
                return pickle.loads(data), 'application/octet-stream'
            return File().download(existing, contentDisposition=contentDisposition)
        if checkAndCreate == 'check':
            return False
        tileSource = self._loadTileSource(item, **kwargs)
        result = getattr(tileSource, imageFunc)(**kwargs)
        if result is None:
            imageData, imageMime = b'', 'application/octet-stream'
        elif pickleCache:
            imageData, imageMime = result, 'application/octet-stream'
        else:
            imageData, imageMime = result
        saveFile = True
        if not pickleCache:
            # The logic on which files to save could be more sophisticated.
            maxThumbnailFiles = int(Setting().get(
                constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES))
            saveFile = maxThumbnailFiles > 0
            # Make sure we don't exceed the desired number of thumbnails
            self.removeThumbnailFiles(
                item, maxThumbnailFiles - 1, imageKey=keydict.get('imageKey') or 'none')
        if (saveFile and checkAndCreate != 'nosave' and (
                pickleCache or isinstance(imageData, bytes))):
            dataStored = imageData if not pickleCache else pickle.dumps(imageData, protocol=4)
            # Save the data as a file
            try:
                datafile = Upload().uploadFromFile(
                    io.BytesIO(dataStored), size=len(dataStored),
                    name='_largeImageThumbnail', parentType='item', parent=item,
                    user=None, mimeType=imageMime, attachParent=True)
                if not len(dataStored) and 'received' in datafile:
                    datafile = Upload().finalizeUpload(
                        datafile, Assetstore().load(datafile['assetstoreId']))
                datafile.update({
                    'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True,
                    'thumbnailKey': key,
                })
                # Ideally, we would check that the file is still wanted before
                # we save it.  This is probably impossible without true
                # transactions in Mongo.
                File().save(datafile)
            except (GirderException, PermissionError):
                logger.warning('Could not cache data for large image')
        return imageData, imageMime
Ejemplo n.º 17
0
def _updateJob(event):
    """
    Called when a job is saved, updated, or removed.  If this is a histogram
    job and it is ended, clean up after it.
    """
    if event.name == 'jobs.job.update.after':
        job = event.info['job']
    else:
        job = event.info
    meta = job.get('meta', {})
    if (meta.get('creator') != 'histogram'
            or meta.get('task') != 'createHistogram'):
        return
    status = job['status']
    if event.name == 'model.job.remove' and status not in (JobStatus.ERROR,
                                                           JobStatus.CANCELED,
                                                           JobStatus.SUCCESS):
        status = JobStatus.CANCELED
    if status not in (JobStatus.ERROR, JobStatus.CANCELED, JobStatus.SUCCESS):
        return
    histograms = list(Histogram().find({'fakeId': meta.get('fakeId')},
                                       limit=2))
    if len(histograms) != 1:
        msg = 'Failed to retrieve histogram using fakeId %s.'
        logger.warning(msg % meta.get('fakeId'))
        return
    histogram = histograms[0]
    if histogram.get('expected'):
        # We can get a SUCCESS message before we get the upload message, so
        # don't clear the expected status on success.
        if status != JobStatus.SUCCESS:
            del histogram['expected']
    notify = histogram.get('notify')
    msg = None
    if notify:
        del histogram['notify']
        if status == JobStatus.SUCCESS:
            msg = 'Histogram created'
        elif status == JobStatus.CANCELED:
            msg = 'Histogram creation canceled'
        else:  # ERROR
            msg = 'FAILED: Histogram creation failed'
        msg += ' for item %s' % histogram['itemId']
        msg += ', file %s' % histogram['fileId']
    if status == JobStatus.SUCCESS:
        Histogram().save(histogram)
    else:
        Histogram().remove(histogram)
    if msg and event.name != 'model.job.remove':
        Job().updateJob(job, progressMessage=msg)
    if notify:
        Notification().createNotification(type='histogram.finished_histogram',
                                          data={
                                              'histogram_id': histogram['_id'],
                                              'item_id': histogram['itemId'],
                                              'file_id': histogram['fileId'],
                                              'fakeId': histogram['fakeId'],
                                              'success':
                                              status == JobStatus.SUCCESS,
                                              'status': status
                                          },
                                          user={'_id': job.get('userId')},
                                          expires=datetime.datetime.utcnow() +
                                          datetime.timedelta(seconds=30))
Ejemplo n.º 18
0
    def _parse_dataSet(self, dataSet=None, relpath=''):
        """
        Get the basic info about the contents of `dataSet`

        Returns:
            external_objects: A list of objects that represent externally defined data
            dataset_top_identifiers: A set of DOIs for top-level packages that contain
                objects from external_objects

        """
        if dataSet is None:
            dataSet = self.tale['dataSet']

        dataset_top_identifiers = set()
        external_objects = []
        for obj in dataSet:
            try:
                doc = ModelImporter.model(obj['_modelType']).load(
                    obj['itemId'],
                    user=self.user,
                    level=AccessType.READ,
                    exc=True)
                provider_name = doc['meta']['provider']
                if provider_name.startswith('HTTP'):
                    provider_name = 'HTTP'  # TODO: handle HTTPS to make it unnecessary
                provider = IMPORT_PROVIDERS.providerMap[provider_name]
                top_identifier = provider.getDatasetUID(doc, self.user)
                if top_identifier:
                    dataset_top_identifiers.add(top_identifier)

                ext_obj = {
                    'dataset_identifier': top_identifier,
                    'provider': provider_name,
                    '_modelType': obj['_modelType'],
                    'relpath': relpath
                }

                if obj['_modelType'] == 'folder':

                    if provider_name == 'HTTP' or self.expand_folders:
                        external_objects += self._expand_folder_into_items(
                            doc, self.user)
                        continue

                    ext_obj['name'] = doc['name']
                    if doc['meta'].get('identifier') == top_identifier:
                        ext_obj['uri'] = top_identifier
                    else:
                        ext_obj['uri'] = provider.getURI(doc, self.user)
                        #  Find path to root?
                    ext_obj['size'] = 0
                    for _, f in self.folderModel.fileList(doc,
                                                          user=self.user,
                                                          subpath=False,
                                                          data=False):
                        ext_obj['size'] += f['size']

                elif obj['_modelType'] == 'item':
                    fileObj = self.itemModel.childFiles(doc)[0]
                    ext_obj.update({
                        'name': fileObj['name'],
                        'uri': fileObj['linkUrl'],
                        'size': fileObj['size']
                    })
                external_objects.append(ext_obj)
            except (ValidationException, KeyError):
                msg = 'While creating a manifest for Tale "{}" '.format(
                    str(self.tale['_id']))
                msg += 'encountered a following error:\n'
                logger.warning(msg)
                raise  # We don't want broken manifests, do we?

        return external_objects, dataset_top_identifiers
Ejemplo n.º 19
0
def afterPostScore(event):
    """
    Post-process submissions that were successfully scored.

    In test phases, users are required to submit an abstract in PDF format that describes
    their approach. This function extracts the PDF file from the submission ZIP file and
    saves it to a subfolder of the submission folder.

    This processing runs asynchronously to avoid delaying the scoring endpoint response.
    """
    submission = ModelImporter.model('submission',
                                     'covalic').load(event.info['id'])
    phase = ModelImporter.model('phase', 'covalic').load(submission['phaseId'],
                                                         force=True)

    # Handle only submissions to ISIC 2018 Final Test phases
    isicMeta = phase.get('meta', {}).get('isic')
    if not isicMeta:
        return

    if not (isicMeta['challengeYear'] in ['2018', 'live']
            and isicMeta['phaseType'] == 'final'):
        return

    # Load submission folder
    folder = Folder().load(submission['folderId'], force=True)
    if not folder:
        logger.warning(
            'afterPostScore: Failed to load submission folder; aborting (FolderId=%s)'
            % folder['_id'])
        return

    # Expect only one item in the folder
    items = list(Folder().childItems(folder, limit=2))
    if not items or len(items) > 1:
        logger.warning(
            'afterPostScore: Found more than one item in submission folder; aborting (FolderId=%s)'
            % folder['_id'])
        return

    # Expect only one file in the item
    item = items[0]
    files = list(Item().childFiles(item, limit=2))
    if not files or len(files) > 1:
        logger.warning(
            'afterPostScore: Found more than one file in submission item; aborting (ItemId=%s)'
            % item['_id'])
        return

    # Abort if submission folder already contains an 'Abstract' folder
    abstractFolder = Folder().findOne(query={
        'parentId': folder['_id'],
        'parentCollection': 'folder',
        'name': 'Abstract'
    },
                                      fields=['_id'])
    if abstractFolder is not None:
        logger.warning(
            'afterPostScore: Abstract folder already exists in submission folder; aborting '
            '(FolderId=%s)' % folder['_id'])
        return

    # Process asynchronously
    events.daemon.trigger(info={
        'submission': submission,
        'folder': folder,
        'file': files[0]
    },
                          callback=_savePDF)
Ejemplo n.º 20
0
 def itemSortKey(self, item):
     if Lock.FIELD_LAST_UNLOCKED in item:
         return item[Lock.FIELD_LAST_UNLOCKED]
     else:
         logger.warning('Item %s does not have a dm.lastUnlocked field.' % item['_id'])
         return BEGINNING_OF_TIME
Ejemplo n.º 21
0
    from girder.utility.model_importer import ModelImporter
    from ..models.base import TileGeneralException
    from girder.models.model_base import AccessType
except ImportError:
    import logging as logger
    girder = None

    class TileGeneralException(Exception):
        pass


# Not having PIL disables thumbnail creation, but isn't fatal
try:
    import PIL
    if int(PIL.PILLOW_VERSION.split('.')[0]) < 3:
        logger.warning('Error: Pillow v3.0 or later is required')
        PIL = None
except ImportError:
    logger.warning('Error: Could not import PIL')
    PIL = None


class TileSourceException(TileGeneralException):
    pass


class TileSourceAssetstoreException(TileSourceException):
    pass


class TileSource(object):
Ejemplo n.º 22
0
import math
import os
import six

import PIL.Image

try:
    from girder import logger
except ImportError:
    import logging as logger
    logger.getLogger().setLevel(logger.INFO)

try:
    import numpy
except ImportError:
    logger.warning('Error: Could not import numpy')
    numpy = None

from .base import FileTileSource, TileSourceException
from ..cache_util import LruCacheMetaclass, strhash, methodcache

try:
    import girder
    from girder.models.setting import Setting
    from .base import GirderTileSource
    from .. import constants
    import cherrypy
except ImportError:
    girder = None

Ejemplo n.º 23
0
    from girder.utility import assetstore_utilities
    from girder.utility.model_importer import ModelImporter
    from ..models.base import TileGeneralException
    from girder.models.model_base import AccessType
except ImportError:
    import logging as logger
    girder = None

    class TileGeneralException(Exception):
        pass

# Not having PIL disables thumbnail creation, but isn't fatal
try:
    import PIL
    if int(PIL.PILLOW_VERSION.split('.')[0]) < 3:
        logger.warning('Error: Pillow v3.0 or later is required')
        PIL = None
except ImportError:
    logger.warning('Error: Could not import PIL')
    PIL = None


class TileSourceException(TileGeneralException):
    pass


class TileSourceAssetstoreException(TileSourceException):
    pass


class TileSource(object):