Beispiel #1
0
def get_results_dir(result, request):
    swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings)

    # swift only if it is remote dataset. For blob and multi-species dataset, store locally.
    # For other dataset type, store at swift if possible.
    do_swift = IRemoteDataset.providedBy(result) or \
               (not IMultiSpeciesDataset.providedBy(result) and \
                not IBlobDataset.providedBy(result) and \
                swiftsettings.storage_url)

    if do_swift:
        if swiftsettings.storage_url:
            results_dir = 'swift+{storage_url}/{container}/{path}/'.format(
                storage_url=swiftsettings.storage_url,
                container=swiftsettings.result_container,
                path=IUUID(result)
            )
        else:
            raise Exception("Remote dataset requires swift url to be set")
    else:
        # if swift is not setup we use local storage
        results_dir = 'scp://{uid}@{ip}:{port}{path}/'.format(
            uid=pwd.getpwuid(os.getuid()).pw_name,
            # FIXME: hostname from request is not good enough...
            #        need to get ip or host from plone_worker that does actual
            #        import
            #        store in registry?
            #        (is ok for testing)
            # ip=get_public_ip(),
            ip=get_hostname(request),
            port=os.environ.get('SSH_PORT', 22),
            path=tempfile.mkdtemp(prefix='result_import_')
        )

    return results_dir
Beispiel #2
0
def get_results_dir(result, request, childSpecies=False):
    swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings)

    # swift only if it is remote dataset. For blob and multi-species dataset, store locally.
    # For other dataset type (including the child species of multispecies), store at swift if possible.
    do_swift = IRemoteDataset.providedBy(result) or \
               ((childSpecies or (not IMultiSpeciesDataset.providedBy(result))) and \
                not IBlobDataset.providedBy(result) and \
                swiftsettings.storage_url)

    if do_swift:
        if swiftsettings.storage_url:
            results_dir = 'swift+{storage_url}/{container}/{path}/'.format(
                storage_url=swiftsettings.storage_url,
                container=swiftsettings.result_container,
                path=IUUID(result))
        else:
            raise Exception("Remote dataset requires swift url to be set")
    else:
        # if swift is not setup we use local storage
        results_dir = 'scp://{uid}@{ip}:{port}{path}/'.format(
            uid=pwd.getpwuid(os.getuid()).pw_name,
            # FIXME: hostname from request is not good enough...
            #        need to get ip or host from plone_worker that does actual
            #        import
            #        store in registry?
            #        (is ok for testing)
            # ip=get_public_ip(),
            ip=get_hostname(request),
            port=os.environ.get('SSH_PORT', 22),
            path=tempfile.mkdtemp(prefix='result_import_'))

    return results_dir
 def getGenreSchemata(self):
     schemata = []
     md = IBCCVLMetadata(self.context)
     genre = md.get('genre')
     if genre in self.genre_interface_map:
         schemata.append(self.genre_interface_map[genre])
     if IBlobDataset.providedBy(self.context):
         schemata.append(IBlobDataset)
     if IRemoteDataset.providedBy(self.context):
         schemata.append(IRemoteDataset)
     return schemata
 def getGenreSchemata(self):
     schemata = []
     md = IBCCVLMetadata(self.context)
     genre = md.get('genre')
     if genre in self.genre_interface_map:
         schemata.append(self.genre_interface_map[genre])
     if IBlobDataset.providedBy(self.context):
         schemata.append(IBlobDataset)
     if IRemoteDataset.providedBy(self.context):
         schemata.append(IRemoteDataset)
     return schemata
 def __call__(self, **kw):
     jt = IJobTracker(self.context)
     # TODO: if state is empty check if there is a downloadable file
     #       Yes: COMPLETED
     #       No: FAILED
     state = jt.state
     if not state:
         if IBlobDataset.providedBy(self.context):
             # we have no state, may happen for imported datasets,
             # let's check if we have a file
             if self.context.file is not None:
                 state = 'COMPLETED'
             else:
                 state = 'FAILED'
         elif IRemoteDataset.providedBy(self.context):
             if self.context.remoteUrl:
                 state = 'COMPLETED'
             else:
                 state = 'FAILED'
     return state
Beispiel #6
0
    def _download_results(self, context, zfile):
        # FIXME: This is a rather lengthy process, and should probably be turned into a background task... (maybe as part of a datamanager service?)

        # 1. find all IBlobDataset/ IRemotedDataset/ IDataset objects within context
        pc = getToolByName(context, 'portal_catalog')
        brains = pc.searchResults(path='/'.join(context.getPhysicalPath()),
                                  object_provides=[
                                      IBlobDataset.__identifier__,
                                      IRemoteDataset.__identifier__
                                  ])
        metadata = {}

        # the file/folder name for the zip
        zfilename = context.title
        # iterate over files and add to zip
        for brain in brains:
            content = brain.getObject()
            if IBlobDataset.providedBy(content):
                # If data is stored locally:
                arcname = '/'.join((zfilename, 'data', content.file.filename))
                # ob.file should be a NamedFile ... need to get fs name for that
                blobfile = content.file.openDetached()

                zfile.write(blobfile.name, arcname)
                blobfile.close()

            elif IRemoteDataset.providedBy(content):
                # TODO: duplicate code from
                remoteUrl = getattr(content, 'remoteUrl', None)
                if remoteUrl is None:
                    raise NotFound(self, 'remoteUrl', self.request)
                # get arcname from remoteUrl
                arcname = '/'.join(
                    (zfilename, 'data', os.path.basename(remoteUrl)))
                # FIXME: should check dataset downloaiable flag here,
                #       but assumption is, that this function can only be called on an experiment result folder....
                # TODO: duplicate code in browser/dataset.py:RemoteDatasetDownload.__call__
                # TODO: may not work in general... it always uses swift as remote url
                tool = getUtility(ISwiftUtility)
                try:
                    url = tool.generate_temp_url(url=remoteUrl)
                except:
                    url = remoteUrl
                # url is now the location from which we can fetch the file
                temp_file, _ = urlretrieve(url)
                zfile.write(temp_file, arcname)
                os.remove(temp_file)
            else:
                # unknown type of Dataset
                # just skip it
                # TODO: Log warning or debug?
                continue
            metadata[arcname] = getdsmetadata(content)
        # all files are in ....
        # TODO: add experiment result metadata

        # put metadata into zip
        # provenance data stored on result container
        provdata = IProvenanceData(context)
        if not provdata.data is None:
            zfile.writestr('/'.join((zfilename, 'prov.ttl')),
                           provdata.data.encode('utf-8'))

        # add experiment metadata
        expmetadata = IExperimentMetadata(context)
        if not expmetadata.data is None:
            zfile.writestr('/'.join((zfilename, 'expmetadata.txt')),
                           expmetadata.data.encode('utf-8'))

        # add mets.xml
        metsview = getMultiAdapter((context, self.request), name="mets.xml")
        zfile.writestr('/'.join((zfilename, 'mets.xml')),
                       metsview.render().encode('utf-8'))

        # add experiment parameters
        params = IExperimentParameter(context)
        if not params.data is None:
            zfile.writestr('/'.join((zfilename, 'params.json')),
                           params.data.encode('utf-8'))
    def _download_results(self, context, zfile):
        # FIXME: This is a rather lengthy process, and should probably be turned into a background task... (maybe as part of a datamanager service?)

        # 1. find all IBlobDataset/ IRemotedDataset/ IDataset objects within context
        pc = getToolByName(context, 'portal_catalog')
        brains = pc.searchResults(path='/'.join(context.getPhysicalPath()),
                                  object_provides=[IBlobDataset.__identifier__,
                                                   IRemoteDataset.__identifier__])
        metadata = {}

        # the file/folder name for the zip
        zfilename = context.title
        # iterate over files and add to zip
        for brain in brains:
            content = brain.getObject()
            if IBlobDataset.providedBy(content):
                # If data is stored locally:
                arcname = '/'.join((zfilename, 'data', content.file.filename))
                # ob.file should be a NamedFile ... need to get fs name for that
                blobfile = content.file.openDetached()

                zfile.write(blobfile.name, arcname)
                blobfile.close()

            elif IRemoteDataset.providedBy(content):
                # TODO: duplicate code from
                remoteUrl = getattr(content, 'remoteUrl', None)
                if remoteUrl is None:
                    raise NotFound(self, 'remoteUrl', self.request)
                # get arcname from remoteUrl
                arcname = '/'.join((zfilename, 'data', os.path.basename(remoteUrl)))
                # FIXME: should check dataset downloaiable flag here,
                #       but assumption is, that this function can only be called on an experiment result folder....
                # TODO: duplicate code in browser/dataset.py:RemoteDatasetDownload.__call__
                # TODO: may not work in general... it always uses swift as remote url
                tool = getUtility(ISwiftUtility)
                try:
                    url = tool.generate_temp_url(url=remoteUrl)
                except:
                    url = remoteUrl
                # url is now the location from which we can fetch the file
                temp_file, _ = urlretrieve(url)
                zfile.write(temp_file, arcname)
                os.remove(temp_file)
            else:
                # unknown type of Dataset
                # just skip it
                # TODO: Log warning or debug?
                continue
            metadata[arcname] = getdsmetadata(content)
        # all files are in ....
        # TODO: add experiment result metadata

        # put metadata into zip
        # provenance data stored on result container
        provdata = IProvenanceData(context)
        if not provdata.data is None:
            zfile.writestr('/'.join((zfilename, 'prov.ttl')),
                           provdata.data.encode('utf-8'))

        # add experiment metadata
        expmetadata = IExperimentMetadata(context)
        if not expmetadata.data is None:
            zfile.writestr('/'.join((zfilename, 'expmetadata.txt')),
                           expmetadata.data.encode('utf-8'))

        # add mets.xml
        metsview = getMultiAdapter((context, self.request), name="mets.xml")
        zfile.writestr('/'.join((zfilename, 'mets.xml')),
                       metsview.render().encode('utf-8'))

        # add experiment parameters
        params = IExperimentParameter(context)
        if not params.data is None:
            zfile.writestr('/'.join((zfilename, 'params.json')),
                           params.data.encode('utf-8'))
Beispiel #8
0
    def __call__(self):

        # FIXME: This is a rather lengthy process, and should probably be turned into a background task... (maybe as part of a datamanager service?)

        # 1. find all IBlobDataset/ IRemotedDataset/ IDataset objects within context
        pc = getToolByName(self.context, 'portal_catalog')
        brains = pc.searchResults(path='/'.join(self.context.getPhysicalPath()),
                                  object_provides=[IBlobDataset.__identifier__,
                                                   IRemoteDataset.__identifier__])
        fname = None
        try:
            # create tmp file
            fd, fname = tempfile.mkstemp()
            fo = os.fdopen(fd, 'w')
            zfile = zipfile.ZipFile(fo, 'w')

            metadata = {}

            # the file/folder name for the zip
            zfilename = self.context.title
            # iterate over files and add to zip
            for brain in brains:
                content = brain.getObject()
                if IBlobDataset.providedBy(content):
                    # If data is stored locally:
                    arcname = '/'.join((zfilename, 'data', content.file.filename))
                    # ob.file should be a NamedFile ... need to get fs name for that
                    blobfile = content.file.openDetached()

                    zfile.write(blobfile.name, arcname)
                    blobfile.close()

                elif IRemoteDataset.providedBy(content):
                    # TODO: duplicate code from
                    remoteUrl = getattr(content, 'remoteUrl', None)
                    if remoteUrl is None:
                        raise NotFound(self, 'remoteUrl', self.request)
                    # get arcname from remoteUrl
                    arcname = '/'.join((zfilename, 'data', os.path.basename(remoteUrl)))
                    # FIXME: should check dataset downloaiable flag here,
                    #       but assumption is, that this function can only be called on an experiment result folder....
                    # TODO: duplicate code in browser/dataset.py:RemoteDatasetDownload.__call__
                    # TODO: may not work in general... it always uses swift as remote url
                    tool = getUtility(ISwiftUtility)
                    try:
                        url = tool.generate_temp_url(url=remoteUrl)
                    except:
                        url = remoteUrl
                    # url is now the location from which we can fetch the file
                    temp_file, _ = urlretrieve(url)
                    zfile.write(temp_file, arcname)
                    os.remove(temp_file)
                else:
                    # unknown type of Dataset
                    # just skip it
                    # TODO: Log warning or debug?
                    continue
                metadata[arcname] = getdsmetadata(content)
            # all files are in ....
            # TODO: add experiment result metadata

            # put metadata into zip
            # provenance data stored on result container
            provdata = IProvenanceData(self.context)
            if not provdata.data is None:
                zfile.writestr('/'.join((zfilename, 'prov.ttl')),
                               provdata.data)
            # add mets.xml
            metsview = getMultiAdapter((self.context, self.request), name="mets.xml")
            zfile.writestr('/'.join((zfilename, 'mets.xml')),
                           metsview.render())
            # finish zip file
            zfile.close()

            fo.close()

            # create response
            self.request.response.setHeader('Content-Type', 'application/zip')
            self.request.response.setHeader('Content-Disposition', 'attachment; filename="{}.zip"'.format(zfilename))
            self.request.response.setHeader('Content-Length', '{}'.format(os.path.getsize(fname)))
            return tmpfile_stream_iterator(fname)
        except Exception as e:
            # something went wrong ...
            # clean up and re-raise
            if os.path.exists(fname):
                os.remove(fname)
            raise e