Beispiel #1
0
def set_progress(state, statusmsg, rusage, context):
    app.signature("org.bccvl.tasks.plone.set_progress",
                  kwargs={
                      'state': state,
                      'message': statusmsg,
                      'rusage': rusage,
                      'context': context,
                  }).delay()
Beispiel #2
0
def import_cleanup_job(results_dir, context):
    return app.signature("org.bccvl.tasks.plone.import_cleanup",
                         kwargs={
                             'path': results_dir,
                             'context': context
                         },
                         immutable=True)
Beispiel #3
0
def import_ala_job(items, params, context):
    return app.signature("org.bccvl.tasks.plone.import_ala",
                         kwargs={
                             'items': items,
                             'results_dir': params,
                             'context': context
                         },
                         immutable=True)
Beispiel #4
0
def set_progress_job(state, statusmsg, rusage, context):
    return app.signature("org.bccvl.tasks.plone.set_progress",
                         kwargs={
                             'state': state,
                             'message': statusmsg,
                             'rusage': rusage,
                             'context': context,
                         },
                         immutable=True)
Beispiel #5
0
def import_multi_species_csv_job(url, results_dir, import_context, context):
    return app.signature(
            "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
            kwargs={
                'url': url,
                'results_dir': results_dir,
                'import_context': import_context,
                'context': context
            },
            immutable=True)
Beispiel #6
0
    def update_metadata(self):
        uuid = self.request.form.get('uuid', None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    'id': member.getUserName(),
                    'email': member.getProperty('email'),
                    'fullname': member.getProperty('fullname')
                }
            else:
                raise Exception("Invalid user")

            # build download url
            # 1. get context (site) relative path
            obj_url = obj.absolute_url()

            if obj.portal_type == 'org.bccvl.content.dataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.remotedataset':
                filename = os.path.basename(obj.remoteUrl)
                obj_url = '{}/@@download/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.multispeciesdataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            else:
                raise Exception("Wrong content type")

            from org.bccvl.tasks.celery import app
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': obj_url,
                    'filename': filename,
                    'contenttype': obj.format,
                    'context': {
                        'context': '/'.join(obj.getPhysicalPath()),
                        'user': user,
                    }
                },
                immutable=True)

            from org.bccvl.tasks.plone import after_commit_task
            after_commit_task(update_task)
            # track background job state
            jt = IJobTracker(obj)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: update_metadata',
                             function=obj.dataSource,
                             type=obj.portal_type)
            jt.set_progress('PENDING', 'Metadata update pending')
            return job.id
        except Exception as e:
            LOG.error('Caught exception %s', e)
        raise NotFound(self, 'update_metadata', self.request)
Beispiel #7
0
    def export_result(self, serviceid):
        # self.context should be a result
        if not hasattr(self.context, 'job_params'):
            raise NotFound(self.context, self.context.title, self.request)
        # TODO: validate serviceid

        # start export job
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()

        # collect list of files to export:
        urllist = []
        for content in self.context.values():
            if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'):
                # skip non datasets
                continue
            dlinfo = IDownloadInfo(content)
            urllist.append(dlinfo['url'])
        # add mets.xml
        urllist.append('{}/mets.xml'.format(self.context.absolute_url()))
        # add prov.ttl
        urllist.append('{}/prov.ttl'.format(self.context.absolute_url()))

        from org.bccvl.tasks.celery import app
        from org.bccvl.tasks.plone import after_commit_task
        # FIXME: Do mapping from serviceid to service type? based on interface
        #        background task will need serviceid and type, but it may resolve
        #        servicetype via API with serviceid
        export_task = app.signature(
            "org.bccvl.tasks.export_services.export_result",
            kwargs={
                'siteurl': api.portal.get().absolute_url(),
                'fileurls': urllist,
                'serviceid': serviceid,
                'context': {
                    'context': context_path,
                    'user': {
                        'id': member.getUserName(),
                        'email': member.getProperty('email'),
                        'fullname': member.getProperty('fullname')
                    }
                }
            },
            immutable=True)

        # queue job submission
        after_commit_task(export_task)

        # self.new_job('TODO: generate id', 'generate taskname: export_result')
        # self.set_progress('PENDING', u'Result export pending')

        status = 'info'
        message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(
            self.context.title)

        IStatusMessage(self.request).add(message, type=status)
        nexturl = self.request.get('HTTP-REFERER')
        if not nexturl:
            # this method should only be called on a result folder
            # we should be able to safely redirect back to the pacent
            # experiment
            nexturl = self.context.__parent__.absolute_url()
        self.request.response.redirect(nexturl, 307)
        return (status, message)
Beispiel #8
0
    def export_result(self, serviceid):
        # self.context should be a result
        if not hasattr(self.context, 'job_params'):
            raise NotFound(self.context, self.context.title, self.request)
        # TODO: validate serviceid

        # start export job
        context_path = '/'.join(self.context.getPhysicalPath())
        member = api.user.get_current()

        # collect list of files to export:
        urllist = []
        for content in self.context.values():
            if content.portal_type not in ('org.bccvl.content.dataset',
                                           'org.bccvl.content.remotedataset'):
                # skip non datasets
                continue
            dlinfo = IDownloadInfo(content)
            urllist.append(dlinfo['url'])
        # add mets.xml
        urllist.append('{}/mets.xml'.format(self.context.absolute_url()))
        # add prov.ttl
        urllist.append('{}/prov.ttl'.format(self.context.absolute_url()))
        # add experiment metadata
        urllist.append('{}/expmetadata.txt'.format(
            self.context.absolute_url()))

        from org.bccvl.tasks.celery import app
        from org.bccvl.tasks.plone import after_commit_task
        # FIXME: Do mapping from serviceid to service type? based on interface
        #        background task will need serviceid and type, but it may resolve
        #        servicetype via API with serviceid
        export_task = app.signature(
            "org.bccvl.tasks.export_services.export_result",
            kwargs={
                'siteurl': api.portal.get().absolute_url(),
                'fileurls': urllist,
                'serviceid': serviceid,
                'context': {
                    'context': context_path,
                    'user': {
                        'id': member.getUserName(),
                        'email': member.getProperty('email'),
                        'fullname': member.getProperty('fullname')
                    }
                }
            },
            immutable=True)

        # queue job submission
        after_commit_task(export_task)

        # self.new_job('TODO: generate id', 'generate taskname: export_result')
        # self.set_progress('PENDING', u'Result export pending')

        status = 'info'
        message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(
            self.context.title)

        IStatusMessage(self.request).add(message, type=status)
        nexturl = self.request.get('HTTP-REFERER')
        if not nexturl:
            # this method should only be called on a result folder
            # we should be able to safely redirect back to the pacent
            # experiment
            nexturl = self.context.__parent__.absolute_url()
        self.request.response.redirect(nexturl, 307)
        return (status, message)
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()),
                     '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories

        new_object.subject = []
        if self.domain:
            new_object.subject = [self.domain]
        if self.timeperiod:
            new_object.subject += self.timeperiod

            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url':
                    '{}/@@download/file/{}'.format(new_object.absolute_url(),
                                                   new_object.file.filename),
                    'results_dir':
                    get_results_dir(new_object,
                                    self.request,
                                    childSpecies=True),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'genre': self.datagenre,
                        'dataSource': new_object.dataSource,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: import_multi_species_csv',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(IRegistry).forInterface(
                    ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # FIXME: we can't use ssh here.... we don't know which container we are in... and
                #        sshing here is bad as well....
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)

                # TODO: we push the uploaded file directly to swift here..
                #       this really should be a background process
                #       best solution: ...
                #           user uploads to some temporary upload service (file never ends up here)
                #           we have a remote url here, and tell the datamover to pull it from there
                #           and move it to final destination. (or something like this)
                #       other good way: ...
                #           let user upload directly to swift (what about large file uploads?)
                #           and take care of clean up if necessary

                # 4. move file to swift
                # TODO: do we have enough information to upload to swift?
                #       need a temp url?
                swiftopts = app.conf.get('bccvl', {}).get('swift', {})
                src_url = build_source('file://{}'.format(tmpfile))
                dest_url = build_destination(
                    'swift+{}'.format(new_object.remoteUrl),
                    settings={
                        'swift': {
                            'os_auth_url':
                            swiftopts.get('os_auth_url'),
                            'os_username':
                            swiftopts.get('os_username'),
                            'os_password':
                            swiftopts.get('os_password'),
                            'os_project_name':
                            swiftopts.get('os_project_name'),
                            'os_storage_url':
                            swiftopts.get('os_storage_url'),
                            'os_user_domain_name':
                            swiftopts.get('os_user_domain_name'),
                            'os_project_domain_name':
                            swiftopts.get('os_project_domain_name'),
                            'auth_version':
                            swiftopts.get('auth_version')
                        }
                    })

                try:
                    movelib.move(src_url, dest_url)
                except Exception as e:
                    # do error handling here
                    raise
                finally:
                    # clean up temp location
                    path = os.path.dirname(tmpfile)
                    shutil.rmtree(path)

            # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: update_metadata',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
Beispiel #10
0
def import_cleanup(results_dir, context):
    app.signature("org.bccvl.tasks.plone.import_cleanup",
                  kwargs={
                      'path': results_dir,
                      'context': context
                  }).delay()
Beispiel #11
0
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()), '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories
            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename),
                    'results_dir': get_results_dir(container, self.request),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: import_multi_species_csv')
            job.type = new_object.portal_type
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(
                    IRegistry).forInterface(ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)
                # 4. update task chain
                src_url = 'scp://{uid}@{ip}:{port}{file}'.format(
                    uid=pwd.getpwuid(os.getuid()).pw_name,
                    ip=get_hostname(self.request),
                    port=os.environ.get('SSH_PORT', 22),
                    file=tmpfile)
                dest_url = 'swift+{}'.format(new_object.remoteUrl)
                move_task = app.signature(
                    'org.bccvl.tasks.datamover.tasks.move',
                    kwargs={
                        'move_args': [(src_url, dest_url)],
                        'context': {
                            'context': context_path,
                            'user': {
                                'id': member.getUserName(),
                                'email': member.getProperty('email'),
                                'fullname': member.getProperty('fullname')
                            }
                        }
                    },
                    immutable=True)
                cleanup_task = app.signature(
                    'org.bccvl.tasks.plone.import_cleanup',
                    kwargs={
                        'path': os.path.dirname(tmpfile),
                        'context': {
                            'context': context_path,
                            'user': {
                                'id': member.getUserName(),
                                'email': member.getProperty('email'),
                                'fullname': member.getProperty('fullname')
                            }
                        }
                    },
                    immutable=True)

                update_task = move_task | update_task | cleanup_task

                # need some more workflow states here to support e.g. zip file upload (multiple rasters),
                #      give user a chance to better define metadata
                # make sure update_metadata does not change user edited metadata
                #      -> layer, unit, projection, whatever

                # FIXME: clean up tmp upload directory as well

                # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: update_metadata')
            job.type = new_object.portal_type
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
Beispiel #12
0
    def update_metadata(self):
        uuid = self.request.form.get("uuid", None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    "id": member.getUserName(),
                    "email": member.getProperty("email"),
                    "fullname": member.getProperty("fullname"),
                }
            else:
                raise Exception("Invalid user")

            # build download url
            # 1. get context (site) relative path
            obj_url = obj.absolute_url()

            if obj.portal_type == "org.bccvl.content.dataset":
                filename = obj.file.filename
                obj_url = "{}/@@download/file/{}".format(obj_url, filename)
            elif obj.portal_type == "org.bccvl.content.remotedataset":
                filename = os.path.basename(obj.remoteUrl)
                obj_url = "{}/@@download/{}".format(obj_url, filename)
            elif obj.portal_type == "org.bccvl.content.multispeciesdataset":
                filename = obj.file.filename
                obj_url = "{}/@@download/file/{}".format(obj_url, filename)
            else:
                raise Exception("Wrong content type")

            from org.bccvl.tasks.celery import app

            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    "url": obj_url,
                    "filename": filename,
                    "contenttype": obj.format,
                    "context": {"context": "/".join(obj.getPhysicalPath()), "user": user},
                },
                immutable=True,
            )

            from org.bccvl.tasks.plone import after_commit_task

            after_commit_task(update_task)
            # track background job state
            jt = IJobTracker(obj)
            job = jt.new_job("TODO: generate id", "generate taskname: update_metadata")
            job.type = obj.portal_type
            jt.set_progress("PENDING", "Metadata update pending")
            return job.id
        except Exception as e:
            LOG.error("Caught exception %s", e)
        raise NotFound(self, "update_metadata", self.request)
    def add(self, object):
        # FIXME: this is a workaround, which is fine for small uploaded files.
        #        large uploads should go through another process anyway
        # TODO: re implementing this method is the only way to know
        #       the full path of the object. We need the path to apply
        #       the transmogrifier chain.
        # fti = getUtility(IDexterityFTI, name=self.portal_type)
        container = aq_inner(self.context)
        try:
            # traverse to subfolder if possible
            container = container.restrictedTraverse('/'.join(self.subpath))
        except Exception as e:
            LOG.warn('Could not traverse to %s/%s',
                     '/'.join(container.getPhysicalPath()), '/'.join(self.subpath))
        new_object = addContentToContainer(container, object)
        # set data genre:
        if self.datagenre:
            IBCCVLMetadata(new_object)['genre'] = self.datagenre
        if self.categories:
            IBCCVLMetadata(new_object)['categories'] = self.categories
        
        new_object.subject = []
        if self.domain:
            new_object.subject = [self.domain]
        if self.timeperiod:
            new_object.subject += self.timeperiod

            # rdf commit should happens in transmogrifier step later on
        # if fti.immediate_view:
        #     self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,)
        # else:
        #     self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id)
        # start background import process (just a metadata update)

        # run transmogrify md extraction here
        context_path = '/'.join(new_object.getPhysicalPath())
        member = api.user.get_current()
        # species extract task
        if IMultiSpeciesDataset.providedBy(new_object):
            # kick off csv split import tasks
            import_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.import_multi_species_csv",
                kwargs={
                    'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename),
                    'results_dir': get_results_dir(new_object, self.request, childSpecies=True),
                    'import_context': {
                        'context': '/'.join(container.getPhysicalPath()),
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    },
                    'context': {
                        'context': context_path,
                        'genre': self.datagenre,
                        'dataSource': new_object.dataSource,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            after_commit_task(import_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: import_multi_species_csv',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Multi species import pending')
        else:
            if hasattr(self, '_upload'):
                file = self._upload['file']
                new_object.format = file.contentType
                uid = IUUID(new_object)
                swiftsettings = getUtility(
                    IRegistry).forInterface(ISwiftSettings)
                import os.path
                swift_url = '{storage_url}/{container}/{path}/{name}'.format(
                    storage_url=swiftsettings.storage_url,
                    container=swiftsettings.result_container,
                    path=uid,
                    name=os.path.basename(file.filename))
                new_object.remoteUrl = swift_url
            else:
                file = new_object.file
                new_object.format = file.contentType

            dlinfo = IDownloadInfo(new_object)

            # single species upload
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': dlinfo['url'],
                    'filename': dlinfo['filename'],
                    'contenttype': dlinfo['contenttype'],
                    'context': {
                        'context': context_path,
                        'user': {
                            'id': member.getUserName(),
                            'email': member.getProperty('email'),
                            'fullname': member.getProperty('fullname')
                        }
                    }
                },
                immutable=True)
            # create upload task in case we upload to external store
            if hasattr(self, '_upload'):
                # FIXME: we can't use ssh here.... we don't know which container we are in... and
                #        sshing here is bad as well....
                # There is an upload ... we have to make sure the uploaded data ends up in external storage
                # 3. put temp file aside
                tmpdir = tempfile.mkdtemp(prefix='bccvl_upload')
                tmpfile = os.path.join(tmpdir, os.path.basename(file.filename))
                blobf = file.open()
                try:
                    # try rename
                    os.rename(blobf.name, tmpfile)
                except OSError:
                    # try copy
                    shutil.copy(blobf.name, tmpfile)

                # TODO: we push the uploaded file directly to swift here..
                #       this really should be a background process
                #       best solution: ...
                #           user uploads to some temporary upload service (file never ends up here)
                #           we have a remote url here, and tell the datamover to pull it from there
                #           and move it to final destination. (or something like this)
                #       other good way: ...
                #           let user upload directly to swift (what about large file uploads?)
                #           and take care of clean up if necessary

                # 4. move file to swift
                # TODO: do we have enough information to upload to swift?
                #       need a temp url?
                swiftopts = app.conf.get('bccvl', {}).get('swift', {})
                src_url = build_source('file://{}'.format(tmpfile))
                dest_url = build_destination('swift+{}'.format(new_object.remoteUrl),
                    settings={'swift': {
                        'os_auth_url': swiftopts.get('os_auth_url'),
                        'os_username': swiftopts.get('os_username'),
                        'os_password': swiftopts.get('os_password'),
                        'os_tenant_name': swiftopts.get('os_tenant_name'),
                        'os_storage_url': swiftopts.get('os_storage_url')
                    }}
                )

                try:
                    movelib.move(src_url, dest_url)
                except Exception as e:
                    # do error handling here
                    raise
                finally:
                    # clean up temp location
                    path = os.path.dirname(tmpfile)
                    shutil.rmtree(path)

            # queue job submission
            after_commit_task(update_task)
            # create job tracking object
            jt = IJobTracker(new_object)
            jt.new_job('TODO: generate id',
                       'generate taskname: update_metadata',
                       function=new_object.dataSource,
                       type=new_object.portal_type)
            jt.set_progress('PENDING', u'Metadata update pending')

        # We have to reindex after updating the object
        new_object.reindexObject()
Beispiel #14
0
    def update_metadata(self):
        uuid = self.request.form.get('uuid', None)
        try:
            if uuid:
                brain = uuidToCatalogBrain(uuid)
                if brain is None:
                    raise Exception("Brain not found")

                obj = brain.getObject()
            else:
                obj = self.context

            # get username
            member = ploneapi.user.get_current()
            if member.getId():
                user = {
                    'id': member.getUserName(),
                    'email': member.getProperty('email'),
                    'fullname': member.getProperty('fullname')
                }
            else:
                raise Exception("Invalid user")

            # build download url
            # 1. get context (site) relative path
            obj_url = obj.absolute_url()

            if obj.portal_type == 'org.bccvl.content.dataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.remotedataset':
                filename = os.path.basename(obj.remoteUrl)
                obj_url = '{}/@@download/{}'.format(obj_url, filename)
            elif obj.portal_type == 'org.bccvl.content.multispeciesdataset':
                filename = obj.file.filename
                obj_url = '{}/@@download/file/{}'.format(obj_url, filename)
            else:
                raise Exception("Wrong content type")

            from org.bccvl.tasks.celery import app
            update_task = app.signature(
                "org.bccvl.tasks.datamover.tasks.update_metadata",
                kwargs={
                    'url': obj_url,
                    'filename': filename,
                    'contenttype': obj.format,
                    'context': {
                        'context': '/'.join(obj.getPhysicalPath()),
                        'user': user,
                    }
                },
                immutable=True)

            from org.bccvl.tasks.plone import after_commit_task
            after_commit_task(update_task)
            # track background job state
            jt = IJobTracker(obj)
            job = jt.new_job('TODO: generate id',
                             'generate taskname: update_metadata',
                             function=obj.dataSource,
                             type=obj.portal_type)
            jt.set_progress('PENDING', 'Metadata update pending')
            return job.id
        except Exception as e:
            LOG.error('Caught exception %s', e)
        raise NotFound(self, 'update_metadata', self.request)