Example #1
0
 def run(self, rp_id):
     rp_query = ResultsPackage.objects.filter(uuid=rp_id)
     package_path = get_package_path(rp_id)
     os.remove(package_path)
     rp_query.update(status=task_status.EXPIRED, celery_task_id=None)
     return True
Example #2
0
File: core.py Project: DDMAL/Rodan
 def run(self, rp_id):
     rp_query = ResultsPackage.objects.filter(uuid=rp_id)
     package_path = get_package_path(rp_id)
     os.remove(package_path)
     rp_query.update(status=task_status.EXPIRED, celery_task_id=None)
     return True
Example #3
0
    def run(self, rp_id):
        rp_query = ResultsPackage.objects.filter(uuid=rp_id)
        rp_query.update(status=task_status.PROCESSING,
                        celery_task_id=self.request.id)
        rp = rp_query.first()
        mode = rp.packaging_mode
        package_path = get_package_path(rp_id)

        output_objs = (
            Output.objects.filter(
                run_job__workflow_run=rp.workflow_run).select_related(
                    "resource", "resource__resource_type", "resource_list",
                    "run_job").prefetch_related("resource_list__resources").
            annotate(is_endpoint=Case(
                When(
                    condition=(
                        Q(resource__isnull=False)
                        & (Q(resource__inputs__isnull=True)
                           | ~Q(resource__inputs__run_job__workflow_run=rp.
                                workflow_run)))
                    | (Q(resource_list__isnull=False)
                       & (Q(resource_list__inputs__isnull=True)
                          | ~Q(resource_list__inputs__run_job__workflow_run=rp.
                               workflow_run))),
                    then=Value(True),
                ),
                default=Value(False),
                output_field=BooleanField(),
            )))

        if len(output_objs) > 0:
            percentage_increment = 70.00 / len(output_objs)
        else:
            percentage_increment = 0
        completed = 0.0

        with TemporaryDirectory() as td:
            tmp_dir = os.path.join(
                td, rp_id)  # because rp_id will be name of the packaged zip
            bag = BagIt(tmp_dir)

            job_namefinder = self._NameFinder()
            res_namefinder = self._NameFinder()

            for output in output_objs:
                if mode == 0:
                    # only endpoint resources, subdirectoried by different outputs
                    # continue if not endpoint output
                    if output.is_endpoint is False:
                        continue

                    j_name = job_namefinder.find(
                        output.run_job.workflow_job_id,
                        output.run_job.job_name)
                    opt_name = output.output_port_type_name
                    op_dir = os.path.join(tmp_dir,
                                          "{0} - {1}".format(j_name, opt_name))

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]

                            res_name = res_namefinder.find(
                                output.resource_id, output.resource.name
                            )  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_filename = "{0}{1}".format(res_name, ext)
                            if not os.path.exists(op_dir):
                                os.makedirs(op_dir)
                            shutil.copyfile(
                                filepath, os.path.join(op_dir,
                                                       result_filename))
                        elif output.resource_list is not None:
                            res_name = res_namefinder.find(
                                output.resource_list_id,
                                output.resource_list.name
                            )  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_foldername = "{0}.list".format(res_name)
                            result_folder = os.path.join(
                                op_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(
                                    output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(
                                    str(idx).zfill(zfills), ext)
                                shutil.copyfile(
                                    filepath,
                                    os.path.join(result_folder, new_filename))

                elif mode == 1:
                    res_name = res_namefinder.find(
                        output.resource_id, output.resource.name
                    )  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                    res_dir = os.path.join(tmp_dir, res_name)

                    j_name = job_namefinder.find(
                        output.run_job.workflow_job_id,
                        output.run_job.job_name)
                    opt_name = output.output_port_type_name

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]
                            result_filename = "{0} - {1}{2}".format(
                                j_name, opt_name, ext)
                            if not os.path.exists(res_dir):
                                os.makedirs(res_dir)
                            shutil.copyfile(
                                filepath, os.path.join(res_dir,
                                                       result_filename))
                        elif output.resource_list is not None:
                            result_foldername = "{0} - {1}.list".format(
                                j_name, opt_name)
                            result_folder = os.path.join(
                                res_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(
                                    output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(
                                    str(idx).zfill(zfills), ext)
                                shutil.copyfile(
                                    filepath,
                                    os.path.join(result_folder, new_filename))

                    elif rj_status == task_status.FAILED:
                        result_filename = "{0} - {1} - ERROR.txt".format(
                            j_name, opt_name)
                        if not os.path.exists(res_dir):
                            os.makedirs(res_dir)
                        with open(os.path.join(res_dir, result_filename),
                                  "w") as f:
                            f.write("Error Summary: ")
                            f.write(output.run_job.error_summary)
                            f.write("\n\nError Details:\n")
                            f.write(output.run_job.error_details)
                elif mode == 2:
                    raise NotImplementedError()  # [TODO]
                else:
                    raise ValueError("mode {0} is not supported".format(mode))

                completed += percentage_increment
                rp_query.update(percent_completed=int(completed))

            # print([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(tmp_dir)) for f in fn])   # DEBUG
            bag.update()
            errors = bag.validate()
            if not bag.is_valid:
                rp_query.update(
                    status=task_status.FAILED,
                    error_summary="The bag failed validation.",
                    error_details=str(errors),
                )

            target_dir_name = os.path.dirname(package_path)
            if not os.path.isdir(target_dir_name):
                os.makedirs(target_dir_name)
            bag.package(target_dir_name, method="zip")

        rp_query.update(status=task_status.FINISHED, percent_completed=100)
        expiry_time = rp_query.values_list("expiry_time", flat=True)[0]
        if expiry_time:
            async_task = registry.tasks[
                "rodan.core.expire_package"].apply_async((rp_id, ),
                                                         eta=expiry_time,
                                                         queue="celery")
            expire_task_id = async_task.task_id
        else:
            expire_task_id = None

        rp_query.update(celery_task_id=expire_task_id)
        return True
Example #4
0
File: core.py Project: DDMAL/Rodan
    def run(self, rp_id):
        rp_query = ResultsPackage.objects.filter(uuid=rp_id)
        rp_query.update(status=task_status.PROCESSING, celery_task_id=self.request.id)
        rp = rp_query.first()
        mode = rp.packaging_mode
        package_path = get_package_path(rp_id)

        output_objs = Output.objects.filter(
            run_job__workflow_run=rp.workflow_run
        ).select_related(
            'resource', 'resource__resource_type', 'resource_list', 'run_job'
        ).prefetch_related(
            'resource_list__resources'
        ).annotate(
            is_endpoint=Case(
                When(
                    condition=(
                        Q(resource__isnull=False)
                        & (
                            Q(resource__inputs__isnull=True)
                            | ~Q(resource__inputs__run_job__workflow_run=rp.workflow_run)
                        )
                    ) | (
                        Q(resource_list__isnull=False)
                        & (
                            Q(resource_list__inputs__isnull=True)
                            | ~Q(resource_list__inputs__run_job__workflow_run=rp.workflow_run)
                        )
                    ),
                    then=Value(True)
                ),
                default=Value(False),
                output_field=BooleanField()
            )
        )

        if len(output_objs) > 0:
            percentage_increment = 70.00 / len(output_objs)
        else:
            percentage_increment = 0
        completed = 0.0

        with TemporaryDirectory() as td:
            tmp_dir = os.path.join(td, rp_id)  # because rp_id will be name of the packaged zip
            bag = BagIt(tmp_dir)

            job_namefinder = self._NameFinder()
            res_namefinder = self._NameFinder()

            for output in output_objs:
                if mode == 0:  # only endpoint resources, subdirectoried by different outputs
                    # continue if not endpoint output
                    if output.is_endpoint is False:
                        continue

                    j_name = job_namefinder.find(output.run_job.workflow_job_id, output.run_job.job_name)
                    opt_name = output.output_port_type_name
                    op_dir = os.path.join(tmp_dir, "{0} - {1}".format(j_name, opt_name))

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]

                            res_name = res_namefinder.find(output.resource_id, output.resource.name)  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_filename = "{0}{1}".format(res_name, ext)
                            if not os.path.exists(op_dir):
                                os.makedirs(op_dir)
                            shutil.copyfile(filepath, os.path.join(op_dir, result_filename))
                        elif output.resource_list is not None:
                            res_name = res_namefinder.find(output.resource_list_id, output.resource_list.name)  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                            result_foldername = "{0}.list".format(res_name)
                            result_folder = os.path.join(op_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(str(idx).zfill(zfills), ext)
                                shutil.copyfile(filepath, os.path.join(result_folder, new_filename))

                elif mode == 1:
                    res_name = res_namefinder.find(output.resource_id, output.resource.name)  # [TODO]: or... find the modified resource name if the resource_uuid still exists?
                    res_dir = os.path.join(tmp_dir, res_name)

                    j_name = job_namefinder.find(output.run_job.workflow_job_id, output.run_job.job_name)
                    opt_name = output.output_port_type_name

                    rj_status = output.run_job.status
                    if rj_status == task_status.FINISHED:
                        if output.resource is not None:
                            filepath = output.resource.resource_file.path
                            ext = os.path.splitext(filepath)[1]
                            result_filename = "{0} - {1}{2}".format(j_name, opt_name, ext)
                            if not os.path.exists(res_dir):
                                os.makedirs(res_dir)
                            shutil.copyfile(filepath, os.path.join(res_dir, result_filename))
                        elif output.resource_list is not None:
                            result_foldername = "{0} - {1}.list".format(j_name, opt_name)
                            result_folder = os.path.join(res_dir, result_foldername)
                            if not os.path.exists(result_folder):
                                os.makedirs(result_folder)

                            cnt = output.resource_list.resources.count()
                            zfills = len(str(cnt))
                            for idx, r in enumerate(output.resource_list.resources.all()):
                                filepath = r.resource_file.path
                                ext = os.path.splitext(filepath)[1]
                                new_filename = "{0}{1}".format(str(idx).zfill(zfills), ext)
                                shutil.copyfile(filepath, os.path.join(result_folder, new_filename))

                    elif rj_status == task_status.FAILED:
                        result_filename = "{0} - {1} - ERROR.txt".format(j_name, opt_name)
                        if not os.path.exists(res_dir):
                            os.makedirs(res_dir)
                        with open(os.path.join(res_dir, result_filename), 'w') as f:
                            f.write("Error Summary: ")
                            f.write(output.run_job.error_summary)
                            f.write("\n\nError Details:\n")
                            f.write(output.run_job.error_details)
                elif mode == 2:
                    raise NotImplementedError() # [TODO]
                else:
                    raise ValueError("mode {0} is not supported".format(mode))

                completed += percentage_increment
                rp_query.update(percent_completed=int(completed))

            #print [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(tmp_dir)) for f in fn]   # DEBUG
            bag.update()
            errors = bag.validate()
            if not bag.is_valid:
                rp_query.update(status=task_status.FAILED,
                                error_summary="The bag failed validation.",
                                error_details=str(errors))

            target_dir_name = os.path.dirname(package_path)
            if not os.path.isdir(target_dir_name):
                os.makedirs(target_dir_name)
            bag.package(target_dir_name, method='zip')


        rp_query.update(status=task_status.FINISHED,
                        percent_completed=100)
        expiry_time = rp_query.values_list('expiry_time', flat=True)[0]
        if expiry_time:
            async_task = registry.tasks['rodan.core.expire_package'].apply_async((rp_id, ), eta=expiry_time)
            expire_task_id = async_task.task_id
        else:
            expire_task_id = None

        rp_query.update(celery_task_id=expire_task_id)
        return True