Esempio n. 1
0
    def upload_input_ports(self, port_list=None, exclude_list=None):
        """
        Takes the workflow value for each port and does the following:
            * If local filesystem -> Uploads locally files to s3.
                S3 location will be as follows:
                    gbd-customer-data/<acct_id>/<workflow_name>/<task_name>/<port_name>/
            * If S3 url -> do nothing.
        :returns the update workflow with S3 urls.
        """

        input_ports = self._task.input_ports

        for port in input_ports:

            # If port list is not None, then only allow port names in the list
            if port_list and port.name not in port_list:
                continue

            # Exclude ports as provided
            if exclude_list and port.name in exclude_list:
                continue

            # port_value = port.get('value', None)

            # Check if the port value is a valid file system location
            if not port.value or not os.path.isabs(port.value) or not os.path.isdir(port.value):
                continue

            # The prefix for each key that is uploaded, not including the the acct id.
            prefix = '{run_name}/{port}'.format(
                run_name=self._task.run_name,
                # task=self._task.name,
                port=port.name
            )

            port_files = self._get_port_files(port.value, prefix)

            # Update the port value with an S3 url
            port.value = '%s/%s' % (self.s3_root, prefix)

            if len(port_files) == 0:
                printer('Port %s is empty, push to S3 skipped' % port.name)
            else:
                self.storage.upload(port_files)
                printer('Port %s pushed to account storage, %s files' % (port.name, len(port_files)))
Esempio n. 2
0
    def _create_app(self):
        """
        Method for creating a new Application Template.
        USAGE: cloud-harness create <dir_name> [--destination=<path>]
        """
        template_path = os.path.join(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__))), self.TEMPLATE_FOLDER, self.TEMPLATE_FILENAME
        )

        new_dir = self._arguments['<dir_name>']

        # Make new application directory
        override_destination = self._arguments.get('--destination', None)
        if override_destination is not None:
            if override_destination == '':
                raise ValueError('Destination path is empty')
            # Check if the new destination is abs and exists.
            if os.path.isabs(override_destination) and os.path.isdir(override_destination):
                new_dir = os.path.join(override_destination, new_dir)
            else:
                # Create a path from the cwd, then check if it is valid and exists.
                override_path = os.path.join(os.getcwd(), override_destination)
                if not os.path.isabs(override_path) or not os.path.isdir(override_path):
                    raise ValueError('New path parameter %s is not a directory' % override_destination)
                new_dir = os.path.join(override_path, new_dir)

        else:
            if os.path.isabs(new_dir) or os.path.sep in new_dir:
                raise ValueError("Directory name is invalid")
            # No override, put the folder in the cwd.
            new_dir = os.path.join(os.getcwd(), new_dir)

        os.makedirs(new_dir)

        new_file_path = os.path.join(new_dir, self.DEFAULT_NEW_APP_FILENAME)

        # Copy the template the new application location.
        shutil.copyfile(template_path, new_file_path)

        printer('New Application created at %s' % new_file_path)
 def __upload_file(self, file_tuple, folder):
     key_name = '%s/%s' % (folder, file_tuple[1])
     printer('%s uploaded to location %s' % (file_tuple[0], file_tuple[1]))
     self.s3.upload_file(file_tuple[0], self.bucket, key_name)
Esempio n. 4
0
    def _run_app(self):
        """
        Method for running a custom Application Templates.
        NOTES:
            * The default name of the application is app.py. So this function is going to look
            for app.py, unless the --file option is provide with a different file name.
            * The generated source bundle will package everything in the work_path. If large files
            not required for the application source, they need to be ignored. Use a file called "pkg_ignore"
            to identify folders and files to ignore.
        USAGE: cloud-harness run <file_name> [--remote] [--verbose] [--upload] [--download] [--dry-run]
        """
        is_remote_run = self._arguments.get('--remote')
        filename = self._arguments.get('<file_name>')
        upload_ports = self._arguments.get('--upload')
        download_ports = self._arguments.get('--download')
        is_verbose = self._arguments.get('--verbose')
        # A dry run means, allow port sot be pushed up, but don't allow execution and monitoring.
        is_dry_run = self._arguments.get('--dry-run')

        if download_ports:  # TODO temporary until implemented.
            raise NotImplementedError("Downloading of output ports is not implemented yet.")

        # Check if the filename passed is actually a class object (gbdxtools functionality)
        if not isinstance(filename, str) and issubclass(filename, TaskTemplate):
            template_class = filename
            template_file = inspect.getfile(template_class)
            config_file = self._write_config_file(template_file)

        else:
            template_file = self._get_template_abs_path(filename)

            if not os.path.isfile(template_file):
                raise ValueError('The location %s does not exist' % template_file)

            config_file = self._write_config_file(template_file)

            template_class = self._get_class(template_file)

        with template_class() as template:
            if is_remote_run:
                task = template.task

                # Set the source bundle directory to where the tempalte_file is.
                task.source_bundle.value = os.path.join(os.path.dirname(template_file), 'tmp_%s' % str(uuid.uuid4()))

                # Create a task service object
                task_service = TaskService()
                task_service.delete_task(task.name)
                printer(task_service.register_task(task.json()))

                task.run_name = '{task_name}_src'.format(
                    task_name=task.name,
                    # timestamp=datetime.utcnow().strftime('%Y_%m_%d_%H')
                )

                src_bundle_dir = task.source_bundle.value

                # Create source bundle to be executed on the GBDX platform
                self._archive_source(os.path.dirname(src_bundle_dir), src_bundle_dir)

                port_service = PortService(task)

                if upload_ports:
                    # Push all port data to S3
                    port_service.upload_input_ports()
                else:
                    # Only push source bundle port
                    port_service.upload_input_ports(port_list=[self.SOURCE_BUNDLE_PORT])

                # Delete source bundle directory and config after upload.
                shutil.rmtree(src_bundle_dir)
                os.remove(config_file)

                # Build task json to run remotely
                self.task = port_service.task

                # Validate task
                task.is_valid(remote=True)

                workflow = Workflow(self.task)

                if is_verbose:
                    printer(template.task.json())

                    temp_wf = workflow.json

                    printer(temp_wf)

                if not is_dry_run:
                    try:
                        workflow.execute()
                        printer(workflow.id)
                    except Exception as e:
                        printer(e.message)
                        template.reason = "Execution Failed: %s" % e.message
                        return

                    # Monitor events of workflow
                    is_done = workflow.monitor_run()

                    if is_done:
                        template.reason = "Execution Completed"
                    else:
                        template.reason = "Execution Failed during Run"

                    if download_ports:
                        # port_service.download_output_port()
                        pass

                    # Note: This may be temporary while working with gbdxtools
                    # Delete task after run
                    task_service.delete_task(task.name)

            else:
                # Validate task
                template.task.is_valid()

                if is_verbose:
                    printer(template.task.json())
                    all_ports = template.task.ports[0] + template.task.ports[1]
                    printer([port.__str__() for port in all_ports])

                if not is_dry_run:
                    # Run Task Locally
                    try:
                        template.invoke()
                    except Exception as e:
                        template.reason = "Failed Exception: %s" % e

                    template.reason = "Execution Completed"
                else:
                    template.reason = "Execution Skipped"