Esempio n. 1
0
def _ValidateAndMergeArgInputs(args):
    """Turn args.inputs and args.inputs_from_file dicts into a single dict.

  Args:
    args: The parsed command-line arguments

  Returns:
    A dict that is the merge of args.inputs and args.inputs_from_file
  Raises:
    files.Error
  """

    is_local_file = {}

    # If no inputs from file, then no validation or merge needed
    if not args.inputs_from_file:
        return args.inputs, is_local_file

    # Initialize the merged dictionary
    arg_inputs = {}

    if args.inputs:
        # Validate args.inputs and args.inputs-from-file do not overlap
        overlap = set(args.inputs.keys()).intersection(
            set(args.inputs_from_file.keys()))
        if overlap:
            raise exceptions.GenomicsError(
                '--{0} and --{1} may not specify overlapping values: {2}'.
                format('inputs', 'inputs-from-file', ', '.join(overlap)))

        # Add the args.inputs
        arg_inputs.update(args.inputs)

    # Read up the inputs-from-file and add the values from the file
    for key, value in six.iteritems(args.inputs_from_file):
        arg_inputs[key] = files.ReadFileContents(value)
        is_local_file[key] = True

    return arg_inputs, is_local_file
Esempio n. 2
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: argparse.Namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      files.Error: A file argument could not be read.
      GenomicsError: User input was invalid.
      HttpException: An http error response was received while executing api
          request.
    Returns:
      Operation representing the running pipeline.
    """
        v2 = False
        pipeline = None
        apitools_client = genomics_util.GetGenomicsClient('v1alpha2')
        genomics_messages = genomics_util.GetGenomicsMessages('v1alpha2')
        if args.pipeline_file:
            if args.command_line:
                # TODO(b/79982664): Use a mutex argument group instead.
                raise exceptions.GenomicsError(
                    '--command-line cannot be used with --pipeline-file.')

            pipeline = genomics_util.GetFileAsMessage(
                args.pipeline_file, genomics_messages.Pipeline,
                self.context[lib.STORAGE_V1_CLIENT_KEY])
            pipeline.projectId = genomics_util.GetProjectId()

            if not pipeline.docker:
                v2 = True
                apitools_client = genomics_util.GetGenomicsClient('v2alpha1')
                genomics_messages = genomics_util.GetGenomicsMessages(
                    'v2alpha1')
                pipeline = genomics_util.GetFileAsMessage(
                    args.pipeline_file, genomics_messages.Pipeline,
                    self.context[lib.STORAGE_V1_CLIENT_KEY])
        elif args.command_line:
            v2 = True
            apitools_client = genomics_util.GetGenomicsClient('v2alpha1')
            genomics_messages = genomics_util.GetGenomicsMessages('v2alpha1')
            pipeline = genomics_messages.Pipeline(actions=[
                genomics_messages.Action(imageUri=args.docker_image,
                                         commands=['-c', args.command_line],
                                         entrypoint='bash')
            ])
        else:
            raise exceptions.GenomicsError(
                'Either --pipeline-file or --command-line is required.')

        arg_inputs, is_local_file = _ValidateAndMergeArgInputs(args)

        request = None
        if v2:
            # Create messages up front to avoid checking for None everywhere.
            if not pipeline.resources:
                pipeline.resources = genomics_messages.Resources()
            resources = pipeline.resources

            if not resources.virtualMachine:
                resources.virtualMachine = genomics_messages.VirtualMachine(
                    machineType='n1-standard-1')
            virtual_machine = resources.virtualMachine

            if not virtual_machine.serviceAccount:
                virtual_machine.serviceAccount = genomics_messages.ServiceAccount(
                )

            # Always set the project id.
            resources.projectId = genomics_util.GetProjectId()

            # Update the pipeline based on arguments.
            if args.memory or args.cpus:
                # Default to n1-standard1 sizes.
                virtual_machine.machineType = 'custom-%d-%d' % (
                    args.cpus or 1, (args.memory or 3.84) * 1000)

            if args.preemptible:
                virtual_machine.preemptible = args.preemptible

            if args.zones:
                resources.zones = args.zones
            elif not resources.zones and properties.VALUES.compute.zone.Get():
                resources.zones = [properties.VALUES.compute.zone.Get()]

            if args.regions:
                resources.regions = args.regions
            elif not resources.regions and properties.VALUES.compute.region.Get(
            ):
                resources.regions = [properties.VALUES.compute.region.Get()]

            if args.service_account_email != 'default':
                virtual_machine.serviceAccount.email = args.service_account_email

            if args.service_account_scopes:
                virtual_machine.serviceAccount.scopes = args.service_account_scopes

            # Always add a scope for GCS in case any arguments need it.
            virtual_machine.serviceAccount.scopes.append(
                'https://www.googleapis.com/auth/devstorage.read_write')

            # Generate paths for inputs and outputs in a shared location and put them
            # into the environment for actions based on their name.
            env = {}
            if arg_inputs:
                input_generator = _SharedPathGenerator('input')
                for name, value in arg_inputs.items():
                    if genomics_util.IsGcsPath(value):
                        env[name] = input_generator.Generate()
                        pipeline.actions.insert(
                            0,
                            genomics_messages.Action(
                                imageUri=CLOUD_SDK_IMAGE,
                                commands=[
                                    '/bin/sh', '-c',
                                    'gsutil -q cp %s ${%s}' % (value, name)
                                ]))
                    elif name in is_local_file:
                        env[name] = input_generator.Generate()
                        pipeline.actions.insert(
                            0,
                            genomics_messages.Action(
                                imageUri=CLOUD_SDK_IMAGE,
                                commands=[
                                    '/bin/sh', '-c',
                                    'echo "%s" | base64 -d > ${%s}' %
                                    (base64.b64encode(value), name)
                                ]))
                    else:
                        env[name] = value

            if args.outputs:
                output_generator = _SharedPathGenerator('output')
                for name, value in args.outputs.items():
                    env[name] = output_generator.Generate()
                    pipeline.actions.append(
                        genomics_messages.Action(imageUri=CLOUD_SDK_IMAGE,
                                                 commands=[
                                                     '/bin/sh', '-c',
                                                     'gsutil -q cp ${%s} %s' %
                                                     (name, value)
                                                 ]))

            # Merge any existing pipeline arguments into the generated environment and
            # update the pipeline.
            if pipeline.environment:
                for val in pipeline.environment.additionalProperties:
                    if val.key not in env:
                        env[val.key] = val.value

            pipeline.environment = genomics_messages.Pipeline.EnvironmentValue(
                additionalProperties=genomics_util.
                ArgDictToAdditionalPropertiesList(
                    env, genomics_messages.Pipeline.EnvironmentValue.
                    AdditionalProperty))

            if arg_inputs or args.outputs:
                virtual_machine.disks.append(
                    genomics_messages.Disk(name=SHARED_DISK))

                for action in pipeline.actions:
                    action.mounts.append(
                        genomics_messages.Mount(disk=SHARED_DISK,
                                                path='/' + SHARED_DISK))

            if args.logging:
                pipeline.actions.append(
                    genomics_messages.Action(
                        imageUri=CLOUD_SDK_IMAGE,
                        commands=[
                            '/bin/sh', '-c',
                            'gsutil -q cp /google/logs/output ' + args.logging
                        ],
                        flags=[(genomics_messages.Action.
                                FlagsValueListEntryValuesEnum.ALWAYS_RUN)]))

            # Update disk sizes if specified, potentially including the shared disk.
            if args.disk_size:
                disk_sizes = {}
                for disk_encoding in args.disk_size.split(','):
                    parts = disk_encoding.split(':', 1)
                    try:
                        disk_sizes[parts[0]] = int(parts[1])
                    except:
                        raise exceptions.GenomicsError('Invalid --disk-size.')

                for disk in virtual_machine.disks:
                    size = disk_sizes[disk.name]
                    if size:
                        disk.sizeGb = size

            request = genomics_messages.RunPipelineRequest(
                pipeline=pipeline,
                labels=labels_util.ParseCreateArgs(
                    args, genomics_messages.RunPipelineRequest.LabelsValue))
        else:
            inputs = genomics_util.ArgDictToAdditionalPropertiesList(
                arg_inputs, genomics_messages.RunPipelineArgs.InputsValue.
                AdditionalProperty)
            outputs = genomics_util.ArgDictToAdditionalPropertiesList(
                args.outputs, genomics_messages.RunPipelineArgs.OutputsValue.
                AdditionalProperty)

            # Set "overrides" on the resources. If the user did not pass anything on
            # the command line, do not set anything in the resource: preserve the
            # user-intent "did not set" vs. "set an empty value/list"

            resources = genomics_messages.PipelineResources(
                preemptible=args.preemptible)
            if args.memory:
                resources.minimumRamGb = args.memory
            if args.cpus:
                resources.minimumCpuCores = args.cpus
            if args.disk_size:
                resources.disks = []
                for disk_encoding in args.disk_size.split(','):
                    disk_args = disk_encoding.split(':', 1)
                    resources.disks.append(
                        genomics_messages.Disk(name=disk_args[0],
                                               sizeGb=int(disk_args[1])))

            # Progression for picking the right zones...
            #   If specified on the command line, use them.
            #   If specified in the Pipeline definition, use them.
            #   If there is a GCE default zone in the local configuration, use it.
            #   Else let the API select a zone
            if args.zones:
                resources.zones = args.zones
            elif pipeline.resources and pipeline.resources.zones:
                pass
            elif properties.VALUES.compute.zone.Get():
                resources.zones = [properties.VALUES.compute.zone.Get()]

            request = genomics_messages.RunPipelineRequest(
                ephemeralPipeline=pipeline,
                pipelineArgs=genomics_messages.RunPipelineArgs(
                    inputs=genomics_messages.RunPipelineArgs.InputsValue(
                        additionalProperties=inputs),
                    outputs=genomics_messages.RunPipelineArgs.OutputsValue(
                        additionalProperties=outputs),
                    clientId=args.run_id,
                    logging=genomics_messages.LoggingOptions(
                        gcsPath=args.logging),
                    labels=labels_util.ParseCreateArgs(
                        args, genomics_messages.RunPipelineArgs.LabelsValue),
                    projectId=genomics_util.GetProjectId(),
                    serviceAccount=genomics_messages.ServiceAccount(
                        email=args.service_account_email,
                        scopes=args.service_account_scopes),
                    resources=resources))

        result = apitools_client.pipelines.Run(request)
        log.status.Print('Running [{0}].'.format(result.name))
        return result
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: argparse.Namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      files.Error: A file argument could not be read.
      GenomicsError: User input was invalid.
      HttpException: An http error response was received while executing api
          request.
    Returns:
      Operation representing the running pipeline.
    """
        pipeline = None
        apitools_client = genomics_util.GetGenomicsClient('v2alpha1')
        genomics_messages = genomics_util.GetGenomicsMessages('v2alpha1')
        if args.pipeline_file:
            if args.command_line:
                # TODO(b/79982664): Use a mutex argument group instead.
                raise exceptions.GenomicsError(
                    '--command-line cannot be used with --pipeline-file.')

            pipeline = genomics_util.GetFileAsMessage(
                args.pipeline_file, genomics_messages.Pipeline,
                self.context[lib.STORAGE_V1_CLIENT_KEY])
        elif args.command_line:
            pipeline = genomics_messages.Pipeline(actions=[
                genomics_messages.Action(imageUri=args.docker_image,
                                         commands=['-c', args.command_line],
                                         entrypoint='bash')
            ])
        else:
            raise exceptions.GenomicsError(
                'Either --pipeline-file or --command-line is required.')

        arg_inputs, is_local_file = _ValidateAndMergeArgInputs(args)

        request = None
        # Create messages up front to avoid checking for None everywhere.
        if not pipeline.resources:
            pipeline.resources = genomics_messages.Resources()
        resources = pipeline.resources

        if not resources.virtualMachine:
            resources.virtualMachine = genomics_messages.VirtualMachine(
                machineType='n1-standard-1')
        virtual_machine = resources.virtualMachine

        if not virtual_machine.serviceAccount:
            virtual_machine.serviceAccount = genomics_messages.ServiceAccount()

        # Always set the project id.
        resources.projectId = genomics_util.GetProjectId()

        # Update the pipeline based on arguments.
        if args.memory or args.cpus:
            # Default to n1-standard1 sizes.
            virtual_machine.machineType = 'custom-%d-%d' % (
                args.cpus or 1, (args.memory or 3.75) * 1024)

        if args.preemptible:
            virtual_machine.preemptible = args.preemptible

        if args.zones:
            resources.zones = args.zones
        elif not resources.zones and properties.VALUES.compute.zone.Get():
            resources.zones = [properties.VALUES.compute.zone.Get()]

        if args.regions:
            resources.regions = args.regions
        elif not resources.regions and properties.VALUES.compute.region.Get():
            resources.regions = [properties.VALUES.compute.region.Get()]

        if args.service_account_email != 'default':
            virtual_machine.serviceAccount.email = args.service_account_email

        if args.service_account_scopes:
            virtual_machine.serviceAccount.scopes = args.service_account_scopes

        # Always add a scope for GCS in case any arguments need it.
        virtual_machine.serviceAccount.scopes.append(
            'https://www.googleapis.com/auth/devstorage.read_write')

        # Attach custom network/subnetwork (if set).
        if args.network or args.subnetwork:
            if not virtual_machine.network:
                virtual_machine.network = genomics_messages.Network()
            if args.network:
                virtual_machine.network.name = args.network
            if args.subnetwork:
                virtual_machine.network.subnetwork = args.subnetwork

        if args.boot_disk_size is not None:
            if args.boot_disk_size <= 0:
                raise exceptions.GenomicsError(
                    'Boot disk size must be greater than zero.')
            virtual_machine.bootDiskSizeGb = args.boot_disk_size

        # Generate paths for inputs and outputs in a shared location and put them
        # into the environment for actions based on their name.
        env = {}
        if arg_inputs:
            input_generator = _SharedPathGenerator('input')
            for name, value in arg_inputs.items():
                if genomics_util.IsGcsPath(value):
                    env[name] = input_generator.Generate()
                    pipeline.actions.insert(
                        0,
                        genomics_messages.Action(
                            imageUri=CLOUD_SDK_IMAGE,
                            commands=[
                                '/bin/sh', '-c',
                                'gsutil -m -q cp %s ${%s}' % (value, name)
                            ]))
                elif name in is_local_file:
                    # TODO(b/183206325): Get test coverage to 100%.
                    env[name] = input_generator.Generate()
                    pipeline.actions.insert(
                        0,
                        genomics_messages.Action(
                            imageUri=CLOUD_SDK_IMAGE,
                            commands=[
                                '/bin/sh', '-c',
                                'echo "%s" | base64 -d > ${%s}' %
                                (base64.b64encode(
                                    value.encode()).decode(), name)
                            ]))
                else:
                    env[name] = value

        if args.outputs:
            output_generator = _SharedPathGenerator('output')
            for name, value in args.outputs.items():
                env[name] = output_generator.Generate()
                pipeline.actions.append(
                    genomics_messages.Action(imageUri=CLOUD_SDK_IMAGE,
                                             commands=[
                                                 '/bin/sh', '-c',
                                                 'gsutil -m -q cp ${%s} %s' %
                                                 (name, value)
                                             ]))
        if args.env_vars:
            for name, value in args.env_vars.items():
                env[name] = value

        # Merge any existing pipeline arguments into the generated environment and
        # update the pipeline.
        if pipeline.environment:
            for val in pipeline.environment.additionalProperties:
                if val.key not in env:
                    env[val.key] = val.value

        pipeline.environment = genomics_messages.Pipeline.EnvironmentValue(
            additionalProperties=genomics_util.
            ArgDictToAdditionalPropertiesList(
                env, genomics_messages.Pipeline.EnvironmentValue.
                AdditionalProperty))

        if arg_inputs or args.outputs:
            virtual_machine.disks.append(
                genomics_messages.Disk(name=SHARED_DISK))

            for action in pipeline.actions:
                action.mounts.append(
                    genomics_messages.Mount(disk=SHARED_DISK,
                                            path='/' + SHARED_DISK))

        if args.logging:
            pipeline.actions.append(
                genomics_messages.Action(
                    imageUri=CLOUD_SDK_IMAGE,
                    commands=[
                        '/bin/sh', '-c',
                        'gsutil -m -q cp /google/logs/output ' + args.logging
                    ],
                    flags=[(genomics_messages.Action.
                            FlagsValueListEntryValuesEnum.ALWAYS_RUN)]))

        # Update disk sizes if specified, potentially including the shared disk.
        if args.disk_size:
            disk_sizes = {}
            for disk_encoding in args.disk_size.split(','):
                parts = disk_encoding.split(':', 1)
                try:
                    disk_sizes[parts[0]] = int(parts[1])
                except:
                    raise exceptions.GenomicsError('Invalid --disk-size.')

            for disk in virtual_machine.disks:
                if disk.name in disk_sizes:
                    disk.sizeGb = disk_sizes[disk.name]

        request = genomics_messages.RunPipelineRequest(
            pipeline=pipeline,
            labels=labels_util.ParseCreateArgs(
                args, genomics_messages.RunPipelineRequest.LabelsValue))

        result = apitools_client.pipelines.Run(request)
        log.status.Print('Running [{0}].'.format(result.name))
        return result