예제 #1
0
    def Run(self, args):
        """This is what gets called when the user runs this command.

    Args:
      args: argparse.Namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      files.Error: A file argument could not be read.
      GenomicsError: User input was invalid.
      HttpException: An http error response was received while executing api
          request.
    Returns:
      Operation representing the running pipeline.
    """
        v2 = False
        pipeline = None
        apitools_client = genomics_util.GetGenomicsClient('v1alpha2')
        genomics_messages = genomics_util.GetGenomicsMessages('v1alpha2')
        if args.pipeline_file:
            if args.command_line:
                raise exceptions.GenomicsError(
                    '--command_line cannot be used with --pipeline-file.')

            pipeline = genomics_util.GetFileAsMessage(
                args.pipeline_file, genomics_messages.Pipeline,
                self.context[lib.STORAGE_V1_CLIENT_KEY])
            pipeline.projectId = genomics_util.GetProjectId()

            if not pipeline.docker:
                v2 = True
                apitools_client = genomics_util.GetGenomicsClient('v2alpha1')
                genomics_messages = genomics_util.GetGenomicsMessages(
                    'v2alpha1')
                pipeline = genomics_util.GetFileAsMessage(
                    args.pipeline_file, genomics_messages.Pipeline,
                    self.context[lib.STORAGE_V1_CLIENT_KEY])
        elif args.command_line:
            v2 = True
            apitools_client = genomics_util.GetGenomicsClient('v2alpha1')
            genomics_messages = genomics_util.GetGenomicsMessages('v2alpha1')
            pipeline = genomics_messages.Pipeline(actions=[
                genomics_messages.Action(imageUri=args.docker_image,
                                         commands=['-c', args.command_line],
                                         entrypoint='bash')
            ])
        else:
            raise exceptions.GenomicsError(
                'Either --pipeline-file or --command_line is required.')

        arg_inputs = _ValidateAndMergeArgInputs(args)

        request = None
        if v2:
            # Create messages up front to avoid checking for None everywhere.
            if not pipeline.resources:
                pipeline.resources = genomics_messages.Resources()
            resources = pipeline.resources

            if not resources.virtualMachine:
                resources.virtualMachine = genomics_messages.VirtualMachine(
                    machineType='n1-standard-1')
            virtual_machine = resources.virtualMachine

            if not virtual_machine.serviceAccount:
                virtual_machine.serviceAccount = genomics_messages.ServiceAccount(
                )

            # Always set the project id.
            resources.projectId = genomics_util.GetProjectId()

            # Update the pipeline based on arguments.
            if args.memory or args.cpus:
                # Default to n1-standard1 sizes.
                virtual_machine.machineType = 'custom-%d-%d' % (
                    args.cpus or 1, (args.memory or 3.84) * 1000)

            if args.preemptible:
                virtual_machine.preemptible = args.preemptible

            if args.zones:
                resources.zones = args.zones
            elif not resources.zones and properties.VALUES.compute.zone.Get():
                resources.zones = [properties.VALUES.compute.zone.Get()]

            if args.regions:
                resources.regions = args.regions
            elif not resources.regions and properties.VALUES.compute.region.Get(
            ):
                resources.regions = [properties.VALUES.compute.region.Get()]

            if args.service_account_email != 'default':
                virtual_machine.serviceAccount.email = args.service_account_email

            if args.service_account_scopes:
                virtual_machine.serviceAccount.scopes = args.service_account_scopes

            # Always add a scope for GCS in case any arguments need it.
            virtual_machine.serviceAccount.scopes.append(
                'https://www.googleapis.com/auth/devstorage.read_write')

            # Generate paths for inputs and outputs in a shared location and put them
            # into the environment for actions based on their name.
            env = {}
            if arg_inputs:
                input_generator = _SharedPathGenerator('input')
                for name, value in arg_inputs.items():
                    if genomics_util.IsGcsPath(value):
                        env[name] = input_generator.Generate()
                        pipeline.actions.insert(
                            0,
                            genomics_messages.Action(
                                imageUri=CLOUD_SDK_IMAGE,
                                commands=[
                                    '/bin/sh', '-c',
                                    'gsutil -q cp %s ${%s}' % (value, name)
                                ]))
                    else:
                        env[name] = value

            if args.outputs:
                output_generator = _SharedPathGenerator('output')
                for name, value in args.outputs.items():
                    env[name] = output_generator.Generate()
                    pipeline.actions.append(
                        genomics_messages.Action(imageUri=CLOUD_SDK_IMAGE,
                                                 commands=[
                                                     '/bin/sh', '-c',
                                                     'gsutil -q cp ${%s} %s' %
                                                     (name, value)
                                                 ]))

            # Merge any existing pipeline arguments into the generated environment and
            # update the pipeline.
            if pipeline.environment:
                for val in pipeline.environment.additionalProperties:
                    if val.key not in env:
                        env[val.key] = val.value

            pipeline.environment = genomics_messages.Pipeline.EnvironmentValue(
                additionalProperties=genomics_util.
                ArgDictToAdditionalPropertiesList(
                    env, genomics_messages.Pipeline.EnvironmentValue.
                    AdditionalProperty))

            if arg_inputs or args.outputs:
                virtual_machine.disks.append(
                    genomics_messages.Disk(name=SHARED_DISK))

                for action in pipeline.actions:
                    action.mounts.append(
                        genomics_messages.Mount(disk=SHARED_DISK,
                                                path='/' + SHARED_DISK))

            if args.logging:
                pipeline.actions.append(
                    genomics_messages.Action(
                        imageUri=CLOUD_SDK_IMAGE,
                        commands=[
                            '/bin/sh', '-c',
                            'gsutil -q cp /google/logs/output ' + args.logging
                        ],
                        flags=[(genomics_messages.Action.
                                FlagsValueListEntryValuesEnum.ALWAYS_RUN)]))

            # Update disk sizes if specified, potentially including the shared disk.
            if args.disk_size:
                disk_sizes = {}
                for disk_encoding in args.disk_size.split(','):
                    parts = disk_encoding.split(':', 1)
                    try:
                        disk_sizes[parts[0]] = int(parts[1])
                    except:
                        raise exceptions.GenomicsError('Invalid --disk-size.')

                for disk in virtual_machine.disks:
                    size = disk_sizes[disk.name]
                    if size:
                        disk.sizeGb = size

            request = genomics_messages.RunPipelineRequest(
                pipeline=pipeline,
                labels=labels_util.ParseCreateArgs(
                    args, genomics_messages.RunPipelineRequest.LabelsValue))
        else:
            inputs = genomics_util.ArgDictToAdditionalPropertiesList(
                arg_inputs, genomics_messages.RunPipelineArgs.InputsValue.
                AdditionalProperty)
            outputs = genomics_util.ArgDictToAdditionalPropertiesList(
                args.outputs, genomics_messages.RunPipelineArgs.OutputsValue.
                AdditionalProperty)

            # Set "overrides" on the resources. If the user did not pass anything on
            # the command line, do not set anything in the resource: preserve the
            # user-intent "did not set" vs. "set an empty value/list"

            resources = genomics_messages.PipelineResources(
                preemptible=args.preemptible)
            if args.memory:
                resources.minimumRamGb = args.memory
            if args.cpus:
                resources.minimumCpuCores = args.cpus
            if args.disk_size:
                resources.disks = []
                for disk_encoding in args.disk_size.split(','):
                    disk_args = disk_encoding.split(':', 1)
                    resources.disks.append(
                        genomics_messages.Disk(name=disk_args[0],
                                               sizeGb=int(disk_args[1])))

            # Progression for picking the right zones...
            #   If specified on the command line, use them.
            #   If specified in the Pipeline definition, use them.
            #   If there is a GCE default zone in the local configuration, use it.
            #   Else let the API select a zone
            if args.zones:
                resources.zones = args.zones
            elif pipeline.resources and pipeline.resources.zones:
                pass
            elif properties.VALUES.compute.zone.Get():
                resources.zones = [properties.VALUES.compute.zone.Get()]

            request = genomics_messages.RunPipelineRequest(
                ephemeralPipeline=pipeline,
                pipelineArgs=genomics_messages.RunPipelineArgs(
                    inputs=genomics_messages.RunPipelineArgs.InputsValue(
                        additionalProperties=inputs),
                    outputs=genomics_messages.RunPipelineArgs.OutputsValue(
                        additionalProperties=outputs),
                    clientId=args.run_id,
                    logging=genomics_messages.LoggingOptions(
                        gcsPath=args.logging),
                    labels=labels_util.ParseCreateArgs(
                        args, genomics_messages.RunPipelineArgs.LabelsValue),
                    projectId=genomics_util.GetProjectId(),
                    serviceAccount=genomics_messages.ServiceAccount(
                        email=args.service_account_email,
                        scopes=args.service_account_scopes),
                    resources=resources))

        result = apitools_client.pipelines.Run(request)
        log.status.Print('Running [{0}].'.format(result.name))
        return result
예제 #2
0
  def Run(self, args):
    """This is what gets called when the user runs this command.

    Args:
      args: argparse.Namespace, All the arguments that were provided to this
        command invocation.

    Raises:
      files.Error: A file argument could not be read.
      GenomicsError: User input was invalid.
      HttpException: An http error response was received while executing api
          request.
    Returns:
      Operation representing the running pipeline.
    """
    apitools_client = genomics_util.GetGenomicsClient('v1alpha2')
    genomics_messages = genomics_util.GetGenomicsMessages('v1alpha2')

    pipeline = genomics_util.GetFileAsMessage(
        args.pipeline_file,
        genomics_messages.Pipeline,
        self.context[lib.STORAGE_V1_CLIENT_KEY])
    pipeline.projectId = genomics_util.GetProjectId()

    arg_inputs = _ValidateAndMergeArgInputs(args)

    inputs = genomics_util.ArgDictToAdditionalPropertiesList(
        arg_inputs,
        genomics_messages.RunPipelineArgs.InputsValue.AdditionalProperty)
    outputs = genomics_util.ArgDictToAdditionalPropertiesList(
        args.outputs,
        genomics_messages.RunPipelineArgs.OutputsValue.AdditionalProperty)

    # Set "overrides" on the resources. If the user did not pass anything on
    # the command line, do not set anything in the resource: preserve the
    # user-intent "did not set" vs. "set an empty value/list"

    resources = genomics_messages.PipelineResources(
        preemptible=args.preemptible)
    if args.memory:
      resources.minimumRamGb = args.memory
    if args.disk_size:
      resources.disks = []
      for disk_encoding in args.disk_size.split(','):
        disk_args = disk_encoding.split(':', 1)
        resources.disks.append(genomics_messages.Disk(
            name=disk_args[0],
            sizeGb=int(disk_args[1])
        ))

    # Progression for picking the right zones...
    #   If specified on the command line, use them.
    #   If specified in the Pipeline definition, use them.
    #   If there is a GCE default zone in the local configuration, use it.
    #   Else let the API select a zone
    if args.zones:
      resources.zones = args.zones
    elif pipeline.resources and pipeline.resources.zones:
      pass
    elif properties.VALUES.compute.zone.Get():
      resources.zones = [properties.VALUES.compute.zone.Get()]

    request = genomics_messages.RunPipelineRequest(
        ephemeralPipeline=pipeline,
        pipelineArgs=genomics_messages.RunPipelineArgs(
            inputs=genomics_messages.RunPipelineArgs.InputsValue(
                additionalProperties=inputs),
            outputs=genomics_messages.RunPipelineArgs.OutputsValue(
                additionalProperties=outputs),
            clientId=args.run_id,
            logging=genomics_messages.LoggingOptions(gcsPath=args.logging),
            labels=labels_util.Diff.FromCreateArgs(args).Apply(
                genomics_messages.RunPipelineArgs.LabelsValue),
            projectId=genomics_util.GetProjectId(),
            serviceAccount=genomics_messages.ServiceAccount(
                email=args.service_account_email,
                scopes=args.service_account_scopes),
            resources=resources))
    result = apitools_client.pipelines.Run(request)
    log.status.Print('Running [{0}].'.format(result.name))
    return result