def _ValidateAndMergeArgInputs(args): """Turn args.inputs and args.inputs_from_file dicts into a single dict. Args: args: The parsed command-line arguments Returns: A dict that is the merge of args.inputs and args.inputs_from_file Raises: files.Error """ is_local_file = {} # If no inputs from file, then no validation or merge needed if not args.inputs_from_file: return args.inputs, is_local_file # Initialize the merged dictionary arg_inputs = {} if args.inputs: # Validate args.inputs and args.inputs-from-file do not overlap overlap = set(args.inputs.keys()).intersection( set(args.inputs_from_file.keys())) if overlap: raise exceptions.GenomicsError( '--{0} and --{1} may not specify overlapping values: {2}'. format('inputs', 'inputs-from-file', ', '.join(overlap))) # Add the args.inputs arg_inputs.update(args.inputs) # Read up the inputs-from-file and add the values from the file for key, value in six.iteritems(args.inputs_from_file): arg_inputs[key] = files.ReadFileContents(value) is_local_file[key] = True return arg_inputs, is_local_file
def Run(self, args): """This is what gets called when the user runs this command. Args: args: argparse.Namespace, All the arguments that were provided to this command invocation. Raises: files.Error: A file argument could not be read. GenomicsError: User input was invalid. HttpException: An http error response was received while executing api request. Returns: Operation representing the running pipeline. """ v2 = False pipeline = None apitools_client = genomics_util.GetGenomicsClient('v1alpha2') genomics_messages = genomics_util.GetGenomicsMessages('v1alpha2') if args.pipeline_file: if args.command_line: # TODO(b/79982664): Use a mutex argument group instead. raise exceptions.GenomicsError( '--command-line cannot be used with --pipeline-file.') pipeline = genomics_util.GetFileAsMessage( args.pipeline_file, genomics_messages.Pipeline, self.context[lib.STORAGE_V1_CLIENT_KEY]) pipeline.projectId = genomics_util.GetProjectId() if not pipeline.docker: v2 = True apitools_client = genomics_util.GetGenomicsClient('v2alpha1') genomics_messages = genomics_util.GetGenomicsMessages( 'v2alpha1') pipeline = genomics_util.GetFileAsMessage( args.pipeline_file, genomics_messages.Pipeline, self.context[lib.STORAGE_V1_CLIENT_KEY]) elif args.command_line: v2 = True apitools_client = genomics_util.GetGenomicsClient('v2alpha1') genomics_messages = genomics_util.GetGenomicsMessages('v2alpha1') pipeline = genomics_messages.Pipeline(actions=[ genomics_messages.Action(imageUri=args.docker_image, commands=['-c', args.command_line], entrypoint='bash') ]) else: raise exceptions.GenomicsError( 'Either --pipeline-file or --command-line is required.') arg_inputs, is_local_file = _ValidateAndMergeArgInputs(args) request = None if v2: # Create messages up front to avoid checking for None everywhere. if not pipeline.resources: pipeline.resources = genomics_messages.Resources() resources = pipeline.resources if not resources.virtualMachine: resources.virtualMachine = genomics_messages.VirtualMachine( machineType='n1-standard-1') virtual_machine = resources.virtualMachine if not virtual_machine.serviceAccount: virtual_machine.serviceAccount = genomics_messages.ServiceAccount( ) # Always set the project id. resources.projectId = genomics_util.GetProjectId() # Update the pipeline based on arguments. if args.memory or args.cpus: # Default to n1-standard1 sizes. virtual_machine.machineType = 'custom-%d-%d' % ( args.cpus or 1, (args.memory or 3.84) * 1000) if args.preemptible: virtual_machine.preemptible = args.preemptible if args.zones: resources.zones = args.zones elif not resources.zones and properties.VALUES.compute.zone.Get(): resources.zones = [properties.VALUES.compute.zone.Get()] if args.regions: resources.regions = args.regions elif not resources.regions and properties.VALUES.compute.region.Get( ): resources.regions = [properties.VALUES.compute.region.Get()] if args.service_account_email != 'default': virtual_machine.serviceAccount.email = args.service_account_email if args.service_account_scopes: virtual_machine.serviceAccount.scopes = args.service_account_scopes # Always add a scope for GCS in case any arguments need it. virtual_machine.serviceAccount.scopes.append( 'https://www.googleapis.com/auth/devstorage.read_write') # Generate paths for inputs and outputs in a shared location and put them # into the environment for actions based on their name. env = {} if arg_inputs: input_generator = _SharedPathGenerator('input') for name, value in arg_inputs.items(): if genomics_util.IsGcsPath(value): env[name] = input_generator.Generate() pipeline.actions.insert( 0, genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -q cp %s ${%s}' % (value, name) ])) elif name in is_local_file: env[name] = input_generator.Generate() pipeline.actions.insert( 0, genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'echo "%s" | base64 -d > ${%s}' % (base64.b64encode(value), name) ])) else: env[name] = value if args.outputs: output_generator = _SharedPathGenerator('output') for name, value in args.outputs.items(): env[name] = output_generator.Generate() pipeline.actions.append( genomics_messages.Action(imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -q cp ${%s} %s' % (name, value) ])) # Merge any existing pipeline arguments into the generated environment and # update the pipeline. if pipeline.environment: for val in pipeline.environment.additionalProperties: if val.key not in env: env[val.key] = val.value pipeline.environment = genomics_messages.Pipeline.EnvironmentValue( additionalProperties=genomics_util. ArgDictToAdditionalPropertiesList( env, genomics_messages.Pipeline.EnvironmentValue. AdditionalProperty)) if arg_inputs or args.outputs: virtual_machine.disks.append( genomics_messages.Disk(name=SHARED_DISK)) for action in pipeline.actions: action.mounts.append( genomics_messages.Mount(disk=SHARED_DISK, path='/' + SHARED_DISK)) if args.logging: pipeline.actions.append( genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -q cp /google/logs/output ' + args.logging ], flags=[(genomics_messages.Action. FlagsValueListEntryValuesEnum.ALWAYS_RUN)])) # Update disk sizes if specified, potentially including the shared disk. if args.disk_size: disk_sizes = {} for disk_encoding in args.disk_size.split(','): parts = disk_encoding.split(':', 1) try: disk_sizes[parts[0]] = int(parts[1]) except: raise exceptions.GenomicsError('Invalid --disk-size.') for disk in virtual_machine.disks: size = disk_sizes[disk.name] if size: disk.sizeGb = size request = genomics_messages.RunPipelineRequest( pipeline=pipeline, labels=labels_util.ParseCreateArgs( args, genomics_messages.RunPipelineRequest.LabelsValue)) else: inputs = genomics_util.ArgDictToAdditionalPropertiesList( arg_inputs, genomics_messages.RunPipelineArgs.InputsValue. AdditionalProperty) outputs = genomics_util.ArgDictToAdditionalPropertiesList( args.outputs, genomics_messages.RunPipelineArgs.OutputsValue. AdditionalProperty) # Set "overrides" on the resources. If the user did not pass anything on # the command line, do not set anything in the resource: preserve the # user-intent "did not set" vs. "set an empty value/list" resources = genomics_messages.PipelineResources( preemptible=args.preemptible) if args.memory: resources.minimumRamGb = args.memory if args.cpus: resources.minimumCpuCores = args.cpus if args.disk_size: resources.disks = [] for disk_encoding in args.disk_size.split(','): disk_args = disk_encoding.split(':', 1) resources.disks.append( genomics_messages.Disk(name=disk_args[0], sizeGb=int(disk_args[1]))) # Progression for picking the right zones... # If specified on the command line, use them. # If specified in the Pipeline definition, use them. # If there is a GCE default zone in the local configuration, use it. # Else let the API select a zone if args.zones: resources.zones = args.zones elif pipeline.resources and pipeline.resources.zones: pass elif properties.VALUES.compute.zone.Get(): resources.zones = [properties.VALUES.compute.zone.Get()] request = genomics_messages.RunPipelineRequest( ephemeralPipeline=pipeline, pipelineArgs=genomics_messages.RunPipelineArgs( inputs=genomics_messages.RunPipelineArgs.InputsValue( additionalProperties=inputs), outputs=genomics_messages.RunPipelineArgs.OutputsValue( additionalProperties=outputs), clientId=args.run_id, logging=genomics_messages.LoggingOptions( gcsPath=args.logging), labels=labels_util.ParseCreateArgs( args, genomics_messages.RunPipelineArgs.LabelsValue), projectId=genomics_util.GetProjectId(), serviceAccount=genomics_messages.ServiceAccount( email=args.service_account_email, scopes=args.service_account_scopes), resources=resources)) result = apitools_client.pipelines.Run(request) log.status.Print('Running [{0}].'.format(result.name)) return result
def Run(self, args): """This is what gets called when the user runs this command. Args: args: argparse.Namespace, All the arguments that were provided to this command invocation. Raises: files.Error: A file argument could not be read. GenomicsError: User input was invalid. HttpException: An http error response was received while executing api request. Returns: Operation representing the running pipeline. """ pipeline = None apitools_client = genomics_util.GetGenomicsClient('v2alpha1') genomics_messages = genomics_util.GetGenomicsMessages('v2alpha1') if args.pipeline_file: if args.command_line: # TODO(b/79982664): Use a mutex argument group instead. raise exceptions.GenomicsError( '--command-line cannot be used with --pipeline-file.') pipeline = genomics_util.GetFileAsMessage( args.pipeline_file, genomics_messages.Pipeline, self.context[lib.STORAGE_V1_CLIENT_KEY]) elif args.command_line: pipeline = genomics_messages.Pipeline(actions=[ genomics_messages.Action(imageUri=args.docker_image, commands=['-c', args.command_line], entrypoint='bash') ]) else: raise exceptions.GenomicsError( 'Either --pipeline-file or --command-line is required.') arg_inputs, is_local_file = _ValidateAndMergeArgInputs(args) request = None # Create messages up front to avoid checking for None everywhere. if not pipeline.resources: pipeline.resources = genomics_messages.Resources() resources = pipeline.resources if not resources.virtualMachine: resources.virtualMachine = genomics_messages.VirtualMachine( machineType='n1-standard-1') virtual_machine = resources.virtualMachine if not virtual_machine.serviceAccount: virtual_machine.serviceAccount = genomics_messages.ServiceAccount() # Always set the project id. resources.projectId = genomics_util.GetProjectId() # Update the pipeline based on arguments. if args.memory or args.cpus: # Default to n1-standard1 sizes. virtual_machine.machineType = 'custom-%d-%d' % ( args.cpus or 1, (args.memory or 3.75) * 1024) if args.preemptible: virtual_machine.preemptible = args.preemptible if args.zones: resources.zones = args.zones elif not resources.zones and properties.VALUES.compute.zone.Get(): resources.zones = [properties.VALUES.compute.zone.Get()] if args.regions: resources.regions = args.regions elif not resources.regions and properties.VALUES.compute.region.Get(): resources.regions = [properties.VALUES.compute.region.Get()] if args.service_account_email != 'default': virtual_machine.serviceAccount.email = args.service_account_email if args.service_account_scopes: virtual_machine.serviceAccount.scopes = args.service_account_scopes # Always add a scope for GCS in case any arguments need it. virtual_machine.serviceAccount.scopes.append( 'https://www.googleapis.com/auth/devstorage.read_write') # Attach custom network/subnetwork (if set). if args.network or args.subnetwork: if not virtual_machine.network: virtual_machine.network = genomics_messages.Network() if args.network: virtual_machine.network.name = args.network if args.subnetwork: virtual_machine.network.subnetwork = args.subnetwork if args.boot_disk_size is not None: if args.boot_disk_size <= 0: raise exceptions.GenomicsError( 'Boot disk size must be greater than zero.') virtual_machine.bootDiskSizeGb = args.boot_disk_size # Generate paths for inputs and outputs in a shared location and put them # into the environment for actions based on their name. env = {} if arg_inputs: input_generator = _SharedPathGenerator('input') for name, value in arg_inputs.items(): if genomics_util.IsGcsPath(value): env[name] = input_generator.Generate() pipeline.actions.insert( 0, genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -m -q cp %s ${%s}' % (value, name) ])) elif name in is_local_file: # TODO(b/183206325): Get test coverage to 100%. env[name] = input_generator.Generate() pipeline.actions.insert( 0, genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'echo "%s" | base64 -d > ${%s}' % (base64.b64encode( value.encode()).decode(), name) ])) else: env[name] = value if args.outputs: output_generator = _SharedPathGenerator('output') for name, value in args.outputs.items(): env[name] = output_generator.Generate() pipeline.actions.append( genomics_messages.Action(imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -m -q cp ${%s} %s' % (name, value) ])) if args.env_vars: for name, value in args.env_vars.items(): env[name] = value # Merge any existing pipeline arguments into the generated environment and # update the pipeline. if pipeline.environment: for val in pipeline.environment.additionalProperties: if val.key not in env: env[val.key] = val.value pipeline.environment = genomics_messages.Pipeline.EnvironmentValue( additionalProperties=genomics_util. ArgDictToAdditionalPropertiesList( env, genomics_messages.Pipeline.EnvironmentValue. AdditionalProperty)) if arg_inputs or args.outputs: virtual_machine.disks.append( genomics_messages.Disk(name=SHARED_DISK)) for action in pipeline.actions: action.mounts.append( genomics_messages.Mount(disk=SHARED_DISK, path='/' + SHARED_DISK)) if args.logging: pipeline.actions.append( genomics_messages.Action( imageUri=CLOUD_SDK_IMAGE, commands=[ '/bin/sh', '-c', 'gsutil -m -q cp /google/logs/output ' + args.logging ], flags=[(genomics_messages.Action. FlagsValueListEntryValuesEnum.ALWAYS_RUN)])) # Update disk sizes if specified, potentially including the shared disk. if args.disk_size: disk_sizes = {} for disk_encoding in args.disk_size.split(','): parts = disk_encoding.split(':', 1) try: disk_sizes[parts[0]] = int(parts[1]) except: raise exceptions.GenomicsError('Invalid --disk-size.') for disk in virtual_machine.disks: if disk.name in disk_sizes: disk.sizeGb = disk_sizes[disk.name] request = genomics_messages.RunPipelineRequest( pipeline=pipeline, labels=labels_util.ParseCreateArgs( args, genomics_messages.RunPipelineRequest.LabelsValue)) result = apitools_client.pipelines.Run(request) log.status.Print('Running [{0}].'.format(result.name)) return result