def test_list_of_tuples(self): version_map = [(LooseVersion("1"), "foo"), (LooseVersion("2"), "bar"), (LooseVersion("3"), "baz")] self.assertEqual(map_version("1.1", version_map), "foo") self.assertEqual(map_version("2", version_map), "bar") self.assertEqual(map_version("4.5", version_map), "baz") self.assertEqual(map_version("11.11", version_map), "baz") self.assertEqual(map_version("0.1", version_map), "foo")
def test_list_of_tuples(self): version_map = [ (LooseVersion('1'), 'foo'), (LooseVersion('2'), 'bar'), (LooseVersion('3'), 'baz'), ] self.assertEqual(map_version('1.1', version_map), 'foo') self.assertEqual(map_version('2', version_map), 'bar') self.assertEqual(map_version('4.5', version_map), 'baz') self.assertEqual(map_version('11.11', version_map), 'baz') self.assertEqual(map_version('0.1', version_map), 'foo')
def test_dict(self): version_map = {"1": "foo", "2": "bar", "3": "baz"} self.assertEqual(map_version("1.1", version_map), "foo") # test exact match self.assertEqual(map_version("2", version_map), "bar") # versions are just minimums self.assertEqual(map_version("4.5", version_map), "baz") # compare versions, not strings self.assertEqual(map_version("11.11", version_map), "baz") # fall back to lowest version self.assertEqual(map_version("0.1", version_map), "foo")
def test_dict(self): version_map = { '1': 'foo', '2': 'bar', '3': 'baz', } self.assertEqual(map_version('1.1', version_map), 'foo') # test exact match self.assertEqual(map_version('2', version_map), 'bar') # versions are just minimums self.assertEqual(map_version('4.5', version_map), 'baz') # compare versions, not strings self.assertEqual(map_version('11.11', version_map), 'baz') # fall back to lowest version self.assertEqual(map_version('0.1', version_map), 'foo')
def _store_cluster_info(self): """Set self._image_version and self._hadoop_version.""" if not self._cluster_id: raise ValueError('cluster has not yet been created') cluster = self._get_cluster(self._cluster_id) self._image_version = (cluster.config.software_config.image_version) # protect against new versions, including patch versions # we didn't explicitly request. See #1428 self._hadoop_version = map_version(self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)
def _store_cluster_info(self): """Set self._image_version and self._hadoop_version.""" if not self._cluster_id: raise AssertionError('cluster has not yet been created') cluster = self._api_cluster_get(self._cluster_id) self._image_version = ( cluster['config']['softwareConfig']['imageVersion']) # protect against new versions, including patch versions # we didn't explicitly request. See #1428 self._hadoop_version = map_version(self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)
def _store_cluster_info(self): """Set self._image_version and self._hadoop_version.""" if not self._cluster_id: raise AssertionError('cluster has not yet been created') cluster = self._api_cluster_get(self._cluster_id) self._image_version = ( cluster['config']['softwareConfig']['imageVersion']) # protect against new versions, including patch versions # we didn't explicitly request. See #1428 self._hadoop_version = map_version( self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)
def _store_cluster_info(self): """Set self._image_version and self._hadoop_version.""" if not self._cluster_id: raise ValueError('cluster has not yet been created') cluster = self._get_cluster(self._cluster_id) self._image_version = ( cluster.config.software_config.image_version) # protect against new versions, including patch versions # we didn't explicitly request. See #1428 self._hadoop_version = map_version( self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)
def test_version_may_not_be_None(self): self.assertEqual(map_version('1', {'1': 'foo'}), 'foo') self.assertRaises(TypeError, map_version, None, {'1': 'foo'})
def _add_steps(self, operation_name, Steps, cluster, now=None): if now is None: now = _boto3_now() _validate_param_type(Steps, (list, tuple)) # only active job flows allowed if cluster['Status']['State'].startswith('TERMINAT'): raise _ValidationException( operation_name, 'A job flow that is shutting down, terminated, or finished' ' may not be modified.') # no more than 256 steps allowed if cluster.get('RunningAmiVersion') and map_version( cluster['RunningAmiVersion'], LIFETIME_STEP_LIMIT_AMI_VERSIONS): # for very old AMIs, *all* steps count if len(cluster['_Steps']) + len(Steps) > STEP_ADD_LIMIT: raise _ValidationException( operation_name, 'Maximum number of steps for job flow exceeded') else: # otherwise, only active and pending steps count num_active_steps = sum( 1 for step in cluster['_Steps'] if step['Status']['State'] in ('PENDING', 'PENDING_CANCELLED', 'RUNNING')) if num_active_steps + len(Steps) > STEP_ADD_LIMIT: raise _ValidationException( operation_name, "Maximum number of active steps(State = 'Running'," " 'Pending' or 'Cancel_Pending') for cluster exceeded.") new_steps = [] for i, Step in enumerate(Steps): Step = dict(Step) new_step = dict( ActionOnFailure='TERMINATE_CLUSTER', Config=dict( Args=[], Jar={}, Properties={}, ), Id='s-MOCKSTEP%d' % (len(cluster['_Steps']) + i), Name='', Status=dict( State='PENDING', StateChangeReason={}, Timeline=dict(CreationDateTime=now), ), ) # Name (required) _validate_param(Step, 'Name', string_types) new_step['Name'] = Step.pop('Name') # ActionOnFailure if 'ActionOnFailure' in Step: _validate_param_enum(Step['ActionOnFailure'], [ 'CANCEL_AND_WAIT', 'CONTINUE', 'TERMINATE_JOB_FLOW', 'TERMINATE_CLUSTER' ]) new_step['ActionOnFailure'] = Step.pop('ActionOnFailure') # HadoopJarStep (required) _validate_param(Step, 'HadoopJarStep', dict) HadoopJarStep = dict(Step.pop('HadoopJarStep')) _validate_param(HadoopJarStep, 'Jar', string_types) new_step['Config']['Jar'] = HadoopJarStep.pop('Jar') if 'Args' in HadoopJarStep: Args = HadoopJarStep.pop('Args') _validate_param_type(Args, (list, tuple)) for arg in Args: _validate_param_type(arg, string_types) new_step['Config']['Args'].extend(Args) if 'MainClass' in HadoopJarStep: _validate_param(HadoopJarStep, 'MainClass', string_types) new_step['Config']['MainClass'] = HadoopJarStep.pop( 'MainClass') # we don't currently support Properties if HadoopJarStep: raise NotImplementedError( "mock_boto3 doesn't support these HadoopJarStep params: %s" % ', '.join(sorted(HadoopJarStep))) if Step: raise NotImplementedError( "mock_boto3 doesn't support these step params: %s" % ', '.join(sorted(Step))) new_steps.append(new_step) cluster['_Steps'].extend(new_steps) # add_job_flow_steps() needs to return step IDs return [new_step['Id'] for new_step in new_steps]
def run_job_flow(self, **kwargs): # going to pop params from kwargs as we process then, and raise # NotImplementedError at the end if any params are left now = kwargs.pop('_Now', _boto3_now()) # our newly created cluster, as described by describe_cluster(), plus: # # _BootstrapActions: as described by list_bootstrap_actions() # _InstanceGroups: as described by list_instance_groups() # _Steps: as decribed by list_steps(), but not reversed # # TODO: at some point when we implement instance fleets, # _InstanceGroups will become optional cluster = dict( _BootstrapActions=[], _InstanceGroups=[], _Steps=[], Applications=[], AutoTerminate=True, Configurations=[], Ec2InstanceAttributes=dict( EmrManagedMasterSecurityGroup='sg-mockmaster', EmrManagedSlaveSecurityGroup='sg-mockslave', IamInstanceProfile='', ), Id='j-MOCKCLUSTER%d' % len(self.mock_emr_clusters), Name='', NormalizedInstanceHours=0, ScaleDownBehavior='TERMINATE_AT_TASK_COMPLETION', ServiceRole='', Status=dict( State='STARTING', StateChangeReason={}, Timeline=dict(CreationDateTime=now), ), Tags=[], TerminationProtected=False, VisibleToAllUsers=False, ) def _error(message): return _ValidationException('RunJobFlow', message) # Name (required) _validate_param(kwargs, 'Name', string_types) cluster['Name'] = kwargs.pop('Name') # LogUri if 'LogUri' in kwargs: _validate_param(kwargs, 'LogUri', string_types) cluster['LogUri'] = kwargs.pop('LogUri') # JobFlowRole and ServiceRole (required) _validate_param(kwargs, 'JobFlowRole', string_types) cluster['Ec2InstanceAttributes']['IamInstanceProfile'] = kwargs.pop( 'JobFlowRole') if 'ServiceRole' not in kwargs: # required by API, not boto3 raise _error('ServiceRole is required for creating cluster.') _validate_param(kwargs, 'ServiceRole', string_types) cluster['ServiceRole'] = kwargs.pop('ServiceRole') # AmiVersion and ReleaseLabel for version_param in ('AmiVersion', 'ReleaseLabel'): if version_param in kwargs: _validate_param(kwargs, version_param, string_types) if 'AmiVersion' in kwargs: if 'ReleaseLabel' in kwargs: raise _error( 'Only one AMI version and release label may be specified.' ' Provided AMI: %s, release label: %s.' % (kwargs['AmiVersion'], kwargs['ReleaseLabel'])) AmiVersion = kwargs.pop('AmiVersion') running_ami_version = AMI_VERSION_ALIASES.get( AmiVersion, AmiVersion) if version_gte(running_ami_version, '4'): raise _error('The supplied ami version is invalid.') elif not version_gte(running_ami_version, '2'): raise _error( 'Job flow role is not compatible with the supplied' ' AMI version') cluster['RequestedAmiVersion'] = AmiVersion cluster['RunningAmiVersion'] = running_ami_version elif 'ReleaseLabel' in kwargs: ReleaseLabel = kwargs.pop('ReleaseLabel') running_ami_version = ReleaseLabel.lstrip('emr-') if not version_gte(running_ami_version, '4'): raise _error('The supplied release label is invalid: %s.' % ReleaseLabel) cluster['ReleaseLabel'] = ReleaseLabel else: # note: you can't actually set Hadoop version through boto3 raise _error('Must specify exactly one of the following:' ' release label, AMI version, or Hadoop version.') # Applications hadoop_version = map_version(running_ami_version, AMI_HADOOP_VERSION_UPDATES) if version_gte(running_ami_version, '4'): application_names = set(a['Name'] for a in kwargs.pop('Applications', [])) # if Applications is set but doesn't include Hadoop, the # cluster description won't either! (Even though Hadoop is # in fact installed.) if not application_names: application_names = set(['Hadoop']) for app_name in sorted(application_names): if app_name == 'Hadoop': version = hadoop_version else: version = DUMMY_APPLICATION_VERSION cluster['Applications'].append( dict(Name=app_name, Version=version)) else: if kwargs.get('Applications'): raise _error( 'Cannot specify applications when AMI version is used.' ' Specify supported products or new supported products' ' instead.') # 'hadoop' is lowercase if AmiVersion specified cluster['Applications'].append( dict(Name='hadoop', Version=hadoop_version)) # Configurations if 'Configurations' in kwargs: _validate_param(kwargs, 'Configurations', (list, tuple)) if kwargs['Configurations'] and not version_gte( running_ami_version, '4'): raise _ValidationException( 'RunJobFlow', 'Cannot specify configurations when AMI version is used.') cluster['Configurations'] = _normalized_configurations( kwargs.pop('Configurations')) # VisibleToAllUsers if 'VisibleToAllUsers' in kwargs: _validate_param(kwargs, 'VisibleToAllUsers', bool) cluster['VisibleToAllUsers'] = kwargs.pop('VisibleToAllUsers') # pass BootstrapActions off to helper if 'BootstrapActions' in kwargs: self._add_bootstrap_actions('RunJobFlow', kwargs.pop('BootstrapActions'), cluster) # pass Instances (required) off to helper _validate_param(kwargs, 'Instances') self._add_instances('RunJobFlow', kwargs.pop('Instances'), cluster, now=now) # pass Steps off to helper if 'Steps' in kwargs: self._add_steps('RunJobFlow', kwargs.pop('Steps'), cluster) # pass Tags off to helper if 'Tags' in kwargs: self._add_tags('RunJobFlow', kwargs.pop('Tags'), cluster) # save AdditionalInfo if 'AdditionalInfo' in kwargs: cluster['_AdditionalInfo'] = kwargs.pop('AdditionalInfo') # catch extra params if kwargs: raise NotImplementedError( 'mock RunJobFlow does not support these parameters: %s' % ', '.join(sorted(kwargs))) self.mock_emr_clusters[cluster['Id']] = cluster return dict(JobFlowId=cluster['Id'])
def test_version_may_not_be_None(self): self.assertEqual(map_version("1", {"1": "foo"}), "foo") self.assertRaises(TypeError, map_version, None, {"1": "foo"})