Python map_version 예제들, mrjob.compat.map_version Python 예제들

예제 #1

0

파일 보기

파일: test_compat.py 프로젝트: irskep/mrjob

    def test_list_of_tuples(self):
        version_map = [(LooseVersion("1"), "foo"), (LooseVersion("2"), "bar"), (LooseVersion("3"), "baz")]

        self.assertEqual(map_version("1.1", version_map), "foo")
        self.assertEqual(map_version("2", version_map), "bar")
        self.assertEqual(map_version("4.5", version_map), "baz")
        self.assertEqual(map_version("11.11", version_map), "baz")
        self.assertEqual(map_version("0.1", version_map), "foo")

예제 #2

0

파일 보기

파일: test_compat.py 프로젝트: kartheek6/mrjob

    def test_list_of_tuples(self):
        version_map = [
            (LooseVersion('1'), 'foo'),
            (LooseVersion('2'), 'bar'),
            (LooseVersion('3'), 'baz'),
        ]

        self.assertEqual(map_version('1.1', version_map), 'foo')
        self.assertEqual(map_version('2', version_map), 'bar')
        self.assertEqual(map_version('4.5', version_map), 'baz')
        self.assertEqual(map_version('11.11', version_map), 'baz')
        self.assertEqual(map_version('0.1', version_map), 'foo')

예제 #3

0

파일 보기

파일: test_compat.py 프로젝트: irskep/mrjob

    def test_dict(self):
        version_map = {"1": "foo", "2": "bar", "3": "baz"}

        self.assertEqual(map_version("1.1", version_map), "foo")
        # test exact match
        self.assertEqual(map_version("2", version_map), "bar")
        # versions are just minimums
        self.assertEqual(map_version("4.5", version_map), "baz")
        # compare versions, not strings
        self.assertEqual(map_version("11.11", version_map), "baz")
        # fall back to lowest version
        self.assertEqual(map_version("0.1", version_map), "foo")

예제 #4

0

파일 보기

파일: test_compat.py 프로젝트: kartheek6/mrjob

    def test_dict(self):
        version_map = {
            '1': 'foo',
            '2': 'bar',
            '3': 'baz',
        }

        self.assertEqual(map_version('1.1', version_map), 'foo')
        # test exact match
        self.assertEqual(map_version('2', version_map), 'bar')
        # versions are just minimums
        self.assertEqual(map_version('4.5', version_map), 'baz')
        # compare versions, not strings
        self.assertEqual(map_version('11.11', version_map), 'baz')
        # fall back to lowest version
        self.assertEqual(map_version('0.1', version_map), 'foo')

예제 #5

0

파일 보기

    def test_dict(self):
        version_map = {
            '1': 'foo',
            '2': 'bar',
            '3': 'baz',
        }

        self.assertEqual(map_version('1.1', version_map), 'foo')
        # test exact match
        self.assertEqual(map_version('2', version_map), 'bar')
        # versions are just minimums
        self.assertEqual(map_version('4.5', version_map), 'baz')
        # compare versions, not strings
        self.assertEqual(map_version('11.11', version_map), 'baz')
        # fall back to lowest version
        self.assertEqual(map_version('0.1', version_map), 'foo')

예제 #6

0

파일 보기

    def _store_cluster_info(self):
        """Set self._image_version and self._hadoop_version."""
        if not self._cluster_id:
            raise ValueError('cluster has not yet been created')

        cluster = self._get_cluster(self._cluster_id)
        self._image_version = (cluster.config.software_config.image_version)
        # protect against new versions, including patch versions
        # we didn't explicitly request. See #1428
        self._hadoop_version = map_version(self._image_version,
                                           _DATAPROC_IMAGE_TO_HADOOP_VERSION)

예제 #7

0

파일 보기

파일: dataproc.py 프로젝트: clarencenhuang/mrjob

    def _store_cluster_info(self):
        """Set self._image_version and self._hadoop_version."""
        if not self._cluster_id:
            raise AssertionError('cluster has not yet been created')

        cluster = self._api_cluster_get(self._cluster_id)
        self._image_version = (
            cluster['config']['softwareConfig']['imageVersion'])
        # protect against new versions, including patch versions
        # we didn't explicitly request. See #1428
        self._hadoop_version = map_version(self._image_version,
                                           _DATAPROC_IMAGE_TO_HADOOP_VERSION)

예제 #8

0

파일 보기

파일: dataproc.py 프로젝트: davidmarin/mrjob

    def _store_cluster_info(self):
        """Set self._image_version and self._hadoop_version."""
        if not self._cluster_id:
            raise AssertionError('cluster has not yet been created')

        cluster = self._api_cluster_get(self._cluster_id)
        self._image_version = (
            cluster['config']['softwareConfig']['imageVersion'])
        # protect against new versions, including patch versions
        # we didn't explicitly request. See #1428
        self._hadoop_version = map_version(
            self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)

예제 #9

0

파일 보기

파일: dataproc.py 프로젝트: Yelp/mrjob

    def _store_cluster_info(self):
        """Set self._image_version and self._hadoop_version."""
        if not self._cluster_id:
            raise ValueError('cluster has not yet been created')

        cluster = self._get_cluster(self._cluster_id)
        self._image_version = (
            cluster.config.software_config.image_version)
        # protect against new versions, including patch versions
        # we didn't explicitly request. See #1428
        self._hadoop_version = map_version(
            self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)

예제 #10

0

파일 보기

파일: test_compat.py 프로젝트: saikirandulla/codesamples

 def test_version_may_not_be_None(self):
     self.assertEqual(map_version('1', {'1': 'foo'}), 'foo')
     self.assertRaises(TypeError, map_version, None, {'1': 'foo'})

예제 #11

0

파일 보기

파일: test_compat.py 프로젝트: kartheek6/mrjob

 def test_version_may_not_be_None(self):
     self.assertEqual(map_version('1', {'1': 'foo'}), 'foo')
     self.assertRaises(TypeError, map_version, None, {'1': 'foo'})

예제 #12

0

파일 보기

파일: emr.py 프로젝트: iyangming/mrjob

    def _add_steps(self, operation_name, Steps, cluster, now=None):
        if now is None:
            now = _boto3_now()

        _validate_param_type(Steps, (list, tuple))

        # only active job flows allowed
        if cluster['Status']['State'].startswith('TERMINAT'):
            raise _ValidationException(
                operation_name,
                'A job flow that is shutting down, terminated, or finished'
                ' may not be modified.')

        # no more than 256 steps allowed
        if cluster.get('RunningAmiVersion') and map_version(
                cluster['RunningAmiVersion'],
                LIFETIME_STEP_LIMIT_AMI_VERSIONS):
            # for very old AMIs, *all* steps count
            if len(cluster['_Steps']) + len(Steps) > STEP_ADD_LIMIT:
                raise _ValidationException(
                    operation_name,
                    'Maximum number of steps for job flow exceeded')
        else:
            # otherwise, only active and pending steps count
            num_active_steps = sum(
                1 for step in cluster['_Steps']
                if step['Status']['State'] in ('PENDING', 'PENDING_CANCELLED',
                                               'RUNNING'))
            if num_active_steps + len(Steps) > STEP_ADD_LIMIT:
                raise _ValidationException(
                    operation_name,
                    "Maximum number of active steps(State = 'Running',"
                    " 'Pending' or 'Cancel_Pending') for cluster exceeded.")

        new_steps = []

        for i, Step in enumerate(Steps):
            Step = dict(Step)

            new_step = dict(
                ActionOnFailure='TERMINATE_CLUSTER',
                Config=dict(
                    Args=[],
                    Jar={},
                    Properties={},
                ),
                Id='s-MOCKSTEP%d' % (len(cluster['_Steps']) + i),
                Name='',
                Status=dict(
                    State='PENDING',
                    StateChangeReason={},
                    Timeline=dict(CreationDateTime=now),
                ),
            )

            # Name (required)
            _validate_param(Step, 'Name', string_types)
            new_step['Name'] = Step.pop('Name')

            # ActionOnFailure
            if 'ActionOnFailure' in Step:
                _validate_param_enum(Step['ActionOnFailure'], [
                    'CANCEL_AND_WAIT', 'CONTINUE', 'TERMINATE_JOB_FLOW',
                    'TERMINATE_CLUSTER'
                ])

                new_step['ActionOnFailure'] = Step.pop('ActionOnFailure')

            # HadoopJarStep (required)
            _validate_param(Step, 'HadoopJarStep', dict)
            HadoopJarStep = dict(Step.pop('HadoopJarStep'))

            _validate_param(HadoopJarStep, 'Jar', string_types)
            new_step['Config']['Jar'] = HadoopJarStep.pop('Jar')

            if 'Args' in HadoopJarStep:
                Args = HadoopJarStep.pop('Args')
                _validate_param_type(Args, (list, tuple))
                for arg in Args:
                    _validate_param_type(arg, string_types)
                new_step['Config']['Args'].extend(Args)

            if 'MainClass' in HadoopJarStep:
                _validate_param(HadoopJarStep, 'MainClass', string_types)
                new_step['Config']['MainClass'] = HadoopJarStep.pop(
                    'MainClass')

            # we don't currently support Properties
            if HadoopJarStep:
                raise NotImplementedError(
                    "mock_boto3 doesn't support these HadoopJarStep params: %s"
                    % ', '.join(sorted(HadoopJarStep)))

            if Step:
                raise NotImplementedError(
                    "mock_boto3 doesn't support these step params: %s" %
                    ', '.join(sorted(Step)))

            new_steps.append(new_step)

        cluster['_Steps'].extend(new_steps)

        # add_job_flow_steps() needs to return step IDs
        return [new_step['Id'] for new_step in new_steps]

예제 #13

0

파일 보기

파일: emr.py 프로젝트: iyangming/mrjob

    def run_job_flow(self, **kwargs):
        # going to pop params from kwargs as we process then, and raise
        # NotImplementedError at the end if any params are left
        now = kwargs.pop('_Now', _boto3_now())

        # our newly created cluster, as described by describe_cluster(), plus:
        #
        # _BootstrapActions: as described by list_bootstrap_actions()
        # _InstanceGroups: as described by list_instance_groups()
        # _Steps: as decribed by list_steps(), but not reversed
        #
        # TODO: at some point when we implement instance fleets,
        # _InstanceGroups will become optional
        cluster = dict(
            _BootstrapActions=[],
            _InstanceGroups=[],
            _Steps=[],
            Applications=[],
            AutoTerminate=True,
            Configurations=[],
            Ec2InstanceAttributes=dict(
                EmrManagedMasterSecurityGroup='sg-mockmaster',
                EmrManagedSlaveSecurityGroup='sg-mockslave',
                IamInstanceProfile='',
            ),
            Id='j-MOCKCLUSTER%d' % len(self.mock_emr_clusters),
            Name='',
            NormalizedInstanceHours=0,
            ScaleDownBehavior='TERMINATE_AT_TASK_COMPLETION',
            ServiceRole='',
            Status=dict(
                State='STARTING',
                StateChangeReason={},
                Timeline=dict(CreationDateTime=now),
            ),
            Tags=[],
            TerminationProtected=False,
            VisibleToAllUsers=False,
        )

        def _error(message):
            return _ValidationException('RunJobFlow', message)

        # Name (required)
        _validate_param(kwargs, 'Name', string_types)
        cluster['Name'] = kwargs.pop('Name')

        # LogUri
        if 'LogUri' in kwargs:
            _validate_param(kwargs, 'LogUri', string_types)
            cluster['LogUri'] = kwargs.pop('LogUri')

        # JobFlowRole and ServiceRole (required)
        _validate_param(kwargs, 'JobFlowRole', string_types)
        cluster['Ec2InstanceAttributes']['IamInstanceProfile'] = kwargs.pop(
            'JobFlowRole')

        if 'ServiceRole' not in kwargs:  # required by API, not boto3
            raise _error('ServiceRole is required for creating cluster.')
        _validate_param(kwargs, 'ServiceRole', string_types)
        cluster['ServiceRole'] = kwargs.pop('ServiceRole')

        # AmiVersion and ReleaseLabel
        for version_param in ('AmiVersion', 'ReleaseLabel'):
            if version_param in kwargs:
                _validate_param(kwargs, version_param, string_types)

        if 'AmiVersion' in kwargs:
            if 'ReleaseLabel' in kwargs:
                raise _error(
                    'Only one AMI version and release label may be specified.'
                    ' Provided AMI: %s, release label: %s.' %
                    (kwargs['AmiVersion'], kwargs['ReleaseLabel']))

            AmiVersion = kwargs.pop('AmiVersion')

            running_ami_version = AMI_VERSION_ALIASES.get(
                AmiVersion, AmiVersion)

            if version_gte(running_ami_version, '4'):
                raise _error('The supplied ami version is invalid.')
            elif not version_gte(running_ami_version, '2'):
                raise _error(
                    'Job flow role is not compatible with the supplied'
                    ' AMI version')

            cluster['RequestedAmiVersion'] = AmiVersion
            cluster['RunningAmiVersion'] = running_ami_version

        elif 'ReleaseLabel' in kwargs:
            ReleaseLabel = kwargs.pop('ReleaseLabel')
            running_ami_version = ReleaseLabel.lstrip('emr-')

            if not version_gte(running_ami_version, '4'):
                raise _error('The supplied release label is invalid: %s.' %
                             ReleaseLabel)

            cluster['ReleaseLabel'] = ReleaseLabel
        else:
            # note: you can't actually set Hadoop version through boto3
            raise _error('Must specify exactly one of the following:'
                         ' release label, AMI version, or Hadoop version.')

        # Applications
        hadoop_version = map_version(running_ami_version,
                                     AMI_HADOOP_VERSION_UPDATES)

        if version_gte(running_ami_version, '4'):
            application_names = set(a['Name']
                                    for a in kwargs.pop('Applications', []))

            # if Applications is set but doesn't include Hadoop, the
            # cluster description won't either! (Even though Hadoop is
            # in fact installed.)
            if not application_names:
                application_names = set(['Hadoop'])

            for app_name in sorted(application_names):
                if app_name == 'Hadoop':
                    version = hadoop_version
                else:
                    version = DUMMY_APPLICATION_VERSION

                cluster['Applications'].append(
                    dict(Name=app_name, Version=version))
        else:
            if kwargs.get('Applications'):
                raise _error(
                    'Cannot specify applications when AMI version is used.'
                    ' Specify supported products or new supported products'
                    ' instead.')

            # 'hadoop' is lowercase if AmiVersion specified
            cluster['Applications'].append(
                dict(Name='hadoop', Version=hadoop_version))

        # Configurations
        if 'Configurations' in kwargs:
            _validate_param(kwargs, 'Configurations', (list, tuple))

            if kwargs['Configurations'] and not version_gte(
                    running_ami_version, '4'):
                raise _ValidationException(
                    'RunJobFlow',
                    'Cannot specify configurations when AMI version is used.')

            cluster['Configurations'] = _normalized_configurations(
                kwargs.pop('Configurations'))

        # VisibleToAllUsers
        if 'VisibleToAllUsers' in kwargs:
            _validate_param(kwargs, 'VisibleToAllUsers', bool)
            cluster['VisibleToAllUsers'] = kwargs.pop('VisibleToAllUsers')

        # pass BootstrapActions off to helper
        if 'BootstrapActions' in kwargs:
            self._add_bootstrap_actions('RunJobFlow',
                                        kwargs.pop('BootstrapActions'),
                                        cluster)

        # pass Instances (required) off to helper
        _validate_param(kwargs, 'Instances')
        self._add_instances('RunJobFlow',
                            kwargs.pop('Instances'),
                            cluster,
                            now=now)

        # pass Steps off to helper
        if 'Steps' in kwargs:
            self._add_steps('RunJobFlow', kwargs.pop('Steps'), cluster)

        # pass Tags off to helper
        if 'Tags' in kwargs:
            self._add_tags('RunJobFlow', kwargs.pop('Tags'), cluster)

        # save AdditionalInfo
        if 'AdditionalInfo' in kwargs:
            cluster['_AdditionalInfo'] = kwargs.pop('AdditionalInfo')

        # catch extra params
        if kwargs:
            raise NotImplementedError(
                'mock RunJobFlow does not support these parameters: %s' %
                ', '.join(sorted(kwargs)))

        self.mock_emr_clusters[cluster['Id']] = cluster

        return dict(JobFlowId=cluster['Id'])

예제 #14

0

파일 보기

파일: test_compat.py 프로젝트: irskep/mrjob

 def test_version_may_not_be_None(self):
     self.assertEqual(map_version("1", {"1": "foo"}), "foo")
     self.assertRaises(TypeError, map_version, None, {"1": "foo"})