Example #1
0
 def test_missing_id_field(self):
     # Note that the 'id' key is missing.
     definition = self.load_def("""{
           "objects": [
             {
               "name": "OVERRIDE-NAME",
               "type": "Schedule",
               "startDateTime": "2013-08-18T00:00:00",
               "endDateTime": "2013-08-19T00:00:00",
               "period": "1 day"
             }
         ]}""")
     with self.assertRaises(translator.PipelineDefinitionError):
         translator.definition_to_api_objects(definition)
Example #2
0
 def test_missing_id_field(self):
     # Note that the 'id' key is missing.
     definition = self.load_def("""{
           "objects": [
             {
               "name": "OVERRIDE-NAME",
               "type": "Schedule",
               "startDateTime": "2013-08-18T00:00:00",
               "endDateTime": "2013-08-19T00:00:00",
               "period": "1 day"
             }
         ]}""")
     with self.assertRaises(translator.PipelineDefinitionError):
         translator.definition_to_api_objects(definition)
Example #3
0
    def define_data_pipeline(self, client, pipe_id, emr_core_instances):
        import awscli.customizations.datapipeline.translator as trans
        base = self.get_package_path()

        if emr_core_instances != 0:
            definition_file = base + 'yaetos/definition.json'  # see syntax in datapipeline-dg.pdf p285 # to add in there: /*"AdditionalMasterSecurityGroups": "#{}",  /* To add later to match EMR mode */
        else:
            definition_file = base + 'yaetos/definition_standalone_cluster.json'
            # TODO: have 1 json for both to avoid having to track duplication.

        definition = json.load(
            open(definition_file,
                 'r'))  # Note: Data Pipeline doesn't support emr-6.0.0 yet.

        pipelineObjects = trans.definition_to_api_objects(definition)
        parameterObjects = trans.definition_to_api_parameters(definition)
        parameterValues = trans.definition_to_parameter_values(definition)
        parameterValues = self.update_params(parameterValues)
        logger.info('Filled pipeline with data from ' + definition_file)

        response = client.put_pipeline_definition(
            pipelineId=pipe_id,
            pipelineObjects=pipelineObjects,
            parameterObjects=parameterObjects,
            parameterValues=parameterValues)
        logger.info('put_pipeline_definition response: ' + str(response))
        return parameterValues
Example #4
0
def deploy(config_file='config.yaml'):
    path_to_config_file = os.path.join(os.getcwd(), config_file)
    cfg = read_cfg(path_to_config_file)

    profile = cfg.get('aws_profile')
    access_key_id = cfg.get('aws_access_key_id')
    secret_access_key = cfg.get('aws_secret_access_key')
    region = cfg.get('region')

    client = get_client(access_key_id, secret_access_key, region, profile)

    name = cfg.get('name')
    unique_id = cfg.get('unique_id')
    description = cfg.get('description', '')

    create_response = client.create_pipeline(name, unique_id, description)

    pipeline_id = create_response.get('pipelineId')
    parameter_objects = translator.definition_to_api_parameters(
        read_json_file(cfg.get('parameter_objects')))
    parameter_values = translator.definition_to_parameter_values(
        read_json_file(cfg.get('parameter_values')))
    pipeline_definition = translator.definition_to_api_objects(
        read_json_file(cfg.get('pipeline_definition')))

    return client.put_pipeline_definition(pipeline_id, pipeline_definition,
                                          parameter_objects, parameter_values)
Example #5
0
    def set_pipeline_definition(self):
        """Translates the json definition and puts it on created pipeline

        Returns:
                dict: the response of the Boto3 command
        """

        if not self.pipeline_id:
            self.get_pipeline_id()

        json_def = self.datapipeline_data['json_definition']
        try:
            pipelineobjects = translator.definition_to_api_objects(json_def)
            parameterobjects = translator.definition_to_api_parameters(
                json_def)
            parametervalues = translator.definition_to_parameter_values(
                json_def)
        except translator.PipelineDefinitionError as error:
            LOG.warning(error)
            raise DataPipelineDefinitionError

        response = self.client.put_pipeline_definition(
            pipelineId=self.pipeline_id,
            pipelineObjects=pipelineobjects,
            parameterObjects=parameterobjects,
            parameterValues=parametervalues)
        LOG.debug(response)
        LOG.info("Successfully applied pipeline definition")
        return response
Example #6
0
 def test_value_with_refs(self):
     definition = self.load_def("""{"objects": [
         {
           "id" : "emrActivity",
           "type" : "EmrActivity",
           "name" : "Foo",
           "step" : ["s3://foo1", {"ref": "otherValue"}, "s3://foo3"]
         }
     ]}""")
     actual = translator.definition_to_api_objects(definition)
     api = [{
         "name":
         "Foo",
         "id":
         "emrActivity",
         "fields": [{
             "key": "step",
             "stringValue": "s3://foo1"
         }, {
             "key": "step",
             "refValue": "otherValue"
         }, {
             "key": "step",
             "stringValue": "s3://foo3"
         }, {
             "key": "type",
             "stringValue": "EmrActivity"
         }]
     }]
     self.assertEqual(actual, api)
def to_new_definition_objects(new_definition, new_parameter_values):
    if new_parameter_values is not None:
        new_definition['values'] = new_parameter_values
    return dict(
        pipelineObjects=translator.definition_to_api_objects(new_definition),
        parameterObjects=translator.definition_to_api_parameters(
            new_definition),
        parameterValues=translator.definition_to_parameter_values(
            new_definition))
Example #8
0
 def add_to_params(self, parameters, value):
     if value is None:
         return
     parsed = json.loads(value)
     api_objects = translator.definition_to_api_objects(parsed)
     parameter_objects = translator.definition_to_api_parameters(parsed)
     parameter_values = translator.definition_to_parameter_values(parsed)
     parameters["pipelineObjects"] = api_objects
     # Use Parameter objects and values from def if not already provided
     if "parameterObjects" not in parameters and parameter_objects is not None:
         parameters["parameterObjects"] = parameter_objects
     if "parameterValues" not in parameters and parameter_values is not None:
         parameters["parameterValues"] = parameter_values
Example #9
0
 def add_to_params(self, parameters, value):
     if value is None:
         return
     parsed = json.loads(value)
     api_objects = translator.definition_to_api_objects(parsed)
     parameter_objects = translator.definition_to_api_parameters(parsed)
     parameter_values = translator.definition_to_parameter_values(parsed)
     parameters['pipelineObjects'] = api_objects
     # Use Parameter objects and values from def if not already provided
     if 'parameterObjects' not in parameters \
             and parameter_objects is not None:
         parameters['parameterObjects'] = parameter_objects
     if 'parameterValues' not in parameters \
             and parameter_values is not None:
         parameters['parameterValues'] = parameter_values
Example #10
0
 def test_value_with_refs(self):
     definition = self.load_def("""{"objects": [
         {
           "id" : "emrActivity",
           "type" : "EmrActivity",
           "name" : "Foo",
           "step" : ["s3://foo1", {"ref": "otherValue"}, "s3://foo3"]
         }
     ]}""")
     actual = translator.definition_to_api_objects(definition)
     api = [{"name": "Foo", "id": "emrActivity",
             "fields": [
                 {"key": "step", "stringValue": "s3://foo1"},
                 {"key": "step", "refValue": "otherValue"},
                 {"key": "step", "stringValue": "s3://foo3"},
                 {"key": "type", "stringValue": "EmrActivity"}
             ]}]
     self.assertEqual(actual, api)
Example #11
0
 def test_convert_schedule_df_to_api(self):
     definition = self.load_def("""{"objects": [
         {
           "id" : "S3ToS3Copy",
           "type" : "CopyActivity",
           "schedule" : { "ref" : "CopyPeriod" },
           "input" : { "ref" : "InputData" },
           "output" : { "ref" : "OutputData" }
         }
         ]}""")
     actual = translator.definition_to_api_objects(definition)
     api = [{"name": "S3ToS3Copy", "id": "S3ToS3Copy",
             "fields": [
                 {"key": "input", "refValue": "InputData"},
                 {"key": "output", "refValue": "OutputData"},
                 {"key": "schedule", "refValue": "CopyPeriod"},
                 {"key": "type", "stringValue": "CopyActivity"},
             ]}]
     self.assertEqual(actual, api)
Example #12
0
 def test_convert_df_to_api_schedule(self):
     definition = self.load_def("""{
           "objects": [
             {
               "id": "MySchedule",
               "type": "Schedule",
               "startDateTime": "2013-08-18T00:00:00",
               "endDateTime": "2013-08-19T00:00:00",
               "period": "1 day"
             }
         ]}""")
     actual = translator.definition_to_api_objects(definition)
     api = [{"name": "MySchedule", "id": "MySchedule",
             "fields": [
                 {"key": "endDateTime",
                  "stringValue": "2013-08-19T00:00:00"},
                 {"key": "period", "stringValue": "1 day"},
                 {"key": "startDateTime",
                  "stringValue": "2013-08-18T00:00:00"},
                 {"key": "type", "stringValue": "Schedule"},
             ]}]
     self.assertEqual(actual, api)
Example #13
0
 def test_convert_df_to_api_with_name(self):
     definition = self.load_def("""{
           "objects": [
             {
               "id": "MySchedule",
               "name": "OVERRIDE-NAME",
               "type": "Schedule",
               "startDateTime": "2013-08-18T00:00:00",
               "endDateTime": "2013-08-19T00:00:00",
               "period": "1 day"
             }
         ]}""")
     actual = translator.definition_to_api_objects(definition)
     api = [{
         "name":
         "OVERRIDE-NAME",
         "id":
         "MySchedule",
         "fields": [
             {
                 "key": "endDateTime",
                 "stringValue": "2013-08-19T00:00:00"
             },
             {
                 "key": "period",
                 "stringValue": "1 day"
             },
             {
                 "key": "startDateTime",
                 "stringValue": "2013-08-18T00:00:00"
             },
             {
                 "key": "type",
                 "stringValue": "Schedule"
             },
         ]
     }]
     self.assertEqual(actual, api)
Example #14
0
    def run_aws_data_pipeline(self):
        self.s3_ops(self.session)
        self.push_secrets(creds_or_file=self.app_args['connection_file']
                          )  # TODO: fix privileges to get creds in dev env

        # DataPipeline ops
        import awscli.customizations.datapipeline.translator as trans
        client = self.session.client('datapipeline')

        pipe_id = self.create_date_pipeline(client)

        definition_file = eu.LOCAL_APP_FOLDER + 'core/definition.json'  # see syntax in datapipeline-dg.pdf p285 # to add in there: /*"AdditionalMasterSecurityGroups": "#{}",  /* To add later to match EMR mode */
        definition = json.load(
            open(definition_file,
                 'r'))  # Note: Data Pipeline doesn't support emr-6.0.0 yet.

        pipelineObjects = trans.definition_to_api_objects(definition)
        parameterObjects = trans.definition_to_api_parameters(definition)
        parameterValues = trans.definition_to_parameter_values(definition)
        parameterValues = self.update_params(parameterValues)
        logger.info('Filled pipeline with data from ' + definition_file)

        response = client.put_pipeline_definition(
            pipelineId=pipe_id,
            pipelineObjects=pipelineObjects,
            parameterObjects=parameterObjects,
            parameterValues=parameterValues)
        logger.info('put_pipeline_definition response: ' + str(response))

        response = client.activate_pipeline(
            pipelineId=pipe_id,
            parameterValues=
            parameterValues,  # optional. If set, need to specify all params as per json.
            # startTimestamp=datetime(2018, 12, 1)  # optional
        )
        logger.info('activate_pipeline response: ' + str(response))
        logger.info('Activated pipeline ' + pipe_id)
Example #15
0
 def test_convert_schedule_df_to_api(self):
     definition = self.load_def("""{"objects": [
         {
           "id" : "S3ToS3Copy",
           "type" : "CopyActivity",
           "schedule" : { "ref" : "CopyPeriod" },
           "input" : { "ref" : "InputData" },
           "output" : { "ref" : "OutputData" }
         }
         ]}""")
     actual = translator.definition_to_api_objects(definition)
     api = [{
         "name":
         "S3ToS3Copy",
         "id":
         "S3ToS3Copy",
         "fields": [
             {
                 "key": "input",
                 "refValue": "InputData"
             },
             {
                 "key": "output",
                 "refValue": "OutputData"
             },
             {
                 "key": "schedule",
                 "refValue": "CopyPeriod"
             },
             {
                 "key": "type",
                 "stringValue": "CopyActivity"
             },
         ]
     }]
     self.assertEqual(actual, api)
Example #16
0
 def test_objects_key_is_missing_raise_error(self):
     definition = self.load_def("""{"not-objects": []}""")
     with self.assertRaises(translator.PipelineDefinitionError):
         translator.definition_to_api_objects(definition)
Example #17
0
 def test_objects_key_is_missing_raise_error(self):
     definition = self.load_def("""{"not-objects": []}""")
     with self.assertRaises(translator.PipelineDefinitionError):
         translator.definition_to_api_objects(definition)