def fulfil_bindings(binding_data, fulfilled_promises): """ Substitutes promise values in binding_data with model Literal values built from python std values in fulfilled_promises :param _interface.BindingData binding_data: :param dict[Text,T] fulfilled_promises: :rtype: """ if binding_data.scalar: return _literals.Literal(scalar=binding_data.scalar) elif binding_data.collection: return _literals.Literal(collection=_literals.LiteralCollection( [DynamicTask.fulfil_bindings(sub_binding_data, fulfilled_promises) for sub_binding_data in binding_data.collection.bindings])) elif binding_data.promise: if binding_data.promise.node_id not in fulfilled_promises: raise _system_exception.FlyteSystemAssertion( "Expecting output of node [{}] but that hasn't been produced.".format(binding_data.promise.node_id)) node_output = fulfilled_promises[binding_data.promise.node_id] if binding_data.promise.var not in node_output: raise _system_exception.FlyteSystemAssertion( "Expecting output [{}] of node [{}] but that hasn't been produced.".format( binding_data.promise.var, binding_data.promise.node_id)) return binding_data.promise.sdk_type.from_python_std(node_output[binding_data.promise.var]) elif binding_data.map: return _literals.Literal(map=_literals.LiteralMap( { k: DynamicTask.fulfil_bindings(sub_binding_data, fulfilled_promises) for k, sub_binding_data in _six.iteritems(binding_data.map.bindings) }))
def get_sdk_type_from_literal_type(self, literal_type): """ :param flytekit.models.types.LiteralType literal_type: :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType """ if literal_type.collection_type is not None: return _container_types.List( _helpers.get_sdk_type_from_literal_type( literal_type.collection_type)) elif literal_type.map_value_type is not None: raise NotImplementedError("TODO: Implement map") elif literal_type.schema is not None: return _schema.schema_instantiator_from_proto(literal_type.schema) elif literal_type.blob is not None: return self._get_blob_impl_from_type(literal_type.blob) elif literal_type.simple is not None: if (literal_type.simple == _literal_type_models.SimpleType.BINARY and _proto.Protobuf.PB_FIELD_KEY in literal_type.metadata): return _proto_sdk_type_from_tag( literal_type.metadata[_proto.Protobuf.PB_FIELD_KEY]) if (literal_type.simple == _literal_type_models.SimpleType.STRUCT and literal_type.metadata and _proto.Protobuf.PB_FIELD_KEY in literal_type.metadata): return _generic_proto_sdk_type_from_tag( literal_type.metadata[_proto.Protobuf.PB_FIELD_KEY]) sdk_type = self._SIMPLE_TYPE_LOOKUP_TABLE.get(literal_type.simple) if sdk_type is None: raise NotImplementedError( "We haven't implemented this type yet: Simple type={}". format(literal_type.simple)) return sdk_type else: raise _system_exceptions.FlyteSystemAssertion( "An unrecognized literal type was received: {}".format( literal_type))
def execute_array_task(root_input_path, task, array_inputs): array_job = _array_job.ArrayJob.from_dict(task.custom) outputs = {} for job_index in _six_moves.range(0, array_job.size): inputs_path = _os.path.join( root_input_path, _six.text_type(job_index), _sdk_constants.INPUT_FILE_NAME, ) if inputs_path not in array_inputs: raise _system_exception.FlyteSystemAssertion( "dynamic task hasn't generated expected inputs document [{}]." .format(inputs_path)) input_proto = array_inputs[inputs_path] # All outputs generated by the same array job will have the same key in sub_task_outputs, # they will, however, differ in the var names; they will be on the format [<job_index>].<var_name> # e.g. [1].out1 for key, val in _six.iteritems( ReturnOutputsTask( task.assign_type_and_return( _sdk_constants.SdkTaskType.PYTHON_TASK ) # TODO: This is weird ).execute(input_proto)): outputs["[{}].{}".format(job_index, key)] = val return outputs
def test_flyte_system_assertion(): try: raise system.FlyteSystemAssertion("I assert that the system messed up.") except Exception as e: assert str(e) == "I assert that the system messed up." assert type(e).error_code == "SYSTEM:AssertionError" assert isinstance(e, system.FlyteSystemException) assert isinstance(e, AssertionError)
def _execute_user_code(self, inputs): """ :param flytekit.models.literals.LiteralMap inputs: :rtype: dict[Text,flytekit.models.common.FlyteIdlEntity] """ results = super(DynamicTask, self)._execute_user_code(inputs) if _sdk_constants.FUTURES_FILE_NAME in results: futures = results[_sdk_constants.FUTURES_FILE_NAME] sub_task_outputs = {} tasks_map = {task.id: task for task in futures.tasks} for future_node in futures.nodes: if future_node.workflow_node is not None: # TODO: implement proper unit testing for launchplan and subworkflow nodes somehow _logging.warning( "A workflow node has been detected in the output of the dynamic task. The " "Flytekit unit test engine is incomplete for dynamic tasks that return launch " "plans or subworkflows. The generated dynamic job spec will be returned but " "they will not be run." ) # For now, just return the output of the parent task self._has_workflow_node = True return results task = tasks_map[future_node.task_node.reference_id] if task.type == _sdk_constants.SdkTaskType.CONTAINER_ARRAY_TASK: sub_task_output = DynamicTask.execute_array_task(future_node.id, task, results) elif task.type == _sdk_constants.SdkTaskType.SPARK_TASK: # This is required because `_transform_for_user_output` function about is invoked which # checks for outputs self._has_workflow_node = True return results elif task.type == _sdk_constants.SdkTaskType.HIVE_JOB: # TODO: futures.outputs should have the Schema instances. # After schema is implemented, fill out random data into the random locations # then check output in test function # Even though we recommend people use typed schemas, they might not always do so... # in which case it'll be impossible to predict the actual schema, we should support a # way for unit test authors to provide fake data regardless sub_task_output = None else: inputs_path = _os.path.join(future_node.id, _sdk_constants.INPUT_FILE_NAME) if inputs_path not in results: raise _system_exception.FlyteSystemAssertion( "dynamic task hasn't generated expected inputs document [{}] found {}".format( future_node.id, list(results.keys()) ) ) sub_task_output = UnitTestEngineFactory().get_task(task).execute(results[inputs_path]) sub_task_outputs[future_node.id] = sub_task_output results[_sdk_constants.OUTPUT_FILE_NAME] = _literals.LiteralMap( literals={ binding.var: DynamicTask.fulfil_bindings(binding.binding, sub_task_outputs) for binding in futures.outputs } ) return results
def infer_sdk_type_from_literal(self, literal): # noqa """ :param flytekit.models.literals.Literal literal: :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType """ if literal.collection is not None: if len(literal.collection.literals) > 0: sdk_type = _container_types.List( _helpers.infer_sdk_type_from_literal( literal.collection.literals[0])) else: sdk_type = _container_types.List(_base_sdk_types.Void) elif literal.map is not None: raise NotImplementedError("TODO: Implement map") elif literal.scalar.blob is not None: sdk_type = self._get_blob_impl_from_type( literal.scalar.blob.metadata.type) elif literal.scalar.none_type is not None: sdk_type = _base_sdk_types.Void elif literal.scalar.schema is not None: sdk_type = _schema.schema_instantiator_from_proto( literal.scalar.schema.type) elif literal.scalar.error is not None: raise NotImplementedError("TODO: Implement error from literal map") elif literal.scalar.generic is not None: sdk_type = _primitive_types.Generic elif literal.scalar.binary is not None: if literal.scalar.binary.tag.startswith( _proto.Protobuf.TAG_PREFIX): sdk_type = _proto_sdk_type_from_tag( literal.scalar.binary.tag[len(_proto.Protobuf.TAG_PREFIX ):]) else: raise NotImplementedError( "TODO: Binary is only supported for protobuf types currently" ) elif literal.scalar.primitive.boolean is not None: sdk_type = _primitive_types.Boolean elif literal.scalar.primitive.datetime is not None: sdk_type = _primitive_types.Datetime elif literal.scalar.primitive.duration is not None: sdk_type = _primitive_types.Timedelta elif literal.scalar.primitive.float_value is not None: sdk_type = _primitive_types.Float elif literal.scalar.primitive.integer is not None: sdk_type = _primitive_types.Integer elif literal.scalar.primitive.string_value is not None: sdk_type = _primitive_types.String else: raise _system_exceptions.FlyteSystemAssertion( "Received unknown literal: {}".format(literal)) return sdk_type
def test_base_scope(): with pytest.raises(ValueError) as e: _user_func(ValueError("Bad value")) assert "Bad value" in str(e.value) with pytest.raises(ValueError) as e: _system_func(ValueError("Bad value")) assert "Bad value" in str(e.value) with pytest.raises(user.FlyteAssertion) as e: _user_func(user.FlyteAssertion("Bad assert")) assert "Bad assert" in str(e.value) with pytest.raises(system.FlyteSystemAssertion) as e: _user_func(system.FlyteSystemAssertion("Bad assert")) assert "Bad assert" in str(e.value) with pytest.raises(user.FlyteAssertion) as e: _system_func(user.FlyteAssertion("Bad assert")) assert "Bad assert" in str(e.value) with pytest.raises(system.FlyteSystemAssertion) as e: _system_func(system.FlyteSystemAssertion("Bad assert")) assert "Bad assert" in str(e.value)
def _map_job_index_to_child_index(local_input_dir, datadir, index): local_lookup_file = local_input_dir.get_named_tempfile('indexlookup.pb') idx_lookup_file = _os.path.join(datadir, 'indexlookup.pb') # if the indexlookup.pb does not exist, then just return the index if not _data_proxy.Data.data_exists(idx_lookup_file): return index _data_proxy.Data.get_data(idx_lookup_file, local_lookup_file) mapping_proto = _utils.load_proto_from_file(_literals_pb2.LiteralCollection, local_lookup_file) if len(mapping_proto.literals) < index: raise _system_exceptions.FlyteSystemAssertion( "dynamic task index lookup array size: {} is smaller than lookup index {}".format( len(mapping_proto.literals), index)) return mapping_proto.literals[index].scalar.primitive.integer
def _get_blob_impl_from_type(self, blob_type): """ :param flytekit.models.core.types.BlobType blob_type: :rtype: flytekit.common.types.base_sdk_types.FlyteSdkType """ if blob_type.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE: if blob_type.format == "csv": return _blobs.CSV else: return _blobs.Blob elif blob_type.dimensionality == _core_types.BlobType.BlobDimensionality.MULTIPART: if blob_type.format == "csv": return _blobs.MultiPartCSV else: return _blobs.MultiPartBlob raise _system_exceptions.FlyteSystemAssertion( "Flyte's base type engine does not support this type of blob. Value: {}" .format(blob_type))
def test_intercepted_scope_flyte_system_exception(): assertion_error = system.FlyteSystemAssertion("Bad assert") with pytest.raises(scopes.FlyteScopedSystemException) as e: _user_func(assertion_error) e = e.value assert e.value == assertion_error assert "Bad assert" in e.verbose_message assert "SYSTEM ERROR!" in e.verbose_message assert e.kind == _error_models.ContainerError.Kind.RECOVERABLE with pytest.raises(scopes.FlyteScopedSystemException) as e: _system_func(assertion_error) e = e.value assert e.value == assertion_error assert "Bad assert" in e.verbose_message assert "SYSTEM ERROR!" in e.verbose_message assert e.kind == _error_models.ContainerError.Kind.RECOVERABLE
def _execute_user_code(self, inputs): """ :param flytekit.models.literals.LiteralMap inputs: :rtype: dict[Text,flytekit.models.common.FlyteIdlEntity] """ results = super(DynamicTask, self)._execute_user_code(inputs) if _sdk_constants.FUTURES_FILE_NAME in results: futures = results[_sdk_constants.FUTURES_FILE_NAME] sub_task_outputs = {} tasks_map = {task.id: task for task in futures.tasks} for future_node in futures.nodes: task = tasks_map[future_node.task_node.reference_id] if task.type == _sdk_constants.SdkTaskType.CONTAINER_ARRAY_TASK: sub_task_output = DynamicTask.execute_array_task( future_node.id, task, results) elif task.type == _sdk_constants.SdkTaskType.HIVE_JOB: # TODO: futures.outputs should have the Schema instances. # After schema is implemented, fill out random data into the random locations # then check output in test function # Even though we recommend people use typed schemas, they might not always do so... # in which case it'll be impossible to predict the actual schema, we should support a # way for unit test authors to provide fake data regardless sub_task_output = None else: inputs_path = _os.path.join(future_node.id, _sdk_constants.INPUT_FILE_NAME) if inputs_path not in results: raise _system_exception.FlyteSystemAssertion( "dynamic task hasn't generated expected inputs document [{}] found {}" .format(future_node.id, list(results.keys()))) sub_task_output = UnitTestEngineFactory().get_task( task).execute(results[inputs_path]) sub_task_outputs[future_node.id] = sub_task_output results[_sdk_constants.OUTPUT_FILE_NAME] = _literals.LiteralMap( literals={ binding.var: DynamicTask.fulfil_bindings( binding.binding, sub_task_outputs) for binding in futures.outputs }) return results