def test_single_step_entrypoint_in_proc(): with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'), internal_overrides={ 'project': 'test', 'domain': 'development' }): with _utils.AutoDeletingTempDir("in") as input_dir: literal_map = _type_helpers.pack_python_std_map_to_literal_map( {'a': 9}, _type_map_from_variable_map(_task_defs.add_one.interface.inputs)) input_file = os.path.join(input_dir.name, "inputs.pb") _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file) with _utils.AutoDeletingTempDir("out") as output_dir: _execute_task( _task_defs.add_one.task_module, _task_defs.add_one.task_function_name, input_file, output_dir.name, False ) p = _utils.load_proto_from_file( _literals_pb2.LiteralMap, os.path.join(output_dir.name, _constants.OUTPUT_FILE_NAME) ) raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std( _literal_models.LiteralMap.from_flyte_idl(p), _type_map_from_variable_map(_task_defs.add_one.interface.outputs) ) assert raw_map['b'] == 10 assert len(raw_map) == 1
def evaluate_on_datasets( wf_params, model, evaluation_clean_mpblob, evaluation_dirty_mpblob, ground_truths_out, predictions_out, ): """ Map prediction task on a set of zip files of images to sub tasks""" with flytekit_utils.AutoDeletingTempDir("results") as output_models_dir: with flytekit_utils.AutoDeletingTempDir( "evaluation") as evaluation_dir: download_data(evaluation_dir.name, { "clean": evaluation_clean_mpblob, "dirty": evaluation_dirty_mpblob }) model.download() ground_truths, predictions = predict_with_resnet50_model( model_path=model.local_path, evaluation_dataset=evaluation_dir.name, batch_size=DEFAULT_BATCH_SIZE, img_size=DEFAULT_IMG_SIZE, ) ground_truths_out.set(ground_truths) predictions_out.set(predictions)
def test_fetch(value_type_pair): column_name, flyte_type, values = value_type_pair values = [tuple([value]) for value in values] schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)]) with _utils.AutoDeletingTempDir("test") as tmpdir: for i in _six_moves.range(3): _pd.DataFrame.from_records(values, columns=[ column_name ]).to_parquet(tmpdir.get_named_tempfile(str(i).zfill(6)), coerce_timestamps='us') with _utils.AutoDeletingTempDir("test2") as local_dir: schema_obj = _schema_impl.Schema.fetch( tmpdir.name, local_path=local_dir.get_named_tempfile('schema_test'), schema_type=schema_type) with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual
def test_single_step_entrypoint_out_of_proc(): with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'), internal_overrides={ 'project': 'test', 'domain': 'development' }): with _utils.AutoDeletingTempDir("in") as input_dir: literal_map = _type_helpers.pack_python_std_map_to_literal_map({'a': 9}, _type_map_from_variable_map( _task_defs.add_one.interface.inputs)) input_file = os.path.join(input_dir.name, "inputs.pb") _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file) with _utils.AutoDeletingTempDir("out") as output_dir: cmd = [] cmd.extend(["--task-module", _task_defs.add_one.task_module]) cmd.extend(["--task-name", _task_defs.add_one.task_function_name]) cmd.extend(["--inputs", input_file]) cmd.extend(["--output-prefix", output_dir.name]) result = CliRunner().invoke(execute_task_cmd, cmd) assert result.exit_code == 0 p = _utils.load_proto_from_file( _literals_pb2.LiteralMap, os.path.join(output_dir.name, _constants.OUTPUT_FILE_NAME) ) raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std( _literal_models.LiteralMap.from_flyte_idl(p), _type_map_from_variable_map(_task_defs.add_one.interface.outputs) ) assert raw_map['b'] == 10 assert len(raw_map) == 1
def convert_to_sagemaker_csv(ctx, x_train, y_train, x_test, y_test, train, validation): _train = read_and_merge(y_train, x_train) _validate = read_and_merge(y_test, x_test) with utils.AutoDeletingTempDir("train") as t: f = t.get_named_tempfile("train.csv") _train.to_csv(f, header=False, index=False) train.set(t.name) with utils.AutoDeletingTempDir("validate") as t: f = t.get_named_tempfile("validate.csv") _validate.to_csv(f, header=False, index=False) validation.set(t.name)
def luminance_select_collection_worker( wf_params, raw_frames_mpblob, n_clusters, sample_size, random_seed, selected_image_mpblob, selected_file_names, ): with flytekit_utils.AutoDeletingTempDir("output_images") as local_output_dir: raw_frames_mpblob.download() luminance_sample_collection( raw_frames_dir=raw_frames_mpblob.local_path, sampled_frames_out_dir=local_output_dir.name, n_clusters=n_clusters, sample_size=sample_size, logger=wf_params.logging, random_seed=random_seed, ) # Get the full paths of all the files, excluding sub-folders, under folder_path selected_file_names_in_folder = [ f for f in sorted(listdir(local_output_dir.name)) if isfile(join(local_output_dir.name, f)) ] selected_image_mpblob.set(local_output_dir.name) selected_file_names.set(selected_file_names_in_folder)
def inputs(self) -> Dict[str, Any]: """ Returns the inputs to the execution in the standard python format as dictated by the type engine. """ if self._inputs is None: client = _flyte_engine.get_client() execution_data = client.get_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. input_map: _literal_models.LiteralMap = _literal_models.LiteralMap( {}) if bool(execution_data.full_inputs.literals): input_map = execution_data.full_inputs elif execution_data.inputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as tmp_dir: tmp_name = _os.path.join(tmp_dir.name, "inputs.pb") _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name) input_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file( _literals_pb2.Literalmap, tmp_name)) lp_id = self.spec.launch_plan workflow = _workflow.FlyteWorkflow.fetch(lp_id.project, lp_id.domain, lp_id.name, lp_id.version) self._inputs = TypeEngine.literal_map_to_kwargs( ctx=FlyteContextManager.current_context(), lm=input_map, python_types=TypeEngine.guess_python_types( workflow.interface.inputs), ) return self._inputs
def get_traintest_splitdatabase(ctx, dataset, seed, test_split_ratio, x_train, x_test, y_train, y_test): """ Retrieves the training dataset from the given blob location and then splits it using the split ratio and returns the result This splitter is only for the dataset that has the format as specified in the example csv. The last column is assumed to be the class and all other columns 0-8 the features. The data is returned as a schema, which gets converted to a parquet file in the back. """ with flytekit_utils.AutoDeletingTempDir("dataset_dir"): dataset_blob = Types.Blob.fetch(remote_path=dataset) column_names = [k for k in DATASET_SCHEMA.columns.keys()] df = pd.read_csv(dataset_blob.local_path, names=column_names) # Select all features x = df[column_names[:8]] # Select only the classes y = df[[column_names[-1]]] # split data into train and test sets _x_train, _x_test, _y_train, _y_test = train_test_split( x, y, test_size=test_split_ratio, random_state=seed) # TODO also add support for Spark dataframe, but make the pyspark dependency optional x_train.set(_x_train) x_test.set(_x_test) y_train.set(_y_train) y_test.set(_y_test)
def object_detection(wf_params, url, result, parsed_image): with utils.AutoDeletingTempDir('tmp') as tmpdir: request = urllib2.Request(url, headers=HEADERS) fname = '{}/image.jpg'.format(tmpdir.name) d = urllib2.urlopen(request) with open(fname, 'wb') as opfile: data = d.read() opfile.write(data) wf_params.logging.info("downloaded image") output_file = '{}/output.jpg'.format(tmpdir.name) output = download.detect(fname, output_file) scores = output["detection_scores"] classes = output["detection_classes"] category_index = output["category_index"] results = [] for i in range(len(scores)): if scores[i] > MIN_SCORE: if classes[i] in six.viewkeys(category_index): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' display_str = str(class_name) display_str = '{}: {}%'.format(display_str, int(100*scores[i])) results.append(display_str) parsed_image.set(output_file) result.set("\n".join(results))
def test_datetime_coercion(): values = [ tuple( [ _datetime.datetime(day=1, month=1, year=2017, hour=1, minute=1, second=1, microsecond=1) - _datetime.timedelta(days=x) ] ) for x in _six_moves.range(5) ] schema_type = _schema_impl.SchemaType(columns=[("testname", _primitives.Datetime)]) with _test_utils.LocalTestFileSystem(): with _utils.AutoDeletingTempDir("test") as t: a = _schema_impl.Schema.create_at_known_location(t.name, mode="wb", schema_type=schema_type) with a as writer: for _ in _six_moves.range(5): # us to ms coercion segfaults unless we explicitly allow truncation. writer.write( _pd.DataFrame.from_records(values, columns=["testname"]), coerce_timestamps="ms", allow_truncated_timestamps=True, ) # TODO: Uncomment when segfault bug is resolved # with _pytest.raises(Exception): # writer.write( # _pd.DataFrame.from_records(values, columns=['testname']), # coerce_timestamps='ms') b = _schema_impl.Schema.create_at_known_location(t.name, mode="wb", schema_type=schema_type) with b as writer: for _ in _six_moves.range(5): writer.write(_pd.DataFrame.from_records(values, columns=["testname"]))
def test_simple_read_and_write_with_different_types(value_type_pair): column_name, flyte_type, values = value_type_pair values = [tuple([value]) for value in values] schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)]) with _test_utils.LocalTestFileSystem() as sandbox: with _utils.AutoDeletingTempDir("test") as t: a = _schema_impl.Schema.create_at_known_location( t.name, mode='wb', schema_type=schema_type) assert a.local_path is None with a as writer: for _ in _six_moves.range(5): writer.write( _pd.DataFrame.from_records(values, columns=[column_name])) assert a.local_path.startswith(sandbox.name) assert a.local_path is None b = _schema_impl.Schema.create_at_known_location( t.name, mode='rb', schema_type=schema_type) assert b.local_path is None with b as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual assert b.local_path.startswith(sandbox.name) assert b.local_path is None
def outputs(self): """ Returns the outputs of the task execution, if available, in the standard Python format that is produced by the type engine. If not available, perhaps due to execution being in progress or an error being produced, this will raise an exception. :rtype: dict[Text, T] """ if not self.is_complete: raise _user_exceptions.FlyteAssertion( "Please what until the task execution has completed before requesting the outputs." ) if self.error: raise _user_exceptions.FlyteAssertion("Outputs could not be found because the execution ended in failure.") if self._outputs is None: client = _flyte_engine.get_client() execution_data = client.get_task_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. if bool(execution_data.full_outputs.literals): output_map = execution_data.full_outputs elif execution_data.outputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as t: tmp_name = _os.path.join(t.name, "outputs.pb") _data_proxy.Data.get_data(execution_data.outputs.url, tmp_name) output_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file(_literals_pb2.LiteralMap, tmp_name) ) else: output_map = _literal_models.LiteralMap({}) self._outputs = _type_helpers.unpack_literal_map_to_sdk_python_std(output_map) return self._outputs
def test_hive_task_query_generation(): with _common_utils.AutoDeletingTempDir( "user_dir") as user_working_directory: context = _common_engine.EngineContext( execution_id=WorkflowExecutionIdentifier(project="unit_test", domain="unit_test", name="unit_test"), execution_date=_datetime.utcnow(), stats=None, # TODO: A mock stats object that we can read later. logging= _logging, # TODO: A mock logging object that we can read later. tmp_dir=user_working_directory, ) references = { name: _task_output.OutputReference( _type_helpers.get_sdk_type_from_literal_type(variable.type)) for name, variable in _six.iteritems(two_queries.interface.outputs) } qubole_hive_jobs = two_queries._generate_plugin_objects( context, references) assert len(qubole_hive_jobs) == 2 # deprecated, collection is only here for backwards compatibility assert len(qubole_hive_jobs[0].query_collection.queries) == 1 assert len(qubole_hive_jobs[1].query_collection.queries) == 1 # The output references should now have the same fake S3 path as the formatted queries assert references["hive_results"].value[0].uri != "" assert references["hive_results"].value[1].uri != "" assert references["hive_results"].value[0].uri in qubole_hive_jobs[ 0].query.query assert references["hive_results"].value[1].uri in qubole_hive_jobs[ 1].query.query
def test_task_system_failure(): with TemporaryConfiguration(os.path.join( os.path.dirname(os.path.realpath(__file__)), '../../../common/configs/local.config'), internal_overrides={ 'image': 'myflyteimage:{}'.format( os.environ.get('IMAGE_VERSION', 'sha')), 'project': 'myflyteproject', 'domain': 'development' }): m = MagicMock() m.execute = _raise_system_exception with utils.AutoDeletingTempDir("test") as tmp: engine.FlyteTask(m).execute(None, {'output_prefix': tmp.name}) doc = errors.ErrorDocument.from_flyte_idl( utils.load_proto_from_file( errors_pb2.ErrorDocument, os.path.join(tmp.name, constants.ERROR_FILE_NAME))) assert doc.error.code == "SYSTEM:Unknown" assert doc.error.kind == errors.ContainerError.Kind.RECOVERABLE assert "errorERRORerror" in doc.error.message
def outputs(self) -> Dict[str, Any]: """ Returns the outputs to the execution in the standard python format as dictated by the type engine. :raises: ``FlyteAssertion`` error if execution is in progress or execution ended in error. """ if not self.is_complete: raise _user_exceptions.FlyteAssertion( "Please wait until the node execution has completed before requesting the outputs." ) if self.error: raise _user_exceptions.FlyteAssertion( "Outputs could not be found because the execution ended in failure." ) if self._outputs is None: client = _flyte_engine.get_client() execution_data = client.get_execution_data(self.id) # Outputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. output_map: LiteralMap = _literal_models.LiteralMap({}) if bool(execution_data.full_outputs.literals): output_map = execution_data.full_outputs elif execution_data.outputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as tmp_dir: tmp_name = _os.path.join(tmp_dir.name, "outputs.pb") _data_proxy.Data.get_data(execution_data.outputs.url, tmp_name) output_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file( _literals_pb2.LiteralMap, tmp_name)) # TODO: need to convert flyte literals to python types. For now just use literals # self._outputs = TypeEngine.literal_map_to_kwargs(ctx=FlyteContext.current_context(), lm=output_map) self._outputs = output_map return self._outputs
def inputs(self): """ Returns the inputs of the task execution in the standard Python format that is produced by the type engine. :rtype: dict[Text, T] """ if self._inputs is None: client = _flyte_engine.get_client() execution_data = client.get_task_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. if bool(execution_data.full_inputs.literals): input_map = execution_data.full_inputs elif execution_data.inputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as t: tmp_name = _os.path.join(t.name, "inputs.pb") _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name) input_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file(_literals_pb2.LiteralMap, tmp_name) ) else: input_map = _literal_models.LiteralMap({}) self._inputs = _type_helpers.unpack_literal_map_to_sdk_python_std(input_map) return self._inputs
def inputs(self) -> Dict[str, Any]: """ Returns the inputs to the execution in the standard python format as dicatated by the type engine. """ if self._inputs is None: client = _flyte_engine.get_client() execution_data = client.get_node_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. input_map: _literal_models.LiteralMap = _literal_models.LiteralMap( {}) if bool(execution_data.full_inputs.literals): input_map = execution_data.full_inputs elif execution_data.inputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as tmp_dir: tmp_name = _os.path.join(tmp_dir.name, "inputs.pb") _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name) input_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file( _literals_pb2.LiteralMap, tmp_name)) # TODO: need to convert flyte literals to python types. For now just use literals # self._inputs = TypeEngine.literal_map_to_kwargs(ctx=FlyteContext.current_context(), lm=input_map) self._inputs = input_map return self._inputs
def test_module_loading(): with _utils.AutoDeletingTempDir("mypackage") as pkg: path = pkg.name # Create directories top_level = os.path.join(path, "top") middle_level = os.path.join(top_level, "middle") bottom_level = os.path.join(middle_level, "bottom") os.makedirs(bottom_level) # Create init files with open(os.path.join(path, "__init__.py"), "w"): pass with open(os.path.join(top_level, "__init__.py"), "w"): pass with open(os.path.join(top_level, "a.py"), "w"): pass with open(os.path.join(middle_level, "__init__.py"), "w"): pass with open(os.path.join(middle_level, "a.py"), "w"): pass with open(os.path.join(bottom_level, "__init__.py"), "w"): pass with open(os.path.join(bottom_level, "a.py"), "w"): pass sys.path.append(path) # Not a sufficient test but passes for now assert sum(1 for _ in module_loader.iterate_modules(["top"])) == 6 assert [ pkg.__file__ for pkg in module_loader.iterate_modules(["top.a", "top.middle.a", "top.middle.bottom.a"]) ] == [os.path.join(lvl, "a.py") for lvl in (top_level, middle_level, bottom_level)]
def execute_task(task_module, task_name, inputs, output_prefix, test): with _TemporaryConfiguration(_internal_config.CONFIGURATION_PATH.get()): with _utils.AutoDeletingTempDir('input_dir') as input_dir: # Load user code task_module = _importlib.import_module(task_module) task_def = getattr(task_module, task_name) if not test: local_inputs_file = input_dir.get_named_tempfile('inputs.pb') # Handle inputs/outputs for array job. if _os.environ.get('BATCH_JOB_ARRAY_INDEX_VAR_NAME'): job_index = _compute_array_job_index() # TODO: Perhaps remove. This is a workaround to an issue we perceived with limited entropy in # TODO: AWS batch array jobs. _flyte_random.seed_flyte_random("{} {} {}".format( _random.random(), _datetime.datetime.utcnow(), job_index)) # If an ArrayTask is discoverable, the original job index may be different than the one specified in # the environment variable. Look up the correct input/outputs in the index lookup mapping file. job_index = _map_job_index_to_child_index( input_dir, inputs, job_index) inputs = _os.path.join(inputs, str(job_index), 'inputs.pb') output_prefix = _os.path.join(output_prefix, str(job_index)) _data_proxy.Data.get_data(inputs, local_inputs_file) input_proto = _utils.load_proto_from_file( _literals_pb2.LiteralMap, local_inputs_file) _engine_loader.get_engine().get_task(task_def).execute( _literal_models.LiteralMap.from_flyte_idl(input_proto), context={'output_prefix': output_prefix})
def test_hive_task_dynamic_job_spec_generation(): with _common_utils.AutoDeletingTempDir( "user_dir") as user_working_directory: context = _common_engine.EngineContext( execution_id=WorkflowExecutionIdentifier(project="unit_test", domain="unit_test", name="unit_test"), execution_date=_datetime.utcnow(), stats=None, # TODO: A mock stats object that we can read later. logging= _logging, # TODO: A mock logging object that we can read later. tmp_dir=user_working_directory, ) dj_spec = two_queries._produce_dynamic_job_spec( context, _literals.LiteralMap(literals={})) # Bindings assert len(dj_spec.outputs[0].binding.collection.bindings) == 2 assert isinstance( dj_spec.outputs[0].binding.collection.bindings[0].scalar.schema, Schema) assert isinstance( dj_spec.outputs[0].binding.collection.bindings[1].scalar.schema, Schema) # Custom field is filled in assert len(dj_spec.tasks[0].custom) > 0
def inputs(self) -> Dict[str, Any]: """ Returns the inputs of the task execution in the standard Python format that is produced by the type engine. """ from flytekit.control_plane.tasks.task import FlyteTask if self._inputs is None: client = _flyte_engine.get_client() execution_data = client.get_task_execution_data(self.id) # Inputs are returned inline unless they are too big, in which case a url blob pointing to them is returned. input_map = _literal_models.LiteralMap({}) if bool(execution_data.full_inputs.literals): input_map = execution_data.full_inputs elif execution_data.inputs.bytes > 0: with _common_utils.AutoDeletingTempDir() as tmp_dir: tmp_name = os.path.join(tmp_dir.name, "inputs.pb") _data_proxy.Data.get_data(execution_data.inputs.url, tmp_name) input_map = _literal_models.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file( _literals_pb2.LiteralMap, tmp_name)) task = FlyteTask.fetch(self.id.task_id.project, self.id.task_id.domain, self.id.task_id.name, self.id.task_id.version) self._inputs = TypeEngine.literal_map_to_kwargs( ctx=FlyteContextManager.current_context(), lm=input_map, python_types=TypeEngine.guess_python_types( task.interface.inputs), ) return self._inputs
def test_module_loading(): with _utils.AutoDeletingTempDir("mypackage") as pkg: path = pkg.name # Create directories top_level = os.path.join(path, 'top') middle_level = os.path.join(top_level, 'middle') bottom_level = os.path.join(middle_level, 'bottom') os.makedirs(bottom_level) # Create init files with open(os.path.join(path, '__init__.py'), 'w'): pass with open(os.path.join(top_level, '__init__.py'), 'w'): pass with open(os.path.join(top_level, 'a.py'), 'w'): pass with open(os.path.join(middle_level, '__init__.py'), 'w'): pass with open(os.path.join(middle_level, 'a.py'), 'w'): pass with open(os.path.join(bottom_level, '__init__.py'), 'w'): pass with open(os.path.join(bottom_level, 'a.py'), 'w'): pass sys.path.append(path) # Not a sufficient test but passes for now assert sum(1 for _ in module_loader.iterate_modules(['top'])) == 6
def test_datetime_coercion_explicitly(): """ Sanity check that we're using a version of pyarrow that allows us to truncate timestamps """ dt = _datetime.datetime(day=1, month=1, year=2017, hour=1, minute=1, second=1, microsecond=1) values = [(dt, )] df = _pd.DataFrame.from_records(values, columns=['testname']) assert df['testname'][0] == dt with _utils.AutoDeletingTempDir('test') as tmpdir: tmpfile = tmpdir.get_named_tempfile('repro.parquet') df.to_parquet(tmpfile, coerce_timestamps='ms', allow_truncated_timestamps=True) df2 = _pd.read_parquet(tmpfile) dt2 = _datetime.datetime(day=1, month=1, year=2017, hour=1, minute=1, second=1) assert df2['testname'][0] == dt2
def download_video_worker( wf_params, video_external_path, video_blob, ): # avi_local = wf_params.working_directory.get_named_tempfile("input.avi") with flytekit_utils.AutoDeletingTempDir("stream") as download_dir: local_path = join(download_dir.name, basename(video_external_path)) b = Types.Blob.fetch(remote_path=video_external_path, local_path=local_path) video_blob.set(b)
def test_arrayjob_entrypoint_in_proc(): with _TemporaryConfiguration(os.path.join(os.path.dirname(__file__), 'fake.config'), internal_overrides={ 'project': 'test', 'domain': 'development' }): with _utils.AutoDeletingTempDir("dir") as dir: literal_map = _type_helpers.pack_python_std_map_to_literal_map( {'a': 9}, _type_map_from_variable_map( _task_defs.add_one.interface.inputs)) input_dir = os.path.join(dir.name, "1") os.mkdir( input_dir) # auto cleanup will take this subdir into account input_file = os.path.join(input_dir, "inputs.pb") _utils.write_proto_to_file(literal_map.to_flyte_idl(), input_file) # construct indexlookup.pb which has array: [1] mapped_index = _literals.Literal( _literals.Scalar(primitive=_literals.Primitive(integer=1))) index_lookup_collection = _literals.LiteralCollection( [mapped_index]) index_lookup_file = os.path.join(dir.name, "indexlookup.pb") _utils.write_proto_to_file(index_lookup_collection.to_flyte_idl(), index_lookup_file) # fake arrayjob task by setting environment variables orig_env_index_var_name = os.environ.get( 'BATCH_JOB_ARRAY_INDEX_VAR_NAME') orig_env_array_index = os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX') os.environ[ 'BATCH_JOB_ARRAY_INDEX_VAR_NAME'] = 'AWS_BATCH_JOB_ARRAY_INDEX' os.environ['AWS_BATCH_JOB_ARRAY_INDEX'] = '0' execute_task(_task_defs.add_one.task_module, _task_defs.add_one.task_function_name, dir.name, dir.name, False) raw_map = _type_helpers.unpack_literal_map_to_sdk_python_std( _literal_models.LiteralMap.from_flyte_idl( _utils.load_proto_from_file( _literals_pb2.LiteralMap, os.path.join(input_dir, _constants.OUTPUT_FILE_NAME))), _type_map_from_variable_map( _task_defs.add_one.interface.outputs)) assert raw_map['b'] == 10 assert len(raw_map) == 1 # reset the env vars if orig_env_index_var_name: os.environ[ 'BATCH_JOB_ARRAY_INDEX_VAR_NAME'] = orig_env_index_var_name if orig_env_array_index: os.environ['AWS_BATCH_JOB_ARRAY_INDEX'] = orig_env_array_index
def confusion_matrix(wf_params, y_true, y_pred, title, normalize, classes, matrix, visual): with utils.AutoDeletingTempDir('test') as tmpdir: f_path = tmpdir.get_named_tempfile("visual.png") cm = _plot_confusion_matrix(np.asarray(y_true), np.asarray(y_pred), classes=np.asarray(classes), title=title, normalize=normalize, to_file_path=f_path) m = [] for i in range(cm.shape[0]): m.append([]) for j in range(cm.shape[1]): m[i].append(j) visual.set(f_path) matrix.set(m)
def get_outputs(self): """ :rtype: flytekit.models.literals.LiteralMap """ with _common_utils.AutoDeletingTempDir() as t: tmp_name = _os.path.join(t.name, "outputs.pb") _data_proxy.Data.get_data( self.sdk_task_execution.closure.output_uri, tmp_name) return _literals.LiteralMap.from_flyte_idl( _common_utils.load_proto_from_file(_literals_pb2.LiteralMap, tmp_name))
def test_backwards_compatible_replacement(mock_execute_task): def return_args(*args, **kwargs): assert args[4] is None mock_execute_task.side_effect = return_args with _TemporaryConfiguration( os.path.join(os.path.dirname(__file__), "fake.config"), internal_overrides={"project": "test", "domain": "development"}, ): with _utils.AutoDeletingTempDir("in"): with _utils.AutoDeletingTempDir("out"): cmd = [] cmd.extend(["--task-module", "fake"]) cmd.extend(["--task-name", "fake"]) cmd.extend(["--inputs", "fake"]) cmd.extend(["--output-prefix", "fake"]) cmd.extend(["--raw-output-data-prefix", "{{.rawOutputDataPrefix}}"]) result = CliRunner().invoke(execute_task_cmd, cmd) assert result.exit_code == 0
def __enter__(self): """ :rtype: flytekit.common.utils.AutoDeletingTempDir """ self._exit_stack.__enter__() temp_dir = self._exit_stack.enter_context( _utils.AutoDeletingTempDir("local_test_filesystem")) self._exit_stack.enter_context( _data_proxy.LocalDataContext(temp_dir.name)) self._exit_stack.enter_context( _data_proxy.LocalWorkingDirectoryContext(temp_dir)) return temp_dir
def test_download(value_type_pair): column_name, flyte_type, values = value_type_pair values = [tuple([value]) for value in values] schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)]) with _utils.AutoDeletingTempDir("test") as tmpdir: for i in _six_moves.range(3): _pd.DataFrame.from_records(values, columns=[column_name]).to_parquet( tmpdir.get_named_tempfile(str(i).zfill(6)), coerce_timestamps="us" ) with _utils.AutoDeletingTempDir("test2") as local_dir: schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download(local_dir.get_named_tempfile(_uuid.uuid4().hex)) with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip(values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual with _pytest.raises(Exception): schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download() with _test_utils.LocalTestFileSystem(): schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download() with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip(values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual