def test_set_nested(): with set({"abc": {"x": 123}}): assert config["abc"] == {"x": 123} with set({"abc.y": 456}): assert config["abc"] == {"x": 123, "y": 456} assert config["abc"] == {"x": 123} assert "abc" not in config
def test_set_nested(): with set({'abc': {'x': 123}}): assert config['abc'] == {'x': 123} with set({'abc.y': 456}): assert config['abc'] == {'x': 123, 'y': 456} assert config['abc'] == {'x': 123} assert 'abc' not in config
def test_set_nested(): with set({'abc': {'x': 123}}): assert config['abc'] == {'x': 123} with set({'abc.y': 456}): assert config['abc'] == {'x': 123, 'y': 456} assert config['abc'] == {'x': 123} assert 'abc' not in config
def test_get_context_using_python3_posix(): """ get_context() respects configuration. If default context is changed this test will need to change too. """ assert get_context() is multiprocessing.get_context(None) with config.set({"multiprocessing.context": "forkserver"}): assert get_context() is multiprocessing.get_context("forkserver") with config.set({"multiprocessing.context": "spawn"}): assert get_context() is multiprocessing.get_context("spawn")
def initialize(self) -> Client: if not self.initialized: config.set( temporary_directory=ask_parallel_actions_dask_temp_dir()) self.client = Client( processes=ask_parallel_actions_dask_use_process(), threads_per_worker=1, n_workers=ask_parallel_actions_count(), ) self.initialized = True return self.client
def test_set_kwargs(): with set(foo__bar=1, foo__baz=2): assert config["foo"] == {"bar": 1, "baz": 2} assert "foo" not in config # Mix kwargs and dict, kwargs override with set({"foo.bar": 1, "foo.baz": 2}, foo__buzz=3, foo__bar=4): assert config["foo"] == {"bar": 4, "baz": 2, "buzz": 3} assert "foo" not in config # Mix kwargs and nested dict, kwargs override with set({"foo": {"bar": 1, "baz": 2}}, foo__buzz=3, foo__bar=4): assert config["foo"] == {"bar": 4, "baz": 2, "buzz": 3} assert "foo" not in config
def test_files_per_partition(): files3 = {f"{n:02}.txt": "line from {:02}" for n in range(20)} with filetexts(files3): # single-threaded scheduler to ensure the warning happens in the # same thread as the pytest.warns with config.set({"scheduler": "single-threaded"}): with pytest.warns(UserWarning): b = read_text("*.txt", files_per_partition=10) l = len(b.take(100, npartitions=1)) assert l == 10, "10 files should be grouped into one partition" assert b.count().compute() == 20, "All 20 lines should be read" with pytest.warns(UserWarning): b = read_text("*.txt", files_per_partition=10, include_path=True) p = b.take(100, npartitions=1) p_paths = tuple(zip(*p))[1] p_unique_paths = set(p_paths) assert len(p_unique_paths) == 10 b_paths = tuple(zip(*b.compute()))[1] b_unique_paths = set(b_paths) assert len(b_unique_paths) == 20
def test_arg_reductions(dfunc, func): x = np.random.random((10, 10, 10)) a = da.from_array(x, chunks=(3, 4, 5)) assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) assert_eq(dfunc(a, 1), func(x, 1)) assert_eq(dfunc(a, 2), func(x, 2)) with config.set(split_every=2): assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) assert_eq(dfunc(a, 1), func(x, 1)) assert_eq(dfunc(a, 2), func(x, 2)) if _numpy_122: assert_eq(dfunc(a, keepdims=True), func(x, keepdims=True)) pytest.raises(ValueError, lambda: dfunc(a, 3)) pytest.raises(TypeError, lambda: dfunc(a, (0, 1))) x2 = np.arange(10) a2 = da.from_array(x2, chunks=3) assert_eq(dfunc(a2), func(x2)) assert_eq(dfunc(a2, 0), func(x2, 0)) assert_eq(dfunc(a2, 0, split_every=2), func(x2, 0)) x3 = np.array(1) a3 = da.from_array(x3) assert_eq(dfunc(a3), func(x3))
def fix_dask_settings(self): """ Fix "standard" dask behaviour for time+space testing. Currently this is single-threaded mode, with known chunksize, which is optimised for space saving so we can test largest data. """ import dask.config as dcfg # Use single-threaded, to avoid process-switching costs and minimise memory usage. # N.B. generally may be slower, but use less memory ? dcfg.set(scheduler="single-threaded") # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks dcfg.set({"array.chunk-size": "128Mib"})
def test_take_uses_config(): chunks = ((1, 1, 1, 1), (500,), (500,)) index = np.array([0, 1] + [2] * 101 + [3]) itemsize = 8 with config.set(**{"array.chunk-size": "10GB"}): chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((1, 1, 101, 1), (500,), (500,)) assert len(dsk) == 4
def test_take_uses_config(): with dask.config.set({"array.slicing.split-large-chunks": True}): chunks = ((1, 1, 1, 1), (500, ), (500, )) index = np.array([0, 1] + [2] * 101 + [3]) itemsize = 8 with config.set({"array.chunk-size": "10GB"}): chunks2, dsk = take("a", "b", chunks, index, itemsize) assert chunks2 == ((1, 1, 101, 1), (500, ), (500, )) assert len(dsk) == 4
def setup(self, resource, steal_interval): config.set( {"distributed.scheduler.work-stealing-interval": steal_interval}) rdict = {"resource": resource} if resource else None cluster = LocalCluster(n_workers=1, threads_per_worker=1, resources=rdict, worker_class=Worker) spec = copy.deepcopy(cluster.new_worker_spec()) if resource: del spec[1]['options']['resources'] cluster.worker_spec.update(spec) cluster.scale(2) client = Client(cluster) self.client = client
def test_custom_context_ignored_elsewhere(): """ On Python 2/Windows, setting 'multiprocessing.context' doesn't explode. Presumption is it's not used since unsupported, but mostly we care about not breaking anything. """ assert get({'x': (inc, 1)}, 'x') == 2 with pytest.warns(UserWarning): with config.set({"multiprocessing.context": "forkserver"}): assert get({'x': (inc, 1)}, 'x') == 2
def test_meta_commands(c, client, capsys): _meta_commands("?", context=c, client=client) captured = capsys.readouterr() assert "Commands" in captured.out _meta_commands("help", context=c, client=client) captured = capsys.readouterr() assert "Commands" in captured.out _meta_commands("\\d?", context=c, client=client) captured = capsys.readouterr() assert "Commands" in captured.out _meta_commands("\\l", context=c, client=client) captured = capsys.readouterr() assert "Schemas" in captured.out _meta_commands("\\dt", context=c, client=client) captured = capsys.readouterr() assert "Tables" in captured.out _meta_commands("\\dm", context=c, client=client) captured = capsys.readouterr() assert "Models" in captured.out _meta_commands("\\df", context=c, client=client) captured = capsys.readouterr() assert "Functions" in captured.out _meta_commands("\\de", context=c, client=client) captured = capsys.readouterr() assert "Experiments" in captured.out c.create_schema("test_schema") _meta_commands("\\dss test_schema", context=c, client=client) assert c.schema_name == "test_schema" _meta_commands("\\dss not_exists", context=c, client=client) captured = capsys.readouterr() assert "Schema not_exists not available\n" == captured.out with pytest.raises( OSError, match="Timed out .* to tcp://localhost:8787 after 5 s", ): with dask_config.set({"distributed.comm.timeouts.connect": 5}): client = _meta_commands("\\dsc localhost:8787", context=c, client=client) assert client.scheduler.__dict__["addr"] == "localhost:8787"
def test_custom_context_used_python3_posix(): """ The 'multiprocessing.context' config is used to create the pool. We assume default is 'fork', and therefore test for 'spawn'. If default context is changed this test will need to be modified to be different than that. """ # We check for spawn by ensuring subprocess doesn't have modules only # parent process should have: def check_for_pytest(): import sys return "FAKE_MODULE_FOR_TEST" in sys.modules import sys sys.modules["FAKE_MODULE_FOR_TEST"] = 1 try: with config.set({"multiprocessing.context": "spawn"}): result = get({"x": (check_for_pytest,)}, "x") assert not result finally: del sys.modules["FAKE_MODULE_FOR_TEST"]
def test_arg_reductions(dfunc, func): x = np.random.random((10, 10, 10)) a = da.from_array(x, chunks=(3, 4, 5)) assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) assert_eq(dfunc(a, 1), func(x, 1)) assert_eq(dfunc(a, 2), func(x, 2)) with config.set(split_every=2): assert_eq(dfunc(a), func(x)) assert_eq(dfunc(a, 0), func(x, 0)) assert_eq(dfunc(a, 1), func(x, 1)) assert_eq(dfunc(a, 2), func(x, 2)) pytest.raises(ValueError, lambda: dfunc(a, 3)) pytest.raises(TypeError, lambda: dfunc(a, (0, 1))) x2 = np.arange(10) a2 = da.from_array(x2, chunks=3) assert_eq(dfunc(a2), func(x2)) assert_eq(dfunc(a2, 0), func(x2, 0)) assert_eq(dfunc(a2, 0, split_every=2), func(x2, 0))
def test_set(): with set(abc=123): assert config['abc'] == 123 with set(abc=456): assert config['abc'] == 456 assert config['abc'] == 123 assert 'abc' not in config with set({'abc': 123}): assert config['abc'] == 123 with set({'abc.x': 1, 'abc.y': 2, 'abc.z.a': 3}): assert config['abc'] == {'x': 1, 'y': 2, 'z': {'a': 3}} d = {} set({'abc.x': 123}, config=d) assert d['abc']['x'] == 123
def test_set(): with set(abc=123): assert config["abc"] == 123 with set(abc=456): assert config["abc"] == 456 assert config["abc"] == 123 assert "abc" not in config with set({"abc": 123}): assert config["abc"] == 123 assert "abc" not in config with set({"abc.x": 1, "abc.y": 2, "abc.z.a": 3}): assert config["abc"] == {"x": 1, "y": 2, "z": {"a": 3}} assert "abc" not in config d = {} set({"abc.x": 123}, config=d) assert d["abc"]["x"] == 123
def test_tree_reduce_set_options(): x = da.from_array(np.arange(242).reshape((11, 22)), chunks=(3, 4)) with config.set(split_every={0: 2, 1: 3}): assert_max_deps(x.sum(), 2 * 3) assert_max_deps(x.sum(axis=0), 2)
def test_set_hard_to_copyables(): import threading with set(x=threading.Lock()): with set(y=1): pass
def annotate(**annotations): """Context Manager for setting HighLevelGraph Layer annotations. Annotations are metadata or soft constraints associated with tasks that dask schedulers may choose to respect: They signal intent without enforcing hard constraints. As such, they are primarily designed for use with the distributed scheduler. Almost any object can serve as an annotation, but small Python objects are preferred, while large objects such as NumPy arrays are discouraged. Callables supplied as an annotation should take a single *key* argument and produce the appropriate annotation. Individual task keys in the annotated collection are supplied to the callable. Parameters ---------- **annotations : key-value pairs Examples -------- All tasks within array A should have priority 100 and be retried 3 times on failure. >>> import dask >>> import dask.array as da >>> with dask.annotate(priority=100, retries=3): ... A = da.ones((10000, 10000)) Prioritise tasks within Array A on flattened block ID. >>> nblocks = (10, 10) >>> with dask.annotate(priority=lambda k: k[1]*nblocks[1] + k[2]): ... A = da.ones((1000, 1000), chunks=(100, 100)) Annotations may be nested. >>> with dask.annotate(priority=1): ... with dask.annotate(retries=3): ... A = da.ones((1000, 1000)) ... B = A + 1 """ # Sanity check annotations used in place of # legacy distributed Client.{submit, persist, compute} keywords if "workers" in annotations: if isinstance(annotations["workers"], (list, set, tuple)): annotations["workers"] = list(annotations["workers"]) elif isinstance(annotations["workers"], str): annotations["workers"] = [annotations["workers"]] elif callable(annotations["workers"]): pass else: raise TypeError( "'workers' annotation must be a sequence of str, a str or a callable, but got %s." % annotations["workers"] ) if ( "priority" in annotations and not isinstance(annotations["priority"], Number) and not callable(annotations["priority"]) ): raise TypeError( "'priority' annotation must be a Number or a callable, but got %s" % annotations["priority"] ) if ( "retries" in annotations and not isinstance(annotations["retries"], Number) and not callable(annotations["retries"]) ): raise TypeError( "'retries' annotation must be a Number or a callable, but got %s" % annotations["retries"] ) if ( "resources" in annotations and not isinstance(annotations["resources"], dict) and not callable(annotations["resources"]) ): raise TypeError( "'resources' annotation must be a dict, but got %s" % annotations["resources"] ) if ( "allow_other_workers" in annotations and not isinstance(annotations["allow_other_workers"], bool) and not callable(annotations["allow_other_workers"]) ): raise TypeError( "'allow_other_workers' annotations must be a bool or a callable, but got %s" % annotations["allow_other_workers"] ) prev_annotations = config.get("annotations", {}) new_annotations = { **prev_annotations, **{f"annotations.{k}": v for k, v in annotations.items()}, } with config.set(new_annotations): yield
def test_tree_reduce_set_options(): x = da.from_array(np.arange(242).reshape((11, 22)), chunks=(3, 4)) with config.set(split_every={0: 2, 1: 3}): assert_max_deps(x.sum(), 2 * 3) assert_max_deps(x.sum(axis=()), 1) assert_max_deps(x.sum(axis=0), 2)
def setup_temp_root(**kwargs): """ Setup a temporary file root for testing purposes. """ path = tempfile.mkdtemp(**kwargs) config.set({"geomodeling.root": path}) return path
def generate_scripts(self): self.log_file.write("Reading from: \t" + self.cf.smx_path) self.log_file.write("Output folder: \t" + self.cf.output_path) self.log_file.write("SMX files:") print("Reading from: \t" + self.cf.smx_path) print("Output folder: \t" + self.cf.output_path) print("Scripts to be generated: \t" + self.scripts_flag) print("SMX files:") filtered_sources = [] self.start_time = dt.datetime.now() try: smx_files = funcs.get_smx_files(self.cf.smx_path, self.smx_ext, self.sheets) for smx in smx_files: try: self.count_smx = self.count_smx + 1 smx_file_path = self.cf.smx_path + "/" + smx smx_file_name = os.path.splitext(smx)[0] print("\t" + smx_file_name) self.log_file.write("\t" + smx_file_name) home_output_path = self.cf.output_path + "/" + smx_file_name # self.parallel_remove_output_home_path.append(delayed(md.remove_folder)(home_output_path)) self.parallel_create_output_home_path.append( delayed(md.create_folder)(home_output_path)) # COPY SMX USED INTO PATH OF ITS UDI SCRIPTS smx_file_path_destination = os.path.join( home_output_path, "USED_SMX_FILE") self.parallel_create_smx_copy_path.append( delayed(md.create_folder)(smx_file_path_destination)) smx_file_path_destination += '/' + smx_file_name + '.xlsx' self.parallel_used_smx_copy.append( delayed(shutil.copy)(smx_file_path, smx_file_path_destination)) self.parallel_templates.append( delayed(gcfr.gcfr)(self.cf, home_output_path)) ##################################### end of read_smx_folder ################################ if self.cf.source_names: System_sht_filter = [[ 'Source system name', self.cf.source_names ]] else: System_sht_filter = None System = funcs.read_excel(smx_file_path, sheet_name=self.System_sht) teradata_sources = System[System['Source type'] == 'TERADATA'] teradata_sources = funcs.df_filter(teradata_sources, System_sht_filter, False) self.count_sources = self.count_sources + len( teradata_sources.index) Supplements = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Supplements_sht) Data_types = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Data_types_sht) Column_mapping = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Column_mapping_sht) BMAP_values = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.BMAP_values_sht) BMAP = delayed(funcs.read_excel)(smx_file_path, sheet_name=self.BMAP_sht) BKEY = delayed(funcs.read_excel)(smx_file_path, sheet_name=self.BKEY_sht) Core_tables = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Core_tables_sht) Core_tables = delayed(funcs.rename_sheet_reserved_word)( Core_tables, Supplements, 'TERADATA', ['Column name', 'Table name']) RI_relations = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.RI_relations_sht) ##################################### end of read_smx_sheet ################################ for system_index, system_row in teradata_sources.iterrows( ): try: Loading_Type = system_row['Loading type'].upper() if Loading_Type != "": source_name = system_row['Source system name'] filtered_sources.append(source_name) source_name_filter = [[ 'Source', [source_name] ]] core_layer_filter = [['Layer', ["CORE"]]] stg_layer_filter = [['Layer', ["STG"]]] stg_source_name_filter = [[ 'Source system name', [source_name] ]] Table_mapping = delayed(funcs.read_excel)( smx_file_path, self.Table_mapping_sht, source_name_filter) core_Table_mapping = delayed(funcs.df_filter)( Table_mapping, core_layer_filter, False) stg_Table_mapping = delayed(funcs.df_filter)( Table_mapping, stg_layer_filter, False) STG_tables = delayed(funcs.read_excel)( smx_file_path, self.STG_tables_sht, stg_source_name_filter) STG_tables_export = delayed(funcs.read_excel)( smx_file_path, self.STG_tables_sht, stg_source_name_filter) STG_tables = delayed( funcs.rename_sheet_reserved_word)( STG_tables, Supplements, 'TERADATA', ['Column name', 'Table name']) main_output_path = home_output_path + "/" + Loading_Type + "/" + source_name source_output_path = os.path.join( main_output_path, "UDI") source_smx_output_path = os.path.join( source_output_path, "Source smx") output_path_testing = os.path.join( main_output_path, "TestCases_scripts") process_check_output_path_testing = os.path.join( output_path_testing, "PROCESS_CHECK_Cases_scripts") cso_output_path_testing = os.path.join( output_path_testing, "CSO_Cases_scripts") nulls_output_path_testing = os.path.join( output_path_testing, "NULLS_Cases_scripts") duplicate_output_path_testing = os.path.join( output_path_testing, "DUPLICATE_Cases_scripts") data_src_output_path_testing = os.path.join( output_path_testing, "DATA_SRC_Cases_scripts") bmaps_output_path_testing = os.path.join( output_path_testing, "BMAPS_Cases_scripts") history_output_path_testing = os.path.join( output_path_testing, "HISTORY_Cases_scripts") ri_output_path_testing = os.path.join( output_path_testing, "RI_Cases_scripts") input_view_output_path_testing = os.path.join( output_path_testing, "SMX QUALITY TESTING SCRIPTS") compare_stg_counts_output_path_testing = os.path.join( output_path_testing, "STG TESTING SCRIPTS") self.parallel_create_output_source_path.append( delayed( md.create_folder)(main_output_path)) #UDI SCRIPTS if 'UDI' in self.scripts_flag: self.parallel_create_output_source_path.append( delayed(md.create_folder)( source_output_path)) self.parallel_templates.append( delayed(D000.d000)(self.cf, source_output_path, source_name, core_Table_mapping, STG_tables, BKEY)) self.parallel_templates.append( delayed(D001.d001)(self.cf, source_output_path, source_name, STG_tables)) self.parallel_templates.append( delayed(D002.d002)(self.cf, source_output_path, Core_tables, core_Table_mapping)) self.parallel_templates.append( delayed(D003.d003)(self.cf, source_output_path, source_name, STG_tables, BMAP_values, BMAP)) self.parallel_templates.append( delayed(D110.d110)(self.cf, source_output_path, stg_Table_mapping, STG_tables, Loading_Type)) self.parallel_templates.append( delayed(D200.d200)(self.cf, source_output_path, STG_tables, Loading_Type)) self.parallel_templates.append( delayed(D210.d210)(self.cf, source_output_path, STG_tables, Loading_Type)) self.parallel_templates.append( delayed(D215.d215)(self.cf, source_output_path, source_name, system_row, STG_tables)) self.parallel_templates.append( delayed(D320.d320)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D330.d330)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D340.d340)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D300.d300)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D400.d400)(self.cf, source_output_path, STG_tables)) self.parallel_templates.append( delayed(D410.d410)(self.cf, source_output_path, STG_tables)) self.parallel_templates.append( delayed(D415.d415)(self.cf, source_output_path, STG_tables)) self.parallel_templates.append( delayed(D420.d420)(self.cf, source_output_path, STG_tables, BKEY, BMAP, Loading_Type)) self.parallel_templates.append( delayed(D600.d600)(self.cf, source_output_path, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed(D607.d607)(self.cf, source_output_path, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(D608.d608)(self.cf, source_output_path, source_name, STG_tables, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(D609.d609)(self.cf, source_output_path, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed(D610.d610)(self.cf, source_output_path, core_Table_mapping, STG_tables, source_name)) self.parallel_templates.append( delayed(D615.d615)(self.cf, source_output_path, Core_tables)) self.parallel_templates.append( delayed(D620.d620)( self.cf, source_output_path, core_Table_mapping, Column_mapping, Core_tables, Loading_Type, 'UDI', STG_tables)) self.parallel_templates.append( delayed(D630.d630)(self.cf, source_output_path, core_Table_mapping)) self.parallel_templates.append( delayed(D640.d640)(self.cf, source_output_path, source_name, core_Table_mapping)) #TESTING SCRIPTS if 'Testing' in self.scripts_flag: #CREATING PATHS FOR THE OUTPUT SCRIPTS self.parallel_create_output_source_path.append( delayed(md.create_folder)( output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( process_check_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( cso_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( nulls_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( duplicate_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( data_src_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( bmaps_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( history_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( ri_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( input_view_output_path_testing)) self.parallel_create_output_source_path.append( delayed(md.create_folder) (compare_stg_counts_output_path_testing )) # self.parallel_templates.append( delayed(testing_script_01. source_testing_script)( self.cf, output_path_testing, source_name, core_Table_mapping, Column_mapping, STG_tables, BKEY)) self.parallel_templates.append( delayed(testing_script_02. source_testing_script)( self.cf, output_path_testing, source_name, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed(PROCESS_CHECK_TEST_SHEET. process_check) (self.cf, process_check_output_path_testing, source_name, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed(CSO_TEST_SHEET.cso_check)( self.cf, cso_output_path_testing, source_name, core_Table_mapping, Column_mapping)) self.parallel_templates.append( delayed(NULLS_TEST_SHEET.nulls_check)( self.cf, nulls_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed( DUP_TEST_SHEET.duplicates_check)( self.cf, duplicate_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed( DATA_SRC_TEST_SHEET.data_src_check) (self.cf, data_src_output_path_testing, source_name, core_Table_mapping, Column_mapping)) self.parallel_templates.append( delayed( BMAP_CHECK_TEST_SHEET.bmap_check)( self.cf, bmaps_output_path_testing, core_Table_mapping, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(BMAP_DUP_CD_TEST_SHEET. bmap_dup_check)( self.cf, bmaps_output_path_testing, core_Table_mapping, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(BMAP_DUP_DESC_TEST_SHEET. bmap_dup_desc_check)( self.cf, bmaps_output_path_testing, core_Table_mapping, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(BMAP_NULL_TEST_SHEET. bmap_null_check)( self.cf, bmaps_output_path_testing, core_Table_mapping, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(BMAP_UNMATCHED_TEST_SHEET. bmap_unmatched_values_check)( self.cf, bmaps_output_path_testing, core_Table_mapping, Core_tables, BMAP, BMAP_values)) self.parallel_templates.append( delayed( HIST_STRT_END_NULL_TEST_SHEET. hist_start_end_null_check)( self.cf, history_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed( HIST_DUP_TEST_SHEET.hist_dup_check) (self.cf, history_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed( HIST_STRT_GRT_END_TEST_SHEET. hist_start_end_null_check)( self.cf, history_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed( HIST_TIME_GAP_TEST_SHEET. hist_timegap_check)( self.cf, history_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed( HIST_STRT_NULL_TEST_SHEET. hist_start_null_check)( self.cf, history_output_path_testing, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed(RI_TEST_SHEET.ri_check)( self.cf, ri_output_path_testing, core_Table_mapping, RI_relations)) self.parallel_templates.append( delayed(D620.d620)( self.cf, input_view_output_path_testing, core_Table_mapping, Column_mapping, Core_tables, Loading_Type, 'TESTING', STG_tables)) self.parallel_templates.append( delayed( compare_testing_inputview. compare_views_check)( self.cf, input_view_output_path_testing, core_Table_mapping, 'FROM_TESTING_TO_UDI')) self.parallel_templates.append( delayed( compare_testing_inputview. compare_views_check)( self.cf, input_view_output_path_testing, core_Table_mapping, 'FROM_UDI_TO_TESTING')) self.parallel_templates.append( delayed(stgCounts.stgCounts) (self.cf, compare_stg_counts_output_path_testing, system_row, STG_tables, Loading_Type, 'Accepted')) self.parallel_templates.append( delayed(stgCounts.stgCounts) (self.cf, compare_stg_counts_output_path_testing, system_row, STG_tables, Loading_Type, 'All')) self.parallel_templates.append( delayed(dataValidation.dataValidation) (self.cf, compare_stg_counts_output_path_testing, source_name, system_row, STG_tables, Loading_Type, 'Accepted')) self.parallel_templates.append( delayed(dataValidation.dataValidation) (self.cf, compare_stg_counts_output_path_testing, source_name, system_row, STG_tables, Loading_Type, 'All')) # TESTING SCRIPTS if 'Source smx' in self.scripts_flag: self.parallel_create_output_source_path.append( delayed(md.create_folder)( source_smx_output_path)) self.parallel_templates.append( delayed( generate_source_smx.source_smx)( STG_tables_export, Table_mapping, Column_mapping, System, BKEY, BMAP, BMAP_values, Supplements, Core_tables, Data_types, source_smx_output_path)) except Exception as e_source: # print(error) # log: smx_file_name, source_name print(system_row.to_dict()) funcs.SMXFilesLogError( self.cf.output_path, smx, str(system_row.to_dict()), traceback.format_exc()).log_error() self.count_sources = self.count_sources - 1 except Exception as e_smx_file: # print(error) funcs.SMXFilesLogError(self.cf.output_path, smx, None, traceback.format_exc()).log_error() self.count_smx = self.count_smx - 1 except Exception as e1: # print(error) # traceback.print_exc() self.elapsed_time = dt.datetime.now() - self.start_time funcs.SMXFilesLogError(self.cf.output_path, None, None, traceback.format_exc()).log_error() if len(self.parallel_templates) > 0: sources = funcs.list_to_string(filtered_sources, ', ') print("Sources:", sources) self.log_file.write("Sources:" + sources) scheduler_value = 'processes' if self.cf.read_sheets_parallel == 1 else '' with config.set(scheduler=scheduler_value): compute(*self.parallel_create_output_home_path) compute(*self.parallel_create_smx_copy_path) compute(*self.parallel_used_smx_copy) compute(*self.parallel_create_output_source_path) compute(*self.parallel_templates) self.error_message = "" else: self.error_message = "No SMX Files Found!" with ProgressBar(): smx_files = " smx files" if self.count_smx > 1 else " smx file" smx_file_sources = " sources" if self.count_sources > 1 else " source" print("Start generating " + str(len(self.parallel_templates)) + " script for " + str(self.count_sources) + smx_file_sources + " from " + str(self.count_smx) + smx_files) self.log_file.write( str(len(self.parallel_templates)) + " script generated for " + str(self.count_sources) + smx_file_sources + " from " + str(self.count_smx) + smx_files) self.elapsed_time = dt.datetime.now() - self.start_time self.log_file.write("Elapsed Time: " + str(self.elapsed_time)) if sys.platform == "win32": os.startfile(self.cf.output_path) else: opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, self.cf.output_path]) self.log_file.close()
def test_set_hard_to_copyables(): import threading with set(x=threading.Lock()): with set(y=1): pass
def dask_linear_operator(self): self.nC = self.modelMap.shape[0] n_data_comp = len(self.survey.components) components = np.array(list(self.survey.components.keys())) active_components = np.hstack( [np.c_[values] for values in self.survey.components.values()] ).tolist() row = delayed(self.evaluate_integral, pure=True) rows = [ array.from_delayed( row(receiver_location, components[component]), dtype=np.float32, shape=(n_data_comp, self.nC), ) for receiver_location, component in zip( self.survey.receiver_locations.tolist(), active_components ) ] stack = array.vstack(rows) # Chunking options if self.chunk_format == "row" or self.store_sensitivities == "forward_only": config.set({"array.chunk-size": f"{self.max_chunk_size}MiB"}) # Autochunking by rows is faster and more memory efficient for # very large problems sensitivty and forward calculations stack = stack.rechunk({0: "auto", 1: -1}) elif self.chunk_format == "equal": # Manual chunks for equal number of blocks along rows and columns. # Optimal for Jvec and Jtvec operations row_chunk, col_chunk = compute_chunk_sizes(*stack.shape, self.max_chunk_size) stack = stack.rechunk((row_chunk, col_chunk)) else: # Auto chunking by columns is faster for Inversions config.set({"array.chunk-size": f"{self.max_chunk_size}MiB"}) stack = stack.rechunk({0: -1, 1: "auto"}) if self.store_sensitivities == "disk": sens_name = self.sensitivity_path + "sensitivity.zarr" if os.path.exists(sens_name): kernel = array.from_zarr(sens_name) if np.all( np.r_[ np.any(np.r_[kernel.chunks[0]] == stack.chunks[0]), np.any(np.r_[kernel.chunks[1]] == stack.chunks[1]), np.r_[kernel.shape] == np.r_[stack.shape], ] ): # Check that loaded kernel matches supplied data and mesh print("Zarr file detected with same shape and chunksize ... re-loading") return kernel else: print("Writing Zarr file to disk") with ProgressBar(): print("Saving kernel to zarr: " + sens_name) kernel = array.to_zarr( stack, sens_name, compute=True, return_stored=True, overwrite=True ) elif self.store_sensitivities == "forward_only": with ProgressBar(): print("Forward calculation: ") pred = (stack @ self.model).compute() return pred else: print(stack.chunks) with ProgressBar(): print("Computing sensitivities to local ram") kernel = array.asarray(stack.compute()) return kernel
def test_dask_setconfig(): dask_config.set({"sql.foo.bar": 1}) with dask_config.set({"sql.foo.baz": "2"}): assert dask_config.get("sql.foo") == {"bar": 1, "baz": "2"} assert dask_config.get("sql.foo") == {"bar": 1} dask_config.refresh()
def generate_scripts(self): self.log_file.write("Reading from: \t" + self.cf.smx_path) self.log_file.write("Output folder: \t" + self.cf.output_path) self.log_file.write("SMX files:") print("Reading from: \t" + self.cf.smx_path) print("Output folder: \t" + self.cf.output_path) print("SMX files:") filtered_sources = [] self.start_time = dt.datetime.now() try: smx_files = funcs.get_smx_files(self.cf.smx_path, self.smx_ext, self.staging_sheets, self.smx_sheets, self.scripts_generation_flag) for smx in smx_files: try: self.count_smx = self.count_smx + 1 self.count_sources = 1 smx_file_path = self.cf.smx_path + "/" + smx smx_file_name = os.path.splitext(smx)[0] print("\t" + smx_file_name) self.log_file.write("\t" + smx_file_name) home_output_path = self.cf.output_path + "/" + smx_file_name + "/" self.parallel_create_output_home_path.append( delayed(md.create_folder)(home_output_path)) if self.scripts_generation_flag == 'Staging Tables': main_output_path = home_output_path + "/" + "DDLs" bteq_stg_dm_scripts_output_path = home_output_path + "/" + "BTEQ_Scrtipts" + "/" + "BTEQ_STG_TO_DATAMARAT_SCRIPTS" bteq_stg_oi_scripts_output_path = home_output_path + "/" + "BTEQ_Scrtipts" + "/" + "BTEQ_STG_TO_OI_SCRIPTS" self.parallel_create_output_source_path.append( delayed(md.create_folder)(main_output_path)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( bteq_stg_dm_scripts_output_path)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( bteq_stg_oi_scripts_output_path)) Data_Types = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Data_types_sht) STG_tables = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.STG_tables_sht) self.parallel_templates.append( delayed(Staging_DDL.stg_temp_DDL)(self.cf, main_output_path, STG_tables, Data_Types, 'Staging')) self.parallel_templates.append( delayed(Staging_DDL.stg_temp_DDL)(self.cf, main_output_path, STG_tables, Data_Types, 'Data_mart')) self.parallel_templates.append( delayed(Staging_DDL.stg_temp_DDL)(self.cf, main_output_path, STG_tables, Data_Types, 'OI_staging')) self.parallel_templates.append( delayed(Staging_DDL.stg_temp_DDL)(self.cf, main_output_path, STG_tables, Data_Types, 'UV_staging')) self.parallel_templates.append( delayed(Staging_DDL.stg_temp_DDL)(self.cf, main_output_path, STG_tables, Data_Types, 'LOG_staging')) self.parallel_templates.append( delayed(BTEQ_Scripts.bteq_temp_script)( self.cf, bteq_stg_dm_scripts_output_path, STG_tables, 'from stg to datamart')) self.parallel_templates.append( delayed(BTEQ_Scripts.bteq_temp_script)( self.cf, bteq_stg_oi_scripts_output_path, STG_tables, 'from stg to oi')) elif self.scripts_generation_flag == 'SMX': main_output_path_apply = home_output_path + "/" + "APPLY_SCRIPTS" main_output_path_sgk = home_output_path + "/" + "SGK" main_output_path_TFN = home_output_path + "/" + "TFN" secondary_output_path_TFN = home_output_path + "/" + "SPECIAL_ATTENTION" + "/" + "TFN" secondary_output_path_HIST = home_output_path + "/" + "SPECIAL_ATTENTION" historyLegacy_subsequent_histLoads_path = home_output_path + "/" + "APPLY_SCRIPTS" + "/" + "Apply_History_Legacy/SUBSEQUENT_LOADS" #historyLegacy_subsequent_histLoads_path_secondary = home_output_path + "/" + "SPECIAL_ATTENTION" source_name = self.cf.sgk_source self.parallel_create_output_source_path.append( delayed(md.create_folder)(main_output_path_apply)) self.parallel_create_output_source_path.append( delayed(md.create_folder)(main_output_path_sgk)) self.parallel_create_output_source_path.append( delayed(md.create_folder)(main_output_path_TFN)) self.parallel_create_output_source_path.append( delayed( md.create_folder)(secondary_output_path_TFN)) self.parallel_create_output_source_path.append( delayed( md.create_folder)(secondary_output_path_HIST)) self.parallel_create_output_source_path.append( delayed(md.create_folder)( historyLegacy_subsequent_histLoads_path)) smx_sheet = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.smx_sheet) if source_name != 'ALL': smx_sheet = smx_sheet[smx_sheet['Stg_Schema'] == source_name] Rid_list = self.cf.Rid_List print("RIDLIST", Rid_list) if not Rid_list: smx_sheet = smx_sheet else: smx_sheet = smx_sheet[smx_sheet.Record_ID.isin( Rid_list)] hist_legacy_subsheet = funcs.histLegacy_To_hist_for_subsequent_runs_df( smx_sheet) self.parallel_templates.append( delayed(History_Legacy_Apply.history_legacy_apply)( self.cf, main_output_path_apply, secondary_output_path_HIST, smx_sheet)) self.parallel_templates.append( delayed(History_Apply.history_apply)( self.cf, main_output_path_apply, secondary_output_path_HIST, hist_legacy_subsheet, False)) self.parallel_templates.append( delayed(Apply_Insert_Upsert.apply_insert_upsert)( self.cf, main_output_path_apply, smx_sheet, "Apply_Insert")) self.parallel_templates.append( delayed(Apply_Insert_Upsert.apply_insert_upsert)( self.cf, main_output_path_apply, smx_sheet, "Apply_Upsert")) self.parallel_templates.append( delayed(Apply_Insert_Upsert.apply_insert_upsert)( self.cf, main_output_path_apply, smx_sheet, "Apply_Delete_Insert")) self.parallel_templates.append( delayed(History_Apply.history_apply)( self.cf, main_output_path_apply, secondary_output_path_HIST, smx_sheet, True)) self.parallel_templates.append( delayed(History_Delete_Insert_Apply. history_delete_insert_apply)( self.cf, main_output_path_apply, secondary_output_path_HIST, smx_sheet)) self.parallel_templates.append( delayed(SGK_insertion.sgk_insertion)( self.cf, main_output_path_sgk, smx_sheet)) self.parallel_templates.append( delayed(TFN_insertion.TFN_insertion)( self.cf, main_output_path_TFN, secondary_output_path_TFN, smx_sheet)) except Exception as e_smx_file: # print(error) funcs.SMXFilesLogError(self.cf.output_path, smx, None, traceback.format_exc()).log_error() self.count_smx = self.count_smx - 1 except Exception as e1: self.elapsed_time = dt.datetime.now() - self.start_time funcs.SMXFilesLogError(self.cf.output_path, None, None, traceback.format_exc()).log_error() if len(self.parallel_templates) > 0: sources = funcs.list_to_string(filtered_sources, ', ') print("Sources:", sources) self.log_file.write("Sources:" + sources) scheduler_value = 'processes' if self.cf.read_sheets_parallel == 1 else '' with config.set(scheduler=scheduler_value): compute(*self.parallel_create_output_home_path) compute(*self.parallel_create_output_source_path) self.error_message = "" else: self.error_message = "No SMX Files Found!" with ProgressBar(): smx_files = " smx files" if self.count_smx > 1 else " smx file" smx_file_sources = " sources" if self.count_sources > 1 else " source" print("Start generating " + str(len(self.parallel_templates)) + " script for " + str(self.count_sources) + smx_file_sources + " from " + str(self.count_smx) + smx_files) compute(*self.parallel_templates) self.log_file.write( str(len(self.parallel_templates)) + " script generated for " + str(self.count_sources) + smx_file_sources + " from " + str(self.count_smx) + smx_files) self.elapsed_time = dt.datetime.now() - self.start_time self.log_file.write("Elapsed Time: " + str(self.elapsed_time)) if sys.platform == "win32": os.startfile(self.cf.output_path) else: opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, self.cf.output_path]) self.log_file.close()
def test_safe_file_url(self): f = utils.safe_file_url if not sys.platform.startswith("win"): # prepends file:// if necessary assert f("/tmp") == "file:///tmp" assert f("/tmp", "/") == "file:///tmp" # absolute input assert f("file:///tmp") == "file:///tmp" assert f("file:///tmp", "/") == "file:///tmp" assert f("file://tmp", "/") == "file:///tmp" # relative input assert f("path", "/tmp/abs") == "file:///tmp/abs/path" assert f("../abs/path", "/tmp/abs") == "file:///tmp/abs/path" # raise on unknown protocol with pytest.raises(NotImplementedError): f("unknown://tmp") # paths outside of 'start' assert f("file://../x", "/tmp") == "file:///x" assert f("/etc/abs", "/tmp") == "file:///etc/abs" assert f("../", "/tmp") == "file:///" # raise on path outside start when strict-file-paths=True with config.set({"geomodeling.strict-file-paths": True}): with pytest.raises(IOError): f("file://../x", "/tmp") with pytest.raises(IOError): f("/etc/abs", "/tmp") with pytest.raises(IOError): f("../", "/tmp") else: # prepends file:// if necessary assert f("C:\\tmp") == "file://C:\\tmp" assert f("C:\\tmp", "C:\\") == "file://C:\\tmp" # absolute input assert f("file://C:\\tmp") == "file://C:\\tmp" assert f("file://C:\\tmp", "C:\\") == "file://C:\\tmp" assert f("file://tmp", "C:\\") == "file://C:\\tmp" # relative input assert f("path", "C:\\tmp\\abs") == "file://C:\\tmp\\abs\\path" assert f("..\\abs\\path", "C:\\tmp\\abs") == "file://C:\\tmp\\abs\\path" # raise on unknown protocol with pytest.raises(NotImplementedError): f("unknown://tmp") # paths outside of 'start' assert f("file://..\\x", "C:\\tmp") == "file://C:\\x" assert f("D:\\tmp", "C:\\tmp") == "file://D:\\tmp" assert f("..\\", "C:\\tmp") == "file://C:\\" # raise on path outside start when strict-file-paths=True with config.set({"geomodeling.strict-file-paths": True}): with pytest.raises(IOError): f("file://..\\x", "C:\\tmp") with pytest.raises(IOError): f("D:\\tmp", "C:\\tmp") with pytest.raises(IOError): f("..\\", "C:\\tmp")
del X, Y X, Y = self.make_dataset(table=self.t, var='SB', additional=['L_MAX', 'LAYER', 'SOL_Z']) X, Y = self.duplicate_dataset(X, Y) self.SB = continuous.model('SB', X, Y, x, logger=logger, load_save=True) del X, Y X, Y = self.make_dataset(table=self.t, var='CS', additional=['L_MAX', 'LAYER', 'SOL_Z', 'SOL_SAND']) X, Y = self.duplicate_dataset(X, Y) x = self.modify_dataset(x, series=self.SOL_Z.Y_mod, name='SOL_SAND') self.CS = continuous.model('CS', X, Y, x, logger=logger, load_save=True) del X, Y X, Y = self.make_dataset(table=self.t, var='FS', additional=['L_MAX', 'LAYER', 'SOL_Z', 'SOL_SAND']) X, Y = self.duplicate_dataset(X, Y) self.FS = continuous.model('FS', X, Y, x, logger=logger, load_save=True) del X, Y, x self.write_results() if __name__ == '__main__': from dask.diagnostics import ProgressBar from dask import config from multiprocessing import freeze_support freeze_support() pbar = ProgressBar() pbar.register() config.set(scheduler='processes') main(r"C:\Users\putzr\Documents\GitHub\sleepy\model\training.txt", r"C:\Users\putzr\Documents\GitHub\sleepy\model\modelling.txt").run() pbar.unregister()
def generate_scripts(self): self.log_file.write("Reading from: \t" + self.cf.smx_path) self.log_file.write("Output folder: \t" + self.cf.output_path) self.log_file.write("SMX files:") print("Reading from: \t" + self.cf.smx_path) print("Output folder: \t" + self.cf.output_path) print("SMX files:") filtered_sources = [] self.start_time = dt.datetime.now() try: smx_files = funcs.get_smx_files(self.cf.smx_path, self.smx_ext, self.sheets) for smx in smx_files: try: self.count_smx = self.count_smx + 1 smx_file_path = self.cf.smx_path + "/" + smx smx_file_name = os.path.splitext(smx)[0] print("\t" + smx_file_name) self.log_file.write("\t" + smx_file_name) home_output_path = self.cf.output_path + "/" + smx_file_name + "/" # self.parallel_remove_output_home_path.append(delayed(md.remove_folder)(home_output_path)) self.parallel_create_output_home_path.append( delayed(md.create_folder)(home_output_path)) self.parallel_templates.append( delayed(gcfr.gcfr)(self.cf, home_output_path)) ##################################### end of read_smx_folder ################################ if self.cf.source_names: System_sht_filter = [[ 'Source system name', self.cf.source_names ]] else: System_sht_filter = None System = funcs.read_excel(smx_file_path, sheet_name=self.System_sht) teradata_sources = System[System['Source type'] == 'TERADATA'] teradata_sources = funcs.df_filter(teradata_sources, System_sht_filter, False) self.count_sources = self.count_sources + len( teradata_sources.index) Supplements = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Supplements_sht) Column_mapping = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Column_mapping_sht) BMAP_values = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.BMAP_values_sht) BMAP = delayed(funcs.read_excel)(smx_file_path, sheet_name=self.BMAP_sht) BKEY = delayed(funcs.read_excel)(smx_file_path, sheet_name=self.BKEY_sht) Core_tables = delayed(funcs.read_excel)( smx_file_path, sheet_name=self.Core_tables_sht) Core_tables = delayed(funcs.rename_sheet_reserved_word)( Core_tables, Supplements, 'TERADATA', ['Column name', 'Table name']) ##################################### end of read_smx_sheet ################################ for system_index, system_row in teradata_sources.iterrows( ): try: Loading_Type = system_row['Loading type'].upper() if Loading_Type != "": source_name = system_row['Source system name'] filtered_sources.append(source_name) source_name_filter = [[ 'Source', [source_name] ]] core_layer_filter = [['Layer', ["CORE"]]] stg_layer_filter = [['Layer', ["STG"]]] stg_source_name_filter = [[ 'Source system name', [source_name] ]] Table_mapping = delayed(funcs.read_excel)( smx_file_path, self.Table_mapping_sht, source_name_filter) core_Table_mapping = delayed(funcs.df_filter)( Table_mapping, core_layer_filter, False) stg_Table_mapping = delayed(funcs.df_filter)( Table_mapping, stg_layer_filter, False) STG_tables = delayed(funcs.read_excel)( smx_file_path, self.STG_tables_sht, stg_source_name_filter) STG_tables = delayed( funcs.rename_sheet_reserved_word)( STG_tables, Supplements, 'TERADATA', ['Column name', 'Table name']) source_output_path = home_output_path + "/" + Loading_Type + "/" + source_name self.parallel_create_output_source_path.append( delayed( md.create_folder)(source_output_path)) self.parallel_templates.append( delayed(D000.d000)(self.cf, source_output_path, source_name, core_Table_mapping, STG_tables, BKEY)) self.parallel_templates.append( delayed(D001.d001)(self.cf, source_output_path, source_name, STG_tables)) self.parallel_templates.append( delayed(D002.d002)(self.cf, source_output_path, Core_tables, core_Table_mapping)) self.parallel_templates.append( delayed(D003.d003)(self.cf, source_output_path, BMAP_values, BMAP)) self.parallel_templates.append( delayed(D110.d110)(self.cf, source_output_path, stg_Table_mapping, STG_tables, Loading_Type)) self.parallel_templates.append( delayed(D200.d200)(self.cf, source_output_path, STG_tables, Loading_Type)) self.parallel_templates.append( delayed(D210.d210)(self.cf, source_output_path, STG_tables, Loading_Type)) self.parallel_templates.append( delayed(D300.d300)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D320.d320)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D330.d330)(self.cf, source_output_path, STG_tables, BKEY)) self.parallel_templates.append( delayed(D340.d340)(self.cf, source_output_path, STG_tables, BKEY)) # self.parallel_templates.append(delayed(D400.d400)(self.cf, source_output_path, STG_tables)) # self.parallel_templates.append(delayed(D410.d410)(self.cf, source_output_path, STG_tables)) # self.parallel_templates.append(delayed(D415.d415)(self.cf, source_output_path, STG_tables)) self.parallel_templates.append( delayed(D420.d420)(self.cf, source_output_path, STG_tables, BKEY, BMAP, Loading_Type)) self.parallel_templates.append( delayed(D600.d600)(self.cf, source_output_path, core_Table_mapping, Core_tables)) self.parallel_templates.append( delayed(D607.d607)(self.cf, source_output_path, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(D608.d608)(self.cf, source_output_path, Core_tables, BMAP_values)) self.parallel_templates.append( delayed(D610.d610)(self.cf, source_output_path, core_Table_mapping)) self.parallel_templates.append( delayed(D615.d615)(self.cf, source_output_path, Core_tables)) self.parallel_templates.append( delayed(D620.d620)(self.cf, source_output_path, core_Table_mapping, Column_mapping, Core_tables, Loading_Type)) self.parallel_templates.append( delayed(D630.d630)(self.cf, source_output_path, core_Table_mapping)) self.parallel_templates.append( delayed(D640.d640)(self.cf, source_output_path, source_name, core_Table_mapping)) self.parallel_templates.append( delayed( testing_script_01.source_testing_script )(self.cf, source_output_path, source_name, core_Table_mapping, Column_mapping, STG_tables, BKEY)) self.parallel_templates.append( delayed( testing_script_02.source_testing_script )(self.cf, source_output_path, source_name, Table_mapping, Core_tables)) except Exception as e_source: # print(error) # log: smx_file_name, source_name print(system_row.to_dict()) funcs.SMXFilesLogError( self.cf.output_path, smx, str(system_row.to_dict()), traceback.format_exc()).log_error() self.count_sources = self.count_sources - 1 except Exception as e_smx_file: # print(error) funcs.SMXFilesLogError(self.cf.output_path, smx, None, traceback.format_exc()).log_error() self.count_smx = self.count_smx - 1 except Exception as e1: # print(error) # traceback.print_exc() funcs.SMXFilesLogError(self.cf.output_path, None, None, traceback.format_exc()).log_error() if len(self.parallel_templates) > 0: sources = funcs.list_to_string(filtered_sources, ', ') print("Sources:", sources) self.log_file.write("Sources:" + sources) scheduler_value = 'processes' if self.cf.read_sheets_parallel == 1 else '' with config.set(scheduler=scheduler_value): # compute(*self.parallel_remove_output_home_path) compute(*self.parallel_create_output_home_path) compute(*self.parallel_create_output_source_path) with ProgressBar(): smx_files = " smx files" if self.count_smx > 1 else " smx file" smx_file_sources = " sources" if self.count_sources > 1 else " source" print("Start generating " + str(len(self.parallel_templates)) + " script for " + str(self.count_sources) + smx_file_sources + " from " + str(self.count_smx) + smx_files) compute(*self.parallel_templates) self.log_file.write( str(len(self.parallel_templates)) + " script generated for " + str(self.count_sources) + smx_file_sources + " from " + str(self.count_smx) + smx_files) self.elapsed_time = dt.datetime.now() - self.start_time self.log_file.write("Elapsed Time: " + str(self.elapsed_time)) self.error_message = "" os.startfile(self.cf.output_path) else: self.error_message = "No SMX Files Found!" self.log_file.close()
def test_get_context_always_default(): """ On Python 2/Windows, get_context() always returns same context.""" assert get_context() is multiprocessing with pytest.warns(UserWarning): with config.set({"multiprocessing.context": "forkserver"}): assert get_context() is multiprocessing
def sql( self, sql: str, return_futures: bool = True, dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, gpu: bool = False, config_options: Dict[str, Any] = None, ) -> Union[dd.DataFrame, pd.DataFrame]: """ Query the registered tables with the given SQL. The SQL follows approximately the postgreSQL standard - however, not all operations are already implemented. In general, only select statements (no data manipulation) works. For more information, see :ref:`sql`. Example: In this example, a query is called using the registered tables and then executed using dask. .. code-block:: python result = c.sql("SELECT a, b FROM my_table") print(result.compute()) Args: sql (:obj:`str`): The query string to execute return_futures (:obj:`bool`): Return the unexecuted dask dataframe or the data itself. Defaults to returning the dask dataframe. dataframes (:obj:`Dict[str, dask.dataframe.DataFrame]`): additional Dask or pandas dataframes to register before executing this query gpu (:obj:`bool`): Whether or not to load the additional Dask or pandas dataframes (if any) on GPU; requires cuDF / dask-cuDF if enabled. Defaults to False. config_options (:obj:`Dict[str,Any]`): Specific configuration options to pass during query execution Returns: :obj:`dask.dataframe.DataFrame`: the created data frame of this query. """ with dask_config.set(config_options): if dataframes is not None: for df_name, df in dataframes.items(): self.create_table(df_name, df, gpu=gpu) rel, select_names, _ = self._get_ral(sql) dc = RelConverter.convert(rel, context=self) if dc is None: return if select_names: # Rename any columns named EXPR$* to a more human readable name cc = dc.column_container cc = cc.rename({ df_col: select_name for df_col, select_name in zip(cc.columns, select_names) }) dc = DataContainer(dc.df, cc) df = dc.assign() if not return_futures: df = df.compute() return df
def teardown_temp_root(path): """ Delete the temporary file root. """ shutil.rmtree(path) config.set({"geomodeling.root": defaults["root"]})