def distribute_headers(self, headers_paths): """ Includes the C++ headers to be declared before execution. Args: headers_paths (str, iter): A string or an iterable (such as a list, set...) containing the paths to all necessary C++ headers as strings. This function accepts both paths to the headers themselves and paths to directories containing the headers. """ headers_to_distribute = set() if isinstance(headers_paths, str): headers_to_distribute.update( Utils.get_paths_set_from_string(headers_paths)) else: for path_string in headers_paths: headers_to_distribute.update( Utils.get_paths_set_from_string(path_string)) # Distribute header files to the workers self.distribute_unique_paths(headers_to_distribute) # Declare headers locally Utils.declare_headers(headers_to_distribute) # Finally, add everything to the includes set self.headers.update(headers_to_distribute)
def spark_mapper(current_range): """ Gets the paths to the file(s) in the current executor, then declares the headers found. Args: current_range (tuple): A pair that contains the starting and ending values of the current range. Returns: function: The map function to be executed on each executor, complete with all headers needed for the analysis. """ # Get and declare headers on each worker headers_on_executor = [ pyspark.SparkFiles.get(ntpath.basename(filepath)) for filepath in headers ] Utils.declare_headers(headers_on_executor) # Get and declare shared libraries on each worker shared_libs_on_ex = [ pyspark.SparkFiles.get(ntpath.basename(filepath)) for filepath in shared_libraries ] Utils.declare_shared_libraries(shared_libs_on_ex) return mapper(current_range)
def dask_mapper(current_range): """ Gets the paths to the file(s) in the current executor, then declares the headers found. Args: current_range (tuple): The current range of the dataset being processed on the executor. Returns: function: The map function to be executed on each executor, complete with all headers needed for the analysis. """ # Retrieve the current worker local directory localdir = get_worker().local_directory # Get and declare headers on each worker headers_on_executor = [ os.path.join(localdir, os.path.basename(filepath)) for filepath in headers ] Utils.declare_headers(headers_on_executor) # Get and declare shared libraries on each worker shared_libs_on_ex = [ os.path.join(localdir, os.path.basename(filepath)) for filepath in shared_libraries ] Utils.declare_shared_libraries(shared_libs_on_ex) return mapper(current_range)
def test_header_declaration_on_current_session(self): """Header has to be declared on the current session""" # Before the header declaration the function f is not present on the # ROOT interpreter with self.assertRaises(AttributeError): self.assertRaises(ROOT.b(1)) Utils.declare_headers(["test_headers/header4.hxx"]) self.assertEqual(ROOT.b(1), True)
def test_multiple_headers_declare(self): """'declare_headers' with multiple headers to be included.""" Utils.declare_headers( ["test_headers/header2.hxx", "test_headers/header3.hxx"]) self.assertEqual(ROOT.a(1), True) self.assertEqual(ROOT.f1(2), 2) self.assertEqual(ROOT.f2("myString"), "myString")
def test_single_header_declare(self): """'declare_headers' with a single header to be included.""" Utils.declare_headers(["test_headers/header1.hxx"]) self.assertEqual(ROOT.f(1), True)