def test_add_sequence_to_pipeline(): p = Pipeline("testpipe") p += Sequence("testseq") assert len(p.sequences) == 1 assert p.testseq assert p.testseq.name == "testseq" assert p.testseq.parent is p
def test_add_module_to_sequence(): s = Sequence("testseq") s += BaseModule() assert len(s.modules) == 1 assert s.testseq_BaseModule assert s.testseq_BaseModule.parent is s assert s.testseq_BaseModule.name == "testseq_BaseModule" assert not s.testseq_BaseModule.is_configured
def test_configure_sequence(): s = Sequence("testseq") assert s.is_configured == False s.coords = [1.23, 4.56] s.date_range = ["2001-01-01", "2020-01-01"] s.configure() assert s.is_configured == True assert s.output_location == "gee_1.23_4.56_testseq"
def test_configure_pipeline(): p = Pipeline("testpipe") p.coords = [1.23, 4.56] p.date_range = ["2001-01-01", "2020-01-01"] p.output_location = "/tmp" p.output_location_type = "local" p += Sequence("testseq") p.testseq += BaseModule() p.configure() assert p.testseq.testseq_BaseModule.is_configured
def test_get_rainfall(): from pyveg.src.download_modules import WeatherDownloader s = Sequence("weather") s.set_config(data_collections["ERA5"]) s.output_location = test_out_dir s.output_location_type = "local" s.coords = coordinates s.date_range = date_range s.time_per_point = time_per_point s += WeatherDownloader() s += WeatherImageToJSON() s.configure() s.run() assert (os.path.exists( os.path.join(test_out_dir, "2016-01-16", "JSON", "WEATHER", "weather_data.json"))) shutil.rmtree(test_out_dir, ignore_errors=True)
def test_get_vegetation(): from pyveg.src.download_modules import VegetationDownloader s = Sequence("vegetation") s.set_config(data_collections["Copernicus"]) s.output_location = test_out_dir s.output_location_type = "local" s.coords = coordinates s.date_range = date_range s.n_sub_images = 1 s.time_per_point = time_per_point s += VegetationDownloader() s += VegetationImageProcessor() s += NetworkCentralityCalculator() s.configure() s.run() assert (os.path.exists( os.path.join(test_out_dir, "2016-01-16", "JSON", "NC", "network_centralities.json"))) shutil.rmtree(test_out_dir, ignore_errors=True)
def test_configure_sequence_from_dict(): s = Sequence("testseq") s.set_config({"collection_name": "TESTCOLL", "some_param": "TESTVAL"}) assert s.collection_name == "TESTCOLL" assert s.some_param == "TESTVAL"
def test_instantiate_sequence(): s = Sequence("testseq") assert isinstance(s, Sequence) assert s.name == "testseq"
def build_pipeline(config_file, from_cache=False): """ Load json config and instantiate modules """ print("Configuring from cached config? {}".format(from_cache)) current_time = time.strftime("%Y-%m-%d_%H-%M-%S") spec = importlib.util.spec_from_file_location("myconfig", config_file) config = importlib.util.module_from_spec(spec) spec.loader.exec_module(config) # instantiate and setup the pipeline p = Pipeline(config.name) p.output_location = config.output_location if not from_cache: p.output_location += '__' + current_time else: # use the time from the filename time_match = re.search( "([\d]{4}-[\d]{2}-[\d]{2}_[\d]{2}-[\d]{2}-[\d]{2})", os.path.basename(config_file)) if time_match: p.output_location += '__' + time_match.groups()[0] else: print( "Wasn't able to infer timestamp from config filename.", "Will use original output_location from {}.".format( config_file)) p.output_location_type = config.output_location_type p.coords = config.coordinates p.date_range = config.date_range if not from_cache: # before we run anything, save the current config to the configs dir config_cache_dir = os.path.join(os.path.dirname(config_file), "cached_config") os.makedirs(config_cache_dir, exist_ok=True) cached_config_file = os.path.basename(config_file)[:-3] + \ '__' + current_time + ".py" copyfile(config_file, os.path.join(config_cache_dir, cached_config_file)) if config.output_location_type == "local" and not os.path.exists( p.output_location): os.makedirs(p.output_location, exist_ok=True) # add sequences to the pipeline to deal with different data types for coll in config.collections_to_use: s = Sequence(coll) coll_dict = config.data_collections[coll] s.set_config(coll_dict) # overwrite the date range with one that takes into account # the limits of this collection s.date_range = get_date_range_for_collection(config.date_range, coll_dict) # add modules to the sequence for module_name in config.modules_to_use[coll]: for n, c in inspect.getmembers(sys.modules[__name__]): if n == module_name: module = c() if "special_config" in vars(config): if n in config.special_config.keys(): module.set_parameters(config.special_config[n]) s += module # add the sequence to the pipeline p += s if len(config.collections_to_use) > 1: # now add the combiner module in its own sequence s = Sequence("combine") # Combiner needs the previous sequences to finish (in case we ever try to # parallelize further) s.depends_on = config.collections_to_use for module_name in config.modules_to_use["combine"]: for n, c in inspect.getmembers(sys.modules[__name__]): if n == module_name: s += c() # and add this combiner sequence to the pipeline. p += s return p