Beispiel #1
0
def test_add_sequence_to_pipeline():
    p = Pipeline("testpipe")
    p += Sequence("testseq")
    assert len(p.sequences) == 1
    assert p.testseq
    assert p.testseq.name == "testseq"
    assert p.testseq.parent is p
Beispiel #2
0
def test_add_module_to_sequence():
    s = Sequence("testseq")
    s += BaseModule()
    assert len(s.modules) == 1
    assert s.testseq_BaseModule
    assert s.testseq_BaseModule.parent is s
    assert s.testseq_BaseModule.name == "testseq_BaseModule"
    assert not s.testseq_BaseModule.is_configured
Beispiel #3
0
def test_configure_sequence():
    s = Sequence("testseq")
    assert s.is_configured == False
    s.coords = [1.23, 4.56]
    s.date_range = ["2001-01-01", "2020-01-01"]
    s.configure()
    assert s.is_configured == True
    assert s.output_location == "gee_1.23_4.56_testseq"
Beispiel #4
0
def test_configure_pipeline():
    p = Pipeline("testpipe")
    p.coords = [1.23, 4.56]
    p.date_range = ["2001-01-01", "2020-01-01"]
    p.output_location = "/tmp"
    p.output_location_type = "local"
    p += Sequence("testseq")
    p.testseq += BaseModule()
    p.configure()
    assert p.testseq.testseq_BaseModule.is_configured
def test_get_rainfall():
    from pyveg.src.download_modules import WeatherDownloader
    s = Sequence("weather")
    s.set_config(data_collections["ERA5"])
    s.output_location = test_out_dir
    s.output_location_type = "local"
    s.coords = coordinates
    s.date_range = date_range
    s.time_per_point = time_per_point
    s += WeatherDownloader()
    s += WeatherImageToJSON()
    s.configure()
    s.run()
    assert (os.path.exists(
        os.path.join(test_out_dir, "2016-01-16", "JSON", "WEATHER",
                     "weather_data.json")))
    shutil.rmtree(test_out_dir, ignore_errors=True)
def test_get_vegetation():
    from pyveg.src.download_modules import VegetationDownloader
    s = Sequence("vegetation")
    s.set_config(data_collections["Copernicus"])
    s.output_location = test_out_dir
    s.output_location_type = "local"
    s.coords = coordinates
    s.date_range = date_range
    s.n_sub_images = 1
    s.time_per_point = time_per_point
    s += VegetationDownloader()
    s += VegetationImageProcessor()
    s += NetworkCentralityCalculator()
    s.configure()
    s.run()
    assert (os.path.exists(
        os.path.join(test_out_dir, "2016-01-16", "JSON", "NC",
                     "network_centralities.json")))
    shutil.rmtree(test_out_dir, ignore_errors=True)
Beispiel #7
0
def test_configure_sequence_from_dict():
    s = Sequence("testseq")
    s.set_config({"collection_name": "TESTCOLL", "some_param": "TESTVAL"})
    assert s.collection_name == "TESTCOLL"
    assert s.some_param == "TESTVAL"
Beispiel #8
0
def test_instantiate_sequence():
    s = Sequence("testseq")
    assert isinstance(s, Sequence)
    assert s.name == "testseq"
Beispiel #9
0
def build_pipeline(config_file, from_cache=False):
    """
    Load json config and instantiate modules
    """
    print("Configuring from cached config? {}".format(from_cache))
    current_time = time.strftime("%Y-%m-%d_%H-%M-%S")

    spec = importlib.util.spec_from_file_location("myconfig", config_file)
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)

    # instantiate and setup the pipeline
    p = Pipeline(config.name)
    p.output_location = config.output_location
    if not from_cache:
        p.output_location += '__' + current_time
    else:
        # use the time from the filename
        time_match = re.search(
            "([\d]{4}-[\d]{2}-[\d]{2}_[\d]{2}-[\d]{2}-[\d]{2})",
            os.path.basename(config_file))
        if time_match:
            p.output_location += '__' + time_match.groups()[0]
        else:
            print(
                "Wasn't able to infer timestamp from config filename.",
                "Will use original output_location from {}.".format(
                    config_file))
    p.output_location_type = config.output_location_type
    p.coords = config.coordinates
    p.date_range = config.date_range
    if not from_cache:
        # before we run anything, save the current config to the configs dir
        config_cache_dir = os.path.join(os.path.dirname(config_file),
                                        "cached_config")
        os.makedirs(config_cache_dir, exist_ok=True)
        cached_config_file = os.path.basename(config_file)[:-3] + \
            '__' + current_time + ".py"

        copyfile(config_file, os.path.join(config_cache_dir,
                                           cached_config_file))

    if config.output_location_type == "local" and not os.path.exists(
            p.output_location):
        os.makedirs(p.output_location, exist_ok=True)
    # add sequences to the pipeline to deal with different data types
    for coll in config.collections_to_use:
        s = Sequence(coll)
        coll_dict = config.data_collections[coll]
        s.set_config(coll_dict)
        # overwrite the date range with one that takes into account
        # the limits of this collection
        s.date_range = get_date_range_for_collection(config.date_range,
                                                     coll_dict)
        # add modules to the sequence
        for module_name in config.modules_to_use[coll]:
            for n, c in inspect.getmembers(sys.modules[__name__]):
                if n == module_name:
                    module = c()
                    if "special_config" in vars(config):
                        if n in config.special_config.keys():
                            module.set_parameters(config.special_config[n])
                    s += module
        # add the sequence to the pipeline
        p += s
    if len(config.collections_to_use) > 1:
        # now add the combiner module in its own sequence
        s = Sequence("combine")
        # Combiner needs the previous sequences to finish (in case we ever try to
        # parallelize further)
        s.depends_on = config.collections_to_use

        for module_name in config.modules_to_use["combine"]:
            for n, c in inspect.getmembers(sys.modules[__name__]):
                if n == module_name:
                    s += c()

        # and add this combiner sequence to the pipeline.
        p += s
    return p