Ejemplo n.º 1
0
    def _set_workflow_config(self, yaml_file):
        # Receives a yaml file path with Workflow configurations and sets appropriate values for properties in this class
        log.info("Setting configurations from config file {0}".format(yaml_file))
        try:
            config = None
            with open(yaml_file, "r") as ymlfile:
                config = yaml.load(ymlfile)
            self._source = config["source"]
            self._destination = config["destination"]
            self._io_reader = Factory.get_reader(self._source["type"], self._source)
            self._io_writer = Factory.get_writer(
                self._destination["type"], self._destination
            )
            # Set attributes for custom workflow properties
            for key in config.keys():
                if key not in self.DEFAULT_PARAMS:
                    setattr(self, key, config[key])

        except Exception:
            log.error(
                "Error creating I/O reader and writer. Please check configurations in workflow config file at {0}".format(
                    yaml_file
                )
            )
            raise
Ejemplo n.º 2
0
    def __init__(self, name, source=None, destination=None):
        # Initialize properties
        self._source = None
        self._destination = None
        self._name = name

        # Check to see if workflow yaml file exists. If so, set workflow configurations from file.
        default_filepath = self._get_default_filepath(name)
        backup_filepath = self._get_backup_filepath(name)
        if os.path.exists(default_filepath):
            log.info("Config file detected: {0}".format(default_filepath))
            self._set_workflow_config(default_filepath)
        elif os.path.exists(backup_filepath):
            log.info("Config file detected: {0}".format(backup_filepath))
            self._set_workflow_config(backup_filepath)
        else:
            log.info("No config file detected.")

        # If source or destination are passed in as parameters, update source and dest configurations.
        if source:
            self._source = source
            self._io_reader = Factory.get_reader(self._source["type"],
                                                 self._source)
        if destination:
            self._destination = destination
            self._io_writer = Factory.get_writer(self._destination["type"],
                                                 self._destination)
Ejemplo n.º 3
0
    def set_source(self, source):
        """
        Set source.

        :param source: dict of configuration parameters for data source (reader)
        """
        self._source = source
        self._io_reader = Factory.get_reader(self.source["type"], self.source)
Ejemplo n.º 4
0
def test_get_reader_orc(test_input_base_path, expected_df):
    test_input_path = "%s/person.orc" % (test_input_base_path)
    config = {
        "type": "fs",
        "input_path": test_input_path,
        "usecols": ["firstname", "lastname", "gender"],
        "input_format": "orc",
    }
    reader_from_factory = Factory.get_reader("fs", config)
    fetched_df = reader_from_factory.fetch_data()

    assert fetched_df.equals(expected_df)
Ejemplo n.º 5
0
def test_get_reader_json(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.json"))
    cudf.io.json.to_json(expected_df, fname, orient="records")
    config = {
        "type": "fs",
        "input_path": fname,
        "orient": "records",
        "input_format": "json",
    }
    reader_from_factory = Factory.get_reader("fs", config)
    fetched_df = reader_from_factory.fetch_data()

    assert fetched_df.equals(expected_df)
Ejemplo n.º 6
0
def test_get_reader_orc(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.orc"))
    cudf.io.orc.to_orc(expected_df, fname)
    config = {
        "type": "fs",
        "input_path": fname,
        "usecols": ["firstname", "lastname", "gender"],
        "input_format": "orc",
    }
    reader_from_factory = Factory.get_reader("fs", config)
    fetched_df = reader_from_factory.fetch_data()

    assert fetched_df.equals(expected_df)
Ejemplo n.º 7
0
def test_get_reader_text(test_input_base_path, expected_df):
    test_input_path = "%s/person.csv" % (test_input_base_path)
    config = {
        "type": "fs",
        "input_path": test_input_path,
        "names": ["firstname", "lastname", "gender"],
        "delimiter": ",",
        "usecols": ["firstname", "lastname", "gender"],
        "dtype": ["str", "str", "str"],
        "header": 0,
        "input_format": "text",
    }
    reader_from_factory = Factory.get_reader("fs", config)
    fetched_df = reader_from_factory.fetch_data()

    assert fetched_df.equals(expected_df)
Ejemplo n.º 8
0
def test_get_reader_csv(tmpdir, expected_df):
    fname = tmpdir.mkdir("tmp_test_factory").join("person.csv")
    expected_df.to_csv(fname, index=False)
    config = {
        "type": "fs",
        "input_path": fname,
        "names": ["firstname", "lastname", "gender"],
        "delimiter": ",",
        "usecols": ["firstname", "lastname", "gender"],
        "dtype": ["str", "str", "str"],
        "header": 0,
        "input_format": "csv",
    }
    reader_from_factory = Factory.get_reader("fs", config)
    fetched_df = reader_from_factory.fetch_data()

    assert fetched_df.equals(expected_df)
Ejemplo n.º 9
0
def test_get_io_reader_fs(fs_reader_config):
    reader = Factory.get_reader("fs", fs_reader_config)
    expected_cls = FileSystemReader
    assert isinstance(reader, expected_cls)
Ejemplo n.º 10
0
def test_get_io_reader_kafka(kafka_config):
    reader = Factory.get_reader("kafka", kafka_config)
    expected_cls = KafkaReader
    assert isinstance(reader, expected_cls)
Ejemplo n.º 11
0
 def set_source(self, source):
     self._source = source
     self._io_reader = Factory.get_reader(self.source["type"], self.source)