コード例 #1
0
def test_mixed_db_sql_spec(tmp_pipeline_sql, add_current_to_sys_path,
                           pg_client_and_schema, monkeypatch):
    _, schema = pg_client_and_schema

    with open('pipeline-multiple-dbs.yaml') as f:
        dag_spec = yaml.load(f, Loader=yaml.SafeLoader)

    # clients for this pipeline are initialized without custom create_engine
    # args but we need to set the default schema, mock the call so it
    # includes that info
    monkeypatch.setattr(db, 'create_engine', create_engine_with_schema(schema))

    dates = _random_date_from(datetime(2016, 1, 1), 365, 100)
    df = pd.DataFrame({
        'customer_id': np.random.randint(0, 5, 100),
        'value': np.random.rand(100),
        'purchase_date': dates
    })
    # make sales data for pg and sqlite
    loader = load_dotted_path(dag_spec['clients']['PostgresRelation'])
    client = loader()
    df.to_sql('sales', client.engine, if_exists='replace')
    client.engine.dispose()

    # make sales data for pg and sqlite
    loader = load_dotted_path(dag_spec['clients']['SQLiteRelation'])
    client = loader()
    df.to_sql('sales', client.engine)
    client.engine.dispose()

    dag = DAGSpec(dag_spec).to_dag()

    # FIXME: this does no show the custom Upstream key missing error
    dag.build()
コード例 #2
0
ファイル: test_dotted_path.py プロジェクト: cxz/ploomber
def test_load_dotted_path_if_attribute_not_found(path, err_msg, root,
                                                 tmp_directory, tmp_imports):
    Path('my_module.py').write_text('')

    Path('another').mkdir()
    Path('another', 'sub.py').touch()

    with pytest.raises(AttributeError) as excinfo:
        dotted_path.load_dotted_path(path)

    expected = err_msg.format(repr(os.path.abspath(root)))
    assert str(excinfo.value) == expected
コード例 #3
0
ファイル: test_dotted_path.py プロジェクト: cxz/ploomber
def test_load_dotted_path_if_import_fails(path, err_msg, tmp_directory,
                                          tmp_imports):

    Path('my_module.py').write_text('import something')

    mod_name = path.split('.')[0]
    spec = importlib.util.find_spec(mod_name)

    if spec:
        err_msg = err_msg + f' (loaded {mod_name!r} from {spec.origin!r})'

    with pytest.raises(ModuleNotFoundError) as excinfo:
        dotted_path.load_dotted_path(path)

    assert str(excinfo.value) == err_msg
コード例 #4
0
    def process_factory_dotted_path(self, dotted_path):
        """Parse a factory entry point, returns initialized dag and parsed args
        """
        entry = load_dotted_path(str(dotted_path), raise_=True)

        # add args using the function's signature
        required, _ = _add_args_from_callable(self, entry)

        # if entry point was decorated with @with_env, add arguments
        # to replace declared variables in env.yaml
        if hasattr(entry, '_env_dict'):
            _add_cli_args_from_env_dict_keys(self, entry._env_dict)

        args = self.parse_args()

        if hasattr(args, 'log'):
            if args.log is not None:
                logging.basicConfig(level=args.log.upper())

        # extract required (by using function signature) params from the cli
        # args
        kwargs = {key: getattr(args, key) for key in required}

        # env and function defaults replaced
        replaced = _env_keys_to_override(args, self.static_args)

        # TODO: add a way of test this by the parameters it will use to
        # call the function, have an aux function to get those then another
        # to execute, test using the first one
        dag = entry(**{**kwargs, **replaced})

        return dag, args
コード例 #5
0
    def _to_dag(self):
        """
        Internal method to manage the different cases to convert to a DAG
        object
        """
        if 'location' in self:
            return dotted_path.call_dotted_path(self['location'])

        dag = DAG()

        if 'config' in self:
            dag._params = DAGConfiguration.from_dict(self['config'])

        clients = self.get('clients')

        if clients:
            for class_name, dotted_path_spec in clients.items():
                dag.clients[class_name] = dotted_path.call_spec(
                    dotted_path_spec)

        # FIXME: this violates lazy_import, we must change DAG's implementation
        # to accept strings as attribute and load them until they are called
        for attr in ['serializer', 'unserializer']:
            if attr in self:
                setattr(dag, attr, dotted_path.load_dotted_path(self[attr]))

        process_tasks(dag, self, root_path=self._parent_path)

        return dag
コード例 #6
0
 def exists(self):
     if self.type == self.Pattern:
         return True
     elif self.type in {self.Directory, self.File}:
         return Path(self.value).exists()
     elif self.type == self.DottedPath:
         return load_dotted_path(self.value, raise_=False) is not None
コード例 #7
0
def test_postgres_sql_spec(tmp_pipeline_sql, pg_client_and_schema,
                           add_current_to_sys_path, monkeypatch):
    _, schema = pg_client_and_schema

    with open('pipeline-postgres.yaml') as f:
        dag_spec = yaml.load(f, Loader=yaml.SafeLoader)

    # clients for this pipeline are initialized without custom create_engine
    # args but we need to set the default schema, mock the call so it
    # includes that info
    monkeypatch.setattr(db, 'create_engine', create_engine_with_schema(schema))

    dates = _random_date_from(datetime(2016, 1, 1), 365, 100)
    df = pd.DataFrame({
        'customer_id': np.random.randint(0, 5, 100),
        'value': np.random.rand(100),
        'purchase_date': dates
    })
    loader = load_dotted_path(dag_spec['clients']['SQLScript'])
    client = loader()
    df.to_sql('sales', client.engine, if_exists='replace')
    client.engine.dispose()

    dag = DAGSpec(dag_spec).to_dag()

    # FIXME: this does no show the custom Upstream key missing error
    dag.build()

    assert not dag['load'].upstream
    assert list(dag['filter'].upstream.keys()) == ['load']
    assert list(dag['transform'].upstream.keys()) == ['filter']
コード例 #8
0
ファイル: pythoncallablesource.py プロジェクト: cxz/ploomber
 def load(self):
     if self._from_dotted_path:
         return load_dotted_path(self._primitive)
     else:
         if self.hot_reload:
             module = importlib.import_module(self.module_name)
             importlib.reload(module)
             return getattr(module, self.fn_name)
         else:
             return self._primitive
コード例 #9
0
ファイル: test_dotted_path.py プロジェクト: cxz/ploomber
def test_lazily_located_dotted_path(dotted_path_str, tmp_imports):
    loc, source = dotted_path.lazily_locate_dotted_path(dotted_path_str)

    obj = dotted_path.load_dotted_path(dotted_path_str)

    loc_real = getfile(obj)
    lines, line = inspect.getsourcelines(obj)
    source_expected = ''.join(lines)
    loc_expected = f'{loc_real}:{line}'

    assert loc == loc_expected
    assert source == source_expected
コード例 #10
0
def test_load_dotted_path_with_reload(tmp_directory, add_current_to_sys_path):
    # write a sample module
    Path('dotted_path_with_reload.py').write_text("""
def x():
    pass
""")

    # load the module
    dotted_path.load_dotted_path('dotted_path_with_reload.x')

    # add a new function
    Path('dotted_path_with_reload.py').write_text("""
def x():
    pass

def y():
    pass
""")

    # the new function should be importable since we are using reload=True
    assert dotted_path.load_dotted_path('dotted_path_with_reload.y',
                                        reload=True)
コード例 #11
0
ファイル: test_sources.py プロジェクト: ploomber/ploomber
def test_defined_name_twice(tmp_directory, add_current_to_sys_path,
                            no_sys_modules_cache):

    Path('a.py').write_text("""

def b():
    pass

def b():
    pass
""")

    loc = PythonCallableSource(dotted_path.load_dotted_path('a.b')).loc
    out = PythonCallableSource('a.b').loc

    assert str(Path(out).resolve()) == str(Path(loc).resolve())
コード例 #12
0
def test_sql_spec_w_products_in_source(tmp_pipeline_sql_products_in_source,
                                       add_current_to_sys_path):
    with open('pipeline.yaml') as f:
        dag_spec = yaml.load(f, Loader=yaml.SafeLoader)

    dates = _random_date_from(datetime(2016, 1, 1), 365, 100)
    df = pd.DataFrame({
        'customer_id': np.random.randint(0, 5, 100),
        'value': np.random.rand(100),
        'purchase_date': dates
    })
    loader = load_dotted_path(dag_spec['clients']['SQLScript'])
    client = loader()
    df.to_sql('sales', client.engine, if_exists='replace')
    client.engine.dispose()

    dag = DAGSpec(dag_spec).to_dag()
    dag.build()
コード例 #13
0
ファイル: TaskSpec.py プロジェクト: israelrico007/ploomber
def source_for_task_class(source_str, task_class, project_root, lazy_import,
                          make_absolute):
    if task_class is tasks.PythonCallable:
        if lazy_import:
            return source_str
        else:
            return dotted_path.load_dotted_path(source_str)
    else:
        path = Path(source_str)

        # NOTE: there is some inconsistent behavior here. project_root
        # will be none if DAGSpec was initialized with a dictionary, hence
        # this won't resolve to absolute paths - this is a bit confusing.
        # maybe always convert to absolute?
        if project_root and not path.is_absolute() and make_absolute:
            return Path(project_root, source_str)
        else:
            return path
コード例 #14
0
def test_sqlite_sql_spec(spec, tmp_pipeline_sql, add_current_to_sys_path):
    with open(spec) as f:
        dag_spec = yaml.load(f, Loader=yaml.SafeLoader)

    dates = _random_date_from(datetime(2016, 1, 1), 365, 100)
    df = pd.DataFrame({
        'customer_id': np.random.randint(0, 5, 100),
        'value': np.random.rand(100),
        'purchase_date': dates
    })
    loader = load_dotted_path(dag_spec['clients']['SQLScript'])
    client = loader()
    df.to_sql('sales', client.engine)
    client.engine.dispose()

    dag = DAGSpec(dag_spec).to_dag()

    # FIXME: this does no show the custom Upstream key missing error
    dag.build()

    assert not dag['load'].upstream
    assert list(dag['filter'].upstream.keys()) == ['load']
    assert list(dag['transform'].upstream.keys()) == ['filter']
コード例 #15
0
ファイル: test_sources.py プロジェクト: ploomber/ploomber
def test_loc_is_consisent_when_initialized_from_str(tmp_directory,
                                                    add_current_to_sys_path,
                                                    no_sys_modules_cache,
                                                    target_file,
                                                    dotted_path_str):
    target_parent = Path(target_file).parent
    target_parent.mkdir(parents=True, exist_ok=True)

    for parent in Path(target_file).parents:
        (parent / '__init__.py').touch()

    Path(target_file).write_text("""
def symbol():
    pass
""")

    out = PythonCallableSource(dotted_path_str).loc
    # check that a.py hasn't been imported
    assert 'a' not in sys.modules

    loc = PythonCallableSource(
        dotted_path.load_dotted_path(dotted_path_str)).loc

    assert str(Path(out).resolve()) == str(Path(loc).resolve())
コード例 #16
0
def test_load_dotted_path_custom_error_message():
    with pytest.raises(AttributeError) as excinfo:
        dotted_path.load_dotted_path('test_pkg.not_a_function')

    assert ('Could not get "not_a_function" from module "test_pkg"'
            in str(excinfo.value))