def test_searches_in_default_locations(monkeypatch, tmp_nbs, root_path): root_path = Path(root_path).resolve() Path('subdir').mkdir() mock = Mock(wraps=dagspec.entry_point) monkeypatch.setattr(dagspec, 'entry_point', mock) DAGSpec._auto_load(starting_dir=root_path) mock.assert_called_once_with(root_path=root_path)
def add(): """Add scaffold templates for tasks whose source does not exist """ # setting lazy_import to true causes sources to be returned as paths, # instead of placeholders spec, path_to_spec = DAGSpec._auto_load(to_dag=False, lazy_import=True) loader = ScaffoldLoader('ploomber_add') # TODO: when the dag has a source loader, the argument passed to # ploomber_add should take that into account to place the new file # in the appropriate location (instead of doing it relative to # pipeline.yaml) # TODO: raise an error if the location is inside the site-packages folder # NOTE: lazy loading freom source loader will giev errors because # initializing a source with a path only, loses the information from the # jinja environment to make macros workj. I have to test this. the best # solution is to add a lazy_load param to Placeholder, so it can be # initialized with a path for a file that does not exist if path_to_spec: print('Found spec at {}'.format(path_to_spec)) # make sure current working dir is in the path, otherwise we might not # be able to import the PythonCallable functions, which we need to do # to locate the modules with add_to_sys_path(path_to_spec, chdir=False): for task in spec['tasks']: loader.create(source=task['source'], params=spec['meta'], class_=task['class']) else: print('Error: No pipeline.yaml spec found...')
def load_dag(self, starting_dir=None): if self.dag is None or self.spec['meta']['jupyter_hot_reload']: self.log.info('[Ploomber] Loading dag...') msg = ('[Ploomber] An error occured when trying to initialize ' 'the pipeline. Cells won\' be injected until your ' 'pipeline processes correctly. See error details below.') if self.spec and not self.spec['meta']['jupyter_hot_reload']: msg += self.restart_msg env_var = os.environ.get('ENTRY_POINT') try: if env_var: (self.spec, self.dag, self.path) = parsers.load_entry_point(env_var) else: hot_reload = (self.spec and self.spec['meta']['jupyter_hot_reload']) (self.spec, self.dag, self.path) = DAGSpec._auto_load(starting_dir=starting_dir, reload=hot_reload) except DAGSpecInitializationError: self.reset_dag() self.log.exception(msg) else: if self.dag is not None: current = os.getcwd() if self.spec['meta'][ 'jupyter_hot_reload'] and current not in sys.path: # jupyter does not add the current working dir by # default, if using hot reload and the dag loads # functions from local files, importlib.reload will # fail # NOTE: might be better to only add this when the dag # is actually loading from local files but that means # we have to run some logic and increases load_dag # running time, which we need to be fast sys.path.append(current) base_path = Path(self.path).resolve() with chdir(base_path): # this dag object won't be executed, forcing speeds # rendering up self.dag.render(force=True) if self.spec['meta']['jupyter_functions_as_notebooks']: self.manager = JupyterDAGManager(self.dag) else: self.manager = None tuples = [(resolve_path(base_path, t.source.loc), t) for t in self.dag.values() if t.source.loc is not None] self.dag_mapping = { t[0]: t[1] for t in tuples if t[0] is not None } self.log.info('[Ploomber] Initialized dag from ' 'pipeline.yaml at' ': {}'.format(base_path)) self.log.info('[Ploomber] Pipeline mapping: {}'.format( pprint(self.dag_mapping))) else: # no pipeline.yaml found... self.log.info('[Ploomber] No pipeline.yaml found, ' 'skipping DAG initialization...') self.dag_mapping = None