def get_pipeline(self, pipe=None, overwrite=False): """ Return a :ref:`yatsm.pipeline.Pipeline` Args: pipe (yatsm.pipeline.Pipe): Pipe data object overwrite (bool): Allow overwriting Returns: yatsm.pipeline.Pipeline: YATSM pipeline """ pipe = pipe or Pipe() return Pipeline.from_config(self.tasks, pipe, overwrite=overwrite)
def batch_block(config, readers, window, overwrite=False): import logging import numpy as np from yatsm import io from yatsm.results import HDF5ResultsStore from yatsm.pipeline import Pipe logger = logging.getLogger('yatsm') def sel_pix(pipe, y, x): return Pipe(data=pipe['data'].sel(y=y, x=x), record=pipe.get('record', None)) logger.info('Working on window: {}'.format(window)) data = io.read_and_preprocess(config['data']['datasets'], readers, window, out=None) store_kwds = { 'window': window, 'reader': config.primary_reader, 'root': config['results']['output'], 'pattern': config['results']['output_prefix'], } # TODO: guess for number of records to store # from IPython.core.debugger import Pdb; Pdb().set_trace() with HDF5ResultsStore.from_window(**store_kwds) as store: # TODO: read this from pre-existing results pipe = Pipe(data=data) pipeline = config.get_pipeline(pipe, overwrite=overwrite) from IPython.core.debugger import Pdb Pdb().set_trace() # TODO: finish checking for resume if store.completed(pipeline) and not overwrite: logger.info('Already completed: {}'.format(store.filename)) return pipe = pipeline.run_eager(pipe) record_results = defaultdict(list) n_ = data.y.shape[0] * data.x.shape[0] for i, (y, x) in enumerate(product(data.y.values, data.x.values)): logger.debug('Processing pixel {pct:>4.2f}%: y/x {y}/{x}'.format( pct=i / n_ * 100, y=y, x=x)) pix_pipe = sel_pix(pipe, y, x) result = pipeline.run(pix_pipe, check_eager=False) # TODO: figure out what to do with 'data' results for k, v in result['record'].items(): record_results[k].append(v) for name, result in record_results.items(): record_results[name] = np.concatenate(result) if record_results: store.write_result(pipeline, record_results, overwrite=overwrite) # TODO: write out cached data return store.filename
def sel_pix(pipe, y, x): return Pipe(data=pipe['data'].sel(y=y, x=x), record=pipe.get('record', None))