Ejemplo n.º 1
0
Archivo: api.py Proyecto: aniucd/yatsm
    def get_pipeline(self, pipe=None, overwrite=False):
        """ Return a :ref:`yatsm.pipeline.Pipeline`

        Args:
            pipe (yatsm.pipeline.Pipe): Pipe data object
            overwrite (bool): Allow overwriting

        Returns:
            yatsm.pipeline.Pipeline: YATSM pipeline
        """
        pipe = pipe or Pipe()
        return Pipeline.from_config(self.tasks, pipe, overwrite=overwrite)
Ejemplo n.º 2
0
def batch_block(config, readers, window, overwrite=False):
    import logging

    import numpy as np

    from yatsm import io
    from yatsm.results import HDF5ResultsStore
    from yatsm.pipeline import Pipe

    logger = logging.getLogger('yatsm')

    def sel_pix(pipe, y, x):
        return Pipe(data=pipe['data'].sel(y=y, x=x),
                    record=pipe.get('record', None))

    logger.info('Working on window: {}'.format(window))
    data = io.read_and_preprocess(config['data']['datasets'],
                                  readers,
                                  window,
                                  out=None)

    store_kwds = {
        'window': window,
        'reader': config.primary_reader,
        'root': config['results']['output'],
        'pattern': config['results']['output_prefix'],
    }

    # TODO: guess for number of records to store
    # from IPython.core.debugger import Pdb; Pdb().set_trace()
    with HDF5ResultsStore.from_window(**store_kwds) as store:
        # TODO: read this from pre-existing results
        pipe = Pipe(data=data)
        pipeline = config.get_pipeline(pipe, overwrite=overwrite)
        from IPython.core.debugger import Pdb
        Pdb().set_trace()

        # TODO: finish checking for resume
        if store.completed(pipeline) and not overwrite:
            logger.info('Already completed: {}'.format(store.filename))
            return

        pipe = pipeline.run_eager(pipe)

        record_results = defaultdict(list)
        n_ = data.y.shape[0] * data.x.shape[0]
        for i, (y, x) in enumerate(product(data.y.values, data.x.values)):
            logger.debug('Processing pixel {pct:>4.2f}%: y/x {y}/{x}'.format(
                pct=i / n_ * 100, y=y, x=x))
            pix_pipe = sel_pix(pipe, y, x)

            result = pipeline.run(pix_pipe, check_eager=False)

            # TODO: figure out what to do with 'data' results
            for k, v in result['record'].items():
                record_results[k].append(v)

        for name, result in record_results.items():
            record_results[name] = np.concatenate(result)

        if record_results:
            store.write_result(pipeline, record_results, overwrite=overwrite)
        # TODO: write out cached data
        return store.filename
Ejemplo n.º 3
0
 def sel_pix(pipe, y, x):
     return Pipe(data=pipe['data'].sel(y=y, x=x),
                 record=pipe.get('record', None))