def test_pipeline():
    '''Tests a few pipelines.'''
    global progresses

    server = HTTPServer(('', 9000), SaveHooks)
    thread = threading.Thread(target = server.serve_forever, daemon=True)
    thread.start()

    results = run_pipelines('./tests/env/dummy/pipeline-test%', '.', 
                            use_cache=False,
                            dirty=False,
                            force=False,
                            concurrency=1,
                            verbose_logs=True)
    failed_results = list(filter(lambda r: not r.success, results))
    assert len(failed_results) == 0, "Failed results: {}".format(["{} {}".format(result.pipeline_id, ", ".join(result.errors))
                                                                 for result in failed_results])
    assert len(called_hooks) == 3
    assert called_hooks == [
        {"pipeline_id": "./tests/env/dummy/pipeline-test-hooks", "event": "queue"},
        {"pipeline_id": "./tests/env/dummy/pipeline-test-hooks", "event": "start"},
        {"pipeline_id": "./tests/env/dummy/pipeline-test-hooks", "event": "finish", "success": True,
         'stats': {'.dpp': {'out-datapackage-url': 'hooks-outputs/datapackage.json'},
                   'bytes': 258, 'count_of_rows': None,
                   'dataset_name': 'hook-tests', 'hash': '1871cf2829406983b5785b03bde91aa9'}}
    ]
    assert progresses >= 1
Exemple #2
0
    def _run_in_background(self, uid, dirname, verbosity=0, status_cb=None):

        if hasattr(status_cb, 'init_progress'):
            specs = self.specs('all', dirname)
            status_cb.init_progress(specs)

        def progress_cb(pr: ProgressReport):
            with self.rlock:
                pipeline_id, row_count, success, errors, stats = pr
                if verbosity > 0:
                    logging.info(
                        'Callback %s #%d (success: %s, errors: %r, stats: %s)',
                        pipeline_id, row_count, success, errors, stats)
                current = self.running[uid]['progress'].get(pipeline_id)
                self.running[uid]['progress'].update({
                    pipeline_id:
                    dict(done=success is not None,
                         success=success,
                         rows=row_count,
                         errors=errors,
                         stats=stats)
                })
                if status_cb:
                    if current is None:
                        status_cb(pipeline_id, 'INPROGRESS')
                    else:
                        if success is not None:
                            status_cb(pipeline_id,
                                      'SUCCESS' if success else 'FAILED',
                                      errors=errors,
                                      stats=stats)

        try:
            if verbosity > 0:
                logging.info('Running all pipelines')
            results = run_pipelines('all',
                                    dirname,
                                    use_cache=False,
                                    dirty=False,
                                    force=False,
                                    concurrency=999,
                                    verbose_logs=verbosity > 1,
                                    progress_cb=progress_cb)
            if verbosity > 0:
                logging.info('Running complete')
            with self.rlock:
                self.running[uid]['results'] = [p._asdict() for p in results]
                if verbosity > 0:
                    logging.info('Results %r', self.running[uid])
        except Exception as e:
            logging.exception('Failed to run pipelines')
        finally:
            self.running[uid]['dir'].cleanup()
            del self.running[uid]['dir']