def test_remap_lineage_doc(): def mk_node(ds, sources): return dict(id=ds.id, **sources) ds = SimpleDocNav(gen_dataset_test_dag(3, force_tree=True)) xx = remap_lineage_doc(ds, mk_node) assert xx['id'] == ds.id assert xx['ac']['id'] == ds.sources['ac'].id xx = remap_lineage_doc(ds.doc, mk_node) assert xx['id'] == ds.id assert xx['ac']['id'] == ds.sources['ac'].id
def resolve(main_ds, uri): try: main_ds = SimpleDocNav(dedup_lineage(main_ds)) except InvalidDocException as e: return None, e main_uuid = main_ds.id ds_by_uuid = toolz.valmap(toolz.first, flatten_datasets(main_ds)) all_uuid = list(ds_by_uuid) db_dss = {str(ds.id): ds for ds in index.datasets.bulk_get(all_uuid)} lineage_uuids = set(filter(lambda x: x != main_uuid, all_uuid)) missing_lineage = lineage_uuids - set(db_dss) if missing_lineage and fail_on_missing_lineage: return None, "Following lineage datasets are missing from DB: %s" % (','.join(missing_lineage)) if verify_lineage and not is_doc_eo3(main_ds.doc): bad_lineage = [] for uuid in lineage_uuids: if uuid in db_dss: ok, err = check_consistent(jsonify_document(ds_by_uuid[uuid].doc_without_lineage_sources), db_dss[uuid].metadata_doc) if not ok: bad_lineage.append((uuid, err)) if len(bad_lineage) > 0: error_report = '\n'.join('Inconsistent lineage dataset {}:\n> {}'.format(uuid, err) for uuid, err in bad_lineage) return None, error_report def with_cache(v, k, cache): cache[k] = v return v def resolve_ds(ds, sources, cache=None): cached = cache.get(ds.id) if cached is not None: return cached uris = [uri] if ds.id == main_uuid else [] doc = ds.doc db_ds = db_dss.get(ds.id) if db_ds: product = db_ds.type else: product = match_product(doc) return with_cache(Dataset(product, doc, uris=uris, sources=sources), ds.id, cache) try: return remap_lineage_doc(main_ds, resolve_ds, cache={}), None except BadMatch as e: return None, e