Esempio n. 1
0
def dump_to_odc(data_stream, dc: Datacube, products: list,
                **kwargs) -> Tuple[int, int]:
    # TODO: Get right combination of flags for **kwargs in low validation/no-lineage mode
    expand_stream = ((d.url, d.data) for d in data_stream
                     if d.data is not None)

    ds_stream = from_yaml_doc_stream(expand_stream,
                                     dc.index,
                                     products=products,
                                     **kwargs)
    ds_added = 0
    ds_failed = 0
    # Consume chained streams to DB
    for result in ds_stream:
        ds, err = result
        if err is not None:
            logging.error(err)
            ds_failed += 1
        else:
            logging.info(ds)
            # TODO: Potentially wrap this in transactions and batch to DB
            # TODO: Capture UUID's from YAML and perform a bulk has
            try:
                dc.index.datasets.add(ds)
                ds_added += 1
            except Exception as e:
                logging.error(e)
                ds_failed += 1

    return ds_added, ds_failed
Esempio n. 2
0
def dump_list_to_odc(
    account_url,
    container_name,
    yaml_content_list: List[Tuple[bytes, str, str]],
    dc: Datacube,
    products: List[str],
    **kwargs,
):
    expand_stream = ((account_url + "/" + container_name + "/" +
                      d[1][:d[1].rfind("/") + 1], d[0])
                     for d in yaml_content_list if d[0] is not None)

    ds_stream = from_yaml_doc_stream(expand_stream,
                                     dc.index,
                                     products=products,
                                     **kwargs)
    ds_added = 0
    ds_failed = 0
    # Consume chained streams to DB
    for result in ds_stream:
        ds, err = result
        if err is not None:
            logging.error(err)
            ds_failed += 1
        else:
            logging.info(ds)
            try:
                dc.index.datasets.add(ds)
                ds_added += 1
            except Exception as e:
                logging.error(e)
                ds_failed += 1

    return ds_added, ds_failed
Esempio n. 3
0
def dump_list_to_odc(
    yaml_content_list: List[Tuple[bytes, str, str]],
    dc: Datacube,
    products: List[str],
    **kwargs,
):
    expand_stream = (("https://" + d[1], d[0]) for d in yaml_content_list
                     if d[0] is not None)

    ds_stream = from_yaml_doc_stream(expand_stream,
                                     dc.index,
                                     transform=None,
                                     products=products,
                                     **kwargs)
    ds_added = 0
    ds_failed = 0
    # Consume chained streams to DB
    for result in ds_stream:
        ds, err = result
        if err is not None:
            logging.error(err)
            ds_failed += 1
        else:
            logging.info(ds)
            # TODO: Potentially wrap this in transactions and batch to DB
            # TODO: Capture UUID's from YAML and perform a bulk has
            try:
                dc.index.datasets.add(ds)
                ds_added += 1
            except Exception as e:
                logging.error(e)
                ds_failed += 1

    return ds_added, ds_failed
Esempio n. 4
0
def from_tar_file(tarfname, index, mk_uri, mode, doc_transform=None, **kwargs):
    """ returns a sequence of tuples where each tuple is either

        (ds, None) or (None, error_message)
    """
    def untar(tarfname, mk_uri):
        for doc_name, doc in tar_doc_stream(tarfname, mode=mode):
            yield mk_uri(doc_name), doc

    return from_yaml_doc_stream(untar(tarfname, mk_uri), index, transform=doc_transform, **kwargs)
Esempio n. 5
0
def dump_to_odc(
    data_stream,
    dc: Datacube,
    products: list,
    transform=None,
    update=False,
    allow_unsafe=False,
    **kwargs,
) -> Tuple[int, int]:
    # TODO: Get right combination of flags for **kwargs in low validation/no-lineage mode
    expand_stream = ((d.url, d.data) for d in data_stream
                     if d.data is not None)

    ds_stream = from_yaml_doc_stream(expand_stream,
                                     dc.index,
                                     products=products,
                                     transform=transform,
                                     **kwargs)
    ds_added = 0
    ds_failed = 0
    # Consume chained streams to DB
    for result in ds_stream:
        ds, err = result
        if err is not None:
            logging.error(err)
            ds_failed += 1
        else:
            logging.info(ds)
            # TODO: Potentially wrap this in transactions and batch to DB
            # TODO: Capture UUID's from dataset doc and perform a bulk has
            try:
                if update:
                    updates = {}
                    if allow_unsafe:
                        updates = {tuple(): changes.allow_any}
                    dc.index.datasets.update(ds, updates_allowed=updates)
                else:
                    dc.index.datasets.add(ds)
                ds_added += 1
            except Exception as e:
                logging.error(e)
                ds_failed += 1

    return ds_added, ds_failed