Python init_nci_collections 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: digitalearthau.collections

메소드/함수: init_nci_collections

hotexamples.com에서의 예제들: 5

Python init_nci_collections - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 digitalearthau.collections.init_nci_collections에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: duplicates.py 프로젝트: sixy6e/digitalearthau

def cli(index, all_, collections_):
    """
    Find duplicate datasets for a collection.

    (eg. if a dataset has been reprocessed but both versions are indexed and active)

    This uses the unique fields defined in a collection to try to group them.

    Note that this is really a prototype: it won't report all duplicates as the unique fields aren't good enough.

      - Scenes group by "day" not "solar day"

      - Tiled products should be grouped by tile_index, but it's not in the metadata.

    """
    collections.init_nci_collections(index)

    if all_:
        collection_names = collections.registered_collection_names()
    else:
        collection_names = collections_

    write_duplicates_csv(
        index, [collections.get_collection(name) for name in collection_names],
        sys.stdout)

예제 #2

파일 보기

파일: __init__.py 프로젝트: sixy6e/digitalearthau

def cli(index: Index, collection_specifiers: Iterable[str], cache_folder: str,
        format_: str, output_file: str, min_trash_age_hours: bool, jobs: int,
        **fix_settings):
    """
    Update a datacube index to the state of the filesystem.

    This will update locations, trash or index new datasets, depending on the chosen options.
    """
    uiutil.init_logging()

    if fix_settings['index_missing'] and fix_settings['trash_missing']:
        click.echo(
            'Can either index missing datasets (--index-missing) , or trash them (--trash-missing), '
            'but not both at the same time.',
            err=True)
        sys.exit(1)

    cs.init_nci_collections(index)

    mismatches = get_mismatches(cache_folder, collection_specifiers, format_,
                                jobs)

    out_f = sys.stdout
    try:
        if output_file:
            out_f = open(output_file, 'w')

        fixes.fix_mismatches(mismatches,
                             index,
                             min_trash_age_hours=min_trash_age_hours,
                             **fix_settings)
    finally:
        if output_file:
            out_f.close()

예제 #3

파일 보기

def main(folders: Iterable[str],
         dry_run: bool,
         queue: str,
         project: str,
         work_folder: str,
         cache_folder: str,
         max_jobs: int,
         concurrent_jobs: int,
         submit_limit: int):
    """
    Submit PBS jobs to run dea-sync

    Note that this is currently specific to tiled products, as it expects their folder naming conventions
    when splitting up jobs. TODO generalise function task_split()

    Example usage: dea-submit-sync 5fc /g/data/fk4/datacube/002/LS5_TM_FC

    5fc is just the name for the job: subsequent resubmissions will not rerun jobs with the same name
    if output files exist.

    A run folder is used (defaulting to `runs` in current dir) for storing output status.
    """
    input_paths = [Path(folder).absolute() for folder in folders]

    with index_connect(application_name='sync-submit') as index:
        collections.init_nci_collections(index)
        submitter = SyncSubmission(cache_folder, project, queue, dry_run, verbose=True, workers=4)
        click.echo(
            "{} input path(s)".format(len(input_paths))
        )
        tasks = _paths_to_tasks(input_paths)
        click.echo(
            "Found {} tasks across collection(s): {}".format(
                len(tasks),
                ', '.join(set(t.collection.name for t in tasks))
            )
        )

        if len(tasks) > max_jobs:
            click.echo(
                "Grouping (max_jobs={})".format(max_jobs)
            )
        tasks = group_tasks(tasks, maximum=max_jobs)

        total_datasets = sum(t.dataset_count for t in tasks)
        click.secho(
            "Submitting {} total jobs with {} datasets (avg {:.2f} each)...".format(
                len(tasks),
                total_datasets,
                total_datasets / len(tasks)
            ),
            bold=True
        )

        _find_and_submit(tasks, work_folder, concurrent_jobs, submit_limit, submitter)

예제 #4

파일 보기

def cli(index, dry_run, paths, destination, checksum):
    """
    Move the given folder of datasets into the given destination folder.

    This will checksum the data, copy it to the destination, and mark the original as archived in the DEA index.


    Notes:

    * An operator can later run dea-clean to trash the archived original locations.

    * Source datasets with failing checksums will be left as-is, with a warning logged.

    * Both the source(s) and destination paths are expected to be paths containing existing DEA collections.
    (See collections.py and paths.py)
    """
    init_logging()
    init_nci_collections(index)

    if not is_base_directory(destination):
        raise click.BadArgumentUsage(
            'Not a known DEA base directory; {}\nExpected one of:\n\t{}'.
            format(destination, '\n\t'.join(BASE_DIRECTORIES)))

    # We want to iterate all datasets in the given input folder, so we find collections that exist in
    # that folder and then iterate through all the collection datasets within that folder. Simple :)

    # We do this aggressively to find errors in arguments immediately. (with the downside of `paths` memory usage)
    resulting_paths = []
    for input_path in map(Path, paths):
        collections = list(get_collections_in_path(input_path))
        if not collections:
            raise click.BadArgumentUsage(
                f"Directory doesn't match any known collections: {input_path}")

        for collection in collections:
            resulting_paths.extend(
                list(collection.iter_fs_paths_within(input_path)))

    _LOG.info("dataset.count",
              input_count=len(paths),
              dataset_count=len(resulting_paths))

    # TODO: @ui.executor_cli_options
    move_all(index,
             resulting_paths,
             Path(destination),
             dry_run=dry_run,
             checksum=checksum)

예제 #5

파일 보기

파일: duplicates.py 프로젝트: sixy6e/digitalearthau

    return ' '.join(printable(v) for v in val)


@printable.register(UUID)
def printable_uuid(val):
    return str(val)


def _write_csv(unique_fields, dicts, stream, append=False):
    writer = csv.DictWriter(stream, _get_headers(unique_fields))
    if not append:
        writer.writeheader()
    writer.writerows(({k: printable(v) for k, v in d.items()} for d in dicts))


collections.init_nci_collections(None)


@click.command('duplicates')
@global_cli_options
@click.option('-a', '--all_', is_flag=True)
@click.argument('collections_',
                type=click.Choice(collections.registered_collection_names()),
                nargs=-1)
@pass_index(app_name="find-duplicates")
def cli(index, all_, collections_):
    """
    Find duplicate datasets for a collection.

    (eg. if a dataset has been reprocessed but both versions are indexed and active)