Example #1
0
def dataset_import(
    cache, user, user_job_id, project_id, dataset_uri, name=None, extract=False, timeout=None,
):
    """Job for dataset import."""
    user = cache.ensure_user(user)
    worker_log.debug(f"executing dataset import job for {user.user_id}:{user.fullname}")

    user_job = cache.get_job(user, user_job_id)
    user_job.in_progress()

    try:
        worker_log.debug(f"retrieving metadata for project {project_id}")
        project = cache.get_project(user, project_id)
        with chdir(project.abs_path):
            worker_log.debug(f"project found in cache - importing dataset {dataset_uri}")
            import_dataset(
                dataset_uri,
                name,
                extract,
                commit_message=f"service: dataset import {dataset_uri}",
                progress=DatasetImportJobProcess(cache, user_job),
            )

            worker_log.debug("operation successful - syncing with remote")
            _, remote_branch = repo_sync(Repo(project.abs_path), remote="origin")
            user_job.update_extras("remote_branch", remote_branch)

            user_job.complete()
            worker_log.debug("job completed")
    except (HTTPError, ParameterError, RenkuException, GitCommandError) as exp:
        user_job.fail_job(str(exp))

        # Reraise exception, so we see trace in job metadata
        # and in metrics as failed job.
        raise exp
Example #2
0
def import_(uri, name, extract, yes):
    """Import data from a 3rd party provider or another renku project.

    Supported providers: [Dataverse, Renku, Zenodo]
    """
    import_dataset(uri=uri, name=name, extract=extract, with_prompt=True, yes=yes, progress=_DownloadProgressbar)
    click.secho(" " * 79 + "\r", nl=False)
    click.secho("OK", fg="green")
Example #3
0
def import_(uri, short_name, extract):
    """Import data from a 3rd party provider.

    Supported providers: [Zenodo, Dataverse]
    """
    import_dataset(
        uri=uri,
        short_name=short_name,
        extract=extract,
        with_prompt=True,
        progress=_DownloadProgressbar
    )
    click.secho('OK', fg='green')
Example #4
0
def import_(uri, short_name, extract, yes):
    """Import data from a 3rd party provider or another renku project.

    Supported providers: [Dataverse, Renku, Zenodo]
    """
    import_dataset(
        uri=uri,
        short_name=short_name,
        extract=extract,
        with_prompt=True,
        yes=yes,
        progress=_DownloadProgressbar
    )
    click.secho(' ' * 79 + '\r', nl=False)
    click.secho('OK', fg='green')
Example #5
0
def dataset_import(
    cache,
    user,
    user_job_id,
    project_id,
    dataset_uri,
    short_name=None,
    extract=False,
    timeout=None,
):
    """Job for dataset import."""
    user = cache.ensure_user(user)
    user_job = cache.get_job(user, user_job_id)
    project = cache.get_project(user, project_id)

    with chdir(project.abs_path):
        try:
            user_job.in_progress()

            import_dataset(
                dataset_uri,
                short_name,
                extract,
                commit_message=f'service: dataset import {dataset_uri}',
                progress=DatasetImportJobProcess(cache, user_job))

            _, remote_branch = repo_sync(Repo(project.abs_path),
                                         remote='origin')
            user_job.update_extras('remote_branch', remote_branch)

            user_job.complete()
        except (HTTPError, ParameterError, DatasetExistsError,
                GitCommandError) as exp:
            user_job.fail_job(str(exp))

            # Reraise exception, so we see trace in job metadata.
            raise exp
Example #6
0
def dataset_import(
    cache,
    user,
    user_job_id,
    project_id,
    dataset_uri,
    short_name=None,
    extract=False,
    timeout=None,
):
    """Job for dataset import."""
    user = cache.ensure_user(user)
    user_job = cache.get_job(user, user_job_id)
    project = cache.get_project(user, project_id)

    with chdir(project.abs_path):
        try:
            user_job.in_progress()
            import_dataset(
                dataset_uri,
                short_name,
                extract,
                commit_message=f'service: dataset import {dataset_uri}',
                progress=DatasetImportJobProcess(cache, user_job))
            user_job.complete()
        except (HTTPError, ParameterError, DatasetExistsError) as exp:
            user_job.fail_job(str(exp))

            # Reraise exception, so we see trace in job metadata.
            raise exp

    if not repo_sync(project.abs_path):
        error = 'failed to push refs'
        user_job.fail_job(error)

        raise RuntimeError(error)