Beispiel #1
0
    def _prepare_git_repo(self, url, ref):
        def checkout(repo, ref):
            try:
                repo.git.checkout(ref)
            except GitCommandError:
                raise errors.ParameterError(
                    'Cannot find reference "{}" in Git repository: {}'.format(
                        ref, url))

        RENKU_BRANCH = 'renku-default-branch'
        ref = ref or RENKU_BRANCH
        u = GitURL.parse(url)
        path = u.pathname
        if u.hostname == 'localhost':
            path = str(Path(path).resolve())
            url = path
        repo_name = os.path.splitext(os.path.basename(path))[0]
        path = os.path.dirname(path).lstrip('/')
        repo_path = self.renku_path / 'cache' / u.hostname / path / repo_name

        if repo_path.exists():
            repo = Repo(str(repo_path))
            if repo.remotes.origin.url == url:
                try:
                    repo.git.fetch(all=True)
                    repo.git.checkout(ref)
                    try:
                        repo.git.pull()
                    except GitError:
                        # When ref is not a branch, an error is thrown
                        pass
                except GitError:
                    # ignore the error and try re-cloning
                    pass
                else:
                    return repo, repo_path

            try:
                shutil.rmtree(str(repo_path))
            except PermissionError:
                raise errors.InvalidFileOperation(
                    'Cannot delete files in {}: Permission denied'.format(
                        repo_path))

        repo = clone(url, path=str(repo_path), install_githooks=False)

        # Because the name of the default branch is not always 'master', we
        # create an alias of the default branch when cloning the repo. It
        # is used to refer to the default branch later.
        renku_ref = 'refs/heads/' + RENKU_BRANCH
        try:
            repo.git.execute(
                ['git', 'symbolic-ref', renku_ref, repo.head.reference.path])
            checkout(repo, ref)
        except GitCommandError as e:
            raise errors.GitError(
                'Cannot clone remote Git repo: {}'.format(url)) from e
        else:
            return repo, repo_path
Beispiel #2
0
    def init_repository(self, force=False):
        """Initialize an empty Renku repository."""
        from git import Repo
        from renku.core.models.provenance.agents import Person

        # verify if folder is empty
        if self.repo is not None and not force:
            raise errors.InvalidFileOperation(
                'Folder {0} already contains file. Use --force to overwrite'.
                format(self.repo.git_dir))

        # initialize repo
        path = self.path.absolute()
        self.repo = Repo.init(str(path))

        # verify if author information is available
        Person.from_git(self.repo)
Beispiel #3
0
    def init_repository(self, force=False, user=None):
        """Initialize an empty Renku repository."""
        from git import Repo

        from renku.core.models.provenance.agents import Person

        # verify if folder is empty
        if self.repo is not None and not force:
            raise errors.InvalidFileOperation(
                "Folder {0} already contains file. Use --force to overwrite".
                format(self.repo.git_dir))

        # initialize repo and set user data
        path = self.path.absolute()
        self.repo = Repo.init(str(path))
        if user:
            config_writer = self.repo.config_writer()
            for key, value in user.items():
                config_writer.set_value("user", key, value)
            config_writer.release()

        # verify if author information is available
        Person.from_git(self.repo)
def _migrate_submodule_based_datasets(client):
    from renku.core.management import LocalClient
    from renku.core.management.migrate import is_project_unsupported

    submodules = client.repo.submodules
    if not submodules:
        return

    for s in submodules:
        try:
            s.update()
        except GitError:
            pass

    submodules_urls = {s.path: s.url for s in submodules}

    repo_paths = []
    symlinks = []

    for dataset in client.datasets.values():
        for file_ in dataset.files:
            path = client.path / file_.path
            if not path.is_symlink():
                continue

            target = path.resolve()

            if '/.renku/vendors/' not in str(target):
                continue

            repo = Repo(target.parent, search_parent_directories=True)
            repo_path = repo.working_dir
            if repo_path not in repo_paths:
                repo_paths.append(repo_path)

            symlinks.append((file_.path, target, repo_path))

    if not symlinks:
        return

    remote_clients = {p: LocalClient(p) for p in repo_paths}

    for remote_client in remote_clients.values():
        if not is_project_unsupported(remote_client):
            migrate(remote_client)

    metadata = {}

    for path, target, repo_path in symlinks:
        remote_client = remote_clients[repo_path]
        path_within_repo = target.relative_to(repo_path)

        repo_is_remote = '.renku/vendors/local' not in repo_path
        based_on = None
        submodule_path = Path(repo_path).relative_to(client.path)

        url = submodules_urls.get(str(submodule_path), '')

        if repo_is_remote:
            based_on = _fetch_file_metadata(remote_client, path_within_repo)
            if based_on:
                based_on.url = url
                based_on.based_on = None
            else:
                based_on = DatasetFile.from_revision(remote_client,
                                                     path=path_within_repo,
                                                     url=url)
        else:
            if url:
                full_path = Path(url) / path_within_repo
                rel_path = os.path.relpath(full_path, client.path)
                url = f'file://{rel_path}'

        metadata[path] = (based_on, url)

        path = client.path / path
        path.unlink()

        try:
            shutil.move(target, path)
        except FileNotFoundError:
            raise errors.InvalidFileOperation(f'File was not found: {target}')

    for s in submodules:
        if s.path.startswith('.renku/vendors/'):
            try:
                s.remove(force=True)
            except ValueError:
                pass

    for dataset in client.datasets.values():
        for file_ in dataset.files:
            if file_.path in metadata:
                based_on, url = metadata[file_.path]
                file_.based_on = based_on
                file_.url = remove_credentials(url)

        dataset.to_yaml()
Beispiel #5
0
def init(ctx, client, use_external_storage, path, name, template,
         template_source, template_ref, template_variables, description,
         print_manifest, force):
    """Initialize a project in PATH. Default is current path."""
    # verify dirty path
    if not is_path_empty(path) and not force and not print_manifest:
        raise errors.InvalidFileOperation(
            'Folder "{0}" is not empty. Please add --force '
            'flag to transform it into a Renku repository.'.format(str(path)))

    if not check_git_user_config():
        raise errors.ConfigurationError(
            'The user name and email are not configured. '
            'Please use the "git config" command to configure them.\n\n'
            '\tgit config --global --add user.name "John Doe"\n'
            '\tgit config --global --add user.email '
            '"*****@*****.**"\n')

    # select template source
    if template_source:
        click.echo('Fetching template from {0}@{1}... '.format(
            template_source, template_ref),
                   nl=False)
        template_folder = Path(mkdtemp())
        fetch_template(template_source, template_ref, template_folder)
        template_manifest = read_template_manifest(template_folder,
                                                   checkout=True)
        click.secho('OK', fg='green')
    else:
        template_folder = Path(
            pkg_resources.resource_filename('renku', 'templates'))
        template_manifest = read_template_manifest(template_folder)

    # select specific template
    repeat = False
    template_data = None
    if template:
        template_filtered = [
            template_elem for template_elem in template_manifest
            if template_elem['name'] == template
        ]
        if len(template_filtered) == 1:
            template_data = template_filtered[0]
        else:
            click.echo('The template "{0}" is not available.'.format(template))
            repeat = True

    if print_manifest:
        if template_data:
            click.echo(create_template_sentence([template_data]))
        else:
            click.echo(create_template_sentence(template_manifest))
        return

    if not template or repeat:
        templates = [template_elem for template_elem in template_manifest]
        if len(templates) == 1:
            template_data = templates[0]
        else:
            template_num = click.prompt(text=create_template_sentence(
                templates, True),
                                        type=click.IntRange(1, len(templates)),
                                        show_default=False,
                                        show_choices=False)
            template_data = templates[template_num - 1]

    # set local path and storage
    store_directory(path)
    if not client.use_external_storage:
        use_external_storage = False
    ctx.obj = client = attr.evolve(client,
                                   path=path,
                                   use_external_storage=use_external_storage)
    if not is_path_empty(path):
        from git import GitCommandError
        try:
            commit = client.find_previous_commit('*')
            branch_name = 'pre_renku_init_{0}'.format(commit.hexsha[:7])
            with client.worktree(path=path,
                                 branch_name=branch_name,
                                 commit=commit,
                                 merge_args=[
                                     '--no-ff', '-s', 'recursive', '-X',
                                     'ours', '--allow-unrelated-histories'
                                 ]):
                click.echo(
                    'Saving current data in branch {0}'.format(branch_name))
        except AttributeError:
            click.echo('Warning! Overwriting non-empty folder.')
        except GitCommandError as e:
            click.UsageError(e)

    # clone the repo
    template_path = template_folder / template_data['folder']
    click.echo('Initializing new Renku repository... ', nl=False)
    with client.lock:
        try:
            create_from_template(template_path, client, name, description,
                                 template_variables, force)
        except FileExistsError as e:
            raise click.UsageError(e)

    # Install git hooks
    from .githooks import install
    ctx.invoke(install, force=force)
Beispiel #6
0
def init(
    ctx,
    client,
    external_storage_requested,
    path,
    name,
    template_id,
    template_index,
    template_source,
    template_ref,
    metadata,
    list_templates,
    force,
    describe,
    data_dir,
):
    """Initialize a project in PATH. Default is the current path."""
    # verify dirty path
    if not is_path_empty(path) and not force and not list_templates:
        existing_paths = [
            str(p.relative_to(path)) for p in Path(path).iterdir()
        ]
        existing_paths.sort()
        raise errors.InvalidFileOperation(
            f'Folder "{str(path)}" is not empty and contains the following files/directories:'
            + "".join((f"\n\t{e}" for e in existing_paths)) +
            "\nPlease add --force flag to transform it into a Renku repository."
        )

    data_dir = resolve_data_directory(data_dir, path)

    if not check_git_user_config():
        raise errors.ConfigurationError(
            "The user name and email are not configured. "
            'Please use the "git config" command to configure them.\n\n'
            '\tgit config --global --add user.name "John Doe"\n'
            "\tgit config --global --add user.email "
            '"*****@*****.**"\n')

    # select template source
    if template_source:
        click.echo("Fetching template from {0}@{1}... ".format(
            template_source, template_ref),
                   nl=False)
        template_folder = Path(mkdtemp())
        fetch_template(template_source, template_ref, template_folder)
        template_manifest = read_template_manifest(template_folder,
                                                   checkout=True)
        click.secho("OK", fg="green")
    else:
        template_folder = Path(
            pkg_resources.resource_filename("renku", "templates"))
        template_manifest = read_template_manifest(template_folder)
        template_source = "renku"

    # select specific template
    repeat = False
    template_data = None
    if template_id:
        if template_index:
            raise errors.ParameterError(
                "Use either --template-id or --template-index, not both",
                '"--template-index"')
        template_filtered = [
            template_elem for template_elem in template_manifest
            if template_elem["folder"] == template_id
        ]
        if len(template_filtered) == 1:
            template_data = template_filtered[0]
        else:
            click.echo(
                f'The template with id "{template_id}" is not available.')
            repeat = True

    if template_index or template_index == 0:
        if template_index > 0 and template_index <= len(template_manifest):
            template_data = template_manifest[template_index - 1]
        else:
            click.echo(
                f"The template at index {template_index} is not available.")
            repeat = True

    if list_templates:
        if template_data:
            click.echo(
                create_template_sentence([template_data], describe=describe))
        else:
            click.echo(
                create_template_sentence(template_manifest, describe=describe))
        return

    if repeat or not (template_id or template_index):
        templates = [template_elem for template_elem in template_manifest]
        if len(templates) == 1:
            template_data = templates[0]
        else:
            template_index = click.prompt(
                text=create_template_sentence(templates,
                                              describe=describe,
                                              instructions=True),
                type=click.IntRange(1, len(templates)),
                show_default=False,
                show_choices=False,
            )
            template_data = templates[template_index - 1]

        template_id = template_data["folder"]

    # verify variables have been passed
    template_variables = template_data.get("variables", {})
    template_variables_keys = set(template_variables.keys())
    input_parameters_keys = set(metadata.keys())
    for key in template_variables_keys - input_parameters_keys:
        value = click.prompt(
            text=(f'The template requires a value for "{key}" '
                  f"({template_variables[key]})"),
            default="",
            show_default=False,
        )
        metadata[key] = value
    useless_variables = input_parameters_keys - template_variables_keys
    if len(useless_variables) > 0:
        click.echo(INFO +
                   "These parameters are not used by the template and were "
                   "ignored:\n\t{}".format("\n\t".join(useless_variables)))
        for key in useless_variables:
            del metadata[key]

    # set local path and storage
    store_directory(path)
    if not client.external_storage_requested:
        external_storage_requested = False
    ctx.obj = client = attr.evolve(
        client,
        path=path,
        data_dir=data_dir,
        external_storage_requested=external_storage_requested)
    if not is_path_empty(path):
        from git import GitCommandError

        try:
            commit = client.find_previous_commit("*")
            branch_name = "pre_renku_init_{0}".format(commit.hexsha[:7])
            with client.worktree(
                    path=path,
                    branch_name=branch_name,
                    commit=commit,
                    merge_args=[
                        "--no-ff", "-s", "recursive", "-X", "ours",
                        "--allow-unrelated-histories"
                    ],
            ):
                click.echo(
                    "Saving current data in branch {0}".format(branch_name))
        except AttributeError:
            click.echo("Warning! Overwriting non-empty folder.")
        except GitCommandError as e:
            click.UsageError(e)

    # supply additional metadata
    metadata["__template_source__"] = template_source
    metadata["__template_ref__"] = template_ref
    metadata["__template_id__"] = template_id
    metadata["__namespace__"] = ""
    metadata["__sanitized_project_name__"] = ""
    metadata["__repository__"] = ""
    metadata["__project_slug__"] = ""

    # clone the repo
    template_path = template_folder / template_data["folder"]
    click.echo("Initializing new Renku repository... ", nl=False)
    with client.lock:
        try:
            create_from_template(
                template_path=template_path,
                client=client,
                name=name,
                metadata=metadata,
                force=force,
                data_dir=data_dir,
            )
        except FileExistsError as e:
            raise click.UsageError(e)

    # Install git hooks
    from .githooks import install

    ctx.invoke(install, force=force)
Beispiel #7
0
def init(ctx, client, external_storage_requested, path, name, template_id,
         template_index, template_source, template_ref, parameter,
         list_templates, force, describe, data_dir):
    """Initialize a project in PATH. Default is the current path."""
    # verify dirty path
    if not is_path_empty(path) and not force and not list_templates:
        raise errors.InvalidFileOperation(
            'Folder "{0}" is not empty. Please add --force '
            'flag to transform it into a Renku repository.'.format(str(path)))

    data_dir = resolve_data_directory(data_dir, path)

    if not check_git_user_config():
        raise errors.ConfigurationError(
            'The user name and email are not configured. '
            'Please use the "git config" command to configure them.\n\n'
            '\tgit config --global --add user.name "John Doe"\n'
            '\tgit config --global --add user.email '
            '"*****@*****.**"\n')

    # select template source
    if template_source:
        click.echo('Fetching template from {0}@{1}... '.format(
            template_source, template_ref),
                   nl=False)
        template_folder = Path(mkdtemp())
        fetch_template(template_source, template_ref, template_folder)
        template_manifest = read_template_manifest(template_folder,
                                                   checkout=True)
        click.secho('OK', fg='green')
    else:
        template_folder = Path(
            pkg_resources.resource_filename('renku', 'templates'))
        template_manifest = read_template_manifest(template_folder)

    # select specific template
    repeat = False
    template_data = None
    if template_id:
        if template_index:
            raise errors.ParameterError(
                'Use either --template-id or --template-index, not both',
                '"--template-index"')
        template_filtered = [
            template_elem for template_elem in template_manifest
            if template_elem['folder'] == template_id
        ]
        if len(template_filtered) == 1:
            template_data = template_filtered[0]
        else:
            click.echo(
                f'The template with id "{template_id}" is not available.')
            repeat = True

    if template_index or template_index == 0:
        if template_index > 0 and template_index <= len(template_manifest):
            template_data = template_manifest[template_index - 1]
        else:
            click.echo(
                f'The template at index {template_index} is not available.')
            repeat = True

    if list_templates:
        if template_data:
            click.echo(
                create_template_sentence([template_data], describe=describe))
        else:
            click.echo(
                create_template_sentence(template_manifest, describe=describe))
        return

    if repeat or not (template_id or template_index):
        templates = [template_elem for template_elem in template_manifest]
        if len(templates) == 1:
            template_data = templates[0]
        else:
            template_num = click.prompt(text=create_template_sentence(
                templates, describe=describe, instructions=True),
                                        type=click.IntRange(1, len(templates)),
                                        show_default=False,
                                        show_choices=False)
            template_data = templates[template_num - 1]

    # verify variables have been passed
    template_variables = template_data.get('variables', {})
    template_variables_keys = set(template_variables.keys())
    input_parameters_keys = set(parameter.keys())
    for key in (template_variables_keys - input_parameters_keys):
        value = click.prompt(
            text=(f'The template requires a value for "{key}" '
                  f'({template_variables[key]})'),
            default='',
            show_default=False)
        parameter[key] = value
    useless_variables = input_parameters_keys - template_variables_keys
    if (len(useless_variables) > 0):
        click.echo(INFO +
                   'These parameters are not used by the template and were '
                   'ignored:\n\t{}'.format('\n\t'.join(useless_variables)))
        for key in useless_variables:
            del parameter[key]

    # set local path and storage
    store_directory(path)
    if not client.external_storage_requested:
        external_storage_requested = False
    ctx.obj = client = attr.evolve(
        client,
        path=path,
        data_dir=data_dir,
        external_storage_requested=external_storage_requested)
    if not is_path_empty(path):
        from git import GitCommandError
        try:
            commit = client.find_previous_commit('*')
            branch_name = 'pre_renku_init_{0}'.format(commit.hexsha[:7])
            with client.worktree(path=path,
                                 branch_name=branch_name,
                                 commit=commit,
                                 merge_args=[
                                     '--no-ff', '-s', 'recursive', '-X',
                                     'ours', '--allow-unrelated-histories'
                                 ]):
                click.echo(
                    'Saving current data in branch {0}'.format(branch_name))
        except AttributeError:
            click.echo('Warning! Overwriting non-empty folder.')
        except GitCommandError as e:
            click.UsageError(e)

    # clone the repo
    template_path = template_folder / template_data['folder']
    click.echo('Initializing new Renku repository... ', nl=False)
    with client.lock:
        try:
            create_from_template(template_path=template_path,
                                 client=client,
                                 name=name,
                                 metadata=parameter,
                                 force=force,
                                 data_dir=data_dir)
        except FileExistsError as e:
            raise click.UsageError(e)

    # Install git hooks
    from .githooks import install
    ctx.invoke(install, force=force)