def _pull(repository: Repository,
          branch_name: str,
          override: str,
          feedback_cb: Callable,
          username: Optional[str] = None) -> None:
    # TODO(billvb) Refactor to BranchManager
    feedback_cb(f"Pulling from remote branch \"{branch_name}\"...")
    cp = repository.git.commit_hash
    try:
        call_subprocess(f'git pull'.split(), cwd=repository.root_dir)

    except subprocess.CalledProcessError as cp_error:
        if 'Automatic merge failed' in cp_error.stdout.decode():
            feedback_cb(
                f"Detected merge conflict, resolution method = {override}")
            bm = BranchManager(repository, username='')
            conflicted_files = bm._infer_conflicted_files(
                cp_error.stdout.decode())
            if 'abort' == override:
                call_subprocess(f'git reset --hard {cp}'.split(),
                                cwd=repository.root_dir)
                raise MergeConflict('Merge conflict pulling upstream',
                                    conflicted_files)
            call_subprocess(
                f'git checkout --{override} {" ".join(conflicted_files)}'.
                split(),
                cwd=repository.root_dir)
            call_subprocess('git add .'.split(), cwd=repository.root_dir)
            call_subprocess('git commit -m "Merge"'.split(),
                            cwd=repository.root_dir)
            feedback_cb("Resolved merge conflict")
        else:
            raise
def publish_to_remote(repository: Repository, username: str, remote: str,
                      feedback_callback: Callable) -> None:
    # TODO(billvb) - Refactor all (or part) to BranchManager
    bm = BranchManager(repository, username=username)
    if bm.workspace_branch != bm.active_branch:
        raise ValueError(f'Must be on branch {bm.workspace_branch} to publish')

    feedback_callback(f"Preparing to publish {repository.name}")
    git_garbage_collect(repository)

    # Try five attempts to fetch - the remote repo could have been created just milliseconds
    # ago, so may need a few moments to settle before it supports all the git operations.
    for tr in range(5):
        try:
            repository.git.fetch(remote=remote)
            break
        except Exception as e:
            logger.warning(
                f"Fetch attempt {tr+1}/5 failed for {str(repository)}: {e}")
            time.sleep(1)
    else:
        raise ValueError(
            f"Timed out trying to fetch repo for {str(repository)}")

    feedback_callback("Pushing up regular objects...")
    call_subprocess(
        ['git', 'push', '--set-upstream', 'origin', bm.workspace_branch],
        cwd=repository.root_dir)
    feedback_callback(f"Publish complete.")
    repository.git.clear_checkout_context()
def clone_repo(remote_url: str,
               username: str,
               owner: str,
               load_repository: Callable[[str], Any],
               put_repository: Callable[[str, str, str], Any],
               make_owner: bool = False) -> Repository:

    # Clone into a temporary directory, such that if anything
    # gets messed up, then this directory will be cleaned up.
    tempdir = os.path.join(
        Configuration().upload_dir,
        f"{username}_{owner}_clone_{uuid.uuid4().hex[0:10]}")
    os.makedirs(tempdir)
    path = _clone(remote_url=remote_url, working_dir=tempdir)
    candidate_repo = load_repository(path)

    if os.environ.get('WINDOWS_HOST'):
        logger.warning("Imported on Windows host - set fileMode to false")
        call_subprocess("git config core.fileMode false".split(),
                        cwd=candidate_repo.root_dir)

    repository = put_repository(candidate_repo.root_dir, username, owner)
    shutil.rmtree(tempdir)

    return repository
def _set_upstream_branch(repository: Repository, branch_name: str,
                         feedback_cb: Callable):
    # TODO(billvb) - Refactor to BranchManager
    set_upstream_tokens = [
        'git', 'push', '--set-upstream', 'origin', branch_name
    ]
    call_subprocess(set_upstream_tokens, cwd=repository.root_dir)
Exemple #5
0
def _clone(remote_url: str, working_dir: str) -> str:

    clone_tokens = f"git clone {remote_url}".split()
    call_subprocess(clone_tokens, cwd=working_dir)

    # Affirm there is only one directory created
    dirs = os.listdir(working_dir)
    if len(dirs) != 1:
        raise GigantumException('Git clone produced extra directories')

    p = os.path.join(working_dir, dirs[0])
    if not os.path.exists(p):
        raise GigantumException(
            'Could not find expected path of repo after clone')

    try:
        # This is for backward compatibility -- old projects will clone to
        # branch "gm.workspace" by default -- even if it has already been migrated.
        # This will therefore set the user to the proper branch if the project has been
        # migrated, and will have no affect if it hasn't
        r = call_subprocess("git checkout master".split(), cwd=p)
    except Exception as e:
        logger.error(e)

    return p
def migrate_labbook_schema(labbook: LabBook) -> None:
    # Fallback point in case of a problem
    initial_commit = labbook.git.commit_hash

    try:
        migrate_schema_to_current(labbook.root_dir)
    except Exception as e:
        logger.exception(e)
        call_subprocess(f'git reset --hard {initial_commit}'.split(),
                        cwd=labbook.root_dir)
        raise

    msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}"
    labbook.git.add(labbook.config_path)
    cmt = labbook.git.commit(msg,
                             author=labbook.author,
                             committer=labbook.author)
    adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                               show=True,
                               importance=100,
                               action=ActivityAction.EDIT)

    adr.add_value('text/plain', msg)
    ar = ActivityRecord(ActivityType.LABBOOK,
                        message=msg,
                        show=True,
                        importance=255,
                        linked_commit=cmt.hexsha,
                        tags=['schema', 'update', 'migration'])
    ar.add_detail_object(adr)
    ars = ActivityStore(labbook)
    ars.create_activity_record(ar)
    def _export_zip(
        cls,
        repo: Repository,
        export_directory: str,
        config_file: Optional[str] = None,
    ) -> str:
        if not os.path.isdir(export_directory):
            os.makedirs(export_directory, exist_ok=True)

        repo_dir, _ = repo.root_dir.rsplit(os.path.sep, 1)

        repo_zip_name = f'{repo.name}-' \
                      f'{repo.git.log()[0]["commit"][:6]}'
        zip_path = f'{repo_zip_name}.zip'
        exported_path = os.path.join(export_directory, zip_path)

        try:
            # zip data using subprocess - NOTE! Python zipfile library does not work correctly.
            call_subprocess(
                ['zip', '-r', exported_path,
                 os.path.basename(repo.root_dir)],
                cwd=repo_dir,
                check=True)
            assert os.path.exists(exported_path)
            return exported_path
        except:
            try:
                os.remove(exported_path)
            except:
                pass
            raise
Exemple #8
0
def sync_branch(repository: Repository, username: Optional[str], override: str,
                pull_only: bool, feedback_callback: Callable) -> int:
    """"""
    if not repository.has_remote:
        return 0

    repository.sweep_uncommitted_changes()
    repository.git.fetch()

    bm = BranchManager(repository)
    branch_name = bm.active_branch

    if pull_only and branch_name not in bm.branches_remote:
        # Cannot pull when remote branch doesn't exist.
        feedback_callback("Pull complete - nothing to pull")
        return 0

    if branch_name not in bm.branches_remote:
        # Branch does not exist, so push it to remote.
        _set_upstream_branch(repository, bm.active_branch, feedback_callback)
        return 0
    else:
        pulled_updates_count = bm.get_commits_behind()
        _pull(repository, branch_name, override, feedback_callback)
        should_push = not pull_only
        if should_push:
            # Skip pushing back up if set to pull_only
            push_tokens = f'git push origin {branch_name}'.split()
            if branch_name not in bm.branches_remote:
                push_tokens.insert(2, "--set-upstream")
            call_subprocess(push_tokens, cwd=repository.root_dir)
            feedback_callback("Sync complete")
        else:
            feedback_callback("Pull complete")
        return pulled_updates_count
Exemple #9
0
    def complete_batch(cls,
                       labbook: LabBook,
                       txid: str,
                       cancel: bool = False,
                       rollback: bool = False) -> None:
        """
        Indicate a batch upload is finished and sweep all new files.

        Args:
            labbook: Subject labbook
            txid: Transaction id (correlator)
            cancel: Indicate transaction finished but due to cancellation
            rollback: Undo all local changes if cancelled (default False)

        Returns:
            None
        """

        if cancel and rollback:
            logger.warning(f"Cancelled tx {txid}, doing git reset")
            call_subprocess(['git', 'reset', '--hard'], cwd=labbook.root_dir)
        else:
            logger.info(f"Done batch upload {txid}, cancelled={cancel}")
            if cancel:
                logger.warning("Sweeping aborted batch upload.")
            m = "Cancelled upload `{txid}`. " if cancel else ''
            labbook.sweep_uncommitted_changes(upload=True,
                                              extra_msg=m,
                                              show=True)
def git_garbage_collect(repository: Repository) -> None:
    """Run "git gc" (garbage collect) over the repo. If run frequently enough, this only takes a short time
    even on large repos.

    Note!! This method assumes the subject repository has already been locked!

    TODO(billvb): Refactor into BranchManager

    Args:
        repository: Subject Repository

    Returns:
        None

    Raises:
        subprocess.CalledProcessError when git gc fails.
        """
    logger.info(f"Running git gc (Garbage Collect) in {str(repository)}...")
    if os.environ.get('WINDOWS_HOST'):
        logger.warning(
            f"Avoiding `git gc` in {str(repository)} on Windows host fs")
        return

    try:
        call_subprocess(['git', 'gc'], cwd=repository.root_dir)
    except subprocess.CalledProcessError:
        logger.warning(
            f"Ignore `git gc` error - {str(repository)} repo remains unpruned")
Exemple #11
0
    def remove_lfs_remotes(self) -> None:
        """Remove all LFS endpoints.

        Each LFS enpoint has its own entry in the git config. It takes the form of the following:

        ```
        [lfs "https://repo.location.whatever"]
            access = basic
        ```

        In order to get the section name, which is "lfs.https://repo.location.whatever", we need to search
        by all LFS fields and remove them (and in order to get the section need to strip the variables off the end).

        Returns:
            None
        """
        lfs_sections = call_subprocess(
            ['git', 'config', '--get-regexp', 'lfs.http*'],
            cwd=self.root_dir).split('\n')
        logger.info(f"LFS entries to delete are {lfs_sections}")
        for lfs_sec in set([n for n in lfs_sections if n]):
            var = lfs_sec.split(' ')[0]
            section = '.'.join(var.split('.')[:-1])
            call_subprocess(['git', 'config', '--remove-section', section],
                            cwd=self.root_dir)
Exemple #12
0
    def merge_from(self, other_branch: str) -> None:
        """Pulls/merges `other_branch` into current branch. If in the event of a
        conflict, it resets to the point prior to merge.

        Args:
            other_branch: Name of other branch to merge from
        """
        if other_branch not in self.branches_local:
            raise InvalidBranchName(f'Branch {other_branch} not found')

        checkpoint = self.repository.git.commit_hash
        try:
            self.repository.sweep_uncommitted_changes()
            try:
                call_subprocess(f'git merge {other_branch}'.split(),
                                cwd=self.repository.root_dir)
            except subprocess.CalledProcessError as merge_error:
                logger.warning(
                    f"Merge conflict syncing {str(self.repository)}")
                # TODO - This should be cleaned up (The UI attempts to match on the token "Merge conflict")
                conflicted_files = self._infer_conflicted_files(
                    merge_error.stdout.decode())
                raise MergeConflict(f"Merge conflict - {merge_error}",
                                    file_conflicts=conflicted_files)
            self.repository.git.commit(f'Merged from branch `{other_branch}`')
        except Exception as e:
            call_subprocess(f'git reset --hard {checkpoint}'.split(),
                            cwd=self.repository.root_dir)
            raise e
Exemple #13
0
    def update_linked_dataset(labbook: LabBook,
                              username: str,
                              init: bool = False) -> None:
        """

        Args:
            labbook:
            username:
            init:

        Returns:

        """
        # List all existing linked datasets IN this repository
        existing_dataset_abs_paths = glob.glob(
            os.path.join(labbook.root_dir, '.gigantum', 'datasets', "*/*"))

        if len(labbook.git.repo.submodules) > 0:
            for submodule in labbook.git.list_submodules():
                try:
                    namespace, dataset_name = submodule['name'].split("&")
                    rel_submodule_dir = os.path.join('.gigantum', 'datasets',
                                                     namespace, dataset_name)
                    submodule_dir = os.path.join(labbook.root_dir,
                                                 rel_submodule_dir)

                    # If submodule is currently present, init/update it, don't remove it!
                    if submodule_dir in existing_dataset_abs_paths:
                        existing_dataset_abs_paths.remove(submodule_dir)

                    if init:
                        # Optionally Init submodule
                        call_subprocess(
                            ['git', 'submodule', 'init', rel_submodule_dir],
                            cwd=labbook.root_dir,
                            check=True)
                    # Update submodule
                    call_subprocess(
                        ['git', 'submodule', 'update', rel_submodule_dir],
                        cwd=labbook.root_dir,
                        check=True)

                    ds = InventoryManager().load_dataset_from_directory(
                        submodule_dir)
                    ds.namespace = namespace
                    manifest = Manifest(ds, username)
                    manifest.link_revision()

                except Exception as err:
                    logger.error(
                        f"Failed to initialize linked Dataset (submodule reference): {submodule['name']}. "
                        f"This may be an actual error or simply due to repository permissions"
                    )
                    logger.exception(err)
                    continue

        # Clean out lingering dataset files if you previously had a dataset linked, but now don't
        for submodule_dir in existing_dataset_abs_paths:
            shutil.rmtree(submodule_dir)
Exemple #14
0
 def merge_use_theirs(self, other_branch: str):
     self.repository.sweep_uncommitted_changes()
     ot = call_subprocess(f'git merge {other_branch}'.split(), cwd=self.repository.root_dir, check=False)
     conf_files = self._infer_conflicted_files(ot)
     if conf_files:
         call_subprocess(f'git checkout --theirs {" ".join(conf_files)}'.split(),
                         cwd=self.repository.root_dir)
     self.repository.sweep_uncommitted_changes(extra_msg=f"Merged {other_branch} using theirs.")
 def reset(self, username: str):
     """ Perform a Git reset to undo all local changes"""
     bm = BranchManager(self.repository, username)
     if self.remote and bm.active_branch in bm.branches_remote:
         self.repository.git.fetch()
         self.repository.sweep_uncommitted_changes()
         call_subprocess(
             ['git', 'reset', '--hard', f'origin/{bm.active_branch}'],
             cwd=self.repository.root_dir)
         call_subprocess(['git', 'clean', '-fd'],
                         cwd=self.repository.root_dir)
    def publish(self,
                username: str,
                access_token: Optional[str] = None,
                remote: str = "origin",
                public: bool = False,
                feedback_callback: Callable = lambda _: None,
                id_token: Optional[str] = None) -> None:
        """ Publish this repository to the remote GitLab instance.

        Args:
            username: Subject username
            access_token: Temp token/password to gain permissions on GitLab instance
            remote: Name of Git remote (always "origin" for now).
            public: Allow public read access
            feedback_callback: Callback to give user-facing realtime feedback
            id_token: Dataset credentials
        Returns:
            None
        """

        logger.info(
            f"Publishing {str(self.repository)} for user {username} to remote {remote}"
        )
        if self.remote:
            raise GitWorkflowException(
                "Cannot publish Labbook when remote already set.")

        branch_mgr = BranchManager(self.repository, username=username)
        if branch_mgr.active_branch != branch_mgr.workspace_branch:
            raise GitWorkflowException(
                f"Must be on branch {branch_mgr.workspace_branch} to publish")

        try:
            self.repository.sweep_uncommitted_changes()
            vis = "public" if public is True else "private"
            gitworkflows_utils.create_remote_gitlab_repo(
                repository=self.repository,
                username=username,
                access_token=access_token,
                visibility=vis)
            gitworkflows_utils.publish_to_remote(
                repository=self.repository,
                username=username,
                remote=remote,
                feedback_callback=feedback_callback)
        except Exception as e:
            # Unsure what specific exception add_remote creates, so make a catchall.
            logger.error(
                f"Publish failed {e}: {str(self.repository)} may be in corrupted Git state!"
            )
            call_subprocess(['git', 'reset', '--hard'],
                            cwd=self.repository.root_dir)
            raise e
    def test_merge_conflict_basic(self, mock_labbook_lfs_disabled):
        """ Test a basic merge-conflict scenario with a conflict on one file.
            First, assert that a MergeConflict is raised when the conflict is detected
            Second, test the force flag to overwrite the conflict using the incoming branch."""
        lb = mock_labbook_lfs_disabled[2]

        # Insert a text file into the master branch of lb
        with open('/tmp/s1.txt', 'w') as s1:
            s1.write('original-file\ndata')
        FileOperations.insert_file(lb, section='code', src_file=s1.name)

        # Create a new branch from this point and make a change to s1.txt
        bm = BranchManager(lb, username=TEST_USER)
        feature_name = bm.create_branch("example-feature-branch")
        with open('/tmp/s1.txt', 'w') as s1:
            s1.write('new-changes-in\nfeature-branch')
        FileOperations.insert_file(lb, section='code', src_file=s1.name)

        # Switch back to the main branch and make a new, conflicting change.
        bm.workon_branch(bm.workspace_branch)
        assert lb.is_repo_clean
        assert not os.path.exists(os.path.join(lb.root_dir, 'output/sample'))
        with open('/tmp/s1.txt', 'w') as s1:
            s1.write('upstream-changes-from-workspace')
        FileOperations.insert_file(lb,
                                   section='code',
                                   src_file=s1.name,
                                   dst_path='')

        # Switch back to feature branch -- make sure that failed merges rollback to state before merge.
        bm.workon_branch(feature_name)
        cp = bm.repository.git.commit_hash
        try:
            bm.merge_from(bm.workspace_branch)
            assert False, "merge_from should have thrown conflict"
        except MergeConflict as m:
            # Assert that the conflicted file(s) are as expected
            assert m.file_conflicts == ['code/s1.txt']
        assert lb.is_repo_clean

        # Now try to force merge, and changes are taken from the workspace-branch
        bm.merge_use_ours(bm.workspace_branch)
        assert open(os.path.join(lb.root_dir, 'code', 's1.txt')).read(1000) == \
            'new-changes-in\nfeature-branch'
        assert lb.is_repo_clean

        # Reset this branch
        call_subprocess(f'git reset --hard {cp}'.split(),
                        cwd=bm.repository.root_dir)
        bm.merge_use_theirs(bm.workspace_branch)
        assert open(os.path.join(lb.root_dir, 'code', 's1.txt')).read(1000) == \
               'upstream-changes-from-workspace'
        assert lb.is_repo_clean
Exemple #18
0
    def remove_remote_branch(self, target_branch) -> None:
        # If no remote, do nothing.
        if not self.repository.has_remote:
            return

        if target_branch not in self.branches_remote:
            raise InvalidBranchName(f'Cannot delete `{target_branch}`; does not exist')

        if target_branch == self.active_branch:
            raise BranchWorkflowViolation(f'Cannot delete current active branch `{target_branch}`')

        logger.info(f'Removing remote branch {target_branch} from {str(self.repository)}')
        call_subprocess(f'git push origin --delete {target_branch}'.split(), cwd=self.repository.root_dir)
Exemple #19
0
    def put_labbook(self, path: str, username: str, owner: str) -> LabBook:
        """ Take given path to a candidate labbook and insert it
        into its proper place in the file system.

        Args:
            path: Path to a given labbook
            username: Active username
            owner: Intended owner of labbook

        Returns:
            LabBook
        """
        try:
            lb = self._put_labbook(path, username, owner)

            # Init dataset submodules if present
            if len(lb.git.repo.submodules) > 0:

                # Link datasets
                for submodule in lb.git.list_submodules():
                    try:

                        namespace, dataset_name = submodule['name'].split("&")
                        rel_submodule_dir = os.path.join(
                            '.gigantum', 'datasets', namespace, dataset_name)
                        submodule_dir = os.path.join(lb.root_dir,
                                                     rel_submodule_dir)
                        call_subprocess(
                            ['git', 'submodule', 'init', rel_submodule_dir],
                            cwd=lb.root_dir,
                            check=True)
                        call_subprocess(
                            ['git', 'submodule', 'update', rel_submodule_dir],
                            cwd=lb.root_dir,
                            check=True)

                        ds = InventoryManager().load_dataset_from_directory(
                            submodule_dir)
                        ds.namespace = namespace
                        manifest = Manifest(ds, username)
                        manifest.link_revision()

                    except Exception as err:
                        logger.exception(
                            f"Failed to import submodule: {submodule['name']}")
                        continue

            return lb
        except Exception as e:
            logger.error(e)
            raise InventoryException(e)
Exemple #20
0
    def unlink_dataset_from_labbook(self, dataset_namespace: str,
                                    dataset_name: str,
                                    labbook: LabBook) -> None:
        """Method to removed a dataset reference from a labbook

        Args:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        submodule_dir = os.path.join('.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        call_subprocess(['git', 'rm', '-f', submodule_dir],
                        cwd=labbook.root_dir)

        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")
        if os.path.exists(git_module_dir):
            shutil.rmtree(git_module_dir)

        absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum',
                                              'datasets', dataset_namespace,
                                              dataset_name)
        if os.path.exists(absolute_submodule_dir):
            shutil.rmtree(absolute_submodule_dir)

        labbook.git.add_all()
        commit = labbook.git.commit("removing submodule ref")

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value(
            'text/markdown',
            f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project"
        )
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
    def _import_zip(cls,
                    archive_path: str,
                    username: str,
                    owner: str,
                    fetch_method: Callable,
                    put_method: Callable,
                    update_meta: Callable = lambda _: None) -> Repository:

        if not os.path.isfile(archive_path):
            raise ValueError(
                f'Archive at {archive_path} is not a file or does not exist')

        if '.zip' not in archive_path and '.lbk' not in archive_path:
            raise ValueError(
                f'Archive at {archive_path} does not have .zip (or legacy .lbk) extension'
            )

        statusmsg = f'Unzipping Repository Archive...'
        update_meta(statusmsg)

        # Unzip into a temporary directory and cleanup if fails
        with TemporaryDirectory() as temp_dir:
            call_subprocess(['unzip', archive_path, '-d', 'project'],
                            cwd=temp_dir,
                            check=True)

            pdirs = os.listdir(os.path.join(temp_dir, 'project'))
            if len(pdirs) != 1:
                raise ValueError("Expected only one directory unzipped")
            unzipped_path = os.path.join(temp_dir, 'project', pdirs[0])

            repo = fetch_method(unzipped_path)
            statusmsg = f'{statusmsg}\nCreating workspace branch...'
            update_meta(statusmsg)

            # Also, remove any lingering remotes.
            # If it gets re-published, it will be to a new remote.
            if repo.has_remote:
                repo.git.remove_remote('origin')

            # Ignore execution bit changes (due to moving between windows/mac/linux)
            call_subprocess("git config core.fileMode false".split(),
                            cwd=repo.root_dir)

            repo = put_method(unzipped_path, username=username, owner=owner)

            statusmsg = f'{statusmsg}\nImport Complete'
            update_meta(statusmsg)

            return repo
    def test_reset__reset_local_change_same_owner(self,
                                                  mock_labbook_lfs_disabled,
                                                  mock_config_file):
        """ test reset performs no operation when there's nothing to do """
        username = '******'
        lb = mock_labbook_lfs_disabled[2]
        wf = LabbookWorkflow(lb)
        wf.publish(username=username)
        commit_to_check = lb.git.commit_hash

        # Make some change locally and commit
        fpath = os.path.join(lb.root_dir, 'input', 'testfile')
        with open(fpath, 'w') as f:
            f.write('filedata')
        lb.sweep_uncommitted_changes()
        assert lb.git.commit_hash != commit_to_check

        # Make an UNTRACKED change locally, make sure it gets clared up
        untracked_file = os.path.join(lb.root_dir, 'output', 'untracked-file')
        with open(untracked_file, 'w') as f:
            f.write('untracked data')

        # Do a reset and make sure state resets appropriately
        wf.reset(username=username)
        assert lb.git.commit_hash == commit_to_check
        assert not os.path.exists(fpath)
        assert not os.path.exists(untracked_file)
        remote_hash = call_subprocess('git log -n 1 --oneline'.split(),
                                      cwd=wf.remote).split()[0]
        assert remote_hash in lb.git.commit_hash
Exemple #23
0
    def run_fetch(key: str):
        # Get identifying info from key
        repository_type, username, owner_name, repository_name = key.split('&')
        if repository_type == 'labbook':
            repo = InventoryManager().load_labbook(username, owner_name,
                                                   repository_name)
        elif repository_type == 'dataset':
            repo = InventoryManager().load_dataset(username, owner_name,
                                                   repository_name)
        else:
            raise ValueError(f"Unsupported repository type: {repository_type}")

        if repo.remote:
            # If no remote, can't fetch!
            call_subprocess(['git', 'fetch'], cwd=repo.root_dir).strip()

        return None
Exemple #24
0
    def reset(self, username: str):
        """ Perform a Git reset to undo all local changes"""
        bm = BranchManager(self.repository, username)
        if self.remote and bm.active_branch in bm.branches_remote:
            self.repository.git.fetch()
            self.repository.sweep_uncommitted_changes()
            call_subprocess(
                ['git', 'reset', '--hard', f'origin/{bm.active_branch}'],
                cwd=self.repository.root_dir)
            call_subprocess(['git', 'clean', '-fd'],
                            cwd=self.repository.root_dir)
            self.repository.git.clear_checkout_context()

            # update dataset references on reset
            if isinstance(self.repository, LabBook):
                InventoryManager().update_linked_dataset(self.repository,
                                                         username,
                                                         init=True)
Exemple #25
0
def clone_repo(remote_url: str, username: str, owner: str,
               load_repository: Callable[[str], Any],
               put_repository: Callable[[str, str, str], Any],
               make_owner: bool = False) -> Repository:

    with tempfile.TemporaryDirectory() as tempdir:
        # Clone into a temporary directory, such that if anything
        # gets messed up, then this directory will be cleaned up.
        path = _clone(remote_url=remote_url, working_dir=tempdir)
        candidate_repo = load_repository(path)

        if os.environ.get('WINDOWS_HOST'):
            logger.warning("Imported on Windows host - set fileMode to false")
            call_subprocess("git config core.fileMode false".split(),
                            cwd=candidate_repo.root_dir)

        repository = put_repository(candidate_repo.root_dir, username, owner)

    return repository
    def test_migrate(self, mock_config_file):
        p = resource_filename('gtmcore', 'labbook')
        p2 = os.path.join(p, 'tests', 'test.zip')

        with tempfile.TemporaryDirectory() as td:
            call_subprocess(f"unzip {p2} -d {td}".split(), cwd=td)
            temp_lb_path = os.path.join(td, 'test')

            # Tests backwards compatibility (test.zip is a very old schema 1 LabBook)
            lb = InventoryManager(
                mock_config_file[0]).load_labbook_from_directory(temp_lb_path)
            assert lb.schema < CURRENT_SCHEMA

            # Test schema migration -- migrate and then refresh.
            migrate_schema_to_current(lb.root_dir)
            lb = InventoryManager(
                mock_config_file[0]).load_labbook_from_directory(lb.root_dir)
            assert validate_labbook_schema(CURRENT_SCHEMA, lb_data=lb.data)
            assert lb.schema == CURRENT_SCHEMA
Exemple #27
0
    def put_file(cls,
                 labbook: LabBook,
                 section: str,
                 src_file: str,
                 dst_path: str,
                 txid: Optional[str] = None) -> Dict[str, Any]:
        """Move the file at `src_file` to `dst_dir`. Filename removes
        upload ID if present. This operation does NOT commit or create an
        activity record.

        Args:
            labbook: Subject LabBook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Path within section to insert `src_file`
            txid: Optional transaction id

        Returns:
           Full path to inserted file.
        """
        if not os.path.abspath(src_file):
            raise ValueError(f"Source file `{src_file}` not an absolute path")

        if not os.path.isfile(src_file):
            raise ValueError(f"Source file does not exist at `{src_file}`")

        labbook.validate_section(section)
        r = call_subprocess(
            ['git', 'check-ignore',
             os.path.basename(dst_path)],
            cwd=labbook.root_dir,
            check=False)
        if dst_path and r and os.path.basename(dst_path) in r:
            logger.warning(f"File {dst_path} matches gitignore; "
                           f"not put into {str(labbook)}")
            raise FileOperationsException(f"`{dst_path}` matches "
                                          f"ignored pattern")

        mdst_dir = _make_path_relative(dst_path)
        full_dst = os.path.join(labbook.root_dir, section, mdst_dir)
        full_dst = full_dst.replace('..', '')
        full_dst = full_dst.replace('~', '')

        # Force overwrite if file already exists
        if os.path.isfile(os.path.join(full_dst, os.path.basename(src_file))):
            os.remove(os.path.join(full_dst, os.path.basename(src_file)))

        if not os.path.isdir(os.path.dirname(full_dst)):
            os.makedirs(os.path.dirname(full_dst), exist_ok=True)

        fdst = shutil.move(src_file, full_dst)
        relpath = fdst.replace(os.path.join(labbook.root_dir, section), '')
        return cls.get_file_info(labbook, section, relpath)
Exemple #28
0
def _calc_disk_free() -> Tuple[float, float]:
    disk_results = call_subprocess("df -h /".split(), cwd='/').split('\n')
    _, disk_size, disk_used, disk_avail, use_pct, _ = disk_results[1].split()

    disk_size_num, disk_size_unit = float(disk_used[:-1]), disk_used[-1]
    if disk_size_unit == 'M':
        disk_size_num /= 1000.0

    disk_avail_num, disk_avail_unit = float(disk_avail[:-1]), disk_avail[-1]
    if disk_avail_unit == 'M':
        disk_avail_num /= 1000.0

    return disk_size_num, disk_avail_num
Exemple #29
0
        def _clean_submodule():
            """Helper method to clean a submodule reference from a repository"""
            if os.path.exists(absolute_submodule_dir):
                logger.warning(
                    f"Cleaning {relative_submodule_dir} from parent git repo")
                try:
                    call_subprocess([
                        'git', 'rm', '-f', '--cached', relative_submodule_dir
                    ],
                                    cwd=labbook.root_dir)
                except subprocess.CalledProcessError:
                    logger.warning(
                        f"git rm on {relative_submodule_dir} failed. Continuing..."
                    )
                    pass

            if os.path.exists(absolute_submodule_dir):
                logger.warning(f"Removing {absolute_submodule_dir} directory")
                shutil.rmtree(absolute_submodule_dir)

            if os.path.exists(git_module_dir):
                logger.warning(f"Removing {git_module_dir} directory")
                shutil.rmtree(git_module_dir)
    def test_migrate_old_schema_1_project(self, mock_config_file):
        """ Test migrating a very old schema 1/gm.workspace LabBook """
        p = resource_filename('gtmcore', 'workflows')
        p2 = os.path.join(p, 'tests', 'snappy.zip')

        with tempfile.TemporaryDirectory() as td:
            call_subprocess(f"unzip {p2} -d {td}".split(), cwd=td)
            temp_lb_path = os.path.join(td, 'snappy')

            # Tests backwards compatibility (test.zip is a very old schema 1 LabBook)
            lb = InventoryManager(
                mock_config_file[0]).load_labbook_from_directory(temp_lb_path)
            wf = LabbookWorkflow(lb)

            wf.labbook.remove_remote()
            wf.migrate()

            # Test that current branch is as appropriate
            assert wf.labbook.active_branch == 'master'

            # Test that there is an activity record indicate migration
            assert any([
                'Migrate schema to 2' in c['message']
                for c in wf.labbook.git.log()[:5]
            ])

            # Test schema has successfully rolled to 2
            assert wf.labbook.schema == 2

            # Test that untracked space exists (if we add something to untracked space)
            assert wf.labbook.is_repo_clean
            with open(
                    os.path.join(lb.root_dir, 'output/untracked',
                                 'untracked-file'), 'wb') as fb:
                fb.write(b'cat' * 100)
            assert wf.labbook.is_repo_clean