Ejemplo n.º 1
0
    def complete_batch(cls,
                       labbook: LabBook,
                       txid: str,
                       cancel: bool = False,
                       rollback: bool = False) -> None:
        """
        Indicate a batch upload is finished and sweep all new files.

        Args:
            labbook: Subject labbook
            txid: Transaction id (correlator)
            cancel: Indicate transaction finished but due to cancellation
            rollback: Undo all local changes if cancelled (default False)

        Returns:
            None
        """

        if cancel and rollback:
            logger.warning(f"Cancelled tx {txid}, doing git reset")
            call_subprocess(['git', 'reset', '--hard'], cwd=labbook.root_dir)
        else:
            logger.info(f"Done batch upload {txid}, cancelled={cancel}")
            if cancel:
                logger.warning("Sweeping aborted batch upload.")
            m = "Cancelled upload `{txid}`. " if cancel else ''
            labbook.sweep_uncommitted_changes(upload=True,
                                              extra_msg=m,
                                              show=True)
Ejemplo n.º 2
0
    def delete_files(cls, labbook: LabBook, section: str,
                     relative_paths: List[str]) -> None:
        """Delete file (or directory) from inside lb section.

        The list of paths is deleted in series. Only provide "parent" nodes in the file tree. This is because deletes
        on directories will remove all child objects, so subsequent deletes of individual files will then fail.


        Args:
            labbook(LabBook): Subject LabBook
            section(str): Section name (code, input, output)
            relative_paths(list(str)): a list of relative paths from labbook root to target

        Returns:
            None
        """
        labbook.validate_section(section)

        if not isinstance(relative_paths, list):
            raise ValueError("Must provide list of paths to remove")

        for file_path in relative_paths:
            relative_path = LabBook.make_path_relative(file_path)
            target_path = os.path.join(labbook.root_dir, section,
                                       relative_path)
            if not os.path.exists(target_path):
                raise ValueError(
                    f"Attempted to delete non-existent path at `{target_path}`"
                )
            else:
                labbook.git.remove(target_path, force=True, keep_file=False)
                if os.path.exists(target_path):
                    raise IOError(f"Failed to delete path: {target_path}")
        labbook.sweep_uncommitted_changes(show=True)
Ejemplo n.º 3
0
    def stop_container(cls,
                       labbook: LabBook,
                       username: Optional[str] = None) -> Tuple[LabBook, bool]:
        """ Stop the given labbook. Returns True in the second field if stopped,
            otherwise False (False can simply imply no container was running).

        Args:
            labbook: Subject labbook
            username: Optional username of active user

        Returns:
            A tuple of (Labbook, boolean indicating whether a container was successfully stopped).
        """
        owner = InventoryManager().query_owner(labbook)
        n = infer_docker_image_name(labbook_name=labbook.name,
                                    owner=owner,
                                    username=username)
        logger.info(f"Stopping {str(labbook)} ({n})")

        try:
            stopped = stop_labbook_container(n)
        finally:
            # Save state of LB when container turned off.
            labbook.sweep_uncommitted_changes()

        return labbook, stopped
Ejemplo n.º 4
0
    def process(self, result_obj: ActivityRecord, data: List[ExecutionData],
                status: Dict[str, Any], metadata: Dict[str, Any]) -> ActivityRecord:
        """Method to update a result object based on code and result data

        Args:
            result_obj(ActivityNote): An object containing the note
            data(list): A list of ExecutionData instances containing the data for this record
            status(dict): A dict containing the result of git status from gitlib
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityRecord
        """
        for cnt, filename in enumerate(status['untracked']):
            # skip any file in .git or .gigantum dirs
            if ".git" in filename or ".gigantum" in filename:
                continue

            activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename)

            adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0),
                                       action=ActivityAction.CREATE)
            # We use a "private" attribute here, but it's better than the silent breakage that happened before
            # cf. https://github.com/gigantum/gigantum-client/issues/436
            if section == LabBook._default_activity_section:
                msg = f'Created new file `{filename}` in the Project Root. Note, it is best practice to use the Code, ' \
                    'Input, and Output sections exclusively. '
            else:
                msg = f"Created new {section} file `{filename}`"
            adr.add_value('text/markdown', msg)
            result_obj.add_detail_object(adr)

        cnt = 0
        for filename, change in status['unstaged']:
            # skip any file in .git or .gigantum dirs
            if ".git" in filename or ".gigantum" in filename:
                continue

            activity_type, activity_detail_type, section = LabBook.infer_section_from_relative_path(filename)

            if change == "deleted":
                action = ActivityAction.DELETE
            elif change == "added":
                action = ActivityAction.CREATE
            elif change == "modified":
                action = ActivityAction.EDIT
            elif change == "renamed":
                action = ActivityAction.EDIT
            else:
                action = ActivityAction.NOACTION

            adr = ActivityDetailRecord(activity_detail_type, show=False, importance=max(255-cnt, 0), action=action)
            adr.add_value('text/markdown', f"{change[0].upper() + change[1:]} {section} file `{filename}`")
            result_obj.add_detail_object(adr)
            cnt += 1

        return result_obj
Ejemplo n.º 5
0
    def walkdir(cls,
                labbook: LabBook,
                section: str,
                show_hidden: bool = False) -> List[Dict[str, Any]]:
        """Return a list of all files and directories in a section of the labbook. Never includes the .git or
         .gigantum directory.

        Args:
            labbook: Subject LabBook
            section(str): The labbook section (code, input, output) to walk
            show_hidden(bool): If True, include hidden directories (EXCLUDING .git and .gigantum)

        Returns:
            List[Dict[str, str]]: List of dictionaries containing file and directory metadata
        """
        labbook.validate_section(section)

        keys: List[str] = list()
        # base_dir is the root directory to search, to account for relative paths inside labbook.
        base_dir = os.path.join(labbook.root_dir, section)
        if not os.path.isdir(base_dir):
            raise ValueError(
                f"Labbook walkdir base_dir {base_dir} not an existing directory"
            )

        for root, dirs, files in os.walk(base_dir):
            # Remove directories we ignore so os.walk does not traverse into them during future iterations
            if '.git' in dirs:
                del dirs[dirs.index('.git')]
            if '.gigantum' in dirs:
                del dirs[dirs.index('.gigantum')]

            # For more deterministic responses, sort resulting paths alphabetically.
            # Store directories then files, so pagination loads things in an intuitive order
            dirs.sort()
            keys.extend(
                sorted([
                    os.path.join(root.replace(base_dir, ''), d) for d in dirs
                ]))
            keys.extend(
                sorted([
                    os.path.join(root.replace(base_dir, ''), f) for f in files
                ]))

        # Create stats
        stats: List[Dict[str, Any]] = list()
        for f_p in keys:
            if not show_hidden and any(
                [len(p) and p[0] == '.' for p in f_p.split(os.path.sep)]):
                continue
            stats.append(cls.get_file_info(labbook, section, f_p))

        return stats
Ejemplo n.º 6
0
    def put_file(cls,
                 labbook: LabBook,
                 section: str,
                 src_file: str,
                 dst_path: str,
                 txid: Optional[str] = None) -> Dict[str, Any]:
        """Move the file at `src_file` to `dst_dir`. Filename removes
        upload ID if present. This operation does NOT commit or create an
        activity record.

        Args:
            labbook: Subject LabBook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Path within section to insert `src_file`
            txid: Optional transaction id

        Returns:
           Full path to inserted file.
        """
        if not os.path.abspath(src_file):
            raise ValueError(f"Source file `{src_file}` not an absolute path")

        if not os.path.isfile(src_file):
            raise ValueError(f"Source file does not exist at `{src_file}`")

        labbook.validate_section(section)
        r = call_subprocess(
            ['git', 'check-ignore',
             os.path.basename(dst_path)],
            cwd=labbook.root_dir,
            check=False)
        if dst_path and r and os.path.basename(dst_path) in r:
            logger.warning(f"File {dst_path} matches gitignore; "
                           f"not put into {str(labbook)}")
            raise FileOperationsException(f"`{dst_path}` matches "
                                          f"ignored pattern")

        mdst_dir = _make_path_relative(dst_path)
        full_dst = os.path.join(labbook.root_dir, section, mdst_dir)
        full_dst = full_dst.replace('..', '')
        full_dst = full_dst.replace('~', '')

        # Force overwrite if file already exists
        if os.path.isfile(os.path.join(full_dst, os.path.basename(src_file))):
            os.remove(os.path.join(full_dst, os.path.basename(src_file)))

        if not os.path.isdir(os.path.dirname(full_dst)):
            os.makedirs(os.path.dirname(full_dst), exist_ok=True)

        fdst = shutil.move(src_file, full_dst)
        relpath = fdst.replace(os.path.join(labbook.root_dir, section), '')
        return cls.get_file_info(labbook, section, relpath)
Ejemplo n.º 7
0
def migrate_labbook_untracked_space(labbook: LabBook) -> None:

    gitignore_path = os.path.join(labbook.root_dir, '.gitignore')
    gitignored_lines = open(gitignore_path).readlines()
    has_untracked_dir = any(
        ['output/untracked' in l.strip() for l in gitignored_lines])
    if not has_untracked_dir:
        with open(gitignore_path, 'a') as gi_file:
            gi_file.write('\n\n# Migrated - allow untracked area\n'
                          'output/untracked\n')
        labbook.sweep_uncommitted_changes(extra_msg="Added untracked area")

    # Make the untracked directory -- makedirs is no-op if already exists
    untracked_path = os.path.join(labbook.root_dir, 'output/untracked')
    if not os.path.exists(untracked_path):
        os.makedirs(untracked_path, exist_ok=True)
Ejemplo n.º 8
0
    def insert_file(cls,
                    labbook: LabBook,
                    section: str,
                    src_file: str,
                    dst_path: str = '') -> Dict[str, Any]:
        """ Move the file at `src_file` into the `dst_dir`, overwriting
        if a file already exists there. This calls `copy_into_container()` under-
        the-hood, but will create an activity record.

        Args:
            labbook: Subject labbook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Relative path within labbook where `src_file`
                      should be copied to

        Returns:
            dict: The inserted file's info
        """

        finfo = FileOperations.put_file(labbook=labbook,
                                        section=section,
                                        src_file=src_file,
                                        dst_path=dst_path)

        rel_path = os.path.join(section, finfo['key'])

        # If we are setting this section to be untracked
        activity_type, activity_detail_type, section_str = \
            labbook.get_activity_type_from_section(section)

        commit_msg = f"Added new {section_str} file {rel_path}"
        try:
            labbook.git.add(rel_path)
            commit = labbook.git.commit(commit_msg)
        except Exception as x:
            logger.error(x)
            os.remove(dst_path)
            raise FileOperationsException(x)

        # Create Activity record and detail
        _, ext = os.path.splitext(rel_path) or 'file'
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            show=True,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=[ext])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return finfo
Ejemplo n.º 9
0
def write_function(filename: str, delay: int, value: str, labbook: LabBook) -> None:
    """
    A test function that appends to a file after a delay
    """
    time.sleep(delay)
    with labbook.lock():
        with open(filename, 'at') as f:
            f.write(value)
            time.sleep(2)
Ejemplo n.º 10
0
    def listdir(cls,
                labbook: LabBook,
                section: str,
                base_path: Optional[str] = None,
                show_hidden: bool = False) -> List[Dict[str, Any]]:
        """Return a list of all files and directories in a directory. Never includes the .git or
         .gigantum directory.

        Args:
            labbook: Subject labbook
            section(str): the labbook section to start from
            base_path(str): Relative base path, if not listing from labbook's root.
            show_hidden(bool): If True, include hidden directories (EXCLUDING .git and .gigantum)

        Returns:
            List[Dict[str, str]]: List of dictionaries containing file and directory metadata
        """
        labbook.validate_section(section)
        # base_dir is the root directory to search, to account for relative paths inside labbook.
        base_dir = os.path.join(labbook.root_dir, section, base_path or '')
        if not os.path.isdir(base_dir):
            raise ValueError(
                f"Labbook listdir base_dir {base_dir} not an existing directory"
            )

        stats: List[Dict[str, Any]] = list()
        for item in os.listdir(base_dir):
            if item in ['.git', '.gigantum']:
                # Never include .git or .gigantum
                continue

            if not show_hidden and any(
                [len(p) and p[0] == '.' for p in item.split('/')]):
                continue

            # Create tuple (isDir, key)
            stats.append(
                cls.get_file_info(labbook, section,
                                  os.path.join(base_path or "", item)))

        # For more deterministic responses, sort resulting paths alphabetically.
        return sorted(stats, key=lambda a: a['key'])
Ejemplo n.º 11
0
 def test_make_path_relative(self):
     vectors = [
         # In format of input: expected output
         (None, None),
         ('', ''),
         ('/', ''),
         ('//', ''),
         ('/////cats', 'cats'),
         ('//cats///', 'cats///'),
         ('cats', 'cats'),
         ('/cats/', 'cats/'),
         ('complex/.path/.like/this', 'complex/.path/.like/this'),
         ('//complex/.path/.like/this', 'complex/.path/.like/this')
     ]
     for sample_input, expected_output in vectors:
         assert LabBook.make_path_relative(sample_input) == expected_output
Ejemplo n.º 12
0
    def _update_branch_description(cls, lb: LabBook, description: str):
        # Update the description on branch creation
        lb.description = description
        lb.git.add(lb.config_path)
        commit = lb.git.commit('Updating description')

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
        adr.add_value('text/plain', description)
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message="Updated description of Project",
                            linked_commit=commit.hexsha,
                            tags=["labbook"],
                            show=False)
        ar.add_detail_object(adr)
        ars = ActivityStore(lb)
        ars.create_activity_record(ar)
Ejemplo n.º 13
0
 def test_makedir_simple(self, mock_labbook):
     # Note that "score" refers to the count of .gitkeep files.
     lb = mock_labbook[2]
     long_dir = "code/non/existant/dir/should/now/be/made"
     dirs = ["code/cat_dir", "code/dog_dir", "code/mouse_dir/", "code/mouse_dir/new_dir", long_dir]
     for d in dirs:
         FO.makedir(lb, d)
         assert os.path.isdir(os.path.join(lb.root_dir, d))
         assert os.path.isfile(os.path.join(lb.root_dir, d, '.gitkeep'))
     score = 0
     for root, dirs, files in os.walk(os.path.join(lb.root_dir, 'code', 'non')):
         for f in files:
             if f == '.gitkeep':
                 score += 1
     # Ensure that count of .gitkeep files equals the number of subdirs, excluding the code dir.
     assert score == len(LabBook.make_path_relative(long_dir).split(os.sep)) - 1
Ejemplo n.º 14
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               remote_url,
                               client_mutation_id=None):
        username = get_logged_in_username()
        logger.info(f"Importing remote labbook from {remote_url}")
        lb = LabBook(author=get_logged_in_author())
        default_remote = lb.client_config.config['git']['default_remote']
        admin_service = None
        for remote in lb.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = lb.client_config.config['git']['remotes'][
                    remote]['admin_service']
                break

        # Extract valid Bearer token
        if hasattr(info.context, 'headers'
                   ) and "HTTP_AUTHORIZATION" in info.context.headers.environ:
            token = parse_token(
                info.context.headers.environ["HTTP_AUTHORIZATION"])
        else:
            raise ValueError(
                "Authorization header not provided. Must have a valid session to query for collaborators"
            )

        gl_mgr = GitLabManager(default_remote,
                               admin_service=admin_service,
                               access_token=token)
        gl_mgr.configure_git_credentials(default_remote, username)

        job_metadata = {'method': 'import_labbook_from_remote'}
        job_kwargs = {'remote_url': remote_url, 'username': username}

        dispatcher = Dispatcher()
        job_key = dispatcher.dispatch_task(jobs.import_labbook_from_remote,
                                           metadata=job_metadata,
                                           kwargs=job_kwargs)
        logger.info(
            f"Dispatched import_labbook_from_remote({remote_url}) to Job {job_key}"
        )

        return ImportRemoteLabbook(job_key=job_key.key_str)
Ejemplo n.º 15
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               remote_url,
                               client_mutation_id=None):
        username = get_logged_in_username()
        logger.info(f"Importing remote labbook from {remote_url}")
        lb = LabBook(author=get_logged_in_author())
        default_remote = lb.client_config.config['git']['default_remote']
        admin_service = None
        for remote in lb.client_config.config['git']['remotes']:
            if default_remote == remote:
                admin_service = lb.client_config.config['git']['remotes'][
                    remote]['admin_service']
                break

        # Extract valid Bearer token
        if hasattr(info.context, 'headers'
                   ) and "HTTP_AUTHORIZATION" in info.context.headers.environ:
            token = parse_token(
                info.context.headers.environ["HTTP_AUTHORIZATION"])
        else:
            raise ValueError(
                "Authorization header not provided. Must have a valid session to query for collaborators"
            )

        gl_mgr = GitLabManager(default_remote,
                               admin_service=admin_service,
                               access_token=token)
        gl_mgr.configure_git_credentials(default_remote, username)

        wf = LabbookWorkflow.import_from_remote(remote_url, username=username)
        import_owner = InventoryManager().query_owner(wf.labbook)
        # TODO: Fix cursor implementation, this currently doesn't make sense
        cursor = base64.b64encode(f"{0}".encode('utf-8'))
        lbedge = LabbookConnection.Edge(node=Labbook(owner=import_owner,
                                                     name=wf.labbook.name),
                                        cursor=cursor)
        return ImportRemoteLabbook(new_labbook_edge=lbedge)
Ejemplo n.º 16
0
    def _make_move_activity_record(cls, labbook: LabBook, section: str,
                                   dst_abs_path: str, commit_msg: str) -> None:
        if os.path.isdir(dst_abs_path):
            labbook.git.add_all(dst_abs_path)
        else:
            labbook.git.add(dst_abs_path)

        commit = labbook.git.commit(commit_msg)
        activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section(
            section)
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.EDIT)
        adr.add_value('text/markdown', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            linked_commit=commit.hexsha,
                            show=True,
                            importance=255,
                            tags=['file-move'])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
Ejemplo n.º 17
0
    def move_file(cls, labbook: LabBook, section: str, src_rel_path: str, dst_rel_path: str) \
            -> List[Dict[str, Any]]:
        """Move a file or directory within a labbook, but not outside of it. Wraps
        underlying "mv" call.

        Args:
            labbook: Subject LabBook
            section(str): Section name (code, input, output)
            src_rel_path(str): Source file or directory
            dst_rel_path(str): Target file name and/or directory
        """

        # Start with Validations
        labbook.validate_section(section)
        if not src_rel_path:
            raise ValueError("src_rel_path cannot be None or empty")

        if dst_rel_path is None:
            raise ValueError("dst_rel_path cannot be None or empty")

        src_rel_path = LabBook.make_path_relative(src_rel_path)
        dst_rel_path = LabBook.make_path_relative(dst_rel_path)

        src_abs_path = os.path.join(labbook.root_dir, section,
                                    src_rel_path.replace('..', ''))
        dst_abs_path = os.path.join(labbook.root_dir, section,
                                    dst_rel_path.replace('..', ''))

        if not os.path.exists(src_abs_path):
            raise ValueError(f"No src file exists at `{src_abs_path}`")

        try:
            src_type = 'directory' if os.path.isdir(src_abs_path) else 'file'
            logger.info(
                f"Moving {src_type} `{src_abs_path}` to `{dst_abs_path}`")
            labbook.git.remove(src_abs_path, keep_file=True)
            final_dest = shutil.move(src_abs_path, dst_abs_path)
            commit_msg = f"Moved {src_type} `{src_rel_path}` to `{dst_rel_path}`"
            cls._make_move_activity_record(labbook, section, dst_abs_path,
                                           commit_msg)

            if os.path.isfile(final_dest):
                t = final_dest.replace(os.path.join(labbook.root_dir, section),
                                       '')
                return [cls.get_file_info(labbook, section, t or "/")]
            else:
                moved_files = list()
                t = final_dest.replace(os.path.join(labbook.root_dir, section),
                                       '')
                moved_files.append(
                    cls.get_file_info(labbook, section, t or "/"))
                for root, dirs, files in os.walk(final_dest):
                    rt = root.replace(os.path.join(labbook.root_dir, section),
                                      '')
                    rt = _make_path_relative(rt)
                    for d in sorted(dirs):
                        dinfo = cls.get_file_info(labbook, section,
                                                  os.path.join(rt, d))
                        moved_files.append(dinfo)
                    for f in filter(lambda n: n != '.gitkeep', sorted(files)):
                        finfo = cls.get_file_info(labbook, section,
                                                  os.path.join(rt, f))
                        moved_files.append(finfo)
                return moved_files

        except Exception as e:
            logger.critical(
                "Failed moving file in labbook. Repository may be in corrupted state."
            )
            logger.exception(e)
            raise
Ejemplo n.º 18
0
    def makedir(cls,
                labbook: LabBook,
                relative_path: str,
                make_parents: bool = True,
                create_activity_record: bool = False) -> None:
        """Make a new directory inside the labbook directory.

        Args:
            labbook: Subject LabBook
            relative_path(str): Path within the labbook to make directory
            make_parents(bool): If true, create intermediary directories
            create_activity_record(bool): If true, create commit and activity record

        Returns:
            str: Absolute path of new directory
        """
        if not relative_path:
            raise ValueError("relative_path argument cannot be None or empty")

        relative_path = LabBook.make_path_relative(relative_path)
        new_directory_path = os.path.join(labbook.root_dir, relative_path)
        if os.path.exists(new_directory_path):
            return
        else:
            logger.info(f"Making new directory in `{new_directory_path}`")
            os.makedirs(new_directory_path, exist_ok=make_parents)
            new_dir = ''
            for d in relative_path.split(os.sep):
                new_dir = os.path.join(new_dir, d)
                full_new_dir = os.path.join(labbook.root_dir, new_dir)
                gitkeep_path = os.path.join(full_new_dir, '.gitkeep')
                if not os.path.exists(gitkeep_path):
                    with open(gitkeep_path, 'w') as gitkeep:
                        gitkeep.write(
                            "This file is necessary to keep this directory tracked by Git"
                            " and archivable by compression tools. Do not delete or modify!"
                        )
                    labbook.git.add(gitkeep_path)

            if create_activity_record:
                # Create detail record
                activity_type, activity_detail_type, section_str = labbook.infer_section_from_relative_path(
                    relative_path)
                adr = ActivityDetailRecord(activity_detail_type,
                                           show=False,
                                           importance=0,
                                           action=ActivityAction.CREATE)

                msg = f"Created new {section_str} directory `{relative_path}`"
                commit = labbook.git.commit(msg)
                adr.add_value('text/markdown', msg)

                # Create activity record
                ar = ActivityRecord(activity_type,
                                    message=msg,
                                    linked_commit=commit.hexsha,
                                    show=True,
                                    importance=255,
                                    tags=['directory-create'])
                ar.add_detail_object(adr)

                # Store
                ars = ActivityStore(labbook)
                ars.create_activity_record(ar)