def push(self) -> None:
     if self.size_only:
         flags = ["--size-only"]
     else:
         flags = []
     flags.append("--verbose")
     if self.master == "local":
         if self.sync_mode == "copy":
             ret = self.rclone.copy(self.local_path,
                                    self.remote_origin,
                                    flags=flags)
             if ret["code"] != 0:
                 raise ConfigurationError(
                     "rclone copy raised error %d: %s" %
                     (ret["code"], ret["error"]))
         else:
             ret = self.rclone.sync(self.local_path,
                                    self.remote_origin,
                                    flags=flags)
             if ret["code"] != 0:
                 raise ConfigurationError(
                     "rclone sync raised error %d: %s" %
                     (ret["code"], ret["error"]))
     else:
         click.echo("Skiping push of resource %s, master is %s" %
                    (self.name, self.master))
Ejemplo n.º 2
0
def validate_template(template):
    if not template.startswith("snapshots/"):
        raise ConfigurationError("Templates must start with 'snapshots/'")
    for mo in TEMPLATE_VAR_RE.finditer(template):
        tvar = mo.group(0)[1:-1]
        if tvar not in VALID_TEMPLATE_VARS:
            raise ConfigurationError(
                "Unknown variable '%s' in results directory template '%s'" %
                (tvar, template))
    def from_command_line(
        self,
        role,
        name,
        workspace,
        remote_origin,
        local_path,
        config,
        compute_hash,
        export,
        imported,
        master,
        sync_mode,
        size_only,
    ):
        rclone = self._add_prechecks(local_path, remote_origin, config)
        self._copy_from_remote(name, local_path, remote_origin, rclone, master,
                               sync_mode, size_only, workspace.verbose)
        setup_path_for_hashes(role, name, workspace, local_path)
        if imported:
            lineage_path = os.path.join(local_path, "lineage.json")
            if not os.path.exists(lineage_path):
                raise ConfigurationError(
                    "--imported was specified, but missing exported lineage file %s"
                    % lineage_path)
            if (not isinstance(workspace, SnapshotWorkspaceMixin)
                    or not workspace.supports_lineage()):
                raise ConfigurationError(
                    "--imported was specified, but this workspace does not support lineage"
                )
            with open(lineage_path, "r") as f:
                lineage_data = json.load(f)
            if lineage_data["resource_name"] != name:
                raise ConfigurationError(
                    "Resource name in imported lineage '%s' does not match '%s'"
                    % (lineage_data["resource_name"], name))
            cast(SnapshotWorkspaceMixin,
                 workspace).get_lineage_store().import_lineage_file(
                     name, lineage_data["lineages"])

        return RcloneResource(
            name,
            role,
            workspace,
            remote_origin,
            global_local_path=local_path,
            my_local_path=None,
            config=config,
            compute_hash=compute_hash,
            export=export,
            imported=imported,
            master=master,
            sync_mode=sync_mode,
            size_only=size_only,
        )
 def validate_subpath_exists(self, subpath: str) -> None:
     if self.current_snapshot is not None:
         assert self.snapshot_fs is not None
         if not self.snapshot_fs.exists(subpath):
             raise ConfigurationError(
                 f"Subpath {subpath} does not existing in bucket {self.bucket_name} as of snapshot {self.current_snapshot}"
             )
     elif not self.fs.exists(subpath):
         raise ConfigurationError(
             f"Subpath {subpath} does not currently exist in bucket {self.bucket_name}"
         )
def echo_git_status_for_user(cwd):
    """Run git status and echo to the user.
    """
    if GIT_EXE_PATH is None:
        raise ConfigurationError("git executable not found")
    cmd = [GIT_EXE_PATH, "status"]
    # p = run(cmd, cwd=cwd, stdout=PIPE, encoding="utf-8")
    p = run(cmd, cwd=cwd, encoding="utf-8")
    # for line in p.stdout.split("\n"):
    #     click.echo(line)
    if p.returncode != 0:
        raise ConfigurationError("Problem invoking %s status on %s" %
                                 (GIT_EXE_PATH, cwd))
Ejemplo n.º 6
0
 def _add_prechecks(self, local_path, remote_path, config) -> RClone:
     if os.path.exists(local_path) and not (os.access(local_path, os.W_OK)):
         raise ConfigurationError(local_path +
                                  " does not have write permission")
     if config:
         rclone = RClone(cfgfile=config)
     else:
         rclone = RClone()
     known_remotes = rclone.listremotes()
     (remote_name, _) = remote_path.split(":")
     if remote_name not in known_remotes:
         raise ConfigurationError("Remote '" + remote_name +
                                  "' not found by rclone")
     return rclone
Ejemplo n.º 7
0
 def load_workspace(
         batch: bool, verbose: bool,
         parsed_uri: ParseResult) -> ws.Workspace:  # type: ignore
     path = parsed_uri.path
     if not isabs(path):
         path = abspath(expanduser(path))
     if not isdir(path):
         raise ConfigurationError("Workspace directory %s does not exist" %
                                  path)
     metadata_path = join(path, ".dataworkspace")
     if not isdir(metadata_path):
         raise ConfigurationError(
             "Workspace directory %s does not correspond to an initialized git-backend workspace"
             % path)
     return Workspace(path, batch, verbose)
Ejemplo n.º 8
0
def lineage_graph_command(
    workspace: Workspace,
    output_file: str,
    resource_name: Optional[str],
    snapshot: Optional[str],
    format="html",
    width: int = 1024,
    height: int = 800,
) -> None:
    if not isinstance(workspace, SnapshotWorkspaceMixin):
        raise ConfigurationError(
            "Workspace %s does not support snapshots and lineage" %
            workspace.name)
    if not workspace.supports_lineage():
        raise ConfigurationError("Workspace %s does not support lineage" %
                                 workspace.name)
    store = workspace.get_lineage_store()

    snapshot_hash = None  # type: Optional[str]
    if snapshot is not None:
        md = workspace.get_snapshot_by_tag_or_hash(snapshot)
        snapshot_hash = md.hashval
    if resource_name is not None:
        workspace.validate_resource_name(resource_name)
    else:
        for r in workspace.get_resource_names():
            if workspace.get_resource_role(r) == ResourceRoles.RESULTS:
                resource_name = r
                break
        if resource_name is None:
            raise ConfigurationError(
                "Did not find a results resource in workspace. If you want to graph the lineage of a non-results resource, use the --resource option."
            )
    make_simplified_lineage_graph_for_resource(
        workspace.get_instance(),
        store,
        resource_name,
        output_file,
        snapshot_hash=snapshot_hash,
        format=format,
        width=width,
        height=height,
    )
    if snapshot is None:
        click.echo("Wrote lineage for %s to %s" % (resource_name, output_file))
    else:
        click.echo("Wrote lineage for %s as of snapshot %s to %s" %
                   (resource_name, snapshot, output_file))
Ejemplo n.º 9
0
def build_resource_list(
    workspace: Workspace, only: Optional[List[str]], skip: Optional[List[str]]
) -> List[str]:
    """Build up our resource name list for either push or pull commands.
    """
    if (only is not None) and (skip is not None):
        raise ConfigurationError("Cannot specify both --only and --skip")
    all_resource_names_set = frozenset(workspace.get_resource_names())
    local_state_names_set = frozenset(workspace.get_names_of_resources_with_local_state())
    if only is not None:
        only_set = frozenset(only)
        invalid = only_set.difference(all_resource_names_set)
        if len(invalid) > 0:
            raise ConfigurationError(
                "Invalid resource names were included with --only: %s" % ", ".join(sorted(invalid))
            )
        nonsync_rnames = only_set.difference(local_state_names_set)
        if len(nonsync_rnames) > 0:
            click.echo(
                "Skipping the following resources, which do not have local state: %s"
                % ", ".join(sorted(nonsync_rnames))
            )
        return [rn for rn in only if rn in local_state_names_set]
    elif skip is not None:
        skip_set = frozenset(skip)
        invalid = skip_set.difference(all_resource_names_set)
        if len(invalid) > 0:
            raise ConfigurationError(
                "Invalid resource names were included with --skip: %s" % ", ".join(sorted(invalid))
            )
        nonsync_rnames = all_resource_names_set.difference(skip_set).difference(
            local_state_names_set
        )
        if len(nonsync_rnames) > 0:
            click.echo(
                "Skipping the following resources, which do not have local state: %s"
                % ", ".join(sorted(nonsync_rnames))
            )
        skip_set = skip_set.union(nonsync_rnames)
        return [rn for rn in workspace.get_resource_names() if rn not in skip_set]
    else:
        nonsync_rnames = all_resource_names_set.difference(local_state_names_set)
        if len(nonsync_rnames) > 0:
            click.echo(
                "Skipping the following resources, which do not have local state: %s"
                % ", ".join(sorted(nonsync_rnames))
            )
        return [rn for rn in workspace.get_resource_names() if rn not in nonsync_rnames]
Ejemplo n.º 10
0
def clone_scratch_directory(
    workspace_dir: str,
    global_params: Dict[str, Any],
    local_params: Dict[str, Any],
    batch: bool = False,
) -> str:
    """Set the scratch directory parameters for a cloned copy of the workspace,
    updating local_params if neded.
    Returns the absolute path for the scratch directory on this system.
    """
    if SCRATCH_DIRECTORY in global_params:
        return join(workspace_dir, global_params[SCRATCH_DIRECTORY])
    elif not batch:
        local_path = cast(
            str,
            click.prompt(
                "Please specify a location for this workspace's scratch directory (must be outside of workspace)",
                type=LocalPathType(exists=False, must_be_outside_of_workspace=workspace_dir),
            ),
        )
        local_params[LOCAL_SCRATCH_DIRECTORY] = local_path
        return local_path
    else:
        # TODO: come up with a standard way of handling this when called from the API - either by
        # letting the user provide values in advance or by having some standard defaults.
        raise ConfigurationError(
            "Scratch directory was not within workspaces and we are running in batch mode. No way to ask user for location."
        )
Ejemplo n.º 11
0
def init_scratch_directory(
    scratch_dir: str,
    workspace_dir: str,
    global_params: Dict[str, Any],
    local_params: Dict[str, Any],
) -> Tuple[str, Optional[str]]:
    """Given the user-provided or default scratch directory, set the SCRATCH_DIRECTORY
    and LOCAL_SCRATCH_DIRECTORY parameters accordingly. One only will be set, with preference
    to the global parameter, which is relative to the workspace. Returns a tuple of the absolute
    and the gitignore entry (if any) for the scratch_directory
    """
    abs_scratch_dir = abspath(expanduser(scratch_dir)) if not isabs(scratch_dir) else scratch_dir
    scratch_dir_gitignore = None  # type: Optional[str]
    if abs_scratch_dir.startswith(workspace_dir):
        rel_scratch_dir = get_subpath_from_absolute(workspace_dir, abs_scratch_dir)
        global_params[SCRATCH_DIRECTORY] = rel_scratch_dir  # always store a relative directory
        # scratch dir gitignore should start with / to indicate that this should only
        # match the exact path relative to the git repo root.
        if rel_scratch_dir is None:
            raise ConfigurationError(
                "Scratch directory cannot be equal to workspace directory. "
                + "It should either be a subdirectory or completely outside it."
            )
        if rel_scratch_dir.startswith("./"):
            scratch_dir_gitignore = rel_scratch_dir[1:]
        else:
            scratch_dir_gitignore = "/" + rel_scratch_dir
    else:
        local_params[LOCAL_SCRATCH_DIRECTORY] = abs_scratch_dir
    return (abs_scratch_dir, scratch_dir_gitignore)
Ejemplo n.º 12
0
def pull_command(
    workspace: Workspace,
    only: Optional[List[str]] = None,
    skip: Optional[List[str]] = None,
    only_workspace: bool = False,
) -> int:

    if isinstance(workspace, SyncedWorkspaceMixin):
        # first, sync the workspace
        click.echo("Syncing workspace")
        mixin = workspace.pull_workspace()
        workspace = cast(Workspace, mixin)
        if not only_workspace:
            rcount = _pull_and_clone_resources(workspace, only, skip)
        else:
            rcount = 0
    elif isinstance(workspace, CentralWorkspaceMixin):
        if only_workspace:
            raise ConfigurationError(
                "--only-workspace not valid for central workspace %s" %
                workspace.name)
        rcount = _pull_and_clone_resources(workspace, only, skip)
    else:
        raise InternalError(
            "Workspace %s is neither a SyncedWorkspaceMixin nor a CentralWorkspaceMixin"
            % workspace.name)

    workspace.save("Pull command")
    return rcount
 def clone(self, params: JSONDict,
           workspace: Workspace) -> LocalStateResourceMixin:
     """Instantiate a resource that was created remotely. We need to verify that
     the local copy of the data exists -- we are not responsible for making certain
     it is in th correct place.
     """
     name = params["name"]
     # check local_path, too for backward compatibility
     global_local_path = (params["global_local_path"] if "global_local_path"
                          in params else params["local_path"])  # type: str
     local_params = {}  # type: JSONDict
     if exists(global_local_path):
         local_path = global_local_path
     else:
         if not workspace.batch:
             local_path = cast(
                 str,
                 click.prompt(
                     "Local files resource '%s' was located at '%s' on the original system. W\here is it located on this system?"
                     % (name, global_local_path),
                     type=LocalPathType(exists=True),
                 ),
             )
             local_params["my_local_path"] = local_path
         else:
             raise ConfigurationError(
                 "Local files resource %s is missing from %s." %
                 (name, global_local_path))
     if not isinstance(workspace, git_backend.Workspace):
         non_git_hashes = join(local_path, ".hashes")
         if not exists(non_git_hashes):
             os.mkdir(non_git_hashes)
     return self.from_json(params, local_params, workspace)
Ejemplo n.º 14
0
 def _load_json_file(self, relative_path):
     f_path = join(self.workspace_dir, relative_path)
     if not exists(f_path):
         raise ConfigurationError(
             "Did not find workspace metadata file %s" % f_path)
     with open(f_path, "r") as f:
         return json.load(f)
Ejemplo n.º 15
0
def add_command(scheme: str, role: str, name: str, workspace: Workspace,
                *args):
    current_names = set(workspace.get_resource_names())
    if workspace.batch:
        if name == None:
            name = workspace.suggest_resource_name(scheme, role, *args)
        else:
            if name in current_names:
                raise ConfigurationError("Resource name '%s' already in use" %
                                         name)
    else:
        suggested_name = None
        while (name is None) or (name in current_names):
            if suggested_name == None:
                suggested_name = workspace.suggest_resource_name(
                    scheme, role, *args)
            name = click.prompt(
                "Please enter a short, unique name for this resource",
                default=suggested_name)
            if name in current_names:
                click.echo("Resource name '%s' already in use." % name,
                           err=True)

    workspace.add_resource(name, scheme, role, *args)
    workspace.save("add of %s" % name)
    click.echo("Successful added resource '%s' to workspace." % name)
Ejemplo n.º 16
0
def deploy_run_command(workspace: Workspace, image_name: Optional[str],
                       no_mount_ssh_keys: bool) -> None:
    try:
        from repo2docker.__main__ import make_r2d  # type: ignore
    except ImportError as e:
        raise ConfigurationError(R2D_IMPORT_ERROR) from e
    target_repo_dir = "/home/%s/%s" % (os.environ["USER"], workspace.name)
    if image_name is None:
        image_name = workspace.name
    argv = [
        "--target-repo-dir",
        target_repo_dir,
        "--image-name",
        image_name,
    ]
    if not no_mount_ssh_keys:
        dot_ssh = abspath(expanduser("~/.ssh"))
        argv.append("-v")
        argv.append("%s:/home/%s/.ssh" % (dot_ssh, os.environ["USER"]))
    if isinstance(workspace, git_backend.Workspace):
        workspace_dir = workspace.get_workspace_local_path_if_any()
        assert workspace_dir is not None
        argv.append(
            "dws+" +
            get_remote_origin_url(workspace_dir, verbose=workspace.verbose))
    else:
        # need to figure out how the clone url works for a non-git workspace
        assert 0, "run build not yet implemented for non-git workspaces"
    if workspace.verbose:
        click.echo("Command args for repo2docker: %s" % repr(argv))
    r2d = make_r2d(argv=argv)
    r2d.initialize()
    r2d.run_image()
    click.echo("Run of image %s was successful." % image_name)
Ejemplo n.º 17
0
    def __init__(self, cfgfile=None, cfgstring=None):
        self.log = logging.getLogger("RClone")
        self._ensure_rclone_exists()
        self.cfgstring = ''
        self.cfgfile = None
        if cfgstring:
            self.cfgstring = cfgstring.replace("\\n", "\n")
        elif cfgfile:
            self.cfgfile = cfgstring.replace("\\n", "\n")
        else:
            # find the default config file used by the rclone installation
            ret = self._execute(['rclone', 'config', 'file'])
            self.log.debug(ret)
            if ret['code'] == 0:
                # rclone config file output looks like:
                #
                # Configuration file is stored at:
                # filename
                # so we skip until the '\n'
                self.cfgfile = ret['out'].splitlines()[1].decode('utf_8')
            else:
                print(ret)
                raise ConfigurationError(
                    "RClone requires either a configuration file or a configuration string"
                )

        assert (self.cfgstring or self.cfgfile
                ), 'Either a config string is given or a filename is given'
Ejemplo n.º 18
0
 def get_scratch_directory(self) -> str:
     if self.scratch_dir is not None:
         return self.scratch_dir
     else:
         raise ConfigurationError(
             "Neither the %s nor %s parameters are set, so cannot find scratch directory. Please set one using 'dws config'."
             % (SCRATCH_DIRECTORY, LOCAL_SCRATCH_DIRECTORY))
Ejemplo n.º 19
0
 def read_results_file(self, subpath: str) -> JSONDict:
     """Read and parse json results data from the specified path
     in the resource. If the path does not exist or is not a file
     throw a ConfigurationError.
     """
     path = os.path.join(self.local_path, subpath)
     if not os.path.isfile(path):
         raise ConfigurationError(
             "subpath %s does not exist or is not a file in resource %s" %
             (subpath, self.name))
     with open(path, "r") as f:
         try:
             return json.load(f)
         except Exception as e:
             raise ConfigurationError(
                 "Parse error when reading %s in resource %s" %
                 (subpath, self.name)) from e
Ejemplo n.º 20
0
def setup_git_fat_for_repo(
    repo_dir: str,
    git_fat_remote: str,
    git_fat_user: Optional[str] = None,
    git_fat_port: Optional[int] = None,
    git_fat_attributes: Optional[str] = None,
    verbose: bool = False,
) -> None:
    """Setup git fat and all the associated configuration files
    for a repository
    """
    validate_git_fat_in_path()
    dot_git_fat_fpath = get_dot_gitfat_file_path(repo_dir)
    files_to_add = [
        ".gitfat",
    ]
    dot_git_attributes_fpath = None  # type: Optional[str]
    if git_fat_attributes:
        dot_git_attributes_fpath = join(repo_dir, ".gitattributes")
        files_to_add.append(".gitattributes")
    if (RSYNC_RE.match(git_fat_remote) is
            None) and (FPATH_RE.match(git_fat_remote) is None):
        raise ConfigurationError(
            ("'%s' is not a valid remote address for rsync (used by git-fat). "
             + "Please use the format HOSTNAME:/PATH") % git_fat_remote)
    if git_fat_user is not None and USERNAME_RE.match(git_fat_user) is None:
        raise ConfigurationError(
            "'%s' is not a valid remote username for git-fat" % git_fat_user)
    import dataworkspaces.third_party.git_fat as git_fat

    python2_exe = git_fat.find_python2_exe()
    # click.echo("Initializing git-fat with remote %s" % git_fat_remote)
    with open(dot_git_fat_fpath, "w") as f:
        f.write("[rsync]\nremote = %s\n" % git_fat_remote)
        if git_fat_user:
            f.write("sshuser = %s\n" % git_fat_user)
        if git_fat_port:
            f.write("sshport = %s\n" % git_fat_port)
    if git_fat_attributes is not None:
        with open(cast(str, dot_git_attributes_fpath), "w") as f:
            for extn in git_fat_attributes.split(","):
                f.write("%s filter=fat -crlf\n" % extn)
    git_fat.run_git_fat(python2_exe, ["init"], cwd=repo_dir, verbose=verbose)
    git_add(repo_dir, files_to_add, verbose)
    git_commit(repo_dir, "initialized git-fat with remote %s" % git_fat_remote,
               verbose)
def switch_git_branch(local_path, branch, verbose):
    try:
        call_subprocess([GIT_EXE_PATH, "checkout", branch],
                        cwd=local_path,
                        verbose=verbose)
    except Exception as e:
        raise ConfigurationError(
            "Unable to switch git repo at %s to branch %s" %
            (local_path, branch)) from e
Ejemplo n.º 22
0
 def _get_snapshot_manifest_as_bytes(self, hash_val: str) -> bytes:
     snapshot_dir = join(self.workspace_dir, SNAPSHOT_DIR_PATH)
     snapshot_file = join(snapshot_dir,
                          "snapshot-%s.json" % hash_val.lower())
     if not exists(snapshot_file):
         raise ConfigurationError("No snapshot found for hash value %s" %
                                  hash_val)
     with open(snapshot_file, "rb") as f:
         return f.read()
Ejemplo n.º 23
0
 def get_snapshot_by_tag(self, tag: str) -> SnapshotMetadata:
     """Given a tag, return the asssociated snapshot metadata.
     This lookup could be slower ,if a reverse index is not kept."""
     md_dir = join(self.workspace_dir, SNAPSHOT_METADATA_DIR_PATH)
     if not exists(md_dir):
         raise ConfigurationError(f"Snapshot for tag {tag} not found")
     regexp = re.compile(re.escape(tag))
     for fname in os.listdir(md_dir):
         if not fname.endswith("_md.json"):
             continue
         fpath = join(md_dir, fname)
         with open(fpath, "r") as f:
             raw_data = f.read()
         if regexp.search(raw_data) is not None:
             md = SnapshotMetadata.from_json(json.loads(raw_data))
             if md.has_tag(tag):
                 return md
     raise ConfigurationError("Snapshot for tag %s not found" % tag)
    def __init__(
        self,
        model_name: str,
        monitor: str = "val_loss",
        save_best_only: bool = False,
        mode: str = "auto",
        save_freq: Union[str, int] = "epoch",
        results_resource: Optional[Union[str, ResourceRef]] = None,
        workspace_dir: Optional[str] = None,
        verbose: Union[int, bool] = 0,
    ):
        """
        model_name is used to create the checkpoint filenames. The checkpoints
        will be saved as MODEL_NAME_{epoch}.

        Currently, only supports save_weights_only option.

        verbose can be either 0,1 in the style of tensorflow or a True,False
        in the style of Data Workspaces.

        """
        self.dws_model_name = model_name
        if verbose == 0 or verbose == False:
            tf_verbose = 0
            dws_verbose = False
        else:
            tf_verbose = 1
            dws_verbose = True

        self.workspace = find_and_load_workspace(
            batch=True, verbose=dws_verbose, uri_or_local_path=workspace_dir)

        results_ref = _find_resource(self.workspace, ResourceRoles.RESULTS,
                                     results_resource)
        self.results_resource = self.workspace.get_resource(results_ref.name)
        if not isinstance(self.results_resource, FileResourceMixin):
            raise ConfigurationError(
                "Resource %s is not a file-based resource" % results_ref.name)
        self.results_subdir = results_ref.subpath  # type: Optional[str]
        scratch_dir = self.workspace.get_scratch_directory()
        assert isdir(scratch_dir), "missing scratch directory %s" % scratch_dir
        self.dws_checkpoint_path = join(scratch_dir,
                                        "checkpoints")  # type: str
        if not isdir(self.dws_checkpoint_path):
            os.mkdir(self.dws_checkpoint_path)
        self.checkpoint_filepath_template = join(self.dws_checkpoint_path,
                                                 model_name + "_{epoch}")
        super().__init__(
            filepath=self.checkpoint_filepath_template,
            monitor=monitor,
            save_best_only=save_best_only,
            mode=mode,
            save_freq=save_freq,
            save_weights_only=True,
            verbose=tf_verbose,
        )
Ejemplo n.º 25
0
def publish_command(workspace: Workspace, remote_repository: str) -> None:
    if isinstance(workspace, SyncedWorkspaceMixin):
        workspace.publish(remote_repository)
    else:
        raise ConfigurationError(
            "Workspace %s does not support publish command; only supported for synced workspaces"
            % workspace.name
        )

    click.echo("Set remote origin to %s" % remote_repository)
def is_git_dirty(cwd):
    """See if the git repo is dirty. We are looking for untracked
    files, changes in staging, and changes in the working directory.
    """
    if GIT_EXE_PATH is None:
        raise ConfigurationError("git executable not found")
    cmd = [GIT_EXE_PATH, "status", "--porcelain"]
    p = run(cmd, cwd=cwd, stdout=PIPE, encoding="utf-8")
    for line in p.stdout.split("\n"):
        if len(line) < 2:
            continue
        if (line[0] in ("?", "D", "M", "A")) or (line[1]
                                                 in ("?", "D", "M", "A")):
            return True
    if p.returncode == 0:
        return False
    else:
        raise ConfigurationError("Problem invoking %s status on %s" %
                                 (GIT_EXE_PATH, cwd))
 def restore_precheck(self, hashval):
     snapshot_file = hashval + '.json.gz'
     snapshot_local_path = join(self.snapshot_cache_dir, snapshot_file)
     if not exists(snapshot_local_path):
         snapshot_s3_path = join(join(self.bucket_name, '.snapshots'),
                                 snapshot_file)
         if not self.fs.exists(snapshot_s3_path):
             raise ConfigurationError(
                 f"File s3://{snapshot_s3_path} not found for snapshot {hashval}"
             )
Ejemplo n.º 28
0
 def _get_resource_params(self, resource_name) -> JSONDict:
     """Get the parameters for this resource from the workspace's
     metadata store - used when instantitating resources. Show
     throw an exception if resource does not exist.
     """
     if resource_name not in self.resource_params_by_name:
         raise ConfigurationError(
             "A resource by the name '%s' does not exist in this workspace"
             % resource_name)
     return self.resource_params_by_name[resource_name]
def get_remote_origin_url(repo_dir: str, verbose: bool) -> str:
    try:
        url = call_subprocess(
            [GIT_EXE_PATH, "config", "--get", "remote.origin.url"],
            cwd=repo_dir,
            verbose=verbose)
        return url.strip()
    except Exception as e:
        raise ConfigurationError(
            "Problem getting remote origin from repository at %s. Do you have a remote origin configured?"
            % repo_dir) from e
Ejemplo n.º 30
0
def make_lineage_table(
    workspace_uri_or_path: Optional[str] = None,
    tag_or_hash: Optional[str] = None,
    verbose: bool = False,
) -> Iterable[Tuple[str, str, str, Optional[List[str]]]]:
    """Make a table of the lineage for each resource.
    The columns are: ref, lineage type, details, inputs
    """
    workspace = find_and_load_workspace(True, verbose, workspace_uri_or_path)
    if not isinstance(workspace, SnapshotWorkspaceMixin):
        raise ConfigurationError("Workspace %s does not support lineage" % workspace.name)
    if not workspace.supports_lineage():
        raise ConfigurationError("Workspace %s does not support lineage" % workspace.name)
    snapshot_hash = None  # type: Optional[str]
    if tag_or_hash is not None:
        md = workspace.get_snapshot_by_tag_or_hash(tag_or_hash)
        snapshot_hash = md.hashval
    return lu.make_lineage_table(
        workspace.get_instance(), workspace.get_lineage_store(), snapshot_hash
    )