Example #1
0
    def install_packages(self, session=None):
        session = session or get_local_session()
        for env in self.environments:
            # TODO: Deal with system and editable packages.
            to_install = ["{p.name}=={p.version}".format(p=p)
                          for p in env.packages
                          if p.local and not p.editable]
            if not to_install:
                lgr.info("No local, non-editable packages found")
                continue

            # TODO: Right now we just use the python to invoke "virtualenv
            # --python=..." when the directory doesn't exist, but we should
            # eventually use the yet-to-exist "satisfies" functionality to
            # check whether an existing virtual environment has the right
            # python (and maybe other things).
            pyver = "{v.major}.{v.minor}".format(
                v=parse_semantic_version(env.python_version))

            if not session.exists(env.path):
                # The location and version of virtualenv are recorded at the
                # time of tracing, but should we use these values?  For now,
                # use a plain "virtualenv" below on the basis that we just use
                # "apt-get" and "git" elsewhere.
                session.execute_command(["virtualenv",
                                         "--python=python{}".format(pyver),
                                         env.path])
            list(execute_command_batch(session,
                                       [env.path + "/bin/pip", "install"],
                                       to_install))
Example #2
0
    def __call__(path=None, spec=None, output_file=None):
        # heavy import -- should be delayed until actually used

        if not (spec or path):
            raise InsufficientArgumentsError(
                "Need at least a single --spec or a file")

        paths = assure_list(path)
        if spec:
            lgr.info("reading spec file %s", spec)
            # TODO: generic loader to auto-detect formats etc
            from niceman.formats.reprozip import ReprozipProvenance
            spec = ReprozipProvenance(spec)
            paths += spec.get_files() or []

        # Convert paths to unicode
        paths = list(map(to_unicode, paths))

        session = get_local_session()

        # TODO: at the moment assumes just a single distribution etc.
        #       Generalize
        # TODO: RF so that only the above portion is reprozip specific.
        # If we are to reuse their layout largely -- the rest should stay as is
        (distributions, files) = identify_distributions(paths, session=session)
        from niceman.distributions.base import EnvironmentSpec
        spec = EnvironmentSpec(distributions=distributions, )
        if files:
            spec.files = sorted(files)

        # TODO: generic writer!
        from niceman.formats.niceman import NicemanProvenance
        NicemanProvenance.write(output_file or sys.stdout, spec)
Example #3
0
def identify_distributions(files, session=None):
    """Identify packages files belong to

    Parameters
    ----------
    files : iterable
      Files to consider

    Returns
    -------
    distributions : list of Distribution
    unknown_files : list of str
      Files which were not determined to belong to any specific distribution
    """
    # TODO: automate discovery of available tracers
    from niceman.distributions.debian import DebTracer
    from niceman.distributions.conda import CondaTracer
    from niceman.distributions.vcs import VCSTracer

    session = session or get_local_session()
    # TODO create list of appropriate for the `environment` OS tracers
    #      in case of no environment -- get current one
    # TODO: should operate in the session, might be given additional information
    #       not just files
    Tracers = [DebTracer, CondaTracer, VCSTracer]

    # .identify_ functions will have a side-effect of shrinking this list in-place
    # as they identify files beloning to them
    files_to_consider = files[:]

    # Identify directories from the files_to_consider
    dirs = set(filter(session.isdir, files_to_consider))

    distibutions = []
    for Tracer in Tracers:
        lgr.info("Tracing using %s", Tracer)

        # Pull out directories if the tracer can't handle them
        if Tracer.HANDLES_DIRS:
            files_to_trace = files_to_consider
            files_skipped = []
        else:
            files_to_trace = [x for x in files_to_consider if x not in dirs]
            files_skipped = [x for x in files_to_consider if x in dirs]

        tracer = Tracer(session=session)
        begin = time.time()
        if files_to_trace:
            for env, files_to_trace in tracer.identify_distributions(
                    files_to_trace):
                distibutions.append(env)

        # Re-combine any files that were skipped
        files_to_consider = files_to_trace + files_skipped

        lgr.debug("Assigning files to packages by %s took %f seconds", tracer,
                  time.time() - begin)

    return distibutions, files_to_consider
Example #4
0
 def __init__(self, session=None):
     # will be (re)used to run external commands, and let's hardcode LC_ALL
     # codepage just in case since we might want to comprehend error
     # messages
     self._session = session or get_local_session()
     # to ease _init within derived classes which should not be parametrized
     # more anyways
     self._init()
Example #5
0
    def __call__(path=None,
                 spec=None,
                 output_file=None,
                 resref=None,
                 resref_type="auto"):
        # heavy import -- should be delayed until actually used

        if not (spec or path):
            raise InsufficientArgumentsError(
                "Need at least a single --spec or a file")

        paths = assure_list(path)
        if spec:
            lgr.info("reading spec file %s", spec)
            # TODO: generic loader to auto-detect formats etc
            from niceman.formats.reprozip import ReprozipProvenance
            spec = ReprozipProvenance(spec)
            paths += spec.get_files() or []

        # Convert paths to unicode
        paths = map(to_unicode, paths)
        # The tracers assume normalized paths.
        paths = list(map(normpath, paths))

        if isinstance(resref, Session):
            # TODO: Special case for Python callers.  Is this something we want
            # to handle more generally at the interface level?
            session = resref
        elif resref:
            resource = get_manager().get_resource(resref, resref_type)
            session = resource.get_session()
        else:
            session = get_local_session()

        # TODO: at the moment assumes just a single distribution etc.
        #       Generalize
        # TODO: RF so that only the above portion is reprozip specific.
        # If we are to reuse their layout largely -- the rest should stay as is
        (distributions, files) = identify_distributions(paths, session=session)
        from niceman.distributions.base import EnvironmentSpec
        spec = EnvironmentSpec(distributions=distributions, )
        if files:
            spec.files = sorted(files)

        # TODO: generic writer!
        from niceman.formats.niceman import NicemanProvenance
        stream = open(output_file, "w") if output_file else sys.stdout
        NicemanProvenance.write(stream, spec)
        if stream is not sys.stdout:
            stream.close()
Example #6
0
def identify_distributions(files, session=None, tracer_classes=None):
    """Identify packages files belong to

    Parameters
    ----------
    files : iterable
      Files to consider

    Returns
    -------
    distributions : list of Distribution
    unknown_files : list of str
      Files which were not determined to belong to any specific distribution
    """
    if tracer_classes is None:
        tracer_classes = get_tracer_classes()

    session = session or get_local_session()
    # TODO create list of appropriate for the `environment` OS tracers
    #      in case of no environment -- get current one
    # TODO: should operate in the session, might be given additional information
    #       not just files


    # .identify_ functions will have a side-effect of shrinking this list in-place
    # as they identify files beloning to them
    files_to_consider = set(files)

    distibutions = []
    files_processed = set()
    files_to_trace = files_to_consider

    niter = 0
    max_niter = 10
    while True:
        niter += 1
        nfiles_processed = len(files_processed)
        nfiles_to_trace = len(files_to_trace)
        lgr.info("Entering iteration #%d over Tracers", niter)
        if niter > max_niter:
            lgr.error(
                "We did %s iterations already, something is not right"
                % max_niter)
            break

        for Tracer in tracer_classes:
            lgr.debug("Tracing using %s", Tracer.__name__)
            # TODO: memoize across all loops
            # Identify directories from the files_to_consider
            dirs = set(filter(session.isdir, files_to_trace))

            # Pull out directories if the tracer can't handle them
            if Tracer.HANDLES_DIRS:
                files_to_trace = files_to_consider
                files_skipped = set()
            else:
                files_to_trace = files_to_consider - dirs
                files_skipped = files_to_consider - files_to_trace

            tracer = Tracer(session=session)
            begin = time.time()
            # yoh things the idea was that tracer might trace even without
            #     files, so we should not just 'continue' the loop if there is no
            #     files_to_trace
            if files_to_trace:
                remaining_files_to_trace = files_to_trace
                nenvs = 0
                for env, remaining_files_to_trace in tracer.identify_distributions(
                        files_to_trace):
                    distibutions.append(env)
                    nenvs += 1
                files_processed |= files_to_trace - remaining_files_to_trace
                files_to_trace = remaining_files_to_trace
                lgr.info("%s: %d envs with %d other files remaining",
                         Tracer.__name__,
                         nenvs,
                         len(files_to_trace))

            # Re-combine any files that were skipped
            files_to_consider = files_to_trace | files_skipped

            lgr.debug("Assigning files to packages by %s took %f seconds",
                      tracer, time.time() - begin)
        if len(files_to_trace) == 0 or (
            nfiles_processed == len(files_processed) and
            nfiles_to_trace == len(files_to_trace)):
            lgr.info("No more changes or files to track.  Exiting the loop")
            break

    return distibutions, files_to_consider
Example #7
0
    def install_packages(self, session=None):
        """
        Install the packages associated to this distribution by the provenance
        into the environment.

        Parameters
        ----------
        session : object
            Environment sub-class instance.

        Raises
        ------
        ValueError
            Unexpected conda platform or python version
        CommandError
            If unexpected error in install commands occurs
        """

        if not self.path:  # Permit empty conda config entry
            return

        if not session:
            session = get_local_session()

        # Use the session to make a temporary directory for our install files
        tmp_dir = session.mktmpdir()
        try:
            # Install Conda
            # See if Conda root path exists and if not, install Conda
            if not session.isdir(self.path):
                # TODO: Determine if we can detect miniconda vs anaconad
                miniconda_url = get_miniconda_url(self.platform,
                                                  self.python_version)
                session.execute_command("curl %s -o %s/miniconda.sh" %
                                        (miniconda_url, tmp_dir))
                # NOTE: miniconda.sh makes parent directories automatically
                session.execute_command("bash -b %s/miniconda.sh -b -p %s" %
                                        (tmp_dir, self.path))
            ## Update root version of conda
            session.execute_command(
                "%s/bin/conda install -y conda=%s python=%s" %
                (self.path, self.conda_version,
                 self.get_simple_python_version(self.python_version)))

            # Loop through non-root packages, creating the conda-env config
            for env in self.environments:
                export_contents = self.create_conda_export(env)
                with make_tempfile(export_contents) as local_config:
                    remote_config = os.path.join(tmp_dir, env.name)
                    session.put(local_config, remote_config)
                    if not session.isdir(env.path):
                        try:
                            session.execute_command(
                                "%s/bin/conda-env create -p %s -f %s " %
                                (self.path, env.path, remote_config))
                        except CommandError:
                            # Some conda versions seg fault so try to update
                            session.execute_command(
                                "%s/bin/conda-env update -p %s -f %s " %
                                (self.path, env.path, remote_config))
                    else:
                        session.execute_command(
                            "%s/bin/conda-env update -p %s -f %s " %
                            (self.path, env.path, remote_config))

        finally:
            if tmp_dir:
                # Remove the tmp dir
                session.execute_command(["rm", "-R", tmp_dir])

        return
Example #8
0
 def install_packages(self, session=None, use_version=True):
     session = session or get_local_session()
     for repo in self.packages:
         self._install_repo(session, repo)