def install_packages(self, session=None): session = session or get_local_session() for env in self.environments: # TODO: Deal with system and editable packages. to_install = ["{p.name}=={p.version}".format(p=p) for p in env.packages if p.local and not p.editable] if not to_install: lgr.info("No local, non-editable packages found") continue # TODO: Right now we just use the python to invoke "virtualenv # --python=..." when the directory doesn't exist, but we should # eventually use the yet-to-exist "satisfies" functionality to # check whether an existing virtual environment has the right # python (and maybe other things). pyver = "{v.major}.{v.minor}".format( v=parse_semantic_version(env.python_version)) if not session.exists(env.path): # The location and version of virtualenv are recorded at the # time of tracing, but should we use these values? For now, # use a plain "virtualenv" below on the basis that we just use # "apt-get" and "git" elsewhere. session.execute_command(["virtualenv", "--python=python{}".format(pyver), env.path]) list(execute_command_batch(session, [env.path + "/bin/pip", "install"], to_install))
def __call__(path=None, spec=None, output_file=None): # heavy import -- should be delayed until actually used if not (spec or path): raise InsufficientArgumentsError( "Need at least a single --spec or a file") paths = assure_list(path) if spec: lgr.info("reading spec file %s", spec) # TODO: generic loader to auto-detect formats etc from niceman.formats.reprozip import ReprozipProvenance spec = ReprozipProvenance(spec) paths += spec.get_files() or [] # Convert paths to unicode paths = list(map(to_unicode, paths)) session = get_local_session() # TODO: at the moment assumes just a single distribution etc. # Generalize # TODO: RF so that only the above portion is reprozip specific. # If we are to reuse their layout largely -- the rest should stay as is (distributions, files) = identify_distributions(paths, session=session) from niceman.distributions.base import EnvironmentSpec spec = EnvironmentSpec(distributions=distributions, ) if files: spec.files = sorted(files) # TODO: generic writer! from niceman.formats.niceman import NicemanProvenance NicemanProvenance.write(output_file or sys.stdout, spec)
def identify_distributions(files, session=None): """Identify packages files belong to Parameters ---------- files : iterable Files to consider Returns ------- distributions : list of Distribution unknown_files : list of str Files which were not determined to belong to any specific distribution """ # TODO: automate discovery of available tracers from niceman.distributions.debian import DebTracer from niceman.distributions.conda import CondaTracer from niceman.distributions.vcs import VCSTracer session = session or get_local_session() # TODO create list of appropriate for the `environment` OS tracers # in case of no environment -- get current one # TODO: should operate in the session, might be given additional information # not just files Tracers = [DebTracer, CondaTracer, VCSTracer] # .identify_ functions will have a side-effect of shrinking this list in-place # as they identify files beloning to them files_to_consider = files[:] # Identify directories from the files_to_consider dirs = set(filter(session.isdir, files_to_consider)) distibutions = [] for Tracer in Tracers: lgr.info("Tracing using %s", Tracer) # Pull out directories if the tracer can't handle them if Tracer.HANDLES_DIRS: files_to_trace = files_to_consider files_skipped = [] else: files_to_trace = [x for x in files_to_consider if x not in dirs] files_skipped = [x for x in files_to_consider if x in dirs] tracer = Tracer(session=session) begin = time.time() if files_to_trace: for env, files_to_trace in tracer.identify_distributions( files_to_trace): distibutions.append(env) # Re-combine any files that were skipped files_to_consider = files_to_trace + files_skipped lgr.debug("Assigning files to packages by %s took %f seconds", tracer, time.time() - begin) return distibutions, files_to_consider
def __init__(self, session=None): # will be (re)used to run external commands, and let's hardcode LC_ALL # codepage just in case since we might want to comprehend error # messages self._session = session or get_local_session() # to ease _init within derived classes which should not be parametrized # more anyways self._init()
def __call__(path=None, spec=None, output_file=None, resref=None, resref_type="auto"): # heavy import -- should be delayed until actually used if not (spec or path): raise InsufficientArgumentsError( "Need at least a single --spec or a file") paths = assure_list(path) if spec: lgr.info("reading spec file %s", spec) # TODO: generic loader to auto-detect formats etc from niceman.formats.reprozip import ReprozipProvenance spec = ReprozipProvenance(spec) paths += spec.get_files() or [] # Convert paths to unicode paths = map(to_unicode, paths) # The tracers assume normalized paths. paths = list(map(normpath, paths)) if isinstance(resref, Session): # TODO: Special case for Python callers. Is this something we want # to handle more generally at the interface level? session = resref elif resref: resource = get_manager().get_resource(resref, resref_type) session = resource.get_session() else: session = get_local_session() # TODO: at the moment assumes just a single distribution etc. # Generalize # TODO: RF so that only the above portion is reprozip specific. # If we are to reuse their layout largely -- the rest should stay as is (distributions, files) = identify_distributions(paths, session=session) from niceman.distributions.base import EnvironmentSpec spec = EnvironmentSpec(distributions=distributions, ) if files: spec.files = sorted(files) # TODO: generic writer! from niceman.formats.niceman import NicemanProvenance stream = open(output_file, "w") if output_file else sys.stdout NicemanProvenance.write(stream, spec) if stream is not sys.stdout: stream.close()
def identify_distributions(files, session=None, tracer_classes=None): """Identify packages files belong to Parameters ---------- files : iterable Files to consider Returns ------- distributions : list of Distribution unknown_files : list of str Files which were not determined to belong to any specific distribution """ if tracer_classes is None: tracer_classes = get_tracer_classes() session = session or get_local_session() # TODO create list of appropriate for the `environment` OS tracers # in case of no environment -- get current one # TODO: should operate in the session, might be given additional information # not just files # .identify_ functions will have a side-effect of shrinking this list in-place # as they identify files beloning to them files_to_consider = set(files) distibutions = [] files_processed = set() files_to_trace = files_to_consider niter = 0 max_niter = 10 while True: niter += 1 nfiles_processed = len(files_processed) nfiles_to_trace = len(files_to_trace) lgr.info("Entering iteration #%d over Tracers", niter) if niter > max_niter: lgr.error( "We did %s iterations already, something is not right" % max_niter) break for Tracer in tracer_classes: lgr.debug("Tracing using %s", Tracer.__name__) # TODO: memoize across all loops # Identify directories from the files_to_consider dirs = set(filter(session.isdir, files_to_trace)) # Pull out directories if the tracer can't handle them if Tracer.HANDLES_DIRS: files_to_trace = files_to_consider files_skipped = set() else: files_to_trace = files_to_consider - dirs files_skipped = files_to_consider - files_to_trace tracer = Tracer(session=session) begin = time.time() # yoh things the idea was that tracer might trace even without # files, so we should not just 'continue' the loop if there is no # files_to_trace if files_to_trace: remaining_files_to_trace = files_to_trace nenvs = 0 for env, remaining_files_to_trace in tracer.identify_distributions( files_to_trace): distibutions.append(env) nenvs += 1 files_processed |= files_to_trace - remaining_files_to_trace files_to_trace = remaining_files_to_trace lgr.info("%s: %d envs with %d other files remaining", Tracer.__name__, nenvs, len(files_to_trace)) # Re-combine any files that were skipped files_to_consider = files_to_trace | files_skipped lgr.debug("Assigning files to packages by %s took %f seconds", tracer, time.time() - begin) if len(files_to_trace) == 0 or ( nfiles_processed == len(files_processed) and nfiles_to_trace == len(files_to_trace)): lgr.info("No more changes or files to track. Exiting the loop") break return distibutions, files_to_consider
def install_packages(self, session=None): """ Install the packages associated to this distribution by the provenance into the environment. Parameters ---------- session : object Environment sub-class instance. Raises ------ ValueError Unexpected conda platform or python version CommandError If unexpected error in install commands occurs """ if not self.path: # Permit empty conda config entry return if not session: session = get_local_session() # Use the session to make a temporary directory for our install files tmp_dir = session.mktmpdir() try: # Install Conda # See if Conda root path exists and if not, install Conda if not session.isdir(self.path): # TODO: Determine if we can detect miniconda vs anaconad miniconda_url = get_miniconda_url(self.platform, self.python_version) session.execute_command("curl %s -o %s/miniconda.sh" % (miniconda_url, tmp_dir)) # NOTE: miniconda.sh makes parent directories automatically session.execute_command("bash -b %s/miniconda.sh -b -p %s" % (tmp_dir, self.path)) ## Update root version of conda session.execute_command( "%s/bin/conda install -y conda=%s python=%s" % (self.path, self.conda_version, self.get_simple_python_version(self.python_version))) # Loop through non-root packages, creating the conda-env config for env in self.environments: export_contents = self.create_conda_export(env) with make_tempfile(export_contents) as local_config: remote_config = os.path.join(tmp_dir, env.name) session.put(local_config, remote_config) if not session.isdir(env.path): try: session.execute_command( "%s/bin/conda-env create -p %s -f %s " % (self.path, env.path, remote_config)) except CommandError: # Some conda versions seg fault so try to update session.execute_command( "%s/bin/conda-env update -p %s -f %s " % (self.path, env.path, remote_config)) else: session.execute_command( "%s/bin/conda-env update -p %s -f %s " % (self.path, env.path, remote_config)) finally: if tmp_dir: # Remove the tmp dir session.execute_command(["rm", "-R", tmp_dir]) return
def install_packages(self, session=None, use_version=True): session = session or get_local_session() for repo in self.packages: self._install_repo(session, repo)