Example #1
0
def conflicts_with(distkey, distkey_set):
    """
  If there is an immediate conflict between a given dist and a given set of
  dists, returns the distkey of the dist from the set with which the given
  distkey conflicts. Else returns [] if no immediate conflict exists between
  the two.

  e.g. conflicts_with('django(1.5), ['django(1.3)', 'potato(2.5)']) returns
  ['django(1.3)'], indicating a conflict between django(1.5) and django(1.3).

  Runtime O(N)
  """
    (packname, version) = depdata.get_pack_and_version(distkey)

    # For more accurate version equality testing:
    pipified_version = pip._vendor.packaging.version.parse(version)

    # Find all matches for this distkey's package name in the given distkey_set
    # that are not the same literal distkey as this distkey.
    possible_competitors = \
        [dist for dist in find_dists_matching_packname(packname, distkey_set)
        if dist != distkey]

    # Check each possible conflict to be sure it's not actually the same version
    # (e.g. recognize version '2' as the same as version '2.0')
    competing_candidates = []
    for competitor_dist in possible_competitors:

        if not depdata.versions_are_equal(
                version, depdata.get_version(competitor_dist)):

            competing_candidates.append(competitor_dist)

    return competing_candidates
def main():
  # Some defaults:
  n_sdists_to_process = 0 # debug; max packages to explore during debug - overriden by --n=N argument.
  conflict_model = 3
  no_skip = False
  careful_skip = False
  use_local_index = False
  use_local_index_old = False
  #run_all_conflicting = False

  # Files and directories.
  assert(os.path.exists(WORKING_DIRECTORY)), 'Working dir does not exist...??'

  # Ensure that appropriate directory for downloaded distros exists.
  # This would be terrible to duplicate if scraping a large number of packages.
  # One such sdist cache per system! Gets big.
  if not os.path.exists(TEMPDIR_FOR_DOWNLOADED_DISTROS):
    os.makedirs(TEMPDIR_FOR_DOWNLOADED_DISTROS)



  logger.info("scrape_deps_and_detect_conflicts - Version 0.5")
  distkeys_to_inspect_not_normalized = [] # not-yet-normalized user input, potentially filled with distkeys to check, from arguments
  distkeys_to_inspect = [] # list after argument normalization

  # Argument processing.
  # If we have arguments coming in, treat those as the packages to inspect.
  if len(sys.argv) > 1:
    for arg in sys.argv[1:]:
      if arg.startswith("--n="):
        n_sdists_to_process = int(arg[4:])
      elif arg == "--cm1":
        conflict_model = 1
      elif arg == "--cm2":
        conflict_model = 2
      elif arg == "--cm3":
        conflict_model = 3
      elif arg == "--noskip":
        no_skip = True
      elif arg == '--carefulskip':
        careful_skip = True
      elif arg == "--local-old":
        # without ='<directory>' means we pull alphabetically from local PyPI
        # mirror at /srv/pypi/
        # Parse .tar.gz files as they appear in bandersnatch version <= 1.8
        # For newer versions of bandersnatch, the sdist files are stored
        # differently (not in project-based directories) and so the argument
        # --local should be used instead.
        use_local_index_old = True
      elif arg == "--local":
        # without ='<directory>' means we pull from local PyPI mirror at
        # /srv/pypi/
        # Parse .tar.gz files as they appear in bandersnatch version 1.11
        # For bandersnatch 1.11, the sdist files are stored differently than in
        # <1.8. They are no longer kept in project-based directories).
        # If you are using a version of bandersnatch <=1.8, the argument
        # --local-old should be used instead.
        use_local_index = True
      #elif arg == '--conflicting':
      #  # Operate locally and run on the distkeys provided in the indicated
      #  # file, each on its own line.
      #  use_local_index = True
      #  run_all_conflicting = True
      else:
        distkeys_to_inspect_not_normalized.append(arg) # e.g. 'motorengine(0.7.4)'
        # For simplicity right now, I'll use one mode or another, not both.
        # Last arg has it if both.


  # Normalize any input distkeys we were given.
  for distkey in distkeys_to_inspect_not_normalized:
    assert '(' in distkey and distkey.endswith(')'), 'Invalid input.'
    distkey = depdata.normalize_distkey(distkey)
    distkeys_to_inspect.append(distkey)


  # Were we not given any distkeys to inspect?
  if not distkeys_to_inspect:# and not run_all_conflicting:

    if not use_local_index and not use_local_index_old:
      # If we're not using a local index, we have nothing to do.
      raise ValueError('You neither specified distributions to scrape nor '
          '(alternatively) indicated that they should be chosen from a local '
          'mirror.')

    elif use_local_index_old:
      # If we were told to work with a local mirror, but weren't given specific
      # sdists to inspect, we'll scan everything in
      # BANDERSNATCH_MIRROR_SDIST_DIR until we have n_sdists_to_process sdists.
      # There is a better way to do this, but I'll leave this as is for now.

      # Ensure that the local PyPI mirror directory exists first.
      if not os.path.exists(BANDERSNATCH_MIRROR_SDIST_DIR):
        raise Exception('--- Exception. Expecting a bandersnatched mirror of '
            'PyPI at ' + BANDERSNATCH_MIRROR_SDIST_DIR + ' but that directory '
            'does not exist.')
      i = 0
      for dir, subdirs, files in os.walk(BANDERSNATCH_MIRROR_SDIST_DIR):
        for fname in files:
          if is_sdist(fname):
            tarfilename_full = os.path.join(dir, fname)
            # Deduce package names and versions from sdist filename.
            distkey = get_distkey_from_full_filename(tarfilename_full)
            distkeys_to_inspect.append(distkey)
            i += 1
            # awkward control structures, but saving debug run time. tidy later
            if i >= n_sdists_to_process:
              break
        if i >= n_sdists_to_process:
          break

    else: # use_local_index (modern bandersnatch version)
      assert use_local_index, 'Programming error.'
      # # sdists live here: /srv/pypi/web/packages/??/??/*/*.tar.gz
      # # Can implement this such that it checks those places.
      # for name1 in os.listdir(BANDERSNATCH_NEW_MIRROR_SDIST_DIR):
      #   if len(name1) != 2:
      #     continue
      #   for name2 in os.listdir(os.path.join(
      #       BANDERSNATCH_NEW_MIRROR_SDIST_DIR, name1)):
      #     if len(name2) != 2:
      #       continue
      #     for name3 in os.listdir(os.path.join(
      #         BANDERSNATCH_NEW_MIRROR_SDIST_DIR, name1, name2)):
      #       if len(name3) != 60:
      #         continue
      #       for fname in os.listdir():
      #  #.... No, this is not going to unambiguously get me the package name
      #  # in the way that it used to in older versions of bandersnatch.
      #  # Rather than dealing with unexpected naming consequences, I'll go
      #  # with the following even more annoying hack....

      # A dictionary of all versions of all packages on the mirror,
      # collected out-of-band (via xml-rpc at same time as mirroring occurred).
      vbp_mirror = json.load(open('data/versions_by_package.json', 'r'))
      i = 0
      for package in vbp_mirror:
        if i >= n_sdists_to_process:
          break

        for version in vbp_mirror[package]:

          if i >= n_sdists_to_process:
            break

          distkey = depdata.distkey_format(package, version)
          distkeys_to_inspect.append(distkey)

          i += 1



  # We should now have distkeys to inspect (unless run_all_conflicting is True).


  # Load the dependencies, conflicts, and blacklist databases.
  # The blacklist is a list of runs that resulted in errors or runs that were
  # manually added because, for example, they hang seemingly forever or take an
  # inordinate length of time.
  depdata.ensure_data_loaded([conflict_model])

  # Alias depdata.conflicts_db to the relevant conflicts db. (Ugly)
  depdata.set_conflict_model_legacy(conflict_model) # should remove this


  #if run_all_conflicting:
  #  distkeys_to_inspect = [distkey for distkey in depdata.conflicts_3_db if
  #      depdata.conflicts_3_db[distkey]]


  n_inspected = 0
  n_successfully_processed = 0
  last_wrote_at = 0

  # Now take all of the distkeys ( e.g. 'python-twitter(0.2.1)' ) indicated and
  # run on them.
  for distkey in distkeys_to_inspect:
    
    # To avoid losing too much data, make sure we at least write data to disk
    # about every 100 successfully processed or 10000 inspected dists. Avoid
    # writing repeatedly in edge cases (e.g. when we write after 100
    # successfully processed and then have to keep writing for every skip that
    # occurs after that.
    progress = n_inspected + n_successfully_processed * 100
    if progress > last_wrote_at + 10000:
      last_wrote_at = progress
      logger.info("Writing early.")
      depdata.write_data_to_files([conflict_model])


    # The skip conditions.

    # If dist is in the blacklist for the same version of python we're running.
    blacklisted = distkey in depdata.blacklist \
        and sys.version_info.major in depdata.blacklist[distkey]

    # If dist has conflict info saved already
    already_in_conflicts = distkey in depdata.conflicts_db

    # Do we have dep info for the dist? Not a skip condition, but part of
    # careful_skip tests.
    already_in_dependencies = distkey in depdata.dependencies_by_dist


    # If we're not in no_skip mode, perform the skip checks.
    # Skip checks. If the dist is blacklisted or we already have dependency
    # data, then skip it - unless we're in careful skip mode and we don't
    # have dependency data for the dist.
    if not no_skip and (blacklisted or already_in_conflicts):

      # If dist isn't blacklisted, we already have conflict info, there's no
      # dependency info, and careful skip is on, don't actually skip.
      if careful_skip and not already_in_dependencies and not blacklisted:
        print('---    Not skipping ' + distkey + ': ' +
            'Already have conflict data, however there is no dependency info '
            'for the dist, the dist is not blacklisted, and we are in '
            'careful_skip mode.')

      else: # Skip, since we don't have a reason not to.
        n_inspected += 1
        print('---    SKIP -- ' + distkey + ': ' +
            'Blacklisted. '*blacklisted +
            'Already have conflict data. '*already_in_conflicts +
            '(Finished ' + str(n_inspected) + ' out of ' +
            str(len(distkeys_to_inspect)) + ')')
        continue


    # If we didn't skip, process the dist.

    packagename = depdata.get_packname(distkey)
    version_string = depdata.get_version(distkey)
    #assert(distkey.rfind(')') == len(distkey) - 1)
    formatted_requirement = packagename + "==" + version_string
    exitcode = None
    assert(conflict_model in [1, 2, 3])

    # Construct the argument list.
    # Include argument to pass to pip to tell it not to prod users about our
    # strange pip version (lest they follow that instruction and install a
    # standard pip version):
    pip_arglist = [
      'install',
      '-d', TEMPDIR_FOR_DOWNLOADED_DISTROS,
      '--disable-pip-version-check',
      '--find-dep-conflicts', str(conflict_model),
      '--quiet']
    
    if use_local_index:
      pip_arglist.extend(['-i', BANDERSNATCH_MIRROR_INDEX_DIR])

    pip_arglist.append(formatted_requirement)

    # With arg list constructed, call pip.main with it to run a modified pip
    # install attempt (will not install).
    # This assumes that we're dealing with my pip fork version 8.0.0.dev0seb).
    print('---    Sending ' + distkey + ' to pip.')
    logger.debug('Scraper says: before pip call, len(deps) is ' +
        str(len(depdata.dependencies_by_dist)))

    # Call pip, with a 5 minute timeout.
    exitcode = None # scoping paranoia
    try:
      exitcode = _call_pip_with_timeout(pip_arglist)
    except timeout.TimeoutException as e: # This catch is not likely. See below
      logger.warning('pip timed out on dist ' + distkey + '(5min)!'
          ' Will treat as error. Exception follows: ' + str(e.args))
      # Set the exit code to something other than 2 or 0 and it'll be treated
      # like any old pip error below, resulting in a blacklist.
      exitcode = 1000

    # However, unfortunately, we cannot assume that pip will let that exception
    # pass up to us. It seems to take the signal, stop and clean up, and then
    # return exit code 2. This is fine, except that then we can't really
    # blacklist the process. I'd have to add a timer here, detect something
    # very close to the timeout, and guess that it timed out. /: That sucks.
    # In any case, we'll not learn that it's a process that times out, but
    # we'll just look at it as a possible conflict case. (The data recorded
    # will not list it as a conflict. Hopefully, that data is not corrupted.
    # It's unlikely that it would have been, though, so I judge this OK.)
    
    # Process the output of the pip command.
    if exitcode == 2:
      print('--- X  SDist ' + distkey + ' : pip errored out (code=' +
        str(exitcode) + '). Possible DEPENDENCY CONFLICT. Result recorded in '
        'conflicts_<...>.json. (Finished ' +
        str(n_inspected) + ' out of ' + str(len(distkeys_to_inspect)) +
        ')')
    elif exitcode == 0:
      print('--- .  SDist ' + distkey + ' : pip completed successfully. '
        'No dependency conflicts observed. (Finished ' + str(n_inspected)
        + ' out of ' + str(len(distkeys_to_inspect)) + ')')
    else:
      print('--- .  SDist ' + distkey + ': pip errored out (code=' +
        str(exitcode) + '), but it seems to have been unrelated to any dep '
        'conflict.... (Finished ' + str(n_inspected) + ' out of ' +
        str(len(distkeys_to_inspect)) + ')')
      # Store in the list of failing packages along with the python version
      # we're running. (sys.version_info.major yields int 2 or 3)
      # Contents are to eventually be a list of the major versions in which it
      # fails. We should never get here if the dist is already in the blacklist
      # for this version of python, but let's keep going even if so.
      if distkey in depdata.blacklist and sys.version_info.major in \
        depdata.blacklist[distkey] and not no_skip:
        logger.warning('  WARNING! This should not happen! ' + distkey + ' was'
          'already in the blacklist for python ' + str(sys.version_info.major)
          + ', thus it should not have been run unless we have --noskip on '
          '(which it is not)!')
      else:
      # Either the dist is not in the blacklist or it's not in the blacklist
      # for this version of python. (Sensible)
        if distkey not in depdata.blacklist: # 
          depdata.blacklist[distkey] = [sys.version_info.major]
          logger.info("  Added entry to blacklist for " + distkey)
        else:
          assert(no_skip or sys.version_info.major not in depdata.blacklist[distkey])
          depdata.blacklist[distkey].append(sys.version_info.major)
          logger.info("  Added additional entry to blacklist for " + distkey)

          
    # end of exit code processing
    n_inspected += 1
    n_successfully_processed += 1

  # end of for each tarfile/sdist

  # We're done with all packages. Write the collected data back to file.
  logger.debug("Writing.")
  depdata.write_data_to_files([conflict_model])
Example #3
0
def install_into_venv(distkeys, venv_dir, local=False):
  """
  Given a list of distkeys, install those distributions into the given virtual
  environment.

  Args:
    - distkeys: list of distribution keys (see depdata.py, e.g. 'Django(1.8)')
    - venv_dir: the directory of a virtual environment, e.g. 'venvs/v3_zzzzzz',
      if venvs/v3_zzzzzz/bin/activate is the virtual environment's activate
      script.
    - local: Default False. If False, get packages from PyPI; if True, get
      packages from index on local filesystem: /srv/pypi/web/simple (local
      mirror); if a non-empty string, use the given string as the index
      location - e.g. 'file:///srv/pypi/web/simple'.

  Returns:
    - stdout_install: stdout from the subprocess.Popen command to install
      the given distributions, processed into a string. This is the form that
      popen_wrapper returns.
    - stderr_install: Likewise, but stderr instead of stdout.
  """

  requirements = ''
  # Spool requirement strings from each distkey in the list.
  for distkey in distkeys:

    packname = depdata.get_packname(distkey)
    version_string = depdata.get_version(distkey)
    # Construct as a requirement for pip install command.
    requirements += packname + '==' + version_string


  # Put together the pip command.

  # First, are we using PyPI or a specified (local) mirror?
  index_optional_args = ''
  if local == True: # If local is just the value True, use default local mirror
    index_optional_args = '-i file:///srv/pypi/web/simple'

  elif local: # If local is a specific string, assume it's the index location.
    index_optional_args = '-i ' + local

  else:
    pass # Proceed normally, using PyPI, not adding index arguments.

  # Would love to be able to just call
  # scraper._call_pip_with_timeout(pip_arglist), but can't because we have to
  # do this in a virtual environment, so doing it this way instead:
  cmd_sourcevenv = get_source_venv_cmd_str(venv_dir)
  cmd_install_dist = cmd_sourcevenv + \
      '; pip install --disable-pip-version-check --quiet ' + \
      index_optional_args + ' ' + requirements

  logger.info('Using pip to install a list of distkeys into venv ' + venv_dir)

  # Install using pip, incorporating a 5 min timeout, and taking the std_err
  # output (which comes out as a bytes object which we auto-decode).
  stdout_install, stderr_install = popen_wrapper(cmd_install_dist)

  # Print output, if there is any.
  if stdout_install:
    logger.info('Installation process using pip yields stdout: ' +
        stdout_install)

  if stderr_install:
    logger.warn('Installation process using pip yields stderr: ' +
        stderr_install)

  return stdout_install, stderr_install
Example #4
0
def install_into_venv(distkeys, venv_dir, local=False):
    """
  Given a list of distkeys, install those distributions into the given virtual
  environment.

  Args:
    - distkeys: list of distribution keys (see depdata.py, e.g. 'Django(1.8)')
    - venv_dir: the directory of a virtual environment, e.g. 'venvs/v3_zzzzzz',
      if venvs/v3_zzzzzz/bin/activate is the virtual environment's activate
      script.
    - local: Default False. If False, get packages from PyPI; if True, get
      packages from index on local filesystem: /srv/pypi/web/simple (local
      mirror); if a non-empty string, use the given string as the index
      location - e.g. 'file:///srv/pypi/web/simple'.

  Returns:
    - stdout_install: stdout from the subprocess.Popen command to install
      the given distributions, processed into a string. This is the form that
      popen_wrapper returns.
    - stderr_install: Likewise, but stderr instead of stdout.
  """

    requirements = ''
    # Spool requirement strings from each distkey in the list.
    for distkey in distkeys:

        packname = depdata.get_packname(distkey)
        version_string = depdata.get_version(distkey)
        # Construct as a requirement for pip install command.
        requirements += packname + '==' + version_string

    # Put together the pip command.

    # First, are we using PyPI or a specified (local) mirror?
    index_optional_args = ''
    if local == True:  # If local is just the value True, use default local mirror
        index_optional_args = '-i file:///srv/pypi/web/simple'

    elif local:  # If local is a specific string, assume it's the index location.
        index_optional_args = '-i ' + local

    else:
        pass  # Proceed normally, using PyPI, not adding index arguments.

    # Would love to be able to just call
    # scraper._call_pip_with_timeout(pip_arglist), but can't because we have to
    # do this in a virtual environment, so doing it this way instead:
    cmd_sourcevenv = get_source_venv_cmd_str(venv_dir)
    cmd_install_dist = cmd_sourcevenv + \
        '; pip install --disable-pip-version-check --quiet ' + \
        index_optional_args + ' ' + requirements

    logger.info('Using pip to install a list of distkeys into venv ' +
                venv_dir)

    # Install using pip, incorporating a 5 min timeout, and taking the std_err
    # output (which comes out as a bytes object which we auto-decode).
    stdout_install, stderr_install = popen_wrapper(cmd_install_dist)

    # Print output, if there is any.
    if stdout_install:
        logger.info('Installation process using pip yields stdout: ' +
                    stdout_install)

    if stderr_install:
        logger.warn('Installation process using pip yields stderr: ' +
                    stderr_install)

    return stdout_install, stderr_install
Example #5
0
def _backtracking_satisfy(distkey_to_satisfy,
                          edeps,
                          versions_by_package,
                          _depth=0,
                          _candidates=[],
                          _conflicting_distkeys=[]):
    """
  Recursive helper to backtracking_satisfy. See comments there.

  The ADDITIONAL arguments, for recursion state, are:
    - _depth: recursion depth, optionally, for debugging output
    - _candidates: used in recursion: the list of candidates already
      chosen, both to avoid circular dependencies and also to select sane
      choices and force early conflicts (to catch all solutions)
    - _conflicting_distkeys: similar to _candidates, but lists dists that
      we've established conflict with accepted members of _candidates. Saves
      time (minimal dynamic programming)

  The ADDITIONAL returns, for recursion state, are:
    - _conflicting_distkeys, for internal use in recursion
    - str, newline separated list, of the edges in the dot graph describing the
      dependencies satisifed here
      (e.g. 'X(1) -> B(1)\nX(1) -> C(1)\nC(1) -> A(3)\nB(1) -> A(3)')


  """
    # (Not sure this check is necessary yet, but we'll see.)
    if conflicts_with(distkey_to_satisfy, _candidates):
        assert False, "This should be impossible now...."  # Can't install me! You " +\
        #"already have a different version of me! I'm: " + distkey_to_satisfy +\
        #"; you had " + str(_candidates) + " as candidates to install already."
        #   str(_candidates) + " as candidates to install already.")
        #   " a different version of me! I'm: " + distkey_to_satisfy + "; you had " +
        #   str(_candidates) + " as candidates to install already.")
        # raise depresolve.ConflictingVersionError("Can't install me! You already have"
        #   " a different version of me! I'm: " + distkey_to_satisfy + "; you had " +
        #   str(_candidates) + " as candidates to install already.")

    # I think this should also be impossible now due to checks before this call
    # would be made?
    if distkey_to_satisfy in _candidates:
        assert False, "This should also be impossible now, I think."
        # You've already got me, bud. Whatchu doin'? (Terminate recursion on
        # circular dependencies, since we're already covered.)
        return [], [], ''

    # Start the set of candidates to install with what our parent (depender)
    # already needs to install, plus ourselves.
    satisfying_candidate_set = _candidates + [
        distkey_to_satisfy,
    ]

    # Start a list of distkeys that conflict with us while we try to fulfil our
    # dependencies. (Prevents duplicating work)
    my_conflicting_distkeys = []

    # Identify the version of the package to install on the dotgraph. /:
    dotgraph = dot_sanitize(depdata.get_packname(distkey_to_satisfy)) + \
        '[label = "' + distkey_to_satisfy + '"];\n'

    depdata.assume_dep_data_exists_for(distkey_to_satisfy, edeps)

    my_edeps = edeps[distkey_to_satisfy]  # my elaborated dependencies

    if not my_edeps:  # if no dependencies, return only what's already listed
        logger.debug('    ' * _depth + distkey_to_satisfy +
                     ' had no dependencies. '
                     'Returning just it.')
        return satisfying_candidate_set, [], ''

    for edep in my_edeps:

        satisfying_packname = edep[0]
        satisfying_versions = sort_versions(edep[1])
        chosen_version = None

        if not satisfying_versions:
            raise depresolve.NoSatisfyingVersionError(
                'Dependency of ' + distkey_to_satisfy + ' on ' +
                satisfying_packname + ' with '
                'specstring ' + edep[2] +
                ' cannot be satisfied: no versions found '
                'in elaboration attempt.')

        logger.debug('    ' * _depth + 'Dependency of ' + distkey_to_satisfy +
                     ' on ' + satisfying_packname + ' with specstring ' +
                     edep[2] + ' is '
                     'satisfiable with these versions: ' +
                     str(satisfying_versions))

        # Is there already a dist of this package in the candidate set?
        preexisting_dist_of_this_package = find_dists_matching_packname(
            satisfying_packname, satisfying_candidate_set)

        if preexisting_dist_of_this_package:
            assert 1 == len(preexisting_dist_of_this_package), \
                "Programming error." # Can't have more than 1 to begin with!
            # Set of 1 item -> 1 item.
            preexisting_dist_of_this_package = preexisting_dist_of_this_package[
                0]

            preexisting_version = \
                depdata.get_version(preexisting_dist_of_this_package)

            if preexisting_version in satisfying_versions:
                logger.debug(
                    '    ' * _depth + 'Dependency of ' + distkey_to_satisfy +
                    ' on ' + satisfying_packname + ' with specstring ' +
                    edep[2] +
                    ' is already satisfied by pre-existing candidate ' +
                    preexisting_dist_of_this_package + '. Next dependency.')
                continue

            else:
                raise depresolve.ConflictingVersionError(
                    'Dependency of ' + distkey_to_satisfy + ' on ' +
                    satisfying_packname + ' with '
                    'specstring ' + edep[2] +
                    ' conflicts with a pre-existing distkey in'
                    ' the list of candidates to install: ' +
                    preexisting_dist_of_this_package)

        for candidate_version in sort_versions(satisfying_versions):

            candidate_distkey = depdata.distkey_format(satisfying_packname,
                                                       candidate_version)

            if candidate_distkey in _conflicting_distkeys:
                logger.debug('    ' * _depth + '  Skipping version ' +
                             candidate_version + '(' + candidate_distkey +
                             '): already in _conflicting_distkeys.')
                continue

            # else try this version.
            logger.debug('    ' * _depth + '  Trying version ' +
                         candidate_version)

            # Would the addition of this candidate result in a conflict?
            # Recurse and test result. Detect UnresolvableConflictError.
            # Because we're detecting such an error in the child, there's no reason
            # to still do detection of the combined set here in the parent, but I
            # will leave in an assert in case.
            try:
                (candidate_satisfying_candidate_set, new_conflicts, child_dotgraph) = \
                    _backtracking_satisfy(candidate_distkey, edeps,
                    versions_by_package, _depth+1, satisfying_candidate_set)

            # I don't know that I should be catching both. Let's see what happens.
            except (depresolve.ConflictingVersionError,
                    depresolve.UnresolvableConflictError):
                logger.debug('    ' * _depth + '  ' + candidate_version +
                             ' conflicted. '
                             'Trying next.')
                my_conflicting_distkeys.append(candidate_distkey)
                continue

            else:  # Could design it so child adds to this set, but won't yet.
                combined_satisfying_candidate_set = combine_candidate_sets(
                    satisfying_candidate_set,
                    candidate_satisfying_candidate_set)

                assert not detect_direct_conflict(combined_satisfying_candidate_set), \
                    "Programming error. See comments adjacent."

                # save the new candidates (could be designed away, but for now, keeping)
                chosen_version = candidate_version
                satisfying_candidate_set = combined_satisfying_candidate_set
                my_conflicting_distkeys.extend(new_conflicts)

                # Save the graph visualization output for the new candidate.
                #dotgraph += dot_sanitize(satisfying_packname) + '[label = "' + \
                #    candidate_distkey + '"];\n'
                dotgraph += dot_sanitize(depdata.get_packname(distkey_to_satisfy)) + \
                    ' -> ' + dot_sanitize(satisfying_packname) + ';\n' + child_dotgraph

                logger.debug('    ' * _depth + '  ' + candidate_version +
                             ' fits. Next '
                             'dependency.')
                break

        if chosen_version is None:
            raise depresolve.UnresolvableConflictError(
                'Dependency of ' + distkey_to_satisfy + ' on ' +
                satisfying_packname + ' with specstring ' + edep[2] +
                ' cannot be satisfied: versions '
                'found, but none had 0 conflicts.')

    return satisfying_candidate_set, my_conflicting_distkeys, dotgraph
Example #6
0
def is_dep_satisfied(edep,
                     candidates,
                     disregard_setuptools=False,
                     report_issue=False):
    """
  Returns True if the given dependency (in the form of an elaborated
  dependency, or edep (See depresolve/depdata.py.)) is satisfied by the given
  list of candidate distkeys, else returns False. If report_issue is True,
  also returns a string describing the issue.

  If disregard_setuptools is True, assumes that dependencies that pip list
  would not list are satisfied (e.g. dependencies on pip, wheel, setuptools,
  argparse).

  """
    packname = edep[0]
    list_of_acceptable_versions = edep[1]

    satisfied = False  # return value
    problem = ''  # If we encounter an unsatisfied dependency, it goes in this var

    # This is unfortunately a necessary hack, as pip list doesn't report these.
    # pip show packname called individually will list them if they're installed,
    # so maybe I should add a pip show check to everything I'd be ignoring here.
    if disregard_setuptools and packname in [
            'setuptools', 'pip', 'wheel', 'argparse'
    ]:
        satisfied = True
        return (satisfied, problem) if report_issue else satisfied

    same_package_dist = find_dists_matching_packname(packname, candidates)

    # If no distribution of the package is found, then dep is not satisfied.
    if not same_package_dist:
        problem = 'Not satisfied: No version of ' + packname + ' in candidate ' \
            'list.'
        logger.info(problem)
        satisfied = False

    # Else, dep might be satisfied - we have to make sure the version is OK.
    else:
        # Shouldn't have multiple distributions of the same package type provided
        # to this function.
        if len(same_package_dist) != 1:
            raise ValueError(
                'Programming error. Multiple dists of the same package '
                'provided in a candidate list. N=' +
                str(len(same_package_dist)))

        same_package_dist = same_package_dist[0]

        version_of_satisfying_package = depdata.get_version(same_package_dist)

        # Slight shortcut to save time in the majority case:
        if version_of_satisfying_package in list_of_acceptable_versions:
            satisfied = True

        else:
            # Doing it this way catches matches like 2.0 to 2.0.0. (Match same
            # versions even if string isn't exactly the same.)
            for acceptable_version in list_of_acceptable_versions:
                if depdata.versions_are_equal(version_of_satisfying_package,
                                              acceptable_version):
                    satisfied = True
                    break

    if report_issue:
        return satisfied, problem
    else:
        return satisfied
Example #7
0
def rbt_backtracking_satisfy(distkey,
                             edeps=None,
                             versions_by_package=None,
                             local=False,
                             dir_rbt_pip='../pipcollins'):
    """
  Determine correct install candidates by using rbtcollins' pip branch
  issue-988.

  Steps:
    1. Sets up a random-name new virtual environment
    2. Installs rbtcollins' pip patch on that virtual environment
    3. Installs the given distribution using rbt pip
    4. Runs `pip list` and harvests the solution set

  Args & output modeled after resolver.resolvability.backtracking_satisfy().
  edeps and versions_by_package are not used, however, but simply taken
  optionally so that the two functions can be called the same way.

  Additional, optional argument:
   - local (optional):
        - if not provided, we connect to PyPI
        - if simply set to 'True', we use the default local bandersnatch
          location for the simple listing of packages,
          'file:///srv/pypi/web/simple'.
        - if another value is provided, we interpret it as a string indicating
          the location of the simple index listing of packages on the mirror
          to use.

  Returns:
    - solution: the list of distributions to install to satisfy all of the
      given distkey's dependencies (and all their dependencies and so on). In
      other words, an install candidate set that should include the given
      distkey and provide for a functioning environment.
    - std_err: a string (stderr.read().decode()) that contains the stderr from
      the process running the pip install command for the distribution, using
      rbtcollins' pip branch. This is potentially helpful in the case of
      errors.

  Raises:
    - UnrelatedInstallFailure if creation of a virtualenv fails (before we even
      get to the point of trying to install the dist). Should probably just be
      retried right away.

  """
    assert distkey == distkey.lower(), 'distkeys should always be lowercase!' + \
        distkey + ' is not!'  # Remember not to use distkey.islower(). Bug.

    ###############
    # Steps 1 and 2: Create venv and install rbt pip.
    venv_name = 'v3_'
    for i in range(0, 7):
        venv_name += random.choice(string.ascii_lowercase + string.digits)

    # Save a map of this virtual environment name to distkey for later auditing
    # if interesting things happen.
    global venv_catalog
    if venv_catalog is None:
        venv_catalog = depdata.load_json_db(VENV_CATALOG_JSON_FNAME)
    venv_catalog[distkey] = venv_name
    json.dump(venv_catalog, open(VENV_CATALOG_JSON_FNAME, 'w'))


    cmd_venvcreate = 'virtualenv -p python3 --no-site-packages ' + VENVS_DIR + \
        '/' + venv_name
    cmd_sourcevenv = 'source ' + VENVS_DIR + '/' + venv_name + '/bin/activate'
    cmd_piplist = cmd_sourcevenv + '; pip list -l --disable-pip-version-check'
    cmd_install_rbt_pip = cmd_sourcevenv + '; cd ' + dir_rbt_pip + \
        '; pip install -e . --disable-pip-version-check'
    #cmd_check_pip_ver = cmd_sourcevenv + '; pip --version'
    #cmd_install_seb_pip = cmd_sourcevenv + '; cd ' + dir_seb_pip + '; pip install -e .'
    #cmd_install_depresolve = cmd_sourcevenv + '; cd ' + dir_depresolve + '; pip install -e .'

    # Create venv
    logger.info('For ' + distkey + ', creating virtual environment ' +
                venv_name)
    stdout, stderr = popen_wrapper(cmd_venvcreate)

    # Validate the venv by trying to source it. Sometimes this goes wrong....
    # I don't know why yet.
    stdout, stderr = popen_wrapper(cmd_sourcevenv)
    if 'No such file or directory' in stderr:
        raise UnrelatedInstallFailure(
            'Failed to create the virtual environment ' + venv_name +
            ' for dist ' + distkey + ' installation. bin/activate is '
            'missing.')
    else:
        logger.info('For ' + distkey + ', venv ' + venv_name + ' looks OK.')

    ## Initial snapshot of installed packages
    #popen_wrapper(cmd_piplist)

    # Install rbtcollins' issue_988 pip branch and display pip version
    # (should then be 8.0.0dev0)
    logger.info('For ' + distkey + ', installing rbt_pip in ' + venv_name)
    popen_wrapper(cmd_install_rbt_pip)
    #popen_wrapper(cmd_check_pip_ver)

    ###############
    # Step 3: Install given dist using rbt pip.

    # Deconstruct distkey into package and version for pip.
    packname = depdata.get_packname(distkey)
    version_string = depdata.get_version(distkey)
    # Construct as a requirement for pip install command.
    requirement = packname + '==' + version_string

    # Put together the pip command.

    # First, are we using PyPI or a specified (local) mirror?
    index_optional_args = ''
    if local == True:  # If local is just the value True, use default local mirror
        index_optional_args = '-i file:///srv/pypi/web/simple'

    elif local:  # If local is a specific string, assume it's the index location.
        index_optional_args = '-i ' + local

    else:
        pass  # Proceed normally, using PyPI, not adding index arguments.

    # Would love to be able to just call
    # scraper._call_pip_with_timeout(pip_arglist), but can't because we have to
    # do this in a virtual environment, so doing it this way instead:
    cmd_install_dist = cmd_sourcevenv + \
        '; pip install --disable-pip-version-check --quiet ' + \
        index_optional_args + ' ' + requirement

    logger.info('For ' + distkey + ', using rbtpip to install in ' + venv_name)

    # Install using rbtcollins pip, incorporating a 5 min timeout, and taking
    # the std_err output (which comes out as a bytes object which we
    # auto-decode).
    stdout_installation, stderr_installation = popen_wrapper(cmd_install_dist)

    # Print output, if there is any.
    if stdout_installation:
        logger.info('Installation process for ' + distkey +
                    ' using rbtpip yields '
                    'stdout: ' + stdout_installation)

    if stderr_installation:
        logger.warn('Installation process for ' + distkey +
                    ' using rbtpip yields '
                    'stderr: ' + stderr_installation)

    ###############
    # Step 4: Run `pip list` and harvest the solution set
    stdout_list, stderr_list = popen_wrapper(cmd_piplist)
    piplist_output = stdout_list.splitlines()

    solution = []

    # Convert list_output into solution set here.

    for line in piplist_output:

        # pip list outputs almost-distkeys, like: 'pbr (0.11.1)'.
        # We cut out the space, lowercase, and pray they work. /:
        installed_distkey = line.replace(' ', '').lower()

        # These distributions are installed when a new virtual environment is
        # created, so ignore them. This is an unpleasant hack: some packages
        # actually declare dependencies on these, and so the stored solutions may
        # be incomplete, and there's a hack in is_dep_satisfied to disregard these
        # when given disregard_setuptools=True.
        # Also note that because pip here is installed using -e option, it'll show
        # up as having more than just the version string in the ()s where its
        # version string is expected: 'pip (8.0.0.dev0, /Users/s/w/pipcollins)'
        # Since we're excluding pip here anyway, we don't have to deal with that.
        if installed_distkey.startswith('wheel(') or \
            installed_distkey.startswith('pip(') or \
            installed_distkey.startswith('setuptools('):
            continue

        solution.append(installed_distkey)

    return solution, stderr_installation