Esempi in Python per Crawler, esempi in Python per pex.crawler.Crawler

Esempio n. 1

0

Mostra file

def test_cached_dependency_pinned_unpinned_resolution_multi_run():
  # This exercises the issue described here: https://github.com/pantsbuild/pex/issues/178
  project1_0_0 = make_sdist(name='project', version='1.0.0')
  project1_1_0 = make_sdist(name='project', version='1.1.0')

  with temporary_dir() as td:
    for sdist in (project1_0_0, project1_1_0):
      safe_copy(sdist, os.path.join(td, os.path.basename(sdist)))
    fetchers = [Fetcher([td])]
    with temporary_dir() as cd:
      # First run, pinning 1.0.0 in the cache
      dists = resolve(['project', 'project==1.0.0'], fetchers=fetchers, cache=cd, cache_ttl=1000)
      assert len(dists) == 1
      assert dists[0].version == '1.0.0'
      # This simulates separate invocations of pex but allows us to keep the same tmp cache dir
      Crawler.reset_cache()
      # Second, run, the unbounded 'project' req will find the 1.0.0 in the cache. But should also
      # return SourcePackages found in td
      dists = resolve(['project', 'project==1.1.0'], fetchers=fetchers, cache=cd, cache_ttl=1000)
      assert len(dists) == 1
      assert dists[0].version == '1.1.0'
      # Third run, if exact resolvable and inexact resolvable, and cache_ttl is expired, exact
      # resolvable should pull from pypi as well since inexact will and the resulting
      # resolvable_set.merge() would fail.
      Crawler.reset_cache()
      time.sleep(1)
      dists = resolve(['project', 'project==1.1.0'], fetchers=fetchers, cache=cd,
                      cache_ttl=1)
      assert len(dists) == 1
      assert dists[0].version == '1.1.0'

Esempio n. 2

0

Mostra file

def test_crawler_local():
    FL = ('a.txt', 'b.txt', 'c.txt')
    with temporary_dir() as td:
        for fn in FL:
            with open(os.path.join(td, fn), 'w'):
                pass
        for dn in (1, 2):
            os.mkdir(os.path.join(td, 'dir%d' % dn))
            for fn in FL:
                with open(os.path.join(td, 'dir%d' % dn, fn), 'w'):
                    pass

        # basic file / dir rel splitting
        links, rels = Crawler.crawl_local(Link.wrap(td))
        assert set(links) == set(Link.wrap(os.path.join(td, fn)) for fn in FL)
        assert set(rels) == set(
            Link.wrap(os.path.join(td, 'dir%d' % n)) for n in (1, 2))

        # recursive crawling, single vs multi-threaded
        for caching in (False, True):
            for threads in (1, 2, 3):
                links = Crawler(threads=threads).crawl([td], follow_links=True)
                expect_links = (
                    set(Link.wrap(os.path.join(td, fn)) for fn in FL) | set(
                        Link.wrap(os.path.join(td, 'dir1', fn)) for fn in FL) |
                    set(Link.wrap(os.path.join(td, 'dir2', fn)) for fn in FL))
                assert set(links) == expect_links

Esempio n. 3

0

Mostra file

File: test_crawler.py Progetto: pfmoore/pex

def test_crawler_remote_redirect():
  Crawler.reset_cache()

  mock_context = mock.create_autospec(Context, spec_set=True)
  mock_context.resolve = lambda link: Link('http://url2.test.com')
  mock_context.content.side_effect = [MOCK_INDEX_A]
  expected_output = set([Link('http://url2.test.com/3to2-1.0.tar.gz')])

  c = Crawler(mock_context)
  test_links = [Link('http://url1.test.com')]
  assert c.crawl(test_links) == expected_output

Esempio n. 4

0

Mostra file

File: test_crawler.py Progetto: zadamah/pex

def test_crawler_remote_redirect():
    Crawler.reset_cache()

    mock_context = mock.create_autospec(Context, spec_set=True)
    mock_context.resolve = lambda link: Link('http://url2.test.com')
    mock_context.content.side_effect = [MOCK_INDEX_A]
    expected_output = set([Link('http://url2.test.com/3to2-1.0.tar.gz')])

    c = Crawler(mock_context)
    test_links = [Link('http://url1.test.com')]
    assert c.crawl(test_links) == expected_output

Esempio n. 5

0

Mostra file

File: test_crawler.py Progetto: Houzz/pex

def test_crawler_remote():
  Crawler.reset_cache()

  mock_context = mock.create_autospec(Context, spec_set=True)
  mock_context.content.side_effect = [MOCK_INDEX_A, MOCK_INDEX_B, Exception('shouldnt get here')]
  expected_output = set([Link('http://url1.test.com/3to2-1.0.tar.gz'),
                         Link('http://url2.test.com/APScheduler-2.1.0.tar.gz')])

  c = Crawler(mock_context)
  test_links = [Link('http://url1.test.com'), Link('http://url2.test.com')]
  assert c.crawl(test_links) == expected_output

  # Test memoization of Crawler.crawl().
  assert c.crawl(test_links) == expected_output

Esempio n. 6

0

Mostra file

File: test_crawler.py Progetto: pfmoore/pex

def test_crawler_local():
  FL = ('a.txt', 'b.txt', 'c.txt')
  with temporary_dir() as td:
    for fn in FL:
      with open(os.path.join(td, fn), 'w'):
        pass
    for dn in (1, 2):
      os.mkdir(os.path.join(td, 'dir%d' % dn))
      for fn in FL:
        with open(os.path.join(td, 'dir%d' % dn, fn), 'w'):
          pass

    # basic file / dir rel splitting
    links, rels = Crawler.crawl_local(Link.wrap(td))
    assert set(links) == set(Link.wrap(os.path.join(td, fn)) for fn in FL)
    assert set(rels) == set(Link.wrap(os.path.join(td, 'dir%d' % n)) for n in (1, 2))

    # recursive crawling, single vs multi-threaded
    for caching in (False, True):
      for threads in (1, 2, 3):
        links = Crawler(threads=threads).crawl([td], follow_links=True)
        expect_links = (set(Link.wrap(os.path.join(td, fn)) for fn in FL) |
                        set(Link.wrap(os.path.join(td, 'dir1', fn)) for fn in FL) |
                        set(Link.wrap(os.path.join(td, 'dir2', fn)) for fn in FL))
        assert set(links) == expect_links

Esempio n. 7

0

Mostra file

def _resolve_and_link(config,
                      requirement,
                      target_link,
                      installer_provider,
                      logger=print):
    # Short-circuit if there is a local copy
    if os.path.exists(target_link) and os.path.exists(
            os.path.realpath(target_link)):
        egg = EggPackage(os.path.realpath(target_link))
        if egg.satisfies(requirement):
            return egg

    fetchers = fetchers_from_config(config)
    context = context_from_config(config)
    iterator = Iterator(fetchers=fetchers, crawler=Crawler(context))
    links = [
        link for link in iterator.iter(requirement)
        if isinstance(link, SourcePackage)
    ]

    for link in links:
        logger('    fetching %s' % link.url)
        sdist = context.fetch(link)
        logger('    installing %s' % sdist)
        installer = installer_provider(sdist)
        dist_location = installer.bdist()
        target_location = os.path.join(os.path.dirname(target_link),
                                       os.path.basename(dist_location))
        shutil.move(dist_location, target_location)
        _safe_link(target_location, target_link)
        logger('    installed %s' % target_location)
        return EggPackage(target_location)

Esempio n. 8

0

Mostra file

File: pex.py Progetto: windie/heron

def _resolve_and_link_interpreter(requirement, fetchers, target_link,
                                  installer_provider):
    # Short-circuit if there is a local copy
    if os.path.exists(target_link) and os.path.exists(
            os.path.realpath(target_link)):
        egg = EggPackage(os.path.realpath(target_link))
        if egg.satisfies(requirement):
            return egg

    context = Context.get()
    iterator = Iterator(fetchers=fetchers, crawler=Crawler(context))
    links = [
        link for link in iterator.iter(requirement)
        if isinstance(link, SourcePackage)
    ]

    with TRACER.timed('Interpreter cache resolving %s' % requirement, V=2):
        for link in links:
            with TRACER.timed('Fetching %s' % link, V=3):
                sdist = context.fetch(link)

            with TRACER.timed('Installing %s' % link, V=3):
                installer = installer_provider(sdist)
                dist_location = installer.bdist()
                target_location = os.path.join(os.path.dirname(target_link),
                                               os.path.basename(dist_location))
                shutil.move(dist_location, target_location)
                _safe_link(target_location, target_link)

            return EggPackage(target_location)

Esempio n. 9

0

Mostra file

def test_inexact_requirements():
  with plugin_resolution(plugins=[('jake', '1.2.3'), 'jane']) as results:
    working_set, chroot, repo_dir, cache_dir = results

    assert 2 == len(working_set.entries)

    # Kill the cache and the repo source dir and wait past our 1s test TTL, if the PluginResolver
    # truly detects inexact plugin requirements it should skip perma-caching and fall through to
    # pex to a TLL expiry resolve and then fail.
    safe_rmtree(repo_dir)
    safe_rmtree(cache_dir)
    Crawler.reset_cache()
    time.sleep(1.5)

    with pytest.raises(Unsatisfiable):
      with plugin_resolution(chroot=chroot, plugins=[('jake', '1.2.3'), 'jane']):
        assert False, 'Should not reach here, should raise first.'

Esempio n. 10

0

Mostra file

File: test_plugin_resolver.py Progetto: lgvital/pants

  def test_inexact_requirements(self, unused_test_name, packager_cls):
    with self.plugin_resolution(plugins=[('jake', '1.2.3'), 'jane'],
                                packager_cls=packager_cls) as results:
      working_set, chroot, repo_dir, cache_dir = results

      self.assertEqual(2, len(working_set.entries))

      # Kill the cache and the repo source dir and wait past our 1s test TTL, if the PluginResolver
      # truly detects inexact plugin requirements it should skip perma-caching and fall through to
      # pex to a TLL expiry resolve and then fail.
      safe_rmtree(repo_dir)
      safe_rmtree(cache_dir)
      Crawler.reset_cache()
      time.sleep(1.5)

      with self.assertRaises(Unsatisfiable):
        with self.plugin_resolution(chroot=chroot, plugins=[('jake', '1.2.3'), 'jane']):
          self.fail('Should not reach here, should raise first.')

Esempio n. 11

0

Mostra file

File: iterator.py Progetto: zuoxiaolei/pex

 def __init__(self,
              fetchers=None,
              crawler=None,
              follow_links=False,
              allow_prereleases=None):
     self._crawler = crawler or Crawler()
     self._fetchers = fetchers if fetchers is not None else [PyPIFetcher()]
     self.__follow_links = follow_links
     self.__allow_prereleases = allow_prereleases

Esempio n. 12

0

Mostra file

File: test_plugin_resolver.py Progetto: foursquare/pants

  def test_inexact_requirements(self, unused_test_name, packager_cls):
    with self.plugin_resolution(plugins=[('jake', '1.2.3'), 'jane'],
                                packager_cls=packager_cls) as results:
      working_set, chroot, repo_dir, cache_dir = results

      self.assertEqual(2, len(working_set.entries))

      # Kill the cache and the repo source dir and wait past our 1s test TTL, if the PluginResolver
      # truly detects inexact plugin requirements it should skip perma-caching and fall through to
      # pex to a TLL expiry resolve and then fail.
      safe_rmtree(repo_dir)
      safe_rmtree(cache_dir)
      Crawler.reset_cache()
      time.sleep(1.5)

      with self.assertRaises(Unsatisfiable):
        with self.plugin_resolution(chroot=chroot, plugins=[('jake', '1.2.3'), 'jane']):
          self.fail('Should not reach here, should raise first.')

Esempio n. 13

0

Mostra file

File: test_resolver.py Progetto: pfmoore/pex

def test_cached_dependency_pinned_unpinned_resolution_multi_run():
  # This exercises the issue described here: https://github.com/pantsbuild/pex/issues/178
  project1_0_0 = make_sdist(name='project', version='1.0.0')
  project1_1_0 = make_sdist(name='project', version='1.1.0')

  with temporary_dir() as td:
    for sdist in (project1_0_0, project1_1_0):
      safe_copy(sdist, os.path.join(td, os.path.basename(sdist)))
    fetchers = [Fetcher([td])]
    with temporary_dir() as cd:
      # First run, pinning 1.0.0 in the cache
      dists = list(
        resolve_multi(['project', 'project==1.0.0'],
                      fetchers=fetchers,
                      cache=cd,
                      cache_ttl=1000)
      )
      assert len(dists) == 1
      assert dists[0].version == '1.0.0'
      # This simulates separate invocations of pex but allows us to keep the same tmp cache dir
      Crawler.reset_cache()
      # Second, run, the unbounded 'project' req will find the 1.0.0 in the cache. But should also
      # return SourcePackages found in td
      dists = list(
        resolve_multi(['project', 'project==1.1.0'],
                      fetchers=fetchers,
                      cache=cd,
                      cache_ttl=1000)
      )
      assert len(dists) == 1
      assert dists[0].version == '1.1.0'
      # Third run, if exact resolvable and inexact resolvable, and cache_ttl is expired, exact
      # resolvable should pull from pypi as well since inexact will and the resulting
      # resolvable_set.merge() would fail.
      Crawler.reset_cache()
      time.sleep(1)
      dists = list(
        resolve_multi(['project', 'project==1.1.0'],
                      fetchers=fetchers,
                      cache=cd,
                      cache_ttl=1)
      )
      assert len(dists) == 1
      assert dists[0].version == '1.1.0'

Esempio n. 14

0

Mostra file

def test_resolve_prereleases_cached():
  stable_dep = make_sdist(name='dep', version='2.0.0')
  prerelease_dep = make_sdist(name='dep', version='3.0.0rc3')

  with temporary_dir() as td:
    for sdist in (stable_dep, prerelease_dep):
      safe_copy(sdist, os.path.join(td, os.path.basename(sdist)))
    fetchers = [Fetcher([td])]

    with temporary_dir() as cd:
      def assert_resolve(dep, expected_version, **resolve_kwargs):
        dists = list(
          resolve_multi([dep], cache=cd, cache_ttl=1000, **resolve_kwargs)
        )
        assert 1 == len(dists)
        dist = dists[0]
        assert expected_version == dist.version

      Crawler.reset_cache()

      # First do a run to load it into the cache.
      assert_resolve('dep>=1,<4', '3.0.0rc3', allow_prereleases=True, fetchers=fetchers)

      # This simulates running from another pex command. The Crawler cache actually caches an empty
      # cache so this fails in the same "process".
      Crawler.reset_cache()

      # Now assert that we can get it from the cache by removing the source.
      assert_resolve('dep>=1,<4', '3.0.0rc3', allow_prereleases=True, fetchers=[])

      # It should also be able to resolve without allow_prereleases, if explicitly requested.
      Crawler.reset_cache()
      assert_resolve('dep>=1.rc1,<4', '3.0.0rc3', fetchers=[])

Esempio n. 15

0

Mostra file

def test_crawler_remote():
  Crawler.reset_cache()

  mock_context = mock.create_autospec(Context, spec_set=True)
  mock_context.resolve = lambda link: link
  mock_context.content.side_effect = [MOCK_INDEX_A, MOCK_INDEX_B, Exception('shouldnt get here')]
  expected_output = set([Link('http://url1.test.com/3to2-1.0.tar.gz'),
                         Link('http://url2.test.com/APScheduler-2.1.0.tar.gz')])

  c = Crawler(mock_context)
  test_links = [Link('http://url1.test.com'), Link('http://url2.test.com')]
  assert c.crawl(test_links) == expected_output

  # Test memoization of Crawler.crawl().
  assert c.crawl(test_links) == expected_output

Esempio n. 16

0

Mostra file

File: interpreter_cache.py Progetto: digwanderlust/pants

  def _resolve_and_link(self, requirement, target_link, installer_provider):
    # Short-circuit if there is a local copy.
    if os.path.exists(target_link) and os.path.exists(os.path.realpath(target_link)):
      egg = EggPackage(os.path.realpath(target_link))
      if egg.satisfies(requirement):
        return egg

    fetchers = self._python_repos.get_fetchers()
    context = self._python_repos.get_network_context()
    iterator = Iterator(fetchers=fetchers, crawler=Crawler(context))
    links = [link for link in iterator.iter(requirement) if isinstance(link, SourcePackage)]

    for link in links:
      self._logger('    fetching {}'.format(link.url))
      sdist = context.fetch(link)
      self._logger('    installing {}'.format(sdist))
      installer = installer_provider(sdist)
      dist_location = installer.bdist()
      target_location = os.path.join(os.path.dirname(target_link), os.path.basename(dist_location))
      shutil.move(dist_location, target_location)
      _safe_link(target_location, target_link)
      self._logger('    installed {}'.format(target_location))
      return EggPackage(target_location)

Esempio n. 17

0

Mostra file

def test_crawler_unknown_scheme():
    # skips unknown url schemes
    Crawler().crawl('ftp://ftp.cdrom.com') == (set(), set())

Esempio n. 18

0

Mostra file

File: resolver_options.py Progetto: zhanwu/pex

 def get_crawler(self):
   return Crawler(self.get_context())