def test_get_infos_from_url(self): # Test that the the URLs are parsed the right way url_list = { 'FooBar-1.1.0.tar.gz': { 'name': 'foobar', # lowercase the name 'version': '1.1.0', }, 'Foo-Bar-1.1.0.zip': { 'name': 'foo-bar', # keep the dash 'version': '1.1.0', }, 'foobar-1.1b2.tar.gz#md5=123123123123123': { 'name': 'foobar', 'version': '1.1b2', 'url': 'http://example.org/foobar-1.1b2.tar.gz', # no hash 'hashval': '123123123123123', 'hashname': 'md5', }, 'foobar-1.1-rc2.tar.gz': { # use suggested name 'name': 'foobar', 'version': '1.1c2', 'url': 'http://example.org/foobar-1.1-rc2.tar.gz', } } for url, attributes in url_list.items(): # for each url infos = get_infos_from_url("http://example.org/" + url) for attribute, expected in attributes.items(): got = infos.get(attribute) if attribute == "version": self.assertEqual("%s" % got, expected) else: self.assertEqual(got, expected)
def _process_url(self, url, project_name=None, follow_links=True): """Process an url and search for distributions packages. For each URL found, if it's a download, creates a PyPIdistribution object. If it's a homepage and we can follow links, process it too. :param url: the url to process :param project_name: the project name we are searching for. :param follow_links: Do not want to follow links more than from one level. This parameter tells if we want to follow the links we find (eg. run recursively this method on it) """ with self._open_url(url) as f: base_url = f.url if url not in self._processed_urls: self._processed_urls.append(url) link_matcher = self._get_link_matcher(url) for link, is_download in link_matcher(f.read().decode(), base_url): if link not in self._processed_urls: if self._is_distribution(link) or is_download: self._processed_urls.append(link) # it's a distribution, so create a dist object try: infos = get_infos_from_url(link, project_name, is_external=self.index_url not in url) except CantParseArchiveName as e: logger.warning( "version has not been parsed: %s", e) else: self._register_release(release_info=infos) else: if self._is_browsable(link) and follow_links: self._process_url(link, project_name, follow_links=False)
def _process_url(self, url, project_name=None, follow_links=True): """Process an url and search for distributions packages. For each URL found, if it's a download, creates a PyPIdistribution object. If it's a homepage and we can follow links, process it too. :param url: the url to process :param project_name: the project name we are searching for. :param follow_links: Do not want to follow links more than from one level. This parameter tells if we want to follow the links we find (eg. run recursively this method on it) """ f = self._open_url(url) try: base_url = f.url if url not in self._processed_urls: self._processed_urls.append(url) link_matcher = self._get_link_matcher(url) for link, is_download in link_matcher(f.read().decode(), base_url): if link not in self._processed_urls: if self._is_distribution(link) or is_download: self._processed_urls.append(link) # it's a distribution, so create a dist object try: infos = get_infos_from_url(link, project_name, is_external=self.index_url not in url) except CantParseArchiveName, e: logger.warning( "version has not been parsed: %s", e) else: self._register_release(release_info=infos) else: if self._is_browsable(link) and follow_links: self._process_url(link, project_name, follow_links=False) finally: f.close()