def __fetch_data(self, url, all_ver): """Fetch the data via web scraping.""" scraper = Scraper(url) versions = scraper.get_value_from_list('a', None, {'class': 'vbtn release'}, None, 'href') if versions: for ver in versions: if '/' in ver: ver = ver.split('/')[1] all_ver.append(ver) return all_ver
def get_versions_from_other_source(self, package_name): """Get all versions via web scraping from mvnrepository.""" all_ver = [] pkg = package_name.replace(":", "/") pkg_url = "https://mvnrepository.com/artifact/{}".format(pkg) scraper = Scraper(pkg_url) sub_obj = scraper.get_sub_data('div', {'id': 'snippets'}) tab_list = scraper.get_value_from_list('li', 'a', None, None, 'href', sub_obj) _logger.info('Collecting data for {p} from {n} data tabs'.format( p=package_name, n=len(tab_list))) for tab in tab_list: repo_val = "" if "?repo=" in tab: repo_val = tab.split("?repo=")[1] ver_url = pkg_url + "?repo={}".format(repo_val) all_ver = self.__fetch_data(ver_url, all_ver) all_ver = set(all_ver) return all_ver
def get_license(self): """Return declared license of a pkg.""" if self.mode == "Not Found": return None if not self.license: if self.mode == "pkg": url = self.url + "?tab=overview" else: url = self.url + "?tab=Overview" scraper_ov = Scraper(url) self.gh_link = self.__fetch_gh_link(scraper_ov) self.license = self.__fetch_license(scraper_ov) return self.license
def __populate_data(self, pkg): """Set the data for the golang pkg.""" _logger.info("Populating the data object for {}".format(pkg)) pkg_url = "https://pkg.go.dev/{}".format(pkg) mod_url = "https://pkg.go.dev/mod/{}".format(pkg) scraper = Scraper(pkg_url + "?tab=versions") self.mode = "pkg" self.url = pkg_url self.version_list = self.__fetch_all_versions(scraper) if len(self.version_list) == 0: _logger.info("Fetching the details from mod.") scraper = Scraper(mod_url + "?tab=versions") self.mode = "mod" self.url = mod_url self.version_list = self.__fetch_all_versions(scraper) if len(self.version_list) != 0: self.latest_version = self.__fetch_latest_version() self.module = self.__fetch_module(scraper, pkg) else: self.mode = "Not Found" else: _logger.info("Fetching the details from pkg.") self.latest_version = self.__fetch_latest_version() self.module = self.__fetch_module(scraper)