class Web(object): def __init__(self): self.cache = set() self.store = dict() self.extra = set() self.server = ServerProxy("https://pypi.python.org/pypi") def get_users(self, package="pip"): results = self.server.package_roles(package) self.extra.add(package) title, users = zip(*results) for user in users: if user in self.cache: pass else: user_result = self.server.user_packages(user) title, packages = zip(*user_result) self.store[user] = set(packages) self.cache.add(user) print(user, packages)
class PyPiPackage(object): def __init__(self, name, json_link=None): self.name = self.package =name if not json_link: self.json_link = "http://pypi.python.org/pypi/{}/json".format(self.package) else: self.json_link = json_link self.proxy = ServerProxy('https://pypi.python.org/pypi') roles = [role[1] for role in self.proxy.package_roles(self.name)] if len(roles)==1: self.roles = roles[0] else: self.roles = roles self.data = self._data self.keywords = self.info.get('keywords') self.author = self.info.get('author') self.classifiers = self.info.get('classifiers') self.maintainer_email = self.info.get('maintainer_email') self.home_page = self.info.get('home_page') self.platform = self.info.get('platform') self._pypi_ordering = self.info.get('_pypi_ordering') self.downloads_url = self.info.get('download_url') self.docs_url = self.info.get('docs_url') self.author_email = self.info.get('author_email') self.summary = self.info.get('summary', ".") self.maintainer = self.info.get('maintainer') self.description = self.info.get('description') self.package_url = self.info.get('package_url') self.bugtrack_url = self.info.get('bugtrack_url') self.packagetype = self.urls.get('packagetype') self.size = self.urls.get('size') self.size_fmt = "{:,}".format(self.size) self.max_downloads = self._max_downloads self._compacted = self.compact_info for key, value in self.compact_info.items(): if value: self.__setattr__(key, value) def from_dict(self, d): from copy import copy alias = self.copy() for k,v in d.items(): alias.__setattr__(k,v) return alias def clean_Nones(self): alias = copy(self) for k,v in alias.__dict__.items(): if v is None: del alias[k] return alias def __repr__(self): name =self.name if self.max_downloads: dls = "{:,} downloads, ".format(self.max_downloads) else: dls = '' if self.size_fmt: size = "size: {}, ".format(self.size_fmt) else: size = '' if self.summary: if len(self.summary) > 40: summary = " ".join(w for w in self.summary.lower().split() if w not in STOPWORDS) elif len(self.summary) <= 40: summary = self.summary else: summary = '' return "<Package {}: {}{};{}>".format(name, dls, size, summary) @property def _data(self): try: data = requests.get(self.json_link).json() except JSONDecodeError: version = max(self.proxy.package_releases(self.name)) data = self.proxy.release_data(self.name, version) return data @property def _max_downloads(self): counts = [] releases = self.data.get('releases') for key in releases.keys(): if len(releases[key]) > 0: counts.append(releases[key][0].get('downloads', 0)) return sum(counts) @property def info(self): return self.data.get('info') @property def compact_info(self): compacter = itemgetter('downloads', 'release_url', 'keywords', 'author', 'classifiers', 'maintainer_email','home_page', 'license', 'name', 'platform', '_pypi_ordering', 'download_url', 'docs_url', 'author_email', 'summary', 'maintainer', 'description', 'version', 'package_url', 'bugtrack_url') compacted_info = compacter(self.info) compact_dict = dict(zip(('downloads', 'release_url', 'keywords', 'author', 'classifiers', 'maintainer_email', 'home_page', 'license', 'name', 'platform', '_pypi_ordering', 'download_url', 'docs_url', 'author_email', 'summary', 'maintainer', 'description', 'version', 'package_url', 'bugtrack_url'), compacted_info)) return compact_dict @property def releases(self): return self.data.get('releases') @property def urls(self): urls = self.data.get('urls') if len(urls) > 0 and isinstance(urls, list): result = urls[0] else: result = urls return result def copy(self): from copy import copy return copy(self) def __pickle_safe__(self): from copy import copy #just do 1 release, most recent max_release = max(self.releases.keys()) fresh_release = self.releases[max_release] if type(fresh_release) == list: pickled = fresh_release[0] elif type(fresh_release) == tuple: pickled = fresh_release[0] elif type(fresh_release) == dict: pickled = fresh_release else: pickled = fresh_release info = copy(self.info) for k,v in info.items(): if k in pickled: v2 = copy(pickled[k]) if v!= v2: pickled[k] = (v,v2) else: pickled[k] = v pickled['max_downloads'] = str(copy(self.max_downloads)) pickled['size_fmt'] = copy(self.size_fmt) pickled['roles'] = copy(self.roles) return pickled
class JSON_PYPI(object): def __init__(self, name=None, json_link=None): self.name = name self.package = name if not json_link: self.json_link = "http://pypi.python.org/pypi/{}/json".format(self.package) else: self.json_link = json_link self.proxy = ServerProxy('https://pypi.python.org/pypi') self.keywords = self.info.get('keywords') self.author = self.info.get('author') self.classifiers = self.info.get('classifiers') self.maintainer_email = self.info.get('maintainer_email') self.home_page = self.info.get('home_page') self.platform = self.info.get('platform') self._pypi_ordering = self.info.get('_pypi_ordering') self.downloads_url = self.info.get('download_url') self.docs_url = self.info.get('docs_url') self.author_email = self.info.get('author_email') self.summary = self.info.get('summary', ".") self.maintainer = self.info.get('maintainer') self.description = self.info.get('description') self.package_url = self.info.get('package_url') self.bugtrack_url = self.info.get('bugtrack_url') self.packagetype = self.urls.get('packagetype') self.size = self.urls.get('size') self.size_fmt= "{:,}".format(self.size) @property def data(self): try: data =get_json(self.json_link) except simplejson.scanner.JSONDecodeError: version = max(self.proxy.package_releases(self.name)) data = self.proxy.release_data(self.name, version) return data @property def info(self): return self.data.get('info') @property def releases(self): return self.data.get('releases') @property def urls(self): urls = self.data.get('urls') if len(urls) > 0 and isinstance(urls, list): result = urls[0] else: result = urls return result @property def upload_time(self): try: up_time = ' '.join(self.urls.get('upload_time').split('T')) upload_time = datetime.strptime(up_time, "%Y-%m-%d %H:%M:%S") except ValueError: date,time = self.urls.get('upload_time').split('T') year, month, day = date.split('-') hour, min, sec = time.split(':') upload_time = datetime(int(year), int(month), int(day), int(hour), int(min), int(sec)) return upload_time @property def get_max_downloads(self): counts = [] if isinstance(self.releases, str): json = requests.get(self.releases).json()['releases'] elif isinstance(self.releases, dict): if 'releases' in self.releases: json = url['self.releases'] else: json = self.releases for key in json.keys(): if len(json[key]) > 0: counts.append(json[key][0].get('downloads', 0)) return sum(counts) def roles(self): return self.proxy.package_roles(self.name) def __repr__(self): name = self.name if self.summary is not None or len(self.summary) > 2: summary = self.summary else: summary = " " size = self.fmt_size dls = "{:,}".format(self.get_max_downloads) if isinstance(summary, str) and len(summary) > 30: summary = " ".join(w for w in summary.split() if w not in STOPWORDS) return "<Package {}: {}, {}, {}>".format(name, dls, size, summary)
class Client(object): def __init__(self): self.uri = 'https://pypi.python.org/pypi' self.proxy = ServerProxy(self.uri) self.cache = set() def all_distributions(self): return self.proxy.list_packages() def roles(self, package): return [cl[1] for cl in self.proxy.package_roles(package)] def user_packages(self, user): return [cl[1] for cl in self.proxy.user_packages(user)] def release_downloads(self, package, version=None): if version is None: version = max(self.proxy.package_releases(package)) if isinstance(version, list): version = version[0] return self.proxy.release_downloads(package, version) def package_releases(self, package): return self.proxy.package_releases(package) def release_data(self, package, version=None): version = max(self.proxy.package_releases(package)) if isinstance(version, list): version = version[0] data = self.proxy.release_data(package, version) return data def simple_search(self, spec, operator='or', getter = None, omit = ('django')): """Can search by name, version, author, author_email, maintainer, maintainer_email, home_page, license, summary, description, keywords, platform,download_url""" from pip.commands.search import transform_hits, highest_version from results = set() packages = {} from operator import itemgetter filter = itemgetter('name', 'version') info_getter = itemgetter('name', 'summary') name_getter = itemgetter('name') hits = self.proxy.search({'name': spec},{'summary':spec}, operator) results = transform_hits(hits) for result in results: result['version'] = highest_version(result['versions']) del result['score'] return results def deep_search(self, spec, operator='and', cache = None, limit=20): results =list() temp_cache = set() initial_results = self.simple_search(spec, operator='and', getter=None, omit=['django']) for result in initial_results: new = PyPiPackage(result) results.append(new) temp_cache.add(new) if cache is not None: self.cache = cache|temp_cache else: pass filtered = sorted(results, key = lambda x: x.max_downloads, reversed=True) return filtered[limit:] def browse(self, classifiers): return self.proxy.browse(classifiers) def get_json_url(self, package): return "http://pypi.python.org/pypi/{}/json".format(package)
class Package(object): def __init__(self, name, json_link=None): if isinstance(name, dict): for key, value in name.items(): self.name = self.package = key if len(value) > -1: self.dist = value[0] else: self.name = self.package = name self.json_link = "http://pypi.python.org/pypi/{}/json".format(self.name) print(self.json_link) # self.proxy = ServerProxy("https://pypi.python.org/pypi") roles = [role[1] for role in self.proxy.package_roles(self.name)] if len(roles) == 1: self.roles = roles[0] else: self.roles = roles self.data = self._data if self.data: print("data found") if self.info is not None: print("info found") self.keywords = self._keywords self.author = self.info.get("author", "") self.classifiers = self.info.get("classifiers", "") self.maintainer_email = self.info.get("maintainer_email", "") self.home_page = self.info.get("home_page", "") self.platform = self.info.get("platform", "") self._pypi_ordering = self.info.get("_pypi_ordering", "") self.downloads_url = self.info.get("download_url", "") self.docs_url = self.info.get("docs_url", "") self.author_email = self.info.get("author_email", "") self.summary = self.info.get("summary", "") self.maintainer = self.info.get("maintainer", "") self.description = self.info.get("description", "") self.package_url = self.info.get("package_url", "") self.bugtrack_url = self.info.get("bugtrack_url", "") try: self._compacted = self.compact_info for key, value in self.compact_info.items(): if value: self.__setattr__(key, value) print("compact value") else: self.__setattr__(key, None) except AttributeError: pass try: self.packagetype = self.urls.get("packagetype", None) except AttributeError: self.packagetype = None try: self.size = str(self.urls.get("size", None)) except AttributeError: self.size = "0" self.size_fmt = "{:,}".format(int(self.size)) self.max_downloads = str(self._max_downloads) def from_dict(self, d): from copy import copy alias = self.copy() for k, v in d.items(): alias.__setattr__(k, v) return alias def __repr__(self): name = self.name if self.max_downloads: dls = "{:,} downloads, ".format(self.max_downloads) else: dls = "" if self.size_fmt: size = "size: {}, ".format(self.size_fmt) else: size = "" if self.summary: if len(self.summary) > 40: summary = " ".join(w for w in self.summary.lower().split() if w not in STOPWORDS) elif len(self.summary) <= 40: summary = self.summary else: summary = "" return "<Package {}: {}{};{}>".format(name, dls, size, summary) def highest_version(self, versions): return next(iter(sorted(versions, key=pkg_resources.parse_version, reverse=True))) @property def _data(self): try: data = requests.get(self.json_link).json() print(bool(data)) ## except simplejson.JSONDecodeError: version = self.proxy.package_releases(self.name) if len(version) == 1: version = version[0] print(version) ### data = self.proxy.release_data(self.name, version) else: data = dict(info="", releases="", urls="") return data @property def _max_downloads(self): counts = [] releases = self.data.get("releases") for key in releases.keys(): if len(releases[key]) > 0: counts.append(releases[key][0].get("downloads", 0)) return sum(counts) @property def _keywords(self): if self.info is not None: kws = self.info.get("keywords", None) if kws: if "," in kws: keywords = [kw.strip().lower() for kw in kws.split(",")] else: keywords = [kw.strip().lower() for kw in kws.split()] return keywords else: return kws else: return None def extract_metadata(self): if isinstance(self.dist, DistInfoDistribution): if hasattr(self.dist, "_parsed_pkg_info"): name = self.dist._parsed_pkg_info.get("name") version = self.dist._parsed_pkg_info.get("version") summary = self.dist._parsed_pkg_info.get("summary") homepage = self.dist._parsed_pkg_info.get("homepage") author = self.dist._parsed_pkg_info.get("author") author_email = self.dist._parsed_pkg_info.get("Author-email") platform = self.dist._parsed_pkg_info.get("platform") classifiers = self.dist._parsed_pkg_info.get_all("classifier") @property def info(self): if self._data and isinstance(self._data, dict): print(self.data.get("info", {})) return self.data.get("info", {}) else: return dict().fromkeys( [ "docs_url", "keywords", "_pypi_ordering", "requires_python", "version", "license", "maintainer_email", "cheesecake_documentation_id", "release_url", "author", "home_page", "classifiers", "cheesecake_installability_id", "platform", "summary", "name", "download_url", "author_email", "downloads", "cheesecake_code_kwalitee_id", "bugtrack_url", "package_url", "description", "maintainer", "_pypi_hidden", ], "", ) @property def compact_info(self): compacter = itemgetter( "downloads", "release_url", "keywords", "author", "classifiers", "maintainer_email", "home_page", "license", "name", "platform", "_pypi_ordering", "download_url", "docs_url", "author_email", "summary", "maintainer", "description", "version", "package_url", "bugtrack_url", ) compacted_info = compacter(self.info) compact_dict = dict( zip( ( "downloads", "release_url", "keywords", "author", "classifiers", "maintainer_email", "home_page", "license", "name", "platform", "_pypi_ordering", "download_url", "docs_url", "author_email", "summary", "maintainer", "description", "version", "package_url", "bugtrack_url", ), compacted_info, ) ) return compact_dict @property def releases(self): return self.data.get("releases") @property def urls(self): urls = self.data.get("urls") if len(urls) > 0 and isinstance(urls, list): result = urls[0] else: result = urls return result def copy(self): from copy import copy return copy(self) def __pickle_safe__(self): from copy import copy # just do 1 release, most recent max_release = max(self.releases.keys()) fresh_release = self.releases[max_release] if type(fresh_release) == list: pickled = fresh_release[0] elif type(fresh_release) == tuple: pickled = fresh_release[0] elif type(fresh_release) == dict: pickled = fresh_release else: pickled = fresh_release info = copy(self.info) for k, v in info.items(): if k in pickled: v2 = copy(pickled[k]) if v != v2: pickled[k] = (v, v2) else: pickled[k] = v pickled["max_downloads"] = str(copy(self.max_downloads)) pickled["size_fmt"] = copy(self.size_fmt) pickled["roles"] = copy(self.roles) pickled["summary"] = copy(self.summary) pickled["keywords"] = copy(self.keywords) pickled["classifiers"] = copy(self.classifiers) pickled["name"] = copy(self.name) return pickled @property def _pickle(self): return self.__pickle_safe__()