def get_sphinx_inventory(url): """ get the sphinx inventory from a url. (https://gist.github.com/dahlia/9e3a988a52de6319cd5d) Parameters ---------- url : url of the package where find objects.inv file Returns ------- the inventory : dict """ inv_url = urllib.parse.urljoin(url, 'objects.inv') with urllib.request.urlopen(inv_url) as f: line = f.readline().rstrip().decode('utf-8') if line == '# Sphinx inventory version 1': invdata = read_inventory_v1(f, url, urllib.parse.urljoin) elif line == '# Sphinx inventory version 2': invdata = read_inventory_v2(f, url, urllib.parse.urljoin) else: raise ValueError(line) return invdata
def test_read_inventory_v2(): f = BytesIO(inventory_v2) f.readline() invdata1 = read_inventory_v2(f, "/util", posixpath.join) # try again with a small buffer size to test the chunking algorithm f = BytesIO(inventory_v2) f.readline() invdata2 = read_inventory_v2(f, "/util", posixpath.join, bufsize=5) assert invdata1 == invdata2 assert len(invdata1["py:module"]) == 2 assert invdata1["py:module"]["module1"] == ("foo", "2.0", "/util/foo.html#module-module1", "Long Module desc") assert invdata1["py:module"]["module2"] == ("foo", "2.0", "/util/foo.html#module-module2", "-") assert invdata1["py:function"]["module1.func"][2] == "/util/sub/foo.html#module1.func" assert invdata1["c:function"]["CFunc"][2] == "/util/cfunc.html#CFunc" assert invdata1["std:term"]["a term"][2] == "/util/glossary.html#term-a-term"
def parse_sphinx_inventory(self, version): # Parsing objects.inv is strange. urlpattern = 'http://docs.djangoproject.com/en/%s/%%s' % version sphinx_index = urlpattern % '_objects/' req = requests.get(sphinx_index) if req.status_code in [200, 301]: fp = urllib.urlopen(sphinx_index) fp.readline() return intersphinx.read_inventory_v2(fp, urlpattern, operator.mod) return []
def get_inventory(url): inv_url = urljoin(url, 'objects.inv') f = urllib.urlopen(inv_url) line = f.readline().rstrip().decode('utf-8') if line == '# Sphinx inventory version 1': invdata = read_inventory_v1(f, url, urljoin) elif line == '# Sphinx inventory version 2': invdata = read_inventory_v2(f, url, urljoin) else: raise ValueError(line) return invdata
def get_inventory(url): inv_url = urllib.parse.urljoin(url, 'objects.inv') with urllib.request.urlopen(inv_url) as f: line = f.readline().rstrip().decode('utf-8') if line == '# Sphinx inventory version 1': invdata = read_inventory_v1(f, url, urllib.parse.urljoin) elif line == '# Sphinx inventory version 2': invdata = read_inventory_v2(f, url, urllib.parse.urljoin) else: raise ValueError(line) return invdata
def test_read_inventory_v2(): f = BytesIO(inventory_v2) f.readline() invdata1 = read_inventory_v2(f, '/util', posixpath.join) # try again with a small buffer size to test the chunking algorithm f = BytesIO(inventory_v2) f.readline() invdata2 = read_inventory_v2(f, '/util', posixpath.join, bufsize=5) assert invdata1 == invdata2 assert len(invdata1['py:module']) == 2 assert invdata1['py:module']['module1'] == \ ('foo', '2.0', '/util/foo.html#module-module1', 'Long Module desc') assert invdata1['py:module']['module2'] == \ ('foo', '2.0', '/util/foo.html#module-module2', '-') assert invdata1['py:function']['module1.func'][2] == \ '/util/sub/foo.html#module1.func' assert invdata1['c:function']['CFunc'][2] == '/util/cfunc.html#CFunc'
def parse(self): """ Parse sphinx docs at self.doc_path. yield `ParserEntry`s. """ with open(os.path.join(self.doc_path, "objects.inv"), "rb") as inv_f: inv_f.readline() # skip version line that is verified in detection for pe in _inv_to_entries( read_inventory_v2(inv_f, "", os.path.join) ): # this is what Guido gave us `yield from` for :-| yield pe
def read_inventory(fp, url): """ Read Sphinx inventory file from URL. """ from sphinx.ext import intersphinx join = posixpath.join line = fp.readline().rstrip().decode('utf-8') if line == '# Sphinx inventory version 1': invdata = intersphinx.read_inventory_v1(fp, url, join) elif line == '# Sphinx inventory version 2': invdata = intersphinx.read_inventory_v2(fp, url, join) return invdata
def parse(self): """ Parse sphinx docs at self.docpath. yield tuples of symbol name, type and path """ log.info('Creating database...') with open(os.path.join(self.docpath, "objects.inv"), "rb") as inv_f: inv_f.readline() # skip version line that is verified in detection for pe in _inv_to_entries( read_inventory_v2(inv_f, "", os.path.join) ): # this is what Guido gave us `yield from` for :-| yield pe
def fetch_data(url, inv): f = open(inv, 'rb') line = f.readline() # burn a line invdata = read_inventory_v2(f, url or '', join) if args.html: print("<dl>") for role in invdata: start_role(role) for item in invdata[role]: (domain, version, url, title) = invdata[role][item] start_item(role, item) print_link(role, item, domain, title) print_meta(role, item, domain, version, url, title) end_item(role, item) if args.html: print("</dl>\n")
def fetch_data(url,f): line = f.readline() # burn a line invdata = read_inventory_v2(f, url or '', join) if (args.html): print "<dl>" for role in invdata: start_role(role) for item in invdata[role]: (domain, version, url, title) = invdata[role][item] print("{}:{} :: {}\n".format(role,item,invdata[role][item])) start_item(role,item) print_link(role,item,domain,title) print_meta(role,item,domain,version,url,title) end_item(role,item) if (args.html): print "</dl>\n"
def fetch_data(url, f): line = f.readline() # burn a line invdata = read_inventory_v2(f, url or '', join) if (args.html): print "<dl>" for role in invdata: start_role(role) for item in invdata[role]: (domain, version, url, title) = invdata[role][item] print("{}:{} :: {}\n".format(role, item, invdata[role][item])) start_item(role, item) print_link(role, item, domain, title) print_meta(role, item, domain, version, url, title) end_item(role, item) if (args.html): print "</dl>\n"
def fetch_data(url,inv): f = open(inv, 'rb') line = f.readline() # burn a line invdata = read_inventory_v2(f, url or "", join) if (args.html): print("<dl>") for role in invdata: start_role(role) for item in invdata[role]: (domain, version, url, title) = invdata[role][item] #sys.stderr.write("{}:{} :: {}\n".format(role,item,invdata[role][item])) start_item(role,item) print_link(role,item,domain,title) print_meta(role,item,domain,version,url,title) end_item(role,item) if (args.html): print("</dl>\n")
def update_intersphinx(version_pk, api=None): if api is None: api = tastyapi.api version_data = api.version(version_pk).get() version = make_api_version(version_data) project = version.project try: object_file = version.project.find('objects.inv', version.slug)[0] except IndexError: print "Failed to find objects file" return None f = open(object_file) f.readline() urlpattern = "http://%s/en/%s/%%s" % (project.subdomain, version.slug) data = intersphinx.read_inventory_v2(f, urlpattern, operator.mod) for top_key in data.keys(): #print "KEY: %s" % top_key inner_keys = data[top_key].keys() for inner_key in inner_keys: #print "INNER KEY: %s" % inner_key _project, sphinx_version, url, title = data[top_key][inner_key] try: url_key = url.split('#')[1] except IndexError: # Invalid data continue if ":" in url_key: #This dumps junk data into the url namespace we don't need #print "INNER: %s->%s" % (inner_key, url) save_term(version, inner_key, url) else: last_key = url_key.split('.')[-1] if last_key != url_key: #Only save last key if it differes #print "LAST: %s->%s" % (last_key, url) save_term(version, last_key, url) #print "URL: %s->%s" % (url_key, url) save_term(version, url_key, url)
def read_intersphinx(project, file, urlpattern): """ Reads file as intersphinx format. Prepends the url pattern on the front of URLs. URL Pattern should have a %s in it for string formatting. Only supports intersphinx v2. It parses down into an effective set of data that is: {'<id>': [ "<project>", # From conf.py "<version>", # From conf.py "<url>", # With anchor <title>" # Usually blank ] } We then smartly parse the anchor tag and add it into Redis. """ f = open(file) f.readline() data = intersphinx.read_inventory_v2(f, urlpattern, operator.mod) for top_key in data.keys(): print "KEY: %s" % top_key inner_keys = data[top_key].keys() for inner_key in inner_keys: print "INNER KEY: %s" % inner_key _project, version, url, title = data[top_key][inner_key] url_key = url.split('#')[1] if ":" in url_key: #This dumps junk data into the url namespace we don't need print "INNER: %s->%s" % (inner_key, url) safe_save(project, inner_key, url) else: last_key = url_key.split('.')[-1] if last_key != url_key: #Only save last key if it differes print "LAST: %s->%s" % (last_key, url) safe_save(project, last_key, url) print "URL: %s->%s" % (url_key, url) safe_save(project, url_key, url)
@task def update_intersphinx(version_pk): version_data = api.version(version_pk).get() version = make_api_version(version_data) project = version.project try: object_file = version.project.find("objects.inv", version.slug)[0] except IndexError, e: print "Failed to find objects file" return None f = open(object_file) f.readline() urlpattern = "http://%s/en/%s/%%s" % (project.subdomain, version.slug) data = intersphinx.read_inventory_v2(f, urlpattern, operator.mod) for top_key in data.keys(): # print "KEY: %s" % top_key inner_keys = data[top_key].keys() for inner_key in inner_keys: # print "INNER KEY: %s" % inner_key _project, sphinx_version, url, title = data[top_key][inner_key] try: url_key = url.split("#")[1] except IndexError: # Invalid data continue if ":" in url_key: # This dumps junk data into the url namespace we don't need # print "INNER: %s->%s" % (inner_key, url) save_term(version, inner_key, url)
def parse_sphinx_inventory(self, version): # Parsing objects.inv is strange. urlpattern = 'http://docs.djangoproject.com/en/%s/%%s' % version fp = urllib.urlopen(urlpattern % '_objects/') fp.readline() return intersphinx.read_inventory_v2(fp, urlpattern, operator.mod)
del project_data['resource_uri'] del project_data['absolute_url'] project = Project(**project_data) version_data['project'] = project version = Version(**version_data) try: object_file = version.project.find('objects.inv', version.slug)[0] except IndexError, e: print "Failed to find objects file" return None f = open(object_file) f.readline() urlpattern = "http://%s/en/%s/%%s" % (project.subdomain, version.slug) data = intersphinx.read_inventory_v2(f, urlpattern, operator.mod) for top_key in data.keys(): #print "KEY: %s" % top_key inner_keys = data[top_key].keys() for inner_key in inner_keys: #print "INNER KEY: %s" % inner_key _project, sphinx_version, url, title = data[top_key][inner_key] url_key = url.split('#')[1] if ":" in url_key: #This dumps junk data into the url namespace we don't need #print "INNER: %s->%s" % (inner_key, url) save_term(version, inner_key, url) else: last_key = url_key.split('.')[-1] if last_key != url_key: #Only save last key if it differes
def getInventory(self): f = urllib.urlopen(self.inventory_uri) f.readline() # burn a line self.inventory = read_inventory_v2(f, self.uri, join) f.close() self.inventory_items = self.inventory.get('std:label', {})
from sphinx.ext.intersphinx import read_inventory_v2 from posixpath import join url = "https://raw.github.com/xuru/vixDiskLib/gh-pages/" inv = "objects.inv" f = open(inv, 'rb') line = f.readline() invdata = read_inventory_v2(f, url, join) print invdata.keys()