def Fetch(self, statepath, update=True, logger=NoopLogger()): tmppath = statepath + '.tmp' if os.path.isfile(statepath) and not update: logger.Log('no update requested, skipping') return # Get and parse repomd.xml repomd_url = self.url + 'repodata/repomd.xml' logger.Log('fetching metadata from ' + repomd_url) repomd_content = Get(repomd_url, check_status=True).text repomd_xml = xml.etree.ElementTree.fromstring(repomd_content) repodata_url = self.url + repomd_xml.find( '{http://linux.duke.edu/metadata/repo}data[@type="primary"]/{http://linux.duke.edu/metadata/repo}location' ).attrib['href'] logger.Log('fetching ' + repodata_url) data = Get(repodata_url).content logger.GetIndented().Log('size is {} byte(s)'.format(len(data))) logger.GetIndented().Log('decompressing with gzip') data = gzip.decompress(data) logger.GetIndented().Log( 'size after decompression is {} byte(s)'.format(len(data))) logger.GetIndented().Log('saving') with open(tmppath, 'wb') as statefile: statefile.write(data) os.replace(tmppath, statepath)
def DoFetch(self, statepath, update, logger): packages_url = self.url + 'packages.gz' logger.GetIndented().Log('fetching package list from ' + packages_url) data = Get(packages_url).text # autogunzipped? package_names = [] for line in data.split('\n'): line = line.strip() if line.startswith('#') or line == '': continue package_names.append(line) logger.GetIndented().Log('{} package name(s) parsed'.format( len(package_names))) pagesize = 100 for page in range(0, len(package_names) // pagesize + 1): ifrom = page * pagesize ito = (page + 1) * pagesize url = '&'.join([ 'arg[]=' + urllib.parse.quote(name) for name in package_names[ifrom:ito] ]) url = self.url + '/rpc/?v=5&type=info&' + url logger.GetIndented().Log('fetching page {}/{}'.format( page + 1, len(package_names) // pagesize + 1)) with open(os.path.join(statepath, '{}.json'.format(page)), 'wb') as statefile: statefile.write(Get(url).content)
def Fetch(self, statepath, update=True, logger=NoopLogger()): tmppath = statepath + '.tmp' if os.path.isfile(statepath) and not update: logger.Log('no update requested, skipping') return with open(tmppath, 'wb') as statefile: logger.Log('fetching ' + self.url) data = Get(self.url).content logger.GetIndented().Log('size is {} byte(s)'.format(len(data))) if self.compression == 'gz': logger.GetIndented().Log('decompressing with gzip') data = gzip.decompress(data) elif self.compression == 'bz2': logger.GetIndented().Log('decompressing with bz2') data = bz2.decompress(data) elif self.compression == 'xz': logger.GetIndented().Log('decompressing with xz') data = lzma.LZMADecompressor().decompress(data) if self.compression: logger.GetIndented().Log('size after decompression is {} byte(s)'.format(len(data))) logger.GetIndented().Log('saving') statefile.write(data) os.replace(tmppath, statepath)
def DoFetch(self, statepath, logger): root = xml.etree.ElementTree.fromstring( Get(self.url + "repodata/repomd.xml", check_status=True).text) location = root.find( "{http://linux.duke.edu/metadata/repo}data[@type='primary']/{http://linux.duke.edu/metadata/repo}location" ) return FileFetcher(location)
def DoFetch(self, statepath, update, logger): with open(statepath, "wb") as statefile: for source in self.sources: logger.Log("fetching " + source) data = Get(source).content logger.GetIndented().Log("size is {} byte(s)".format( len(data))) if self.gz: logger.GetIndented().Log("decompressing with gzip") data = gzip.decompress(data) elif self.bz2: logger.GetIndented().Log("decompressing with bz2") data = bz2.decompress(data) elif self.xz: logger.GetIndented().Log("decompressing with xz") data = lzma.LZMADecompressor().decompress(data) if self.gz or self.bz2 or self.xz: logger.GetIndented().Log( "size after decompression is {} byte(s)".format( len(data))) logger.GetIndented().Log("saving") statefile.write(data)
def LoadSpec(self, package, statepath, logger): specurl = self.giturl + '/{0}.git/plain/{0}.spec'.format(package) logger.GetIndented().Log('getting spec from {}'.format(specurl)) r = Get(specurl, check_status=False) if r.status_code != 200: deadurl = self.giturl + '/{0}.git/plain/dead.package'.format(package) dr = Get(deadurl, check_status=False) if dr.status_code == 200: logger.GetIndented(2).Log('dead: ' + ';'.join(dr.text.split('\n'))) else: logger.GetIndented(2).Log('failed: {}'.format(r.status_code)) # XXX: check .dead.package, instead throw return with open(os.path.join(statepath, package + '.spec'), 'wb') as file: file.write(r.content)
def DoFetch(self, statepath, update, logger): pages = [chr(x) for x in range(ord('a'), ord('z') + 1)] # a..z pages.append('0-9') for page in pages: logger.Log('fetching page ' + page) pageurl = self.url + '/' + page + '.html' with open(os.path.join(statepath, page + '.html'), 'w', encoding='utf-8') as pagefile: pagefile.write(Get(pageurl).text)
def ParsePackages(self, statepath, logger): page = 1 while True: pageurl = self.apiurl + 'packages/?page={}'.format(page) logger.Log('getting page {} from {}'.format(page, pageurl)) pagedata = json.loads(Get(pageurl).text) for package in pagedata['packages']: self.LoadSpec(package['name'], statepath, logger) page += 1 if page > pagedata['page_total']: break
def Fetch(self, statepath, update=True, logger=NoopLogger()): if os.path.isfile(statepath) and not update: logger.Log("no update requested, skipping") return # Get and parse repomd.xml repomd_url = self.repourl + "repodata/repomd.xml" logger.Log("fetching metadata from " + repomd_url) repomd_content = Get(repomd_url, check_status=True).text repomd_xml = xml.etree.ElementTree.fromstring(repomd_content) repodata_url = self.repourl + repomd_xml.find( "{http://linux.duke.edu/metadata/repo}data[@type='primary']/{http://linux.duke.edu/metadata/repo}location" ).attrib['href'] return FileFetcher(repodata_url, gz=True).Fetch(statepath, update, logger)
def Fetch(self, statepath, update=True, logger=NoopLogger()): if os.path.isfile(statepath) and not update: logger.Log('no update requested, skipping') return state = {} if os.path.isfile(statepath): with open(statepath, 'r', encoding='utf-8') as oldstatefile: state = json.load(oldstatefile) logger.Log('loaded old state, {} entries'.format(len(state))) else: logger.Log('starting with empty state') newdata = json.loads(Get(self.url).text) # add new entries in reversed order, oldest first so newest # have higher priority; may also compare versions here for entry in newdata['releases']: if 'name' not in entry: logger.Log('skipping entry with no name') continue if entry['name'] in state: oldentry = state[entry['name']] if VersionCompare(entry['version'], oldentry['version']) > 0: logger.Log( 'replacing entry "{}", version changed {} -> {}'. format(entry['name'], oldentry['version'], entry['version'])) state[entry['name']] = entry else: logger.Log('adding entry "{}", version {}'.format( entry['name'], entry['version'])) state[entry['name']] = entry temppath = statepath + '.tmp' with open(temppath, 'w', encoding='utf-8') as newstatefile: json.dump(state, newstatefile) os.replace(temppath, statepath) logger.Log('saved new state, {} entries'.format(len(state)))
def DoFetch(self, statepath, update, logger): numpage = 0 nextpageurl = self.url + 'Packages()?$filter=IsLatestVersion' while True: logger.Log('getting ' + nextpageurl) text = Get(nextpageurl).text with open(os.path.join(statepath, '{}.xml'.format(numpage)), 'w', encoding='utf-8') as pagefile: pagefile.write(text) # parse next page logger.Log('parsing ' + nextpageurl) root = xml.etree.ElementTree.fromstring(text) next_link = root.find('{http://www.w3.org/2005/Atom}link[@rel="next"]') if next_link is None: break nextpageurl = next_link.attrib['href'] numpage += 1
def DoFetch(self, statepath, update, logger): numpage = 0 nextpageurl = self.apiurl + "Packages()?$filter=IsLatestVersion" while True: logger.Log("getting " + nextpageurl) text = Get(nextpageurl).text with open(os.path.join(statepath, "{}.xml".format(numpage)), "w", encoding="utf-8") as pagefile: pagefile.write(text) # parse next page logger.Log("parsing " + nextpageurl) root = xml.etree.ElementTree.fromstring(text) next_link = root.find( "{http://www.w3.org/2005/Atom}link[@rel='next']") if next_link is None: break nextpageurl = next_link.attrib['href'] numpage += 1