def fetch(self, statepath: str, update: bool = True, logger: Logger = NoopLogger()) -> bool: if os.path.isfile(statepath) and not update: logger.log('no update requested, skipping') return False args = { 'mode': 'wb' } if self.binary else { 'mode': 'w', 'encoding': 'utf-8' } persdata: Dict[str, Any] = {} perspath = statepath + '.persdata' if os.path.exists(perspath): with open(perspath, 'rb') as rpersfile: persdata = pickle.load(rpersfile) with AtomicFile(statepath, **args) as statefile: have_changes = self._do_fetch(statefile, persdata, logger) if persdata: with AtomicFile(perspath, 'wb') as wpersfile: pickle.dump(persdata, wpersfile.get_file()) if not have_changes: statefile.cancel() return have_changes
def _do_fetch(self, statefile: AtomicFile, persdata: PersistentData, logger: Logger) -> bool: # fetch and parse repomd.xml repomd_url = self.url + 'repodata/repomd.xml' logger.log('fetching metadata from ' + repomd_url) repomd_content = do_http(repomd_url, check_status=True, timeout=self.fetch_timeout).text repomd = xml.etree.ElementTree.fromstring(repomd_content) repomd_elt_primary = repomd.find( '{http://linux.duke.edu/metadata/repo}data[@type="primary"]') if repomd_elt_primary is None: raise RuntimeError('Cannot find <primary> element in repomd.xml') repomd_elt_primary_location = repomd_elt_primary.find( './{http://linux.duke.edu/metadata/repo}location') repomd_elt_primary_checksum = repomd_elt_primary.find( './{http://linux.duke.edu/metadata/repo}open-checksum[@type="sha256"]' ) if repomd_elt_primary_checksum is None: logger.log('no supported checksum', Logger.WARNING) elif repomd_elt_primary_checksum.text == persdata.get( 'open-checksum-sha256'): logger.log('checksum not changed: {}'.format( repomd_elt_primary_checksum.text)) return False if repomd_elt_primary_location is None: raise RuntimeError('Cannot find <location> element in repomd.xml') repodata_url = self.url + repomd_elt_primary_location.attrib['href'] # fetch actual repo data compression = None if repodata_url.endswith('gz'): compression = 'gz' elif repodata_url.endswith('xz'): compression = 'xz' logger.log('fetching {}'.format(repodata_url)) save_http_stream(repodata_url, statefile.get_file(), compression=compression, timeout=self.fetch_timeout) if repomd_elt_primary_checksum is not None and repomd_elt_primary_checksum.text: persdata['open-checksum-sha256'] = repomd_elt_primary_checksum.text logger.log('saving checksum: {}'.format( persdata['open-checksum-sha256'])) logger.log('size is {} byte(s)'.format( os.path.getsize(statefile.get_path()))) return True
def fetch(self, statepath: str, update: bool = True, logger: Logger = NoopLogger()) -> bool: if os.path.isdir(statepath) and not update: logger.log('no update requested, skipping') return False persdata: Dict[str, Any] = {} perspath = statepath + '.persdata' try: with open(perspath, 'rb') as rpersfile: persdata = pickle.load(rpersfile) except (EOFError, FileNotFoundError, pickle.UnpicklingError): pass with AtomicDir(statepath) as statedir: have_changes = self._do_fetch(statedir, persdata, logger) if persdata: with AtomicFile(perspath, 'wb') as wpersfile: pickle.dump(persdata, wpersfile.get_file()) wpersfile.get_file().flush() os.fsync(wpersfile.get_file().fileno()) if not have_changes: statedir.cancel() return have_changes
def fetch(self, statepath: str, update: bool = True, logger: Logger = NoopLogger()) -> bool: if os.path.isdir(statepath) and not update: logger.log('no update requested, skipping') return False persdata: Dict[str, Any] = {} perspath = statepath + '.persdata' if os.path.exists(perspath): with open(perspath, 'rb') as rpersfile: persdata = pickle.load(rpersfile) with AtomicDir(statepath) as statedir: have_changes = self._do_fetch(statedir, persdata, logger) if persdata: with AtomicFile(perspath, 'wb') as wpersfile: pickle.dump(persdata, wpersfile.get_file()) if not have_changes: statedir.cancel() return have_changes
def _do_fetch(self, statefile: AtomicFile, persdata: PersistentData, logger: Logger) -> bool: fetching_what = [self.url] headers = self.headers.copy() if self.headers else {} if isinstance(self.post, dict): fetching_what.append('{} fields of form data'.format(len( self.post))) if headers: fetching_what.append('{} extra headers'.format(len(headers))) logger.log('fetching ' + ', with '.join(fetching_what)) if 'last-modified' in persdata: headers['if-modified-since'] = persdata['last-modified'] logger.log('using if-modified-since: {}'.format( headers['if-modified-since'])) try: response = save_http_stream(self.url, statefile.get_file(), compression=self.compression, data=self.post, headers=headers, timeout=self.fetch_timeout) except NotModifiedException: logger.log('got 403 not modified') return False size = os.path.getsize(statefile.get_path()) logger.log('size is {} byte(s)'.format(size)) if size == 0 and not self.allow_zero_size: raise RuntimeError('refusing zero size file') if response.headers.get('last-modified'): persdata['last-modified'] = response.headers['last-modified'] logger.log('storing last-modified: {}'.format( persdata['last-modified'])) return True
def fetch(self, statepath: str, update: bool = True, logger: Logger = NoopLogger()) -> bool: if os.path.isfile(statepath) and not update: logger.log('no update requested, skipping') return False args = { 'mode': 'wb' } if self.binary else { 'mode': 'w', 'encoding': 'utf-8' } persdata: dict[str, Any] = {} perspath = statepath + '.persdata' try: with open(perspath, 'rb') as rpersfile: persdata = pickle.load(rpersfile) except (EOFError, FileNotFoundError, pickle.UnpicklingError): pass with AtomicFile(statepath, **args) as statefile: have_changes = self._do_fetch(statefile, persdata, logger) if persdata: with AtomicFile(perspath, 'wb') as wpersfile: pickle.dump(persdata, wpersfile.get_file()) wpersfile.get_file().flush() os.fsync(wpersfile.get_file().fileno()) if not have_changes: statefile.cancel() statefile.get_file().flush() os.fsync(statefile.get_file().fileno()) return have_changes
def fetch(self, statepath: str, update: bool = True, logger: Logger = NoopLogger()) -> bool: if os.path.isfile(statepath) and not update: logger.log('no update requested, skipping') return False state: Dict[str, Any] = {} if os.path.isfile(statepath): with open(statepath, 'r', encoding='utf-8') as oldstatefile: state = json.load(oldstatefile) logger.log('loaded old state, {} entries'.format(len(state))) else: logger.log('starting with empty state') newdata = json.loads(do_http(self.url).text) if not newdata['releases']: raise RuntimeError( 'Empty freshcode package list received, refusing to go on') # add new entries in reversed order, oldest first so newest # have higher priority; may also compare versions here for entry in newdata['releases']: if 'name' not in entry: logger.log('skipping entry with no name') continue if entry['name'] in state: oldentry = state[entry['name']] if version_compare(entry['version'], oldentry['version']) > 0: logger.log( 'replacing entry "{}", version changed {} -> {}'. format(entry['name'], oldentry['version'], entry['version'])) state[entry['name']] = entry else: logger.log('adding entry "{}", version {}'.format( entry['name'], entry['version'])) state[entry['name']] = entry with AtomicFile(statepath, 'w', encoding='utf-8') as statefile: json.dump(state, statefile.get_file()) logger.log('saved new state, {} entries'.format(len(state))) return True
def _do_fetch(self, statefile: AtomicFile, persdata: PersistentData, logger: Logger) -> bool: ftp = ftplib.FTP(host=self.url.hostname, user=self.url.username or '', passwd=self.url.password or '', timeout=self.fetch_timeout) ftp.login() ftp.cwd(self.url.path) ftp.retrlines( 'LIST', callback=lambda line: print(line, file=statefile.get_file())) ftp.quit() return True