Example #1
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isfile(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        args = {
            'mode': 'wb'
        } if self.binary else {
            'mode': 'w',
            'encoding': 'utf-8'
        }

        persdata: Dict[str, Any] = {}

        perspath = statepath + '.persdata'

        if os.path.exists(perspath):
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)

        with AtomicFile(statepath, **args) as statefile:
            have_changes = self._do_fetch(statefile, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())

            if not have_changes:
                statefile.cancel()

            return have_changes
Example #2
0
    def _do_fetch(self, statefile: AtomicFile, persdata: PersistentData,
                  logger: Logger) -> bool:
        # fetch and parse repomd.xml
        repomd_url = self.url + 'repodata/repomd.xml'
        logger.log('fetching metadata from ' + repomd_url)
        repomd_content = do_http(repomd_url,
                                 check_status=True,
                                 timeout=self.fetch_timeout).text
        repomd = xml.etree.ElementTree.fromstring(repomd_content)
        repomd_elt_primary = repomd.find(
            '{http://linux.duke.edu/metadata/repo}data[@type="primary"]')
        if repomd_elt_primary is None:
            raise RuntimeError('Cannot find <primary> element in repomd.xml')

        repomd_elt_primary_location = repomd_elt_primary.find(
            './{http://linux.duke.edu/metadata/repo}location')
        repomd_elt_primary_checksum = repomd_elt_primary.find(
            './{http://linux.duke.edu/metadata/repo}open-checksum[@type="sha256"]'
        )

        if repomd_elt_primary_checksum is None:
            logger.log('no supported checksum', Logger.WARNING)
        elif repomd_elt_primary_checksum.text == persdata.get(
                'open-checksum-sha256'):
            logger.log('checksum not changed: {}'.format(
                repomd_elt_primary_checksum.text))
            return False

        if repomd_elt_primary_location is None:
            raise RuntimeError('Cannot find <location> element in repomd.xml')

        repodata_url = self.url + repomd_elt_primary_location.attrib['href']

        # fetch actual repo data
        compression = None
        if repodata_url.endswith('gz'):
            compression = 'gz'
        elif repodata_url.endswith('xz'):
            compression = 'xz'

        logger.log('fetching {}'.format(repodata_url))

        save_http_stream(repodata_url,
                         statefile.get_file(),
                         compression=compression,
                         timeout=self.fetch_timeout)

        if repomd_elt_primary_checksum is not None and repomd_elt_primary_checksum.text:
            persdata['open-checksum-sha256'] = repomd_elt_primary_checksum.text
            logger.log('saving checksum: {}'.format(
                persdata['open-checksum-sha256']))

        logger.log('size is {} byte(s)'.format(
            os.path.getsize(statefile.get_path())))

        return True
Example #3
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isdir(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        persdata: Dict[str, Any] = {}

        perspath = statepath + '.persdata'

        try:
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)
        except (EOFError, FileNotFoundError, pickle.UnpicklingError):
            pass

        with AtomicDir(statepath) as statedir:
            have_changes = self._do_fetch(statedir, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())
                    wpersfile.get_file().flush()
                    os.fsync(wpersfile.get_file().fileno())

            if not have_changes:
                statedir.cancel()

            return have_changes
Example #4
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isdir(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        persdata: Dict[str, Any] = {}

        perspath = statepath + '.persdata'

        if os.path.exists(perspath):
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)

        with AtomicDir(statepath) as statedir:
            have_changes = self._do_fetch(statedir, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())

            if not have_changes:
                statedir.cancel()

            return have_changes
Example #5
0
    def _do_fetch(self, statefile: AtomicFile, persdata: PersistentData,
                  logger: Logger) -> bool:
        fetching_what = [self.url]
        headers = self.headers.copy() if self.headers else {}

        if isinstance(self.post, dict):
            fetching_what.append('{} fields of form data'.format(len(
                self.post)))

        if headers:
            fetching_what.append('{} extra headers'.format(len(headers)))

        logger.log('fetching ' + ', with '.join(fetching_what))

        if 'last-modified' in persdata:
            headers['if-modified-since'] = persdata['last-modified']
            logger.log('using if-modified-since: {}'.format(
                headers['if-modified-since']))

        try:
            response = save_http_stream(self.url,
                                        statefile.get_file(),
                                        compression=self.compression,
                                        data=self.post,
                                        headers=headers,
                                        timeout=self.fetch_timeout)
        except NotModifiedException:
            logger.log('got 403 not modified')
            return False

        size = os.path.getsize(statefile.get_path())

        logger.log('size is {} byte(s)'.format(size))

        if size == 0 and not self.allow_zero_size:
            raise RuntimeError('refusing zero size file')

        if response.headers.get('last-modified'):
            persdata['last-modified'] = response.headers['last-modified']
            logger.log('storing last-modified: {}'.format(
                persdata['last-modified']))

        return True
Example #6
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isfile(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        args = {
            'mode': 'wb'
        } if self.binary else {
            'mode': 'w',
            'encoding': 'utf-8'
        }

        persdata: dict[str, Any] = {}

        perspath = statepath + '.persdata'

        try:
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)
        except (EOFError, FileNotFoundError, pickle.UnpicklingError):
            pass

        with AtomicFile(statepath, **args) as statefile:
            have_changes = self._do_fetch(statefile, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())
                    wpersfile.get_file().flush()
                    os.fsync(wpersfile.get_file().fileno())

            if not have_changes:
                statefile.cancel()

            statefile.get_file().flush()
            os.fsync(statefile.get_file().fileno())

            return have_changes
Example #7
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isfile(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        state: Dict[str, Any] = {}

        if os.path.isfile(statepath):
            with open(statepath, 'r', encoding='utf-8') as oldstatefile:
                state = json.load(oldstatefile)
            logger.log('loaded old state, {} entries'.format(len(state)))
        else:
            logger.log('starting with empty state')

        newdata = json.loads(do_http(self.url).text)

        if not newdata['releases']:
            raise RuntimeError(
                'Empty freshcode package list received, refusing to go on')

        # add new entries in reversed order, oldest first so newest
        # have higher priority; may also compare versions here
        for entry in newdata['releases']:
            if 'name' not in entry:
                logger.log('skipping entry with no name')
                continue

            if entry['name'] in state:
                oldentry = state[entry['name']]

                if version_compare(entry['version'], oldentry['version']) > 0:
                    logger.log(
                        'replacing entry "{}", version changed {} -> {}'.
                        format(entry['name'], oldentry['version'],
                               entry['version']))
                    state[entry['name']] = entry
            else:
                logger.log('adding entry "{}", version {}'.format(
                    entry['name'], entry['version']))
                state[entry['name']] = entry

        with AtomicFile(statepath, 'w', encoding='utf-8') as statefile:
            json.dump(state, statefile.get_file())

        logger.log('saved new state, {} entries'.format(len(state)))

        return True
Example #8
0
    def _do_fetch(self, statefile: AtomicFile, persdata: PersistentData,
                  logger: Logger) -> bool:
        ftp = ftplib.FTP(host=self.url.hostname,
                         user=self.url.username or '',
                         passwd=self.url.password or '',
                         timeout=self.fetch_timeout)

        ftp.login()

        ftp.cwd(self.url.path)

        ftp.retrlines(
            'LIST',
            callback=lambda line: print(line, file=statefile.get_file()))

        ftp.quit()

        return True