Esempio n. 1
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        command = ['rsync', '--verbose', '--archive', '--compress', '--delete', '--delete-excluded', '--timeout=60', self.url, statepath]
        RunSubprocess(command, logger)
Esempio n. 2
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            for letter in ['0-9'] + [l for l in ascii_uppercase]:
                page = 1
                numpages = 1
                while True:
                    logger.Log('fetching {} page {}'.format(letter, page))

                    pageurl = '{}/{}/page/{}/'.format(self.url, letter, page)

                    # fetch HTML
                    response = Fetch(pageurl)
                    response.encoding = 'utf-8'  # is not detected properly
                    text = response.text

                    # get number of pages, if there are more than 1 of them
                    if numpages == 1:
                        for pagebutton in lxml.html.document_fromstring(text).xpath('.//nav[@class="page-selector"]/a'):
                            numpages = max(numpages, int(pagebutton.text))

                    # save HTML
                    with open(os.path.join(statedir, '{}-{}.html'.format(letter, page)), 'w', encoding='utf-8') as pagefile:
                        pagefile.write(text)

                    # end if that was last (or only) page
                    if page >= numpages:
                        break

                    # proceed with the next page
                    page += 1
Esempio n. 3
0
    def Parse(self, reponame, transformer, logger=NoopLogger()):
        repository = self.__GetRepository(reponame)

        packages = self.__Parse(repository, logger)
        packages = self.__Transform(packages, transformer, repository, logger)

        return packages
Esempio n. 4
0
 def Fetch(self, statepath, update=True, logger=NoopLogger()):
     if not os.path.isdir(statepath):
         RunSubprocess([
             'git', 'clone', '--progress', '--no-checkout', '--depth=1',
             '--branch', self.branch, self.url, statepath
         ],
                       logger=logger)
         self.__SetupSparseCheckout(statepath, logger)
         RunSubprocess(['git', 'checkout'], cwd=statepath, logger=logger)
     elif update:
         RunSubprocess(
             ['timeout', '10m', 'git', 'fetch', '--progress', '--depth=1'],
             cwd=statepath,
             logger=logger)
         RunSubprocess(
             ['git', 'checkout'], cwd=statepath, logger=logger
         )  # needed for reset to not fail on changed sparse checkout
         self.__SetupSparseCheckout(statepath, logger)
         RunSubprocess(['git', 'reset', '--hard', 'origin/' + self.branch],
                       cwd=statepath,
                       logger=logger)
         RunSubprocess(['git', 'reflog', 'expire', '--expire=0', '--all'],
                       cwd=statepath,
                       logger=logger)
         RunSubprocess(['git', 'prune'], cwd=statepath, logger=logger)
     else:
         logger.Log('no update requested, skipping')
Esempio n. 5
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.exists(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        args = [
            '--info=stats2',
            '--archive',
            '--compress',
            '--delete',
            '--delete-excluded',
            '--safe-links',
        ]

        if self.fetch_timeout is not None:
            args += ['--timeout', str(self.fetch_timeout)]

        if self.rsync_include is not None:
            args += ['--include', self.rsync_include]

        if self.rsync_exclude is not None:
            args += ['--exclude', self.rsync_exclude]

        run_subprocess(['rsync'] + args + [self.url, statepath], logger)

        return True
Esempio n. 6
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isdir(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        persdata: Dict[str, Any] = {}

        perspath = statepath + '.persdata'

        if os.path.exists(perspath):
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)

        with AtomicDir(statepath) as statedir:
            have_changes = self._do_fetch(statedir, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())

            if not have_changes:
                statedir.cancel()

            return have_changes
Esempio n. 7
0
    def StreamDeserializeMulti(self,
                               processor,
                               reponames=None,
                               logger=NoopLogger()):
        deserializers = []
        for repo in self.__GetRepositories(reponames):
            deserializers.append(
                self.__StreamDeserializer(self.__GetSerializedPath(repo)))

        while deserializers:
            # find lowest key (effname)
            thiskey = deserializers[0].Peek().effname
            for ds in deserializers[1:]:
                thiskey = min(thiskey, ds.Peek().effname)

            # fetch all packages with given key from all deserializers
            packageset = []
            for ds in deserializers:
                while not ds.EOF() and ds.Peek().effname == thiskey:
                    packageset.append(ds.Get())

            processor(packageset)

            # remove EOFed repos
            deserializers = [ds for ds in deserializers if not ds.EOF()]
Esempio n. 8
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isdir(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        persdata: Dict[str, Any] = {}

        perspath = statepath + '.persdata'

        try:
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)
        except (EOFError, FileNotFoundError, pickle.UnpicklingError):
            pass

        with AtomicDir(statepath) as statedir:
            have_changes = self._do_fetch(statedir, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())
                    wpersfile.get_file().flush()
                    os.fsync(wpersfile.get_file().fileno())

            if not have_changes:
                statedir.cancel()

            return have_changes
Esempio n. 9
0
    def fetch(self,
              statepath: str,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        if os.path.isfile(statepath) and not update:
            logger.log('no update requested, skipping')
            return False

        args = {
            'mode': 'wb'
        } if self.binary else {
            'mode': 'w',
            'encoding': 'utf-8'
        }

        persdata: Dict[str, Any] = {}

        perspath = statepath + '.persdata'

        if os.path.exists(perspath):
            with open(perspath, 'rb') as rpersfile:
                persdata = pickle.load(rpersfile)

        with AtomicFile(statepath, **args) as statefile:
            have_changes = self._do_fetch(statefile, persdata, logger)

            if persdata:
                with AtomicFile(perspath, 'wb') as wpersfile:
                    pickle.dump(persdata, wpersfile.get_file())

            if not have_changes:
                statefile.cancel()

            return have_changes
Esempio n. 10
0
    def DeserializeMulti(self, reponames=None, logger=NoopLogger()):
        packages = []

        for repo in self.repoman.GetRepositories(reponames):
            packages += self.Deserialize(repo['name'], logger=logger.GetPrefixed(repo['name'] + ': '))

        return packages
Esempio n. 11
0
    def ParseMulti(self, reponames=None, transformer=None, logger=NoopLogger()):
        packages = []

        for repo in self.repoman.GetRepositories(reponames):
            packages += self.Parse(repo['name'], transformer=transformer, logger=logger.GetPrefixed(repo['name'] + ': '))

        return packages
Esempio n. 12
0
    def StreamDeserializeMulti(self, reponames=None, logger=NoopLogger()):
        deserializers = []
        for repo in self.repomgr.GetRepositories(reponames):
            deserializers.append(
                self.StreamDeserializer(self.__GetSerializedPath(repo),
                                        logger))

        while True:
            # remove EOFed repos
            deserializers = [ds for ds in deserializers if not ds.EOF()]

            # stop when all deserializers are empty
            if not deserializers:
                break

            # find lowest key (effname)
            thiskey = deserializers[0].Peek().effname
            for ds in deserializers[1:]:
                thiskey = min(thiskey, ds.Peek().effname)

            # fetch all packages with given key from all deserializers
            packageset = []
            for ds in deserializers:
                while not ds.EOF() and ds.Peek().effname == thiskey:
                    packageset.append(ds.Get())

            yield packageset
Esempio n. 13
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        tmppath = statepath + '.tmp'

        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with open(tmppath, 'wb') as statefile:
            logger.Log('fetching ' + self.url)
            data = Get(self.url).content

            logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

            if self.compression == 'gz':
                logger.GetIndented().Log('decompressing with gzip')
                data = gzip.decompress(data)
            elif self.compression == 'bz2':
                logger.GetIndented().Log('decompressing with bz2')
                data = bz2.decompress(data)
            elif self.compression == 'xz':
                logger.GetIndented().Log('decompressing with xz')
                data = lzma.LZMADecompressor().decompress(data)

            if self.compression:
                logger.GetIndented().Log('size after decompression is {} byte(s)'.format(len(data)))

            logger.GetIndented().Log('saving')
            statefile.write(data)

        os.replace(tmppath, statepath)
Esempio n. 14
0
    def test_all_fields(self):
        factory = PackageFactory(NoopLogger())

        maker = factory.begin()
        maker.set_name_and_version('foo-1.0')
        maker.set_origin('/foo')
        maker.set_summary('foo package')
        maker.add_maintainers(None, 'a@com', [None, ['b@com']], None, 'c@com')
        maker.add_maintainers('d@com')
        maker.add_categories(None, 'foo', 'bar')
        maker.add_categories('baz')
        maker.add_homepages('http://foo', 'http://bar')
        maker.add_licenses(['GPLv2', 'GPLv3'])
        maker.add_licenses('MIT')
        maker.add_downloads(None, [None, 'http://baz'], ['ftp://quux'])
        pkg = maker.unwrap()

        self.assertEqual(pkg.name, 'foo')
        self.assertEqual(pkg.version, '1.0')
        self.assertEqual(pkg.extrafields['origin'], '/foo')
        self.assertEqual(pkg.maintainers, ['a@com', 'b@com', 'c@com', 'd@com'])
        self.assertEqual(pkg.category, 'foo')  # XXX: convert to array
        self.assertEqual(pkg.homepage, 'http://foo')  # XXX: convert to array
        self.assertEqual(pkg.licenses, ['GPLv2', 'GPLv3', 'MIT'])
        self.assertEqual(pkg.downloads, ['http://baz', 'ftp://quux'])
Esempio n. 15
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            numpage = 1
            while True:
                url = self.url + '?page={}&per_page={}&sort=alpha'.format(
                    numpage, self.per_page)
                logger.Log('getting ' + url)

                text = Fetch(url, timeout=self.fetch_timeout).text
                with open(os.path.join(statedir, '{}.json'.format(numpage)),
                          'w',
                          encoding='utf-8') as pagefile:
                    pagefile.write(text)

                # parse next page
                if not json.loads(text)['crates']:
                    logger.Log('last page detected')
                    return

                numpage += 1
                time.sleep(1)
Esempio n. 16
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            numpage = 0
            nextpageurl = self.url + 'Packages()?$filter=IsLatestVersion'
            while True:
                logger.Log('getting ' + nextpageurl)

                text = Fetch(nextpageurl, timeout=5).text
                with open(os.path.join(statedir, '{}.xml'.format(numpage)),
                          'w',
                          encoding='utf-8') as pagefile:
                    pagefile.write(text)

                # parse next page
                logger.Log('parsing ' + nextpageurl)
                root = xml.etree.ElementTree.fromstring(text)

                next_link = root.find(
                    '{http://www.w3.org/2005/Atom}link[@rel="next"]')
                if next_link is None:
                    break

                nextpageurl = next_link.attrib['href']
                numpage += 1
Esempio n. 17
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        # Get and parse repomd.xml
        repomd_url = self.url + 'repodata/repomd.xml'
        logger.Log('fetching metadata from ' + repomd_url)
        repomd_content = Fetch(repomd_url, check_status=True).text
        repomd_xml = xml.etree.ElementTree.fromstring(repomd_content)

        repodata_url = self.url + repomd_xml.find(
            '{http://linux.duke.edu/metadata/repo}data[@type="primary"]/{http://linux.duke.edu/metadata/repo}location'
        ).attrib['href']

        logger.Log('fetching ' + repodata_url)
        data = Fetch(repodata_url).content

        logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

        if repodata_url.endswith('gz'):
            logger.GetIndented().Log('decompressing with gzip')
            data = gzip.decompress(data)
        elif repodata_url.endswith('xz'):
            logger.GetIndented().Log('decompressing with xz')
            data = lzma.LZMADecompressor().decompress(data)

        logger.GetIndented().Log(
            'size after decompression is {} byte(s)'.format(len(data)))

        logger.GetIndented().Log('saving')

        with StateFile(statepath, 'wb') as statefile:
            statefile.write(data)
Esempio n. 18
0
    def fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with atomic_dir(statepath) as statedir:
            self.do_fetch(statedir, logger)
Esempio n. 19
0
    def test_strip(self):
        factory = PackageFactory(NoopLogger())

        maker = factory.begin()
        maker.set_summary('       some package foo      ')
        pkg = maker.unwrap()

        self.assertEqual(pkg.comment, 'some package foo')
Esempio n. 20
0
 def fetch(self, statepath, update=True, logger=NoopLogger()):
     if not os.path.isdir(statepath):
         with atomic_dir(statepath) as statedir:
             self.do_fetch(statedir, logger)
     elif update:
         self.do_update(statepath, logger)
     else:
         logger.Log('no update requested, skipping')
Esempio n. 21
0
 def parse(
     self,
     reponames: RepositoryNameList,
     transformer: Optional[PackageTransformer] = None,
     logger: Logger = NoopLogger()
 ) -> None:
     for repository in self.repomgr.get_repositories(reponames):
         self._parse(repository, transformer, logger)
Esempio n. 22
0
    def Reprocess(self, reponame, transformer=None, logger=NoopLogger()):
        repository = self.repoman.GetRepository(reponame)

        packages = self.__Deserialize(self.__GetSerializedPath(repository), repository, logger)
        packages = self.__Transform(packages, transformer, repository, logger)
        self.__Serialize(packages, self.__GetSerializedPath(repository), repository, logger)

        return packages
Esempio n. 23
0
    def ParseAndSerialize(self, reponame, transformer, logger=NoopLogger()):
        repository = self.repoman.GetRepository(reponame)

        packages = self.__Parse(repository, logger)
        packages = self.__Transform(packages, transformer, repository, logger)
        self.__Serialize(packages, self.__GetSerializedPath(repository), repository, logger)

        return packages
Esempio n. 24
0
    def test_unicalization_with_order_preserved(self):
        factory = PackageFactory(NoopLogger())

        maker = factory.begin()
        maker.add_maintainers('z@com', 'y@com', 'x@com', 'z@com', 'y@com', 'x@com')
        maker.add_maintainers('z@com', 'y@com', 'x@com')
        pkg = maker.unwrap()

        self.assertEqual(pkg.maintainers, ['z@com', 'y@com', 'x@com'])
Esempio n. 25
0
 def iter_parse(
     self,
     reponames: RepositoryNameList,
     transformer: PackageTransformer | None = None,
     maintainermgr: MaintainerManager | None = None,
     logger: Logger = NoopLogger()
 ) -> Iterator[Package]:
     for repository in self.repomgr.get_repositories(reponames):
         yield from self._iter_parse_all_sources(repository, transformer, maintainermgr, logger)
Esempio n. 26
0
    def fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        args = {'mode': 'wb'} if self.binary else {'mode': 'w', 'encoding': 'utf-8'}

        with atomic_file(statepath, **args) as statefile:
            self.do_fetch(statefile, logger)
Esempio n. 27
0
 def parse(
     self,
     reponames: RepositoryNameList,
     transformer: PackageTransformer | None = None,
     maintainermgr: MaintainerManager | None = None,
     logger: Logger = NoopLogger()
 ) -> None:
     for repository in self.repomgr.get_repositories(reponames):
         self._parse(repository, transformer, maintainermgr, logger)
Esempio n. 28
0
 def iter_parse(
     self,
     reponames: RepositoryNameList,
     transformer: Optional[PackageTransformer] = None,
     logger: Logger = NoopLogger()
 ) -> Iterator[Package]:
     for repository in self.repomgr.get_repositories(reponames):
         yield from self._iter_parse_all_sources(repository, transformer,
                                                 logger)
Esempio n. 29
0
    def test_normalize_urls(self):
        factory = PackageFactory(NoopLogger())

        maker = factory.begin()
        maker.add_homepages('Http://Foo.coM')
        maker.add_downloads('Http://Foo.coM')
        pkg = maker.unwrap()

        self.assertEqual(pkg.homepage, 'http://foo.com/')
        self.assertEqual(pkg.downloads, ['http://foo.com/'])
Esempio n. 30
0
    def fetch(self,
              reponames: RepositoryNameList,
              update: bool = True,
              logger: Logger = NoopLogger()) -> bool:
        have_changes = False

        for repository in self.repomgr.get_repositories(reponames):
            have_changes |= self._fetch(repository, update, logger)

        return have_changes