예제 #1
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        # Get and parse repomd.xml
        repomd_url = self.url + 'repodata/repomd.xml'
        logger.Log('fetching metadata from ' + repomd_url)
        repomd_content = Fetch(repomd_url, check_status=True).text
        repomd_xml = xml.etree.ElementTree.fromstring(repomd_content)

        repodata_url = self.url + repomd_xml.find(
            '{http://linux.duke.edu/metadata/repo}data[@type="primary"]/{http://linux.duke.edu/metadata/repo}location'
        ).attrib['href']

        logger.Log('fetching ' + repodata_url)
        data = Fetch(repodata_url).content

        logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

        if repodata_url.endswith('gz'):
            logger.GetIndented().Log('decompressing with gzip')
            data = gzip.decompress(data)
        elif repodata_url.endswith('xz'):
            logger.GetIndented().Log('decompressing with xz')
            data = lzma.LZMADecompressor().decompress(data)

        logger.GetIndented().Log(
            'size after decompression is {} byte(s)'.format(len(data)))

        logger.GetIndented().Log('saving')

        with StateFile(statepath, 'wb') as statefile:
            statefile.write(data)
예제 #2
0
파일: guix.py 프로젝트: yzgyyang/repology
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            for letter in ['0-9'] + [l for l in ascii_uppercase]:
                page = 1
                numpages = 1
                while True:
                    logger.Log('fetching {} page {}'.format(letter, page))

                    pageurl = '{}/{}/page/{}/'.format(self.url, letter, page)

                    # fetch HTML
                    response = Fetch(pageurl)
                    response.encoding = 'utf-8'  # is not detected properly
                    text = response.text

                    # get number of pages, if there are more than 1 of them
                    if numpages == 1:
                        for pagebutton in lxml.html.document_fromstring(text).xpath('.//nav[@class="page-selector"]/a'):
                            numpages = max(numpages, int(pagebutton.text))

                    # save HTML
                    with open(os.path.join(statedir, '{}-{}.html'.format(letter, page)), 'w', encoding='utf-8') as pagefile:
                        pagefile.write(text)

                    # end if that was last (or only) page
                    if page >= numpages:
                        break

                    # proceed with the next page
                    page += 1
예제 #3
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            numpage = 0
            nextpageurl = self.url + 'Packages()?$filter=IsLatestVersion'
            while True:
                logger.Log('getting ' + nextpageurl)

                text = Fetch(nextpageurl, timeout=5).text
                with open(os.path.join(statedir, '{}.xml'.format(numpage)),
                          'w',
                          encoding='utf-8') as pagefile:
                    pagefile.write(text)

                # parse next page
                logger.Log('parsing ' + nextpageurl)
                root = xml.etree.ElementTree.fromstring(text)

                next_link = root.find(
                    '{http://www.w3.org/2005/Atom}link[@rel="next"]')
                if next_link is None:
                    break

                nextpageurl = next_link.attrib['href']
                numpage += 1
예제 #4
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        logger.Log('fetching ' + self.url)
        data = Fetch(self.url).content

        logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

        if self.compression == 'gz':
            logger.GetIndented().Log('decompressing with gzip')
            data = gzip.decompress(data)
        elif self.compression == 'bz2':
            logger.GetIndented().Log('decompressing with bz2')
            data = bz2.decompress(data)
        elif self.compression == 'xz':
            logger.GetIndented().Log('decompressing with xz')
            data = lzma.LZMADecompressor().decompress(data)

        if self.compression:
            logger.GetIndented().Log('size after decompression is {} byte(s)'.format(len(data)))

        logger.GetIndented().Log('saving')

        with StateFile(statepath, 'wb') as statefile:
            statefile.write(data)
예제 #5
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            numpage = 1
            while True:
                url = self.url + '?page={}&per_page={}&sort=alpha'.format(
                    numpage, self.per_page)
                logger.Log('getting ' + url)

                text = Fetch(url, timeout=self.fetch_timeout).text
                with open(os.path.join(statedir, '{}.json'.format(numpage)),
                          'w',
                          encoding='utf-8') as pagefile:
                    pagefile.write(text)

                # parse next page
                if not json.loads(text)['crates']:
                    logger.Log('last page detected')
                    return

                numpage += 1
                time.sleep(1)
예제 #6
0
    def LoadSpec(self, package, statedir, logger):
        specurl = self.giturl + '/{0}.git/plain/{0}.spec'.format(package)

        logger.GetIndented().Log('getting spec from {}'.format(specurl))

        r = Fetch(specurl, check_status=False)
        if r.status_code != 200:
            deadurl = self.giturl + '/{0}.git/plain/dead.package'.format(package)
            dr = Fetch(deadurl, check_status=False)
            if dr.status_code == 200:
                logger.GetIndented(2).Log('dead: ' + ';'.join(dr.text.split('\n')))
            else:
                logger.GetIndented(2).Log('failed: {}'.format(r.status_code))  # XXX: check .dead.package, instead throw
            return

        with open(os.path.join(statedir, package + '.spec'), 'wb') as file:
            file.write(r.content)
예제 #7
0
파일: aur.py 프로젝트: yzgyyang/repology
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            packages_url = self.url + 'packages.gz'
            logger.GetIndented().Log('fetching package list from ' +
                                     packages_url)
            data = Fetch(packages_url).text  # autogunzipped?

            package_names = []

            for line in data.split('\n'):
                line = line.strip()
                if line.startswith('#') or line == '':
                    continue
                package_names.append(line)

            if not package_names:
                raise RuntimeError(
                    'Empty package list received, refusing to continue')

            logger.GetIndented().Log('{} package name(s) parsed'.format(
                len(package_names)))

            pagesize = 100

            for page in range(0, len(package_names) // pagesize + 1):
                ifrom = page * pagesize
                ito = (page + 1) * pagesize
                url = '&'.join([
                    'arg[]=' + urllib.parse.quote(name)
                    for name in package_names[ifrom:ito]
                ])
                url = self.url + '/rpc/?v=5&type=info&' + url

                logger.GetIndented().Log('fetching page {}/{}'.format(
                    page + 1,
                    len(package_names) // pagesize + 1))

                with open(os.path.join(statedir, '{}.json'.format(page)),
                          'wb') as statefile:
                    statefile.write(
                        Fetch(url, timeout=self.fetch_timeout).content)
예제 #8
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            pages = [chr(x) for x in range(ord('a'), ord('z') + 1)]  # a..z
            pages.append('0-9')

            for page in pages:
                logger.Log('fetching page ' + page)
                pageurl = self.url + '/' + page + '.html'
                with open(os.path.join(statedir, page + '.html'),
                          'w',
                          encoding='utf-8') as pagefile:
                    pagefile.write(Fetch(pageurl).text)
예제 #9
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        state = {}

        if os.path.isfile(statepath):
            with open(statepath, 'r', encoding='utf-8') as oldstatefile:
                state = json.load(oldstatefile)
            logger.Log('loaded old state, {} entries'.format(len(state)))
        else:
            logger.Log('starting with empty state')

        newdata = json.loads(Fetch(self.url).text)

        if not newdata['releases']:
            raise RuntimeError(
                'Empty freshcode package list received, refusing to go on')

        # add new entries in reversed order, oldest first so newest
        # have higher priority; may also compare versions here
        for entry in newdata['releases']:
            if 'name' not in entry:
                logger.Log('skipping entry with no name')
                continue

            if entry['name'] in state:
                oldentry = state[entry['name']]

                if version_compare(entry['version'], oldentry['version']) > 0:
                    logger.Log(
                        'replacing entry "{}", version changed {} -> {}'.
                        format(entry['name'], oldentry['version'],
                               entry['version']))
                    state[entry['name']] = entry
            else:
                logger.Log('adding entry "{}", version {}'.format(
                    entry['name'], entry['version']))
                state[entry['name']] = entry

        with StateFile(statepath, 'w', encoding='utf-8') as statefile:
            json.dump(state, statefile)

        logger.Log('saved new state, {} entries'.format(len(state)))
예제 #10
0
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isdir(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        with StateDir(statepath) as statedir:
            page = 1

            while True:
                pageurl = self.apiurl + 'packages/?page={}'.format(page)
                logger.Log('getting page {} from {}'.format(page, pageurl))
                pagedata = json.loads(Fetch(pageurl).text)

                for package in pagedata['packages']:
                    self.LoadSpec(package['name'], statedir, logger)

                page += 1

                if page > pagedata['page_total']:
                    break
예제 #11
0
파일: file.py 프로젝트: yzgyyang/repology
    def Fetch(self, statepath, update=True, logger=NoopLogger()):
        if os.path.isfile(statepath) and not update:
            logger.Log('no update requested, skipping')
            return

        fetching_what = [self.url]
        if isinstance(self.post, dict):
            fetching_what.append('{} fields of form data'.format(len(
                self.post)))
        elif self.post:
            fetching_what.append('{} bytes of post data'.format(len(
                self.post)))

        if self.headers:
            fetching_what.append('{} extra headers'.format(len(self.headers)))

        logger.Log('fetching ' + ', with '.join(fetching_what))

        data = Fetch(self.url, post=self.post, headers=self.headers).content

        logger.GetIndented().Log('size is {} byte(s)'.format(len(data)))

        if self.compression == 'gz':
            logger.GetIndented().Log('decompressing with gzip')
            data = gzip.decompress(data)
        elif self.compression == 'bz2':
            logger.GetIndented().Log('decompressing with bz2')
            data = bz2.decompress(data)
        elif self.compression == 'xz':
            logger.GetIndented().Log('decompressing with xz')
            data = lzma.LZMADecompressor().decompress(data)

        if self.compression:
            logger.GetIndented().Log(
                'size after decompression is {} byte(s)'.format(len(data)))

        logger.GetIndented().Log('saving')

        with StateFile(statepath, 'wb') as statefile:
            statefile.write(data)