Python urijoin Exemples, grimoirelab.toolkit.uris.urijoin Python Exemples

Exemple #1

0

Afficher le fichier

    def fetch_items(self, path, payload):
        """Return the items from github API using links pagination"""

        page = 0  # current page
        last_page = None  # last page
        url_next = urijoin(self.base_url, 'repos', self.owner, self.repository, path)

        logger.debug("Get GitHub paginated items from " + url_next)

        response = self.fetch(url_next, payload=payload)

        items = response.text
        page += 1

        if 'last' in response.links:
            last_url = response.links['last']['url']
            last_page = last_url.split('&page=')[1].split('&')[0]
            last_page = int(last_page)
            logger.debug("Page: %i/%i" % (page, last_page))

        while items:
            yield items

            items = None

            if 'next' in response.links:
                url_next = response.links['next']['url']
                response = self.fetch(url_next, payload=payload)
                page += 1

                items = response.text
                logger.debug("Page: %i/%i" % (page, last_page))

Exemple #2

0

Afficher le fichier

Fichier : meetup.py Projet : eduranf/perceval

    def events(self, group, from_date=DEFAULT_DATETIME):
        """Fetch the events pages of a given group."""

        date = datetime_to_utc(from_date)
        date = date.strftime("since:%Y-%m-%dT%H:%M:%S.000Z")

        resource = urijoin(group, self.REVENTS)

        # Hack required due to Metup API does not support list
        # values with the format `?param=value1&param=value2`.
        # It only works with `?param=value1,value2`.
        # Morever, urrlib3 encodes comma characters when values
        # are given using params dict, which it doesn't work
        # with Meetup, either.
        fixed_params = '?' + self.PFIELDS + '=' + ','.join(self.VEVENT_FIELDS)
        fixed_params += '&' + self.PSTATUS + '=' + ','.join(self.VSTATUS)
        resource += fixed_params

        params = {
            self.PORDER: self.VUPDATED,
            self.PSCROLL: date,
            self.PPAGE: self.max_items
        }

        try:
            for page in self._fetch(resource, params):
                yield page
        except requests.exceptions.HTTPError as error:
            if error.response.status_code == 410:
                msg = "Group is no longer accessible: {}".format(error)
                raise RepositoryError(cause=msg)
            else:
                raise error

Exemple #3

0

Afficher le fichier

    def _fetch(self, resource, params):
        """Fetch a resource.

        Method to fetch and to iterate over the contents of a
        type of resource. The method returns a generator of
        pages for that resource and parameters.

        :param resource: type of the resource
        :param params: parameters to filter

        :returns: a generator of pages for the requeste resource
        """
        url = urijoin(self.base_url, resource)

        params[self.PKEY] = self.api_key
        params[self.PSIGN] = 'true',

        do_fetch = True

        while do_fetch:
            logger.debug("Meetup client calls resource: %s params: %s",
                         resource, str(params))

            self.sleep_for_rate_limit()
            r = self.fetch(url, payload=params)
            self.update_rate_limit(r)

            yield r.text

            if r.links and 'next' in r.links:
                url = r.links['next']['url']
                params = {self.PKEY: self.api_key, self.PSIGN: 'true'}
            else:
                do_fetch = False

Exemple #4

0

Afficher le fichier

    def _call(self, resource, params):
        """Retrive the given resource.

        :param resource: resource to retrieve
        :param params: dict with the HTTP parameters needed to retrieve
            the given resource
        """
        url = self.URL % {'base': self.base_url, 'resource': resource}

        logger.debug("Confluence client requests: %s params: %s", resource,
                     str(params))

        while True:
            r = self.fetch(url, payload=params)
            yield r.text

            # Pagination is available when 'next' link exists
            j = r.json()
            if '_links' not in j:
                break
            if 'next' not in j['_links']:
                break

            url = urijoin(self.base_url, j['_links']['next'])
            params = {}

Exemple #5

0

Afficher le fichier

    def events(self):
        """Collect the user events"""

        payload = {'per_page': 30}

        path = urijoin("users", self.user, "events", "public")
        return self.fetch_items(path, payload)

Exemple #6

0

Afficher le fichier

    def fetch_items(self, category, **kwargs):
        """Fetch the contents

        :param category: the category of items to fetch
        :param kwargs: backend arguments

        :returns: a generator of items
        """

        from_date = kwargs['from_date']

        logger.info("Fetching historical contents of '%s' from %s", self.url,
                    str(from_date))

        nhcs = 0

        contents = self.__fetch_contents_summary(from_date)
        contents = [content for content in contents]

        for content in contents:
            cid = content['id']
            content_url = urijoin(self.origin, content['_links']['webui'])

            hcs = self.__fetch_historical_contents(cid, from_date)

            for hc in hcs:
                hc['content_url'] = content_url
                yield hc
                nhcs += 1

        logger.info("Fetch process completed: %s historical contents fetched",
                    nhcs)

Exemple #7

0

Afficher le fichier

    def get_issues(self, from_date):
        """Retrieve all the issues from a given date.

        :param from_date: obtain issues updated since this date
        """
        start_at = 0

        url = urijoin(self.base_url, self.RESOURCE, self.VERSION_API, 'search')
        req = self.fetch(url,
                         payload=self.__build_payload(start_at, from_date))
        issues = req.text

        data = req.json()
        tissues = data['total']
        nissues = data['maxResults']

        start_at += min(nissues, tissues)
        self.__log_status(start_at, tissues)

        while issues:
            yield issues
            issues = None

            if data['startAt'] + nissues < tissues:
                req = self.fetch(url,
                                 payload=self.__build_payload(
                                     start_at, from_date))

                data = req.json()
                start_at += nissues
                issues = req.text
                self.__log_status(start_at, tissues)

Exemple #8

0

Afficher le fichier

    def get_fields(self):
        """Retrieve all the fields available."""

        url = urijoin(self.base_url, self.RESOURCE, self.VERSION_API, 'field')
        req = self.fetch(url)

        return req.text

Exemple #9

0

Afficher le fichier

Fichier : pipermail.py Projet : kmn5409/grimoirelab-perceval

    def _parse_archive_links(self, raw_html):
        bs = bs4.BeautifulSoup(raw_html, 'html.parser')

        candidates = [a['href'] for a in bs.find_all('a', href=True)]
        links = []

        for candidate in candidates:
            # Links from Apache's 'mod_mbox' plugin contain
            # trailing "/thread" substrings. Remove them to get
            # the links where mbox files are stored.
            if candidate.endswith(MOD_MBOX_THREAD_STR):
                candidate = candidate[:-len(MOD_MBOX_THREAD_STR)]

            # Ignore links with not recognized extension
            ext1 = os.path.splitext(candidate)[-1]
            ext2 = os.path.splitext(candidate.rstrip(ext1))[-1]

            if ext1 in PIPERMAIL_TYPES or ext2 in PIPERMAIL_TYPES:
                links.append(urijoin(self.url, candidate))
            else:
                logger.debug(
                    "Ignoring %s archive because its extension was not recognized",
                    candidate)

        logger.debug("%s archives found", len(links))

        return links

Exemple #10

0

Afficher le fichier

    def summary(self):
        """Get Crates.io summary"""

        path = urijoin(CRATES_API_URL, SUMMARY_CATEGORY)
        raw_content = self.fetch(path)

        return raw_content

Exemple #11

0

Afficher le fichier

    def crate_attribute(self, crate_id, attribute):
        """Get crate attribute"""

        path = urijoin(CRATES_API_URL, CRATES_CATEGORY, crate_id, attribute)
        raw_attribute_data = self.fetch(path)

        return raw_attribute_data

Exemple #12

0

Afficher le fichier

    def crate(self, crate_id):
        """Get a crate by its ID"""

        path = urijoin(CRATES_API_URL, CRATES_CATEGORY, crate_id)
        raw_crate = self.fetch(path)

        return raw_crate

Exemple #13

0

Afficher le fichier

    def crates(self, from_page=1):
        """Get crates in alphabetical order"""

        path = urijoin(CRATES_API_URL, CRATES_CATEGORY)
        raw_crates = self.__fetch_items(path, from_page)

        return raw_crates

Exemple #14

0

Afficher le fichier

    def events(self, group, from_date=DEFAULT_DATETIME):
        """Fetch the events pages of a given group."""

        date = datetime_to_utc(from_date)
        date = date.strftime("since:%Y-%m-%dT%H:%M:%S.000Z")

        resource = urijoin(group, self.REVENTS)

        # Hack required due to Metup API does not support list
        # values with the format `?param=value1&param=value2`.
        # It only works with `?param=value1,value2`.
        # Morever, urrlib3 encodes comma characters when values
        # are given using params dict, which it doesn't work
        # with Meetup, either.
        fixed_params = '?' + self.PFIELDS + '=' + ','.join(self.VEVENT_FIELDS)
        fixed_params += '&' + self.PSTATUS + '=' + ','.join(self.VSTATUS)
        resource += fixed_params

        params = {
            self.PORDER: self.VUPDATED,
            self.PSCROLL: date,
            self.PPAGE: self.max_items
        }

        for page in self._fetch(resource, params):
            yield page

Exemple #15

0

Afficher le fichier

Fichier : jenkins.py Projet : chubbymaggie/perceval

    def get_jobs(self):
        """ Retrieve all jobs"""

        url_jenkins = urijoin(self.base_url, "api", "json")

        response = self.fetch(url_jenkins)
        return response.text

Exemple #16

0

Afficher le fichier

Fichier : hyperkitty.py Projet : Gemarodri/perceval

    def fetch(self, from_date=DEFAULT_DATETIME):
        """Fetch the mbox files from the remote archiver.

        This method stores the archives in the path given during the
        initialization of this object.

        HyperKitty archives are accessed month by month and stored following
        the schema year-month. Archives are fetched from the given month
        till the current month.

        :param from_date: fetch archives that store messages
            equal or after the given date; only year and month values
            are compared

        :returns: a list of tuples, storing the links and paths of the
            fetched archives
        """
        logger.info("Downloading mboxes from '%s' to since %s", self.url,
                    str(from_date))
        logger.debug("Storing mboxes in '%s'", self.dirpath)

        # Check mailing list URL
        r = requests.get(self.url)
        r.raise_for_status()

        from_date = datetime_to_utc(from_date)
        to_end = datetime_utcnow()
        to_end += dateutil.relativedelta.relativedelta(months=1)

        months = months_range(from_date, to_end)

        fetched = []

        if not os.path.exists(self.dirpath):
            os.makedirs(self.dirpath)

        tmbox = 0

        for dts in months:
            tmbox += 1
            start, end = dts[0], dts[1]
            filename = start.strftime("%Y-%m.mbox.gz")
            filepath = os.path.join(self.dirpath, filename)

            url = urijoin(self.url, 'export', filename)

            params = {
                'start': start.strftime("%Y-%m-%d"),
                'end': end.strftime("%Y-%m-%d")
            }

            success = self._download_archive(url, params, filepath)

            if success:
                fetched.append((url, filepath))

        logger.info("%s/%s MBoxes downloaded", len(fetched), tmbox)

        return fetched

Exemple #17

0

Afficher le fichier

Fichier : gitlab.py Projet : skcse/grimoirelab-perceval

    def issue_notes(self, issue_id):
        """Get the issue notes from pagination"""

        payload = {'order_by': 'updated_at', 'sort': 'asc'}

        path = urijoin("issues", str(issue_id), "notes")

        return self.fetch_items(path, payload)

Exemple #18

0

Afficher le fichier

    def crate_attribute(self, crate_id, attribute):
        """Get crate attribute"""

        path = urijoin(CRATES_API_URL, CRATES_CATEGORY, crate_id, attribute)
        raw_attribute_data = self.__send_request(path,
                                                 headers=self.__set_headers())

        return raw_attribute_data

Exemple #19

0

Afficher le fichier

Fichier : jenkins.py Projet : smitthakkar96/perceval

    def get_jobs(self):
        """ Retrieve all jobs
        """
        url_jenkins = urijoin(self.url, "/api/json")

        req = requests.get(url_jenkins)
        req.raise_for_status()
        return req.text

Exemple #20

0

Afficher le fichier

    def issue(self, issue_id):
        """Get the issue data by its ID"""

        path = urijoin("bugs", str(issue_id))
        url_issue = self.__get_url(path)
        raw_text = self.__send_request(url_issue)

        return raw_text

Exemple #21

0

Afficher le fichier

Fichier : gitlab.py Projet : skcse/grimoirelab-perceval

    def issue_emojis(self, issue_id):
        """Get emojis of an issue"""

        payload = {'order_by': 'updated_at', 'sort': 'asc'}

        path = urijoin("issues", str(issue_id), "award_emoji")

        return self.fetch_items(path, payload)

Exemple #22

0

Afficher le fichier

    def __init__(self, bot, bot_token, tag=None, cache=None, archive=None):
        origin = urijoin(TELEGRAM_URL, bot)

        super().__init__(origin, tag=tag, cache=cache, archive=archive)
        self.bot = bot
        self.bot_token = bot_token

        self.client = None

Exemple #23

0

Afficher le fichier

Fichier : github.py Projet : dandanwei/grimoirelab-perceval

    def pull_commits(self, pr_number):
        """Get pull request commits"""

        payload = {
            'per_page': 30,
        }

        commit_url = urijoin("pulls", str(pr_number), "commits")
        return self.fetch_items(commit_url, payload)

Exemple #24

0

Afficher le fichier

    def comments(self, group, event_id):
        """Fetch the comments of a given event."""

        resource = urijoin(group, self.REVENTS, event_id, self.RCOMMENTS)

        params = {self.PPAGE: self.max_items}

        for page in self._fetch(resource, params):
            yield page

Exemple #25

0

Afficher le fichier

    def __init__(self, channel, api_token, max_items=MAX_ITEMS,
                 tag=None, cache=None):
        origin = urijoin(SLACK_URL, channel)

        super().__init__(origin, tag=tag, cache=cache)
        self.channel = channel
        self.max_items = max_items
        self.client = SlackClient(api_token, max_items=max_items)
        self._users = {}

Exemple #26

0

Afficher le fichier

    def issue_collection(self, issue_id, collection_name):
        """Get a collection list of a given issue"""

        path = urijoin("bugs", str(issue_id), collection_name)
        url_collection = self.__get_url(path)
        payload = {'ws.size': self.items_per_page, 'ws.start': 0, 'order_by': 'date_last_updated'}

        raw_items = self.__fetch_items(path=url_collection, payload=payload)

        return raw_items

Exemple #27

0

Afficher le fichier

    def __init__(self, owner, repository, tag=None, cache=None):
        if owner == DOCKER_SHORTCUT_OWNER:
            owner = DOCKER_OWNER

        origin = urijoin(DOCKERHUB_URL, owner, repository)

        super().__init__(origin, tag=tag, cache=cache)
        self.owner = owner
        self.repository = repository
        self.client = DockerHubClient()

Exemple #28

0

Afficher le fichier

Fichier : askbot.py Projet : smitthakkar96/perceval

    def get_html_question(self, question_id, page=1):
        """Retrieve a raw HTML question and all it's information.

        :param question_id: question identifier
        :param page: page to retrieve
        """
        path = urijoin(self.HTML_QUESTION, question_id)
        params = {'page': page, 'sort': self.ORDER_HTML}
        response = self.__call(path, params)
        return response

Exemple #29

0

Afficher le fichier

    def repository(self, owner, repository):
        """Fetch information about a repository."""

        url = urijoin(self.base_url, self.RREPOSITORY, owner, repository)

        logger.debug("DockerHub client requests: %s", url)

        response = self.fetch(url)

        return response.text

Exemple #30

0

Afficher le fichier

    def __init__(self, owner, repository, tag=None, archive=None):
        if owner == DOCKER_SHORTCUT_OWNER:
            owner = DOCKER_OWNER

        origin = urijoin(DOCKERHUB_URL, owner, repository)

        super().__init__(origin, tag=tag, archive=archive)
        self.owner = owner
        self.repository = repository
        self.client = None