Python dlurljoin Examples

Programming Language: Python

Namespace/Package Name: datalad.support.network

Method/Function: dlurljoin

Examples at hotexamples.com: 2

Python dlurljoin - 2 examples found. These are the top rated real world Python examples of datalad.support.network.dlurljoin extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_network.py Project: ypid/datalad

def test_dlurljoin():
    eq_(dlurljoin('http://a.b/', 'f'), 'http://a.b/f')
    eq_(dlurljoin('http://a.b/page', 'f'), 'http://a.b/f')
    eq_(dlurljoin('http://a.b/dir/', 'f'), 'http://a.b/dir/f')
    eq_(dlurljoin('http://a.b/dir/', 'http://url'), 'http://url')
    eq_(dlurljoin('http://a.b/dir/', '/'), 'http://a.b/')
    eq_(dlurljoin('http://a.b/dir/', '/x/y'), 'http://a.b/x/y')

Example #2

Show file

    def _select_and_extract(self, selector, query, data):
        prev_url = data.get('url', None)
        url_query = re.compile(query)
        for url_e in selector.xpath('//a'):
            url = url_href = url_e.xpath('@href').extract_first()
            if not url:
                # it was an <a> without href
                continue

            # make it a full URL, if there was an original URL
            if prev_url:
                url = dlurljoin(prev_url, url_href)

            if self._TARGET == 'href':
                regex_target = url
            elif self._TARGET == 'text':
                regex_target = url_e.xpath('text()').extract_first()
            else:
                raise ValueError("Unknown _TARGET=%r" % (self._TARGET, ))

            regex = url_query.match(regex_target)
            if not regex:
                continue

            # enrich data with extracted keywords
            data_ = data.copy()
            for k, v in regex.groupdict().items():
                data_[k] = v

            # TODO: such actions we might want to perform also in other cases,
            # e.g. operating on some extracted with XPATH content
            data_['url'] = url
            data_['url_href'] = url_href
            data_['url_text'] = url_e.xpath('text()').extract_first()
            lgr.log(5, "Matched %(url)s" % data_)

            yield url_e, data_