Ejemplo n.º 1
0
    def _get_filesize(self, url):
        
        count = 0
        try:
            
            _res = None
            while (count<3):
                
                try:
                    
                    res = httpx.head(url)
                    if res.status_code > 400:
                        time.sleep(1)
                        count += 1
                    else: 
                        _res = int_or_none(res.headers.get('content-length'))
                       
                        break
            
                except Exception as e:
                    count += 1
        except Exception as e:
            pass

        
        return _res
Ejemplo n.º 2
0
    def _get_info(self, url):

        count = 0
        try:

            _res = None
            while (count < 3):

                try:

                    res = httpx.head(url,
                                     headers=self.get_param('http_headers'))
                    if res.status_code > 400:
                        time.sleep(1)
                        count += 1
                    else:
                        _size = int_or_none(res.headers.get('content-length'))
                        _url = unquote(str(res.url))
                        if _size and _url:
                            _res = {'url': _url, 'filesize': _size}
                            break
                        else:
                            count += 1

                except Exception as e:
                    count += 1
        except Exception as e:
            pass

        return _res
Ejemplo n.º 3
0
    def test_create_upload(self):
        user = User.objects.create(username="******", email="*****@*****.**")
        workflow = Workflow.create_and_init(owner=user)
        create_module_zipfile(
            "x", spec_kwargs={"parameters": [{"id_name": "foo", "type": "file"}]}
        )
        step = workflow.tabs.first().steps.create(
            order=0, slug="step-1", module_id_name="x"
        )
        with self.assertLogs("httpx._client", level=logging.DEBUG):
            response = self.run_handler(
                create_upload,
                user=user,
                workflow=workflow,
                stepSlug="step-1",
                filename="test.csv",
                size=1234,
            )
        self.assertEqual(response.error, "")
        # Test that response has tusUploadUrl
        tus_upload_url = response.data["tusUploadUrl"]
        self.assertRegex(tus_upload_url, "http://testtusd:8080/files/[0-9a-z]+")

        # Upload was created on tusd
        with self.assertLogs("httpx._client", level=logging.DEBUG):
            response = httpx.head(tus_upload_url, headers={"Tus-Resumable": "1.0.0"})
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.headers["Tus-Resumable"], "1.0.0")
        self.assertEqual(response.headers["Upload-Length"], "1234")
        # "dGVzdC5jc3Y=" = "test.csv"
        self.assertIn("filename dGVzdC5jc3Y=", response.headers["Upload-Metadata"])
        # "c3RlcC0x": "step-1"
        self.assertIn("stepSlug c3RlcC0x", response.headers["Upload-Metadata"])
        # apiToken should be empty
        self.assertRegex(response.headers["Upload-Metadata"], "apiToken ?(?:$|,)")
Ejemplo n.º 4
0
    def add_track_rss_entry(
        self,
        feed: FeedGenerator,
        track: dict,
        username: str,
        tz: str = 'America/New_York',
    ):
        """
        Add a new RSS entry for the track to the feed.

        track is the Last.fm response to
        user.getRecentTracks(...)['recenttracks']['track'][i].
        """
        entry = feed.add_entry()
        title = f"{track['artist']['#text']} - {track['name']}"
        playcount = self.get_playcount(username, track['name'],
                                       track['artist']['#text'])
        if playcount:
            title += f" ({playcount} play{'s' if playcount > 1 else ''})"
        entry.title(title)
        entry.guid(mkguid(username, track))
        entry.link(href=track['url'])
        entry.published(
            delorean.epoch(int(track['date']['uts'])).shift(tz).datetime)
        if 'image' in track and len(track['image']) >= 1:
            url = track['image'][-1]['#text'].strip()
            if url:
                r = head(url)
                entry.enclosure(url, r.headers['Content-Length'],
                                r.headers['Content-Type'])
Ejemplo n.º 5
0
    def _get_filesize(self, url):
        
        count = 0
        try:
            
            _res = None
            while (count<10):
                
                try:
                    
                    res = httpx.head(url,headers={'Referer': 'https://vidoza.net', 'User-Agent': self.get_param('http_headers')['User-agent']})
                    if res.status_code > 400:
                        time.sleep(1)
                        count += 1
                    else: 
                        _res = int_or_none(res.headers.get('content-length')) 
                        break
            
                except Exception as e:
                    count += 1
        except Exception as e:
            pass

        
        return _res
Ejemplo n.º 6
0
async def test_http_methods(client):
    async with respx.mock:
        url = "https://foo.bar"
        route = respx.get(url, path="/") % 404
        respx.post(url, path="/").respond(200)
        respx.post(url, path="/").respond(201)
        respx.put(url, path="/").respond(202)
        respx.patch(url, path="/").respond(500)
        respx.delete(url, path="/").respond(204)
        respx.head(url, path="/").respond(405)
        respx.options(url, path="/").respond(status_code=501)
        respx.request("GET", url, path="/baz/").respond(status_code=204)
        url += "/"

        response = httpx.get(url)
        assert response.status_code == 404
        response = await client.get(url)
        assert response.status_code == 404

        response = httpx.get(url + "baz/")
        assert response.status_code == 204
        response = await client.get(url + "baz/")
        assert response.status_code == 204

        response = httpx.post(url)
        assert response.status_code == 201
        response = await client.post(url)
        assert response.status_code == 201

        response = httpx.put(url)
        assert response.status_code == 202
        response = await client.put(url)
        assert response.status_code == 202

        response = httpx.patch(url)
        assert response.status_code == 500
        response = await client.patch(url)
        assert response.status_code == 500

        response = httpx.delete(url)
        assert response.status_code == 204
        response = await client.delete(url)
        assert response.status_code == 204

        response = httpx.head(url)
        assert response.status_code == 405
        response = await client.head(url)
        assert response.status_code == 405

        response = httpx.options(url)
        assert response.status_code == 501
        response = await client.options(url)
        assert response.status_code == 501

        assert route.called is True
        assert respx.calls.call_count == 8 * 2
Ejemplo n.º 7
0
def check_url_response(url: str, **kwargs: Dict) -> str:
    """
    Shortcut to `raise_for_status` instead of fetching the whole content.

    One should only use this is passing URLs that are known to work is necessary.
    Otherwise let it fail later and avoid fetching the head.

    """
    r = httpx.head(url, **kwargs)
    r.raise_for_status()
    return url
Ejemplo n.º 8
0
async def test_http_methods(client):
    async with respx.HTTPXMock() as httpx_mock:
        url = "https://foo.bar/"
        m = httpx_mock.get(url, status_code=404)
        httpx_mock.post(url, status_code=201)
        httpx_mock.put(url, status_code=202)
        httpx_mock.patch(url, status_code=500)
        httpx_mock.delete(url, status_code=204)
        httpx_mock.head(url, status_code=405)
        httpx_mock.options(url, status_code=501)

        response = httpx.get(url)
        assert response.status_code == 404
        response = await client.get(url)
        assert response.status_code == 404

        response = httpx.post(url)
        assert response.status_code == 201
        response = await client.post(url)
        assert response.status_code == 201

        response = httpx.put(url)
        assert response.status_code == 202
        response = await client.put(url)
        assert response.status_code == 202

        response = httpx.patch(url)
        assert response.status_code == 500
        response = await client.patch(url)
        assert response.status_code == 500

        response = httpx.delete(url)
        assert response.status_code == 204
        response = await client.delete(url)
        assert response.status_code == 204

        response = httpx.head(url)
        assert response.status_code == 405
        response = await client.head(url)
        assert response.status_code == 405

        response = httpx.options(url)
        assert response.status_code == 501
        response = await client.options(url)
        assert response.status_code == 501

        assert m.called is True
        assert httpx_mock.stats.call_count == 7 * 2
Ejemplo n.º 9
0
def is_really_active(url, code, retries=2, retry_num=0):
    if not url:
        # no point in checking if API returns it as 'oldcourse'
        return None
    print(f"Trying {url} -> ", end=" ")
    time.sleep(0.1)  # lame rate limiting
    try:
        result = httpx.head(url, allow_redirects=True)
    except Exception as e:
        print("failed (%s)" % e, end=" ")
        retry_num += 1
        if retry_num <= retries:
            print(f"- retrying ({retry_num} / {retries})")
            return is_really_active(url, code, retries, retry_num=retry_num)
        really_active = False
    else:
        correct_redirect = code.lower() in str(result.url).lower()
        really_active = correct_redirect and result.status_code == 200
        print(f"{really_active} ({result.url}, {result.status_code})")
    return really_active
Ejemplo n.º 10
0
    def test_create(self):
        workflow = Workflow.create_and_init()
        _init_module("x")
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-123",
            module_id_name="x",
            file_upload_api_token="abc123",
            params={"file": None},
        )
        with self.assertLogs("httpx._client", level=logging.DEBUG):
            response = self.client.post(
                f"/api/v1/workflows/{workflow.id}/steps/step-123/files",
                HTTP_AUTHORIZATION="Bearer abc123",
                content_type="application/json",
                data={
                    "filename": "foo bar.csv",
                    "size": 12345
                },
            )
        self.assertEqual(response.status_code, 200)

        tus_upload_url = response.json()["tusUploadUrl"]

        # Upload was created on tusd
        with self.assertLogs("httpx._client", level=logging.DEBUG):
            response = httpx.head(tus_upload_url,
                                  headers={"Tus-Resumable": "1.0.0"})
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.headers["Tus-Resumable"], "1.0.0")
        self.assertEqual(response.headers["Upload-Length"], "12345")
        # "Zm9vIGJhci5jc3Y=": "foo bar.csv"
        self.assertIn("filename Zm9vIGJhci5jc3Y=",
                      response.headers["Upload-Metadata"])
        # "c3RlcC0xMjM=": "step-123"
        self.assertIn("stepSlug c3RlcC0xMjM=",
                      response.headers["Upload-Metadata"])
        # "YWJjMTIz": "abc123"
        self.assertIn("apiToken YWJjMTIz", response.headers["Upload-Metadata"])
Ejemplo n.º 11
0
    def add_core_fields(self, s, filename, object_data):
        graph = self.graph
        if not httpx.head(s).status_code == 200:
            log.error(f"The resource at {s} is not available.")
            raise SystemExit(1)
        media_type = self.config.media_types[Path(filename).suffix[1:]]
        creation_uuid = uuid.uuid5(self.creation_uuid_ns, media_type)
        creation_iri = URIRef(f"{ENTITIES_NAMESPACE}{creation_uuid}")
        self.creation_iris.add(creation_iri)

        for p, o in [
            (RDF.type, crmdig["D1.Digital_Object"]),
            (m4p0.fileName, Literal(filename)),
            (edm.dataProvider, self.data_provider),
            (m4p0.hasMediaType, URIRef(media_type)),
            (crm.P94i_was_created_by, creation_iri,),
        ]:
            graph.add((s, p, o))
        if "Rechtehinweis" in object_data:
            graph.add((s, dc.rights, Literal(object_data["Rechtehinweis"])))
        elif "Lizenz" in object_data:
            graph.add((s, dcterms.license, URIRef(object_data["Lizenz"])))
            graph.add((s, m4p0.licensor, Literal(object_data["Lizenzgeber"])))
        else:
            raise AssertionError
        if "Bezugsentität" in object_data:
            graph.add(
                (
                    s,
                    m4p0.refersToMuseumObject,
                    self.create_related_entity_iri(object_data["Bezugsentität"]),
                )
            )
        if "URL" in object_data:
            graph.add(
                (s, edm.shownAt, Literal(object_data["url"], datatype=XSD.anyURI),)
            )
Ejemplo n.º 12
0
    def get_info_for_format(self, url, client=None, headers=None, verify=True):
        
        try:
            res = None
            if client:
                res = client.head(url, headers=headers)
            else:
                _config = SeleniumInfoExtractor._CLIENT_CONFIG.copy()
                if not verify and _config['verify']:

                    if headers: _config['headers'].update(headers)
                    res = httpx.head(url, verify=False, timeout=_config['timeout'], headers=_config['headers'], follow_redirects=_config['follow_redirects'])
                else:    
                    res = SeleniumInfoExtractor._CLIENT.head(url, headers=headers)
            
            res.raise_for_status()
            #self.write_debug(f"{res.request} \n{res.request.headers}")
            #self.logger_debug(f"{res.request} \n{res.request.headers}")

            _filesize = int_or_none(res.headers.get('content-length'))
            _url = unquote(str(res.url))
            return ({'url': _url, 'filesize': _filesize})
            
        except Exception as e:
            if not res:
                #self.write_debug(f"{repr(e)}")
                self.logger_debug(f"{repr(e)}")

            else:
                #self.write_debug(f"{repr(e)} {res.request} \n{res.request.headers}")
                self.logger_debug(f"{repr(e)} {res.request} \n{res.request.headers}")
                #HTTPErrorStatus exception raised to differenciate from ExtractorError from the function in the
                #extractor using this method
                if res.status_code == 404:
                    res.raise_for_status()
                
            raise ExtractorError(repr(e))      
Ejemplo n.º 13
0
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   1_request_method.py
@Time    :   2021-02-23
@Author  :   EvilRecluse
@Contact :   https://github.com/RecluseXU
@Desc    :   常用的请求方法GET, POST, PUT, DELETE, HEAD, OPTIONS
'''

# here put the import lib
import httpx

# 常用的请求方法GET, POST, PUT, DELETE, HEAD, OPTIONS
r = httpx.get('https://httpbin.org/get')
r = httpx.post('https://httpbin.org/post', data={'key': 'value'})
r = httpx.put('https://httpbin.org/put', data={'key': 'value'})
r = httpx.delete('https://httpbin.org/delete')
r = httpx.head('https://httpbin.org/get')
r = httpx.options('https://httpbin.org/get')

# 设置headers
headers = {'user-agent': 'my-app/0.0.1'}
r = httpx.get('http://httpbin.org/headers', headers=headers)
print(r.json())
Ejemplo n.º 14
0
requests.delete('https://gmail.com', timeout=30, verify=True)
requests.delete('https://gmail.com', timeout=30, verify=False)
requests.patch('https://gmail.com', timeout=30, verify=True)
requests.patch('https://gmail.com', timeout=30, verify=False)
requests.options('https://gmail.com', timeout=30, verify=True)
requests.options('https://gmail.com', timeout=30, verify=False)
requests.head('https://gmail.com', timeout=30, verify=True)
requests.head('https://gmail.com', timeout=30, verify=False)

httpx.request('GET', 'https://gmail.com', verify=True)
httpx.request('GET', 'https://gmail.com', verify=False)
httpx.get('https://gmail.com', verify=True)
httpx.get('https://gmail.com', verify=False)
httpx.options('https://gmail.com', verify=True)
httpx.options('https://gmail.com', verify=False)
httpx.head('https://gmail.com', verify=True)
httpx.head('https://gmail.com', verify=False)
httpx.post('https://gmail.com', verify=True)
httpx.post('https://gmail.com', verify=False)
httpx.put('https://gmail.com', verify=True)
httpx.put('https://gmail.com', verify=False)
httpx.patch('https://gmail.com', verify=True)
httpx.patch('https://gmail.com', verify=False)
httpx.delete('https://gmail.com', verify=True)
httpx.delete('https://gmail.com', verify=False)
httpx.stream('https://gmail.com', verify=True)
httpx.stream('https://gmail.com', verify=False)
httpx.Client()
httpx.Client(verify=False)
httpx.AsyncClient()
httpx.AsyncClient(verify=False)
Ejemplo n.º 15
0
def from_html(url, suffix=None, suffix_depth=0, url_depth=0):
    '''parse urls from html website

    Parameters:
    -----------
    url: str
        the website contatins datas
    suffix: list, optional
        data format. suffix should be a list contains multipart. 
        if suffix_depth is 0, all '.' will parsed. 
        Examples: 
            when set 'suffix_depth=0':
                suffix of 'xxx8.1_GLOBAL.nc' should be ['.1_GLOBAL', '.nc']
                suffix of 'xxx.tar.gz' should be ['.tar', '.gz']
            when set 'suffix_depth=1':
                suffix of 'xxx8.1_GLOBAL.nc' should be ['.nc']
                suffix of 'xxx.tar.gz' should be ['.gz']
    suffix_depth: interger
        Number of suffixes
    url_depth: interger
        depth of url in website will parsed

    Return:
    -------
    a list contains urls

    Example:
    --------
    >>> from downloader import parse_urls

    >>> url = 'https://cds-espri.ipsl.upmc.fr/espri/pubipsl/iasib_CH4_2014_uk.jsp'
    >>> urls = parse_urls.from_html(url, suffix=['.nc'], suffix_depth=1)
    >>> urls_all = parse_urls.from_html(url, suffix=['.nc'], suffix_depth=1, url_depth=1)
    >>> print(len(urls_all)-len(urls))
    '''
    def match_suffix(href, suffix):
        if suffix:
            sf = Path(href).suffixes[-suffix_depth:]
            return suffix == sf
        else:
            return True

    r_h = httpx.head(url)
    if 'text/html' in r_h.headers['Content-Type']:
        r = httpx.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')

        a = soup.find_all('a')
        urls_all = [urljoin(url, i['href']) for i in a if i.has_key('href')]
        urls = [i for i in urls_all if match_suffix(i, suffix)]
        if url_depth > 0:
            urls_notdata = sorted(set(urls_all) - set(urls))
            urls_depth = [
                from_html(_url, suffix, suffix_depth, url_depth - 1)
                for _url in urls_notdata
            ]

            for u in urls_depth:
                if isinstance(u, list):
                    urls.extend(u)

        return sorted(set(urls))
Ejemplo n.º 16
0
def test_head(server):
    response = httpx.head(server.url)
    assert response.status_code == 200
    assert response.reason_phrase == "OK"
Ejemplo n.º 17
0
    def _get_filesize(self, _vurl):

        res = httpx.head(_vurl, follow_redirects=True)
        res.raise_for_status()

        return (int_or_none(res.headers.get('content-length')))
Ejemplo n.º 18
0
def test_head(server):
    response = httpx.head("http://127.0.0.1:8000/")
    assert response.status_code == 200
    assert response.reason_phrase == "OK"
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   2_6_redirection.py
@Time    :   2021-02-23
@Author  :   EvilRecluse
@Contact :   https://github.com/RecluseXU
@Desc    :   重定向
'''

# here put the import lib
import httpx

# 由http跳转到https
r = httpx.get('http://github.com/')
print(r.url)
print(r.status_code)
print(r.history)


# 如果不想要跳转,那么可以设置
r = httpx.get('http://github.com/', allow_redirects=False)
print(r.status_code)
print(r.history)

# 在使用head方式发送请求时,也能用这个参数来启用跳转
r = httpx.head('http://github.com/', allow_redirects=True)
print(r.url)
print(r.history)
Ejemplo n.º 20
0
def test_erddap2_10():
    """Check regression for ERDDAP 2.10."""
    e = ERDDAP(server="http://erddap.ioos.us/erddap/")
    url = e.get_search_url(search_for="NOAA", response="csv")
    r = httpx.head(url)
    assert r.raise_for_status() is None