Python URLTitleReader Examples

Programming Language: Python

Namespace/Package Name: urltitle

Class/Type: URLTitleReader

Examples at hotexamples.com: 6

Python URLTitleReader - 6 examples found. These are the top rated real world Python examples of urltitle.URLTitleReader extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

URLTitleReader(5)

title(4)

Frequently Used Methods

URLTitleReader (5)

title (4)

Example #1

Show file

File: test_urls.py Project: SylarRaven/urltitle

 def test_url_titles_without_ssl_verification(self):
     reader = URLTitleReader(verify_ssl=False)
     for url, expected_title in TEST_CASES_WITH_BAD_SSL.items():
         if URL_FILTER and (URL_FILTER not in url):
             continue
         with self.subTest(url=url):
             self.assertEqual(expected_title, reader.title(url))

Example #2

Show file

File: test_urls.py Project: SylarRaven/urltitle

 def test_url_titles(self):
     reader = URLTitleReader()
     for url, expected_title in TEST_CASES.items():
         if URL_FILTER and (URL_FILTER not in url):
             continue
         with self.subTest(url=url):
             self.assertEqual(expected_title, reader.title(url))

Example #3

Show file

File: check_url.py Project: SylarRaven/urltitle

from urltitle import config, URLTitleReader

config.configure_logging()

TEST_URL = 'https://www.google.com/'

reader = URLTitleReader()
reader.title(TEST_URL)
reader.title(TEST_URL)  # Should use cache.

Example #4

Show file

from urltitle import config, URLTitleReader

config.configure_logging()

TEST_URL = 'https://www.google.com/'

TEST_URLS = [
    'https://www.amazon.com/Natures-Plus-Chewable-Iron-Supplement/dp/B00014DAFM',
    'https://www.amazon.com/Bluebonnet-Earth-Vitamin-Chewable-Tablets/dp/B00ENYUIO2/',
    'https://www.amazon.com/dp/B0749WVS7J/ref=ods_gw_ha_h1_d_rr_021519?pf_rd_p=8bf51e9c-a499-47ad-829e-a0b4afcae72e&pf_rd_r=9SHQNHFS1W35WG02P75M',
    'https://www.amazon.com/dp/B0794W1SKP/ref=ods_mccc_lr',
    'https://www.amazon.com/ProsourceFit-Tri-Fold-Folding-Exercise-Carrying/dp/B07NCJDHBM?',
]

reader = URLTitleReader()
for url in TEST_URLS:
    reader.title(url)

Example #5

Show file

"""Read and log the title of a URL."""
import logging

from urltitle import URLTitleReader, config

config.configure_logging()
log = logging.getLogger(f"{config.PACKAGE_NAME}.{__name__}")

URL = "https://www.google.com"

reader = URLTitleReader()  # pylint: disable=invalid-name
log.info(f"{URL} has title: {reader.title(URL)}")
log.info("Testing cache.")
log.info(f"{URL} has title: {reader.title(URL)}")  # Should use cache.

Example #6

Show file

File: try_headers_for_url.py Project: impredicative/urltitle

config.configure_logging()
log = logging.getLogger(__name__)

EXTRA_HEADERS = {
    "Accept": "*/*",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip",
    "Referer": "https://google.com/",
    "DNT": 1,
    "Connection": "keep-alive",
    "Cookie": "",
    "Upgrade-Insecure-Requests": 1,
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
}
NETLOC = URLTitleReader().netloc(TEST_URL)
log.info("Netloc for %s is %s.", TEST_URL, NETLOC)

titles: Dict[str, str] = {}
config.NETLOC_OVERRIDES[NETLOC] = {"extra_headers": {}}
EXTRA_CONFIG_HEADERS = config.NETLOC_OVERRIDES[NETLOC]["extra_headers"]
for h_key, h_val in EXTRA_HEADERS.items():
    log.debug("Adding header: %s=%s", h_key, h_val)
    EXTRA_CONFIG_HEADERS[h_key] = h_val
    reader = URLTitleReader()  # Fresh instance avoids cache.
    try:
        title = reader.title(TEST_URL)
    except URLTitleError as exc:
        log.error("Ignoring exception after adding header %s=%s: %s", h_key,
                  h_val, exc)
        continue