def test_illegal_urls(self): self.assertRaises(TypeError, helper.is_empty_string, None) self.assertRaises(TypeError, helper.is_empty_string, {}) self.assertRaises(TypeError, helper.is_empty_string, 42) self.assertRaises(TypeError, helper.is_empty_string, []) self.assertFalse(helper.is_url(""), "Empty string") self.assertFalse(helper.is_url("42"), "Unrecognized URL") self.assertFalse(helper.is_url("www.example.com"), "Missing protocol")
def _by_pdfplumber(stream: Union[str, object]) -> Union[None, object]: ''' take url, local path, or BtyeIO objectas input return a pdfplumber object ''' if isinstance(stream, str) and is_url(stream): with _byte_obj_from_url(stream) as byte_obj: yield pdfplumber.open(byte_obj) else: yield pdfplumber.open(stream)
def byte_obj_from_url(url: str) -> object: ''' get byteIO object from url ''' from io import BytesIO import requests if not is_url(url): return None response = requests.get(url) response.raise_for_status() return BytesIO(response.content)
def _byte_obj_from_url(url: str) -> object: ''' get byteIO object from url ''' if is_url(url) is None: raise ValueError('Invalid url.') with requests.get(url) as response: response.raise_for_status() byte_obj = BytesIO(response.content) yield byte_obj byte_obj.close()
def by_pypdf(stream: Union[str, object]) -> Union[None, object]: ''' take url, local path, or BtyeIO objectas input return a pypdf2 object ''' import PyPDF2, os if isinstance(stream, str): if is_url(stream): stream = byte_obj_from_url(stream) elif os.path.isfile(stream): # with as f: stream = open(stream, 'rb') return PyPDF2.PdfFileReader(stream, strict=False)
def by_pdfplumber(stream: Union[str, object]) -> Union[None, object]: ''' take url, local path, or BtyeIO objectas input return a pdfplumber object ''' import pdfplumber import os if isinstance(stream, str): if is_url(stream): stream = byte_obj_from_url(stream) elif os.path.isfile(stream): return pdfplumber.open(stream) else: logging.critical(f'Invaild url or local path.') return pdfplumber.open(stream)
def _by_pypdf(stream: Union[str, object]) -> Union[None, object]: ''' take url, local path, or BtyeIO objectas input return a pypdf2 object ''' if isinstance(stream, str): if is_url(stream): with byte_obj_from_url(stream) as byte_obj: yield PyPDF2.PdfFileReader(byte_obj, strict=False) # byte_obj.close() elif os.path.isfile(stream): with open(stream, 'rb') as file_obj: yield PyPDF2.PdfFileReader(file_obj, strict=False) # file_obj.close() else: yield PyPDF2.PdfFileReader(stream, strict=False)
def is_project_repository(url): """Validates the specified project repository URL. Args: url (string): A URL to validate. Returns: <bool, str|None>: A pair containing the value True if the specified URL is valid, False otherwise; and an error message in case the URL is invalid. """ try: ERROR_MESSAGE = "A repository configuration must be a valid URL and include the URL protocol." return (True, None) if is_url(url) else (False, ERROR_MESSAGE) except TypeError: # The is_empty_string function will raise a TypeError if the url argument is not a string. # If the argument is not a string, it stands to reason that it's not a valid URL. return (False, "The 'url' argument must be a string.")
def __init__(self, stream, company=None, stock_num=None, release_date=None, report_title=None): logging.warning(f'Initialising {type(self).__name__}("{stream}")') self.url = stream if is_url(stream) else stream self.pdf_obj = byte_obj_from_url(self.url) or stream self.toc = self._toc() self.auditor = self.valid_auditor() or self.raw_auditor() self.report_title = report_title self.company = company self.stock_num = int(stock_num) self.audit_fee = self._audit_fee() self.release_date = str_to_date(release_date, str_time_pattern='%d/%m/%Y %H:%M', format_pattern='%d %b %Y')
def set_buttons(self, enable=True): if enable: self.cancel_btn.Enable() self.browse_btn.Enable() self.filename_text.Enable() else: self.cancel_btn.Disable() self.browse_btn.Disable() self.filename_text.Disable() if enable and self.ok: self.ok_btn.Enable() else: self.ok_btn.Disable() if enable and helper.is_url(self.url_text.GetValue().strip()): self.retrieve_btn.Enable() else: self.retrieve_btn.Disable()
def test_valid_urls(self): self.assertTrue(helper.is_url("http://www.example.com"), "Simple URL") self.assertTrue( helper.is_url( "https://github.com/geotagx/geotagx-tool-sanitizer.git"), "Github repository URL")
def test_valid_urls(self): self.assertTrue(helper.is_url("http://www.example.com"), "Simple URL") self.assertTrue(helper.is_url("https://github.com/geotagx/geotagx-tool-sanitizer.git"), "Github repository URL")
def on_url_change(self, event): self.ok = False if helper.is_url(self.url_text.GetValue().strip()): self.retrieve_btn.Enable() else: self.retrieve_btn.Disable()