예제 #1
0
 def test_illegal_urls(self):
     self.assertRaises(TypeError, helper.is_empty_string, None)
     self.assertRaises(TypeError, helper.is_empty_string, {})
     self.assertRaises(TypeError, helper.is_empty_string, 42)
     self.assertRaises(TypeError, helper.is_empty_string, [])
     self.assertFalse(helper.is_url(""), "Empty string")
     self.assertFalse(helper.is_url("42"), "Unrecognized URL")
     self.assertFalse(helper.is_url("www.example.com"), "Missing protocol")
예제 #2
0
 def test_illegal_urls(self):
     self.assertRaises(TypeError, helper.is_empty_string, None)
     self.assertRaises(TypeError, helper.is_empty_string, {})
     self.assertRaises(TypeError, helper.is_empty_string, 42)
     self.assertRaises(TypeError, helper.is_empty_string, [])
     self.assertFalse(helper.is_url(""), "Empty string")
     self.assertFalse(helper.is_url("42"), "Unrecognized URL")
     self.assertFalse(helper.is_url("www.example.com"), "Missing protocol")
예제 #3
0
def _by_pdfplumber(stream: Union[str, object]) -> Union[None, object]:
    '''
    take url, local path, or BtyeIO objectas input 
    return a pdfplumber object 
    '''
    if isinstance(stream, str) and is_url(stream):
        with _byte_obj_from_url(stream) as byte_obj:
            yield pdfplumber.open(byte_obj)
    else:
        yield pdfplumber.open(stream)
예제 #4
0
def byte_obj_from_url(url: str) -> object:
    '''
    get byteIO object from url
    '''
    from io import BytesIO
    import requests
    if not is_url(url):
        return None
    response = requests.get(url)
    response.raise_for_status()
    return BytesIO(response.content)
예제 #5
0
def _byte_obj_from_url(url: str) -> object:
    '''
    get byteIO object from url
    '''
    if is_url(url) is None:
        raise ValueError('Invalid url.')
    with requests.get(url) as response:
        response.raise_for_status()
        byte_obj = BytesIO(response.content)
        yield byte_obj
    byte_obj.close()
예제 #6
0
def by_pypdf(stream: Union[str, object]) -> Union[None, object]:
    '''
    take url, local path, or BtyeIO objectas input 
    return a pypdf2 object 
    '''
    import PyPDF2, os
    if isinstance(stream, str):
        if is_url(stream):
            stream = byte_obj_from_url(stream)
        elif os.path.isfile(stream):
            # with  as f:
            stream = open(stream, 'rb')
    return PyPDF2.PdfFileReader(stream, strict=False)
예제 #7
0
def by_pdfplumber(stream: Union[str, object]) -> Union[None, object]:
    '''
    take url, local path, or BtyeIO objectas input 
    return a pdfplumber object 
    '''
    import pdfplumber
    import os
    if isinstance(stream, str):
        if is_url(stream):
            stream = byte_obj_from_url(stream)
        elif os.path.isfile(stream):
            return pdfplumber.open(stream)
        else:
            logging.critical(f'Invaild url or local path.')
    return pdfplumber.open(stream)
예제 #8
0
def _by_pypdf(stream: Union[str, object]) -> Union[None, object]:
    '''
    take url, local path, or BtyeIO objectas input 
    return a pypdf2 object 
    '''
    if isinstance(stream, str):
        if is_url(stream):
            with byte_obj_from_url(stream) as byte_obj:
                yield PyPDF2.PdfFileReader(byte_obj, strict=False)
            # byte_obj.close()
        elif os.path.isfile(stream):
            with open(stream, 'rb') as file_obj:
                yield PyPDF2.PdfFileReader(file_obj, strict=False)
            # file_obj.close()
    else:
        yield PyPDF2.PdfFileReader(stream, strict=False)
예제 #9
0
def is_project_repository(url):
    """Validates the specified project repository URL.

    Args:
        url (string): A URL to validate.

    Returns:
        <bool, str|None>: A pair containing the value True if the specified URL
            is valid, False otherwise; and an error message in case the URL is invalid.
    """
    try:
        ERROR_MESSAGE = "A repository configuration must be a valid URL and include the URL protocol."
        return (True, None) if is_url(url) else (False, ERROR_MESSAGE)
    except TypeError:
        # The is_empty_string function will raise a TypeError if the url argument is not a string.
        # If the argument is not a string, it stands to reason that it's not a valid URL.
        return (False, "The 'url' argument must be a string.")
예제 #10
0
def is_project_repository(url):
    """Validates the specified project repository URL.

    Args:
        url (string): A URL to validate.

    Returns:
        <bool, str|None>: A pair containing the value True if the specified URL
            is valid, False otherwise; and an error message in case the URL is invalid.
    """
    try:
        ERROR_MESSAGE = "A repository configuration must be a valid URL and include the URL protocol."
        return (True, None) if is_url(url) else (False, ERROR_MESSAGE)
    except TypeError:
        # The is_empty_string function will raise a TypeError if the url argument is not a string.
        # If the argument is not a string, it stands to reason that it's not a valid URL.
        return (False, "The 'url' argument must be a string.")
예제 #11
0
 def __init__(self,
              stream,
              company=None,
              stock_num=None,
              release_date=None,
              report_title=None):
     logging.warning(f'Initialising {type(self).__name__}("{stream}")')
     self.url = stream if is_url(stream) else stream
     self.pdf_obj = byte_obj_from_url(self.url) or stream
     self.toc = self._toc()
     self.auditor = self.valid_auditor() or self.raw_auditor()
     self.report_title = report_title
     self.company = company
     self.stock_num = int(stock_num)
     self.audit_fee = self._audit_fee()
     self.release_date = str_to_date(release_date,
                                     str_time_pattern='%d/%m/%Y %H:%M',
                                     format_pattern='%d %b %Y')
예제 #12
0
    def set_buttons(self, enable=True):
        if enable:
            self.cancel_btn.Enable()
            self.browse_btn.Enable()
            self.filename_text.Enable()
        else:
            self.cancel_btn.Disable()
            self.browse_btn.Disable()
            self.filename_text.Disable()

        if enable and self.ok:
            self.ok_btn.Enable()
        else:
            self.ok_btn.Disable()

        if enable and helper.is_url(self.url_text.GetValue().strip()):
            self.retrieve_btn.Enable()
        else:
            self.retrieve_btn.Disable()
예제 #13
0
 def test_valid_urls(self):
     self.assertTrue(helper.is_url("http://www.example.com"), "Simple URL")
     self.assertTrue(
         helper.is_url(
             "https://github.com/geotagx/geotagx-tool-sanitizer.git"),
         "Github repository URL")
예제 #14
0
 def test_valid_urls(self):
     self.assertTrue(helper.is_url("http://www.example.com"), "Simple URL")
     self.assertTrue(helper.is_url("https://github.com/geotagx/geotagx-tool-sanitizer.git"), "Github repository URL")
예제 #15
0
 def on_url_change(self, event):
     self.ok = False
     if helper.is_url(self.url_text.GetValue().strip()):
         self.retrieve_btn.Enable()
     else:
         self.retrieve_btn.Disable()