Esempio n. 1
0
    def _get_uri_from_label(self, def_text):
        """
        Fetches a URI given a label by searching
        all term labels in braces ('{' and '}').  For
        example, if we encounter "{whole plant phenological stage}",
        it will be converted to "http://purl.obolibrary.org/obo/PPO_0000001".
        """
        labelre = re.compile(r'(\{[A-Za-z0-9\- _]+\})')
        defparts = labelre.split(def_text)

        newdef = ''
        for defpart in defparts:
            if labelre.match(defpart):
                label = defpart.strip("{}")

                # Get the class IRI associated with this label.
                try:
                    labelIRI = self.__label_map.lookupIRI(label)
                except KeyError:
                    raise RuntimeError(
                        'The class label, "' + label +
                        '", could not be matched to a term IRI.')

                newdef = str(labelIRI)
            else:
                newdef += defpart

        if len(defparts) == 0:
            newdef = def_text

        if len(newdef) != 0:
            # attempt parsing wlth the rfc3987 library and throws error if not a valid IRI
            rfc3987.parse(newdef, rule='IRI')

        return newdef
Esempio n. 2
0
def clientfunc(connection, addr):
    print "Got connection from ", addr
    connection.send("Server up and running")
    while True:
        connection.send("Ready for another url")
        valid = True
        url_recieved = c.recv(1024)
        try:
            parse(url_recieved, rule="IRI")
        except Exception as e:
            valid = False
        if valid:
            url_recieved = url_recieved.split('/')[2]
            print "Do you want to write URL:", url_recieved, "to config file"
            print "for the node", addr
            response = raw_input("(yes/no):")
            if response == 'yes':
                fp = open("user.action", 'a')
                fp.write(url_recieved)
                fp.write("\n")
                print "URL written to the file user.action"
                print "Done"
                connection.send("The URL is accepted")
                fp.close()
            else:
                print "URL not added"
                print "Message sent to client"
                connection.send("The URL is blocked by admin")
        else:
            connection.send("The URL is not valid")
            pass
    connection.close()
Esempio n. 3
0
def webCheck(website):

        try:
                print parse(website, rule='IRI')
                return True        
        except:
                return False
Esempio n. 4
0
    def expandURL(self, url_pattern, row, datatype=False):
        """Takes a Jinja or Python formatted string, applies it to the row values, and returns it as a URIRef"""

        try:
            unicode_url_pattern = unicode(url_pattern)
        except NameError:
            unicode_url_pattern = str(url_pattern).split(')')[0].split('(')[-1]
        # print(unicode_url_pattern)

        url = self.render_pattern(unicode_url_pattern, row)

        # DEPRECATED
        # for ns, nsuri in namespaces.items():
        #     if url.startswith(ns):
        #         url = url.replace(ns + ':', nsuri)
        #         break

        try:
            iri = iribaker.to_iri(url)
            rfc3987.parse(iri, rule='IRI')
        except:
            raise Exception(u"Cannot convert `{}` to valid IRI".format(url))

        # print(iri)
        return URIRef(iri)
Esempio n. 5
0
 def is_iri(value):
     try:
         import rfc3987
         rfc3987.parse(value, rule="URI")
         return True
     except Exception as e:
         return False
def url_remapper(src, dest):
    src_parts = parse(src, 'URI')
    dest_parts = parse(dest, 'URI')

    src_path = Path(unquote(src_parts['path'])).resolve()
    dest_path = Path(unquote(dest_parts['path'])).resolve()

    def remap(url):
        url_parts = parse(url, 'URI')
        if not (url_parts['scheme'] == src_parts['scheme']
                and url_parts['authority'] == src_parts['authority']):
            return False, url

        url_path = Path(unquote(url_parts['path'])).resolve()
        if src_path != url_path and src_path not in url_path.parents:
            return False, url

        result_path = dest_path / url_path.relative_to(src_path)

        # Use a trailing slash if the incoming path had one. This facilitates
        # further URI resolution operations.
        if url_parts['path'].endswith('/'):
            final_path = f'{result_path}/'
        else:
            final_path = str(result_path)

        return True, (compose(scheme=dest_parts['scheme'],
                              authority=dest_parts['authority'],
                              path=quote(final_path),
                              query=url_parts['query'],
                              fragment=url_parts['fragment']))

    return remap
Esempio n. 7
0
 def __is_url(cls, url_string):
     from rfc3987 import parse
     try:
         parse(url_string, rule='IRI')
         return True
     except ValueError:
         return False
def checker(url):

    try:
        parse(url)
        return True
    except ValueError:
        return False
    return False
Esempio n. 9
0
def check_uri(data):
    if not isinstance(data, str) and not isinstance(data, string_types):
        raise TypeError("URL must be a string, not a {}".format(data))
    try:
        rfc3987.parse(data, rule="URI")
    except Exception as e:
        print(e)
        raise TypeError('except URL type, found {}'.format(data))
def test_against_legacy_ref(url):
    legacy = True
    try:
        rfc3987.parse(url, rule='URI_reference')
    except ValueError:
        legacy = False
    new = validate_rfc3986(url, rule='URI_reference')
    assert legacy == bool(new)
Esempio n. 11
0
    def validate_format(self, value: str, *args: ty.Any,
                        **context: ty.Any) -> ValidationResult:
        try:
            rfc3987.parse(value, rule='URI')
        except ValueError:
            return self.messages['format']

        return None
Esempio n. 12
0
def set_from_url(record, return_data):
    if len(record['SOURCEURL']) > 0:
        try:
            parse(record['SOURCEURL'], rule='IRI')
            return_data['fromURL'] = record['SOURCEURL']
        except:
            pass

    return return_data
Esempio n. 13
0
def get_links_from_url(url, domain, params):

    tree = request_page_tree(url)
    links = []
    for a in tree.xpath('//a'):
        ignore_link = False
        try:
            link = str(a.get('href'))
        except:
            #Link is somehow broken
            link = ''
            ignore_link = True
            pass

        if 'ignore_urls_with' in params:
            if any(pattern in link for pattern in params['ignore_urls_with']):
                ignore_link = True

        if 'ignore_urls_without' in params:
            found = False
            if any(pattern in link
                   for pattern in params['ignore_urls_without']):
                found = True

            if not found:
                ignore_link = True

        if ignore_link:
            continue

        try:
            if '#' in link:
                link = link[:link.find('#')]

            if 'retain_params' not in params and '?' in link:
                link = link[:link.find('?')]

            if parse(link, rule='IRI') and (domain + '/') in link:
                links.append(str(link).encode('ASCII'))

        except ValueError as e:
            if 'http' not in link and 'java' not in link and len(
                    link) > 1 and link != 'None' and link[0] not in ['?', '#']:
                if str(link).encode('ASCII', 'ignore')[0] != '/':
                    link = '/' + link
                rel_link = 'http://' + domain + link
                try:
                    if parse(rel_link, rule='IRI'):
                        links.append(str(rel_link).encode('ASCII'))
                except:
                    pass
            pass

        except:
            pass

    return list(set(links))
Esempio n. 14
0
 def insert_concept(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = BNCF[data['id']]
     self.sparql.add((a, RDF.type, SKOS.Concept))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
Esempio n. 15
0
 def is_html(self, message):
     for suffix in self.urlExtensions:
         if suffix in message:
             try:
                 parse(message, rule="URI")
                 return True
             except ValueError:
                 pass
     return False
Esempio n. 16
0
 def insert_organization(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = AOP[data['id']]
     self.sparql.add((a, RDF.type, FOAF.Organization))
     self.sparql.add((a, FOAF.name, Literal(data['label'])))
     return 'OK'
Esempio n. 17
0
def handle_profile_image(user_id, image_url):
    try:
        parse(image_url, rule="IRI")
    except ValueError:
        return False
    query = (db.session.query(User).filter(User.id == user_id).first())
    query.image_url = image_url
    db.session.commit()
    return True
Esempio n. 18
0
def get_message_type(message):
    try:
        parse(message, rule="IRI")
        if is_url_image(message):
            return "image"
        else:
            return "url"
    except ValueError:
        return "text"
Esempio n. 19
0
def set_from_url(record, return_data):
    if len(record['SOURCEURL']) > 0:
        try:
            parse(record['SOURCEURL'], rule='IRI')
            return_data['fromURL'] = record['SOURCEURL']
        except:
            pass

    return return_data
Esempio n. 20
0
 def insert_organization(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = AOP[data['id']]
     self.sparql.add((a, RDF.type, FOAF.Organization))
     self.sparql.add((a, FOAF.name, Literal(data['label'])))
     return 'OK'
Esempio n. 21
0
 def insert_place(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = DBPEDIA[data['id']]
     self.sparql.add((a, RDF.type, DBPEDIA.Place))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
Esempio n. 22
0
 def insert_place(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = DBPEDIA[data['id']]
     self.sparql.add((a, RDF.type, DBPEDIA.Place))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
Esempio n. 23
0
    def __new__(cls, *args, **kwargs):
        if cls.nullable and args[0] is None:
            return None

        value = super().__new__(cls, *args, **kwargs)

        if cls.trim_whitespace:
            value = value.strip()

        if cls.min_length is not None:
            if len(value) < cls.min_length:
                if cls.min_length == 1:
                    raise TypeSystemError(cls=cls, code='blank')
                else:
                    raise TypeSystemError(cls=cls, code='min_length')

        if cls.max_length is not None:
            if len(value) > cls.max_length:
                raise TypeSystemError(cls=cls, code='max_length')

        if cls.pattern is not None:
            if not re.search(cls.pattern, value):
                raise TypeSystemError(cls=cls, code='pattern')

        # Validate format, if specified
        if cls.format == 'date':
            try:
                value = datetime.strptime(value, "%Y-%m-%d").date()
            except ValueError as e:
                raise TypeSystemError(str(e), cls=cls)
        elif cls.format == 'date-time':
            try:
                value = isodate.parse_datetime(value)
            except (ValueError, isodate.ISO8601Error) as e:
                raise TypeSystemError(str(e), cls=cls)
        elif cls.format == 'email':
            if '@' not in value:
                raise TypeSystemError('Not a valid email address.', cls=cls)
        elif cls.format == 'time':
            try:
                value = datetime.strptime(value, "%H:%M:%S")
            except ValueError as e:
                raise TypeSystemError(str(e), cls=cls)
        elif cls.format == 'uri':
            try:
                rfc3987.parse(value, rule='URI')
            except ValueError as e:
                raise TypeSystemError(str(e), cls=cls)

        # Coerce value to the native str type.  We only do this if the value
        # is an instance of the class.  It could be a datetime instance or
        # a str already if `trim_whitespace` is True.
        if isinstance(value, cls):
            value = cls.native_type(value)

        cls.validate(value)
        return value
Esempio n. 24
0
 def insert_concept(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = BNCF[data['id']]
     self.sparql.add((a, RDF.type, SKOS.Concept))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
def test_against_legacy_hypothesis(url):
    print(url)
    legacy = True
    try:
        rfc3987.parse(url, rule='URI')
    except ValueError:
        legacy = False
    new = validate_rfc3986(url)
    assert legacy == bool(new)
Esempio n. 26
0
def checker(url):
    '''
    Check if the url is a valid one or not.
    '''
    try:
        parse(url)
        return True
    except ValueError:
        return False
    return False
Esempio n. 27
0
def validate_url(url):
    """
    Validates URL (actually, IRIs).
    """
    try:
        rfc3987.parse(url, rule="IRI")
    except:
        return False

    return True
Esempio n. 28
0
 def insert_author(self, author):
     try:
         parse(author['author_id'], rule='IRI')
         a = author['author_id']
     except ValueError:
         a = AOP[author['author_id']]
     self.sparql.add((a, RDF.type, FOAF.Person))
     self.sparql.add((a, FOAF.name, Literal(author['author_fullname'])))
     if 'author_email' in author:
         self.sparql.add((a, SCHEMA.email, Literal(author['author_email'])))
     return 'OK'
Esempio n. 29
0
 def insert_author(self, author):
     try:
         parse(author['author_id'], rule='IRI')
         a = author['author_id']
     except ValueError:
         a = AOP[author['author_id']]
     self.sparql.add((a, RDF.type, FOAF.Person))
     self.sparql.add((a, FOAF.name, Literal(author['author_fullname'])))
     if 'author_email' in author:
         self.sparql.add((a, SCHEMA.email, Literal(author['author_email'])))
     return 'OK'
Esempio n. 30
0
def is_url(url):
    """
    Returns True if `url` is an IRI as specified in the RFC 3987
    (https://www.ietf.org/rfc/rfc3987.txt)
    """
    try:
        rfc3987.parse(url, rule="IRI")
        return True
    except ValueError:
        logger.warning("%s is not a valid url.", url)
        return False
Esempio n. 31
0
def guess_server_url(
    url: str,
    login_page: str = Options.startup_page,
    proxy: "Proxy" = None,
    timeout: int = 5,
) -> str:
    """
    Guess the complete server URL given an URL (either an IP address,
    a simple domain name or an already complete URL).

    Note: this function cannot be decorated with lru_cache().

    :param url: The server URL (IP, domain name, full URL).
    :param login_page: The Drive login page.
    :param int timeout: Timeout for each and every request.
    :return: The complete URL.
    """
    import requests
    import rfc3987

    from requests.exceptions import SSLError

    kwargs: Dict[str, Any] = {
        "timeout": timeout,
        "verify": Options.ca_bundle or not Options.ssl_no_verify,
    }
    for new_url in compute_urls(url):
        try:
            rfc3987.parse(new_url, rule="URI")
            log.debug(f"Testing URL {new_url!r}")
            full_url = f"{new_url}/{login_page}"
            if proxy:
                kwargs["proxies"] = proxy.settings(url=full_url)
            with requests.get(full_url, **kwargs) as resp:
                resp.raise_for_status()
                if resp.status_code == 200:  # Happens when JSF is installed
                    log.debug(f"Found URL: {new_url}")
                    return new_url
        except SSLError as exc:
            if "CERTIFICATE_VERIFY_FAILED" in str(exc):
                raise InvalidSSLCertificate()
        except requests.HTTPError as exc:
            if exc.response.status_code in (401, 403):
                # When there is only Web-UI installed, the code is 401.
                log.debug(f"Found URL: {new_url}")
                return new_url
        except (ValueError, requests.RequestException):
            log.debug(f"Bad URL: {new_url}")
        except Exception:
            log.exception("Unhandled error")

    if not url.lower().startswith("http"):
        return ""
    return url
Esempio n. 32
0
def guess_server_url(
    url: str,
    login_page: str = Options.startup_page,
    proxy: "Proxy" = None,
    timeout: int = 5,
) -> str:
    """
    Guess the complete server URL given an URL (either an IP address,
    a simple domain name or an already complete URL).

    :param url: The server URL (IP, domain name, full URL).
    :param login_page: The Drive login page.
    :param int timeout: Timeout for each and every request.
    :return: The complete URL.
    """
    import requests
    import rfc3987

    from requests.exceptions import SSLError

    kwargs: Dict[str, Any] = {
        "timeout": timeout,
        "verify": Options.ca_bundle or not Options.ssl_no_verify,
    }
    for new_url in compute_urls(url):
        try:
            rfc3987.parse(new_url, rule="URI")
            log.debug(f"Testing URL {new_url!r}")
            full_url = f"{new_url}/{login_page}"
            if proxy:
                kwargs["proxies"] = proxy.settings(url=full_url)
            with requests.get(full_url, **kwargs) as resp:
                resp.raise_for_status()
                if resp.status_code == 200:
                    log.debug(f"Found URL: {new_url}")
                    return new_url
        except requests.HTTPError as exc:
            if exc.response.status_code in {401, 403}:
                # When there is only Web-UI installed, the code is 401.
                log.debug(f"Found URL: {new_url}")
                return new_url
        except SSLError as exc:
            if "CERTIFICATE_VERIFY_FAILED" in str(exc):
                raise InvalidSSLCertificate()
        except (ValueError, requests.RequestException):
            log.debug(f"Bad URL: {new_url}")
        except Exception:
            log.exception("Unhandled error")

    if not url.lower().startswith("http"):
        return ""
    return url
Esempio n. 33
0
def getThumbnailContent(metadata):
    if 'image' not in metadata:
        return 'no preview'
    images = metadata['image']
    if len(images) < 1:
        return 'no preview'
    try:
        img_uri = images[0].strip()
        rfc3987.parse(img_uri, "URI")
        return '<img src="https://steemitimages.com/128x256/{img_uri}" />'.format(
            **locals())
    except Exception:
        return 'no preview'
Esempio n. 34
0
    def validate(self, value):
        """Check that the URL is valid, and optionally accessible."""
        try:
            parse(value)
        except ValueError:
            print value
            self.error("Value is not a valid URL")

        if self.verify_exists:
            try:
                get(value)
            except:
                self.error("The URL appears to be inaccessible")
Esempio n. 35
0
def escape_url(url):
    try:
        rfc3987.parse(url, rule="URI")
        return url
    except ValueError:
        if url.lower().startswith('https://'):
            scheme = 'https://'
        elif url.lower().startswith('http://'):
            scheme = 'http://'
        else:
            scheme = ''

        url = quote_url(url[len(scheme):])
        return scheme + url
Esempio n. 36
0
 def check_url(instance):
     # See https://github.com/Julian/jsonschema/blob/master/jsonschema/_format.py
     if not isinstance(instance, str_types):
         return True
     rfc3987.parse(instance, rule='URI')  # raises ValueError
     try:
         response = requests.get(instance, timeout=self.args.timeout)
         result = response.status_code in (200,)
         if not result:
             print('HTTP {} on GET {}'.format(response.status_code, instance))
         return result
     except requests.exceptions.Timeout as e:
         print('Timedout on GET {}'.format(instance))
         return False
Esempio n. 37
0
def spider(base_urls, target):
    '''
        Loop through the initial links found in the given page. Each new link
        discovered will be added to the list if it's not already there, and thus
        crawled aswell looking for more links.

        wannabe list works as the placeholder for the urls that are yet to crawl.
        base_urls is a list with all the already crawled urls.
    '''
    global target_
    target_ = parse(target)
    p = Pool(arguments.process)
    wannabe = [
        url for url in base_urls
        if target_['authority'] in parse(url)['authority']
    ]

    while True:
        #retrieve all the urls returned by the workers
        new_urls = p.map(worker, wannabe)
        #flatten them and remove repeated ones
        new_urls = list(set(itertools.chain(*new_urls)))
        wannabe = []
        i = 0

        #if new_urls is empty meaning no more urls are being discovered, exit the loop
        if new_urls == []:
            break

        else:
            for url in new_urls:
                if url not in base_urls:
                    '''
                    For each new url, check if it hasn't been crawled. If it's
                    indeed new and contains the target domain it gets appended to
                    the wannabe list so in the next iteration it will be crawled.
                    '''
                    i += 1
                    if target_['authority'] in parse(url)['authority']:
                        wannabe.append(url)
                    base_urls.append(url)

        print(
            colored('\nNew urls appended: {}\n'.format(i),
                    'green',
                    attrs=['bold']))

    #once all the links for the given depth have been analyzed, execute the parser
    parser(base_urls)
Esempio n. 38
0
 def is_valid_uri(instance):
     if not isinstance(instance, six.string_types):
         return True
     uri = urlparse(instance)
     query = urlencode(parse_qsl(unquote(uri.query.encode('utf-8'))))
     return rfc3987.parse(uri._replace(query=query).geturl(),
                          rule='URI')
Esempio n. 39
0
def serialize_fe(fe, reified, wiki_title, add_triple, format):
    # The FE predicate takes the FE label
    p1 = _uri_for('FE', 'predicate', fe['FE'])

    # The FE object takes the linked entity URI and/or the literal
    le_uri = fe.get('uri')
    literal = fe.get('literal')
    
    if le_uri:  # It's a URI
        wiki_title = quote(le_uri.split('/')[-1].encode('utf8'))
        o1 = NAMESPACES['resource'] + wiki_title
        parsed = parse(o1, rule='URI_reference')  # URI sanity check
        assert add_triple(reified, p1, o1)

    if literal:  # It's a literal
        if type(literal) in {str, unicode}:
            assert add_triple(reified, p1, literal)

        elif type(literal) == dict:

            if 'duration' in literal:
                assert add_triple(reified, p1, literal['duration'])

            if 'start' in literal:
                assert add_triple(reified, '%sstartYear' % NAMESPACES['ontology'],
                                  literal['start'])

            if 'end' in literal:
                assert add_triple(reified, '%sendYear' % NAMESPACES['ontology'],
                                  literal['end'])

        else:
            raise Exception("Don't know how to serialize: " + repr(literal))
Esempio n. 40
0
def launch_the_stream():
    parser = create_parser()
    if len(argv) == 1:
        parser.print_help()
        exit(1)

    arguments = parser.parse_args()

    rv = 0
    statistics = UserStat(APPLICATION_NAME, STAT_FILE_NAME)
    statistics.load()
    nicknames = Nicknames(APPLICATION_NAME, ALIAS_FILE_NAME)
    nicknames.load()
    if arguments.stat:
        print(str(statistics))
    elif arguments.aliases:
        print(str(nicknames))
    elif arguments.clear:
        trimmed = statistics.fltr(lambda key, value: value > int(arguments.clear))
        statistics.save()
        print("Statistics cleared: {0}".format(trimmed))
    elif (arguments.let and len(arguments.let) == 2):
        (nick, URL) = arguments.let
        nicknames.assign(nick, URL)
        # Extract the last part of URL path as a streamer nick
        streamer = [x for x in parse(URL)['path'].split('/') if x][-1]
        trimmed = statistics.fltr(lambda key, value: streamer not in key)
        statistics.save()
        nicknames.save()
        print("{0} was assigned to {1}; Statistics cleared: {2}".format(nick, URL, trimmed))
    else:
        rv = assemble_command(arguments, statistics, nicknames)

    return rv
Esempio n. 41
0
def generate_validator_from_schema(schema_uri):

    #download the schema to a string
    schema = None
    #handle http and file
    uri_split = rfc3987.parse(schema_uri)
    if uri_split['scheme'] in ("http", "https"):
        #its a http or https use requests
        schema = requests.get(schema_uri).json()
    elif uri_split['scheme'] == "file":
        #its a file, open as normal
        #reconstiture the file path from the uri
        with open(
                os.path.abspath(
                    os.path.join(uri_split['authority'], uri_split['path'])),
                'r') as schema_file:
            schema = json.load(schema_file)
    else:
        raise ValueError("schema uri must have file or url scheme")

    #Create a refresolver to allow resolution
    #of relative schema links
    #This is required to use git branches / versions and
    #local development correctly
    #Don't use from_schema because it uses the $id baked
    #into the schema, and we want to avoid baking
    handlers = dict(file=file_handler)
    resolver = jss.RefResolver(schema_uri, schema, handlers=handlers, store={})

    validator = jss.Draft7Validator(
        schema=schema,
        resolver=resolver,
    )

    return validator
Esempio n. 42
0
    def parse(ark_str):
        """Parse an ARK URL or an ARK ID string into an Ark oject

        Args:
            ark_str (str): The string to parse.

        Returns:
            Ark: The parsed ARK.

        Raises:
            ArkParsingError: If parsing fails.
        """
        try:
            parts = rfc3987.parse(ark_str, rule="URI")  # Ensure ark is a URI
            parser = Lark(_GRAMMAR, start='arkid')

            # Extract an ARK ID from ark_str if ark_str is a full ARK URL.
            if parts["scheme"] != _ARKID_SCHEME:
                arkid_str = parts["path"].lstrip("/")
                if not parts["authority"]:  # NMA is required
                    msg = 'Name Mapping Authority cannot be null.'
                    raise ArkParsingError(msg, ark_str)
            else:
                arkid_str = ark_str

            tree = parser.parse(arkid_str)
            ark_parts = ArkIdTransformer().transform(tree)
            ark_parts.update(parts)
            ark = Ark(**ark_parts)
            return Either.pure(ark)

        except (TypeError, ValueError, ParseError, UnexpectedCharacters) as ex:
            return Left(ArkParsingError(str(ex), ark_str))
Esempio n. 43
0
 def to_b64(self, image_filename, *args):
     """ Returns a tuple with (b64content, imgtype) where:
         - b64content is a base64 representation of the input file
         - imgtype is the image type as detected by imghdr
     """
     self.logger.debug('Converting image %s to base64', image_filename)
     self.logger.debug('Current directory %s', os.path.abspath(os.curdir))
     try:
         img_info = parse(image_filename, rule='IRI')
         extension = img_info['path'].split('.')[-1]
         content = urlopen(image_filename)
     except ValueError:  # not a valid IRI, assume local file
         self.logger.debug("Image '%s' doesn't have a valid URL, "
                           "assuming local", image_filename)
         try:
             extension = imghdr.what(image_filename)
             if extension is None:
                 self.logger.debug('Image extension not detected, skipping')
                 return ''
             content = open(image_filename, 'rb')
         except (IOError, AttributeError, TypeError):
             return ''
     except (HTTPError, URLError, TypeError):
         return ''
     txt = 'data:image/{};base64,\n{}'.format(extension,
                                              content.read().encode('base64'
                                                                    )
                                              )
     content.close()
     return txt
Esempio n. 44
0
    def execute(self, obj):
        if not isinstance(obj, str):
            raise TypeError('\'{}\' is not of type str.'.format(obj))

        parsed = self._parse(obj)
        parsed = self._process(**parsed)
        return rfc3987.parse(rfc3987.compose(**parsed))
Esempio n. 45
0
def validate_url(url):
    try:
        p = parse(url, rule='URI_reference')
        r = all((p['scheme'], p['authority'], p['path']))
    except Exception as e:
        print e
        r = False 
    return r
Esempio n. 46
0
def uri_validator(value, **kwargs):
    try:
        parts = rfc3987.parse(value, rule="URI")
    except ValueError:
        raise ValidationError(MESSAGES["format"]["invalid_uri"].format(value))

    if not parts["scheme"] or not parts["authority"]:
        raise ValidationError(MESSAGES["format"]["invalid_uri"].format(value))
Esempio n. 47
0
def test_urn_link(urn, result):
    if isinstance(result, Exception):
        with pytest.raises(type(result)) as e:
            URNLink().execute(urn)
        assert e.value.args == result.args
    else:
        assert rfc3987.parse(result)  # Extra URL validation
        assert URNLink().execute(urn)['IRI'] == result
Esempio n. 48
0
def test_ark_link(ark_id, result):
    if isinstance(result, Exception):
        with pytest.raises(type(result)) as e:
            ARKLink().execute(ark_id)
        assert e.value.args == result.args
    else:
        assert rfc3987.parse(result)  # Extra URL validation
        assert ARKLink().execute(ark_id)['IRI'] == result
Esempio n. 49
0
def test_doi_link(doi, result):
    if isinstance(result, Exception):
        with pytest.raises(type(result)) as e:
            DOILink().execute(doi)
        assert e.value.args == result.args
    else:
        assert rfc3987.parse(result)  # Extra URL validation
        assert DOILink().execute(doi)['IRI'] == result
Esempio n. 50
0
def uri_validator(value, **kwargs):
    try:
        parts = rfc3987.parse(value, rule='URI')
    except ValueError:
        raise ValidationError(MESSAGES['format']['invalid_uri'].format(value))

    if not parts['scheme'] or not parts['authority']:
        raise ValidationError(MESSAGES['format']['invalid_uri'].format(value))
Esempio n. 51
0
 def __init__(self, parent, format):
     self.format = format
     self.uri = parent.uri + FORMATS[format]
     self.valid = rfc3987.parse(self.uri, rule="absolute_URI")
     if self.valid:
         l.debug("Initialized serialization (%s: %s)." % ((self.format, self.uri)))
     else:
         l.WARNING("%s is not a valid absolute URI, so this serialization will not be retrieved.")
Esempio n. 52
0
  def getLayerData(self):

    # Is this a proper URL?
    try:
      rfc3987.parse(self.location, rule='IRI')
      isUrl = True
    except:
      isUrl = False

    # Download layer from URL.
    if isUrl:
      logging.info('Downloading {0} from {1}'.format(self.name, self.location))
      try:
        response = urllib2.urlopen(self.location)
        data = response.read()
      except Exception, e:
        logging.exception(e)
        return False
Esempio n. 53
0
 def check_api_url(self, url):
     try:
         p = parse(url, rule="IRI")
         if p != None:
             return True
         else:
             return False
     except ValueError:
         return False
Esempio n. 54
0
 def try_host(self, hostname):
     success = False
     try:
         result = parse(hostname, rule="URI")
         if result['authority'] != '':
             return True
     except ValueError, e:
         logging.error("address given does not match URI definition")
         logging.exception(e)
Esempio n. 55
0
    def expandURL(self, url_pattern, row, datatype=False):
        """Takes a Jinja or Python formatted string, applies it to the row values, and returns it as a URIRef"""
        url = self.render_pattern(unicode(url_pattern), row)

        # DEPRECATED
        # for ns, nsuri in namespaces.items():
        #     if url.startswith(ns):
        #         url = url.replace(ns + ':', nsuri)
        #         break

        try:
            iri = iribaker.to_iri(url)
            rfc3987.parse(iri, rule='IRI')
        except:
            raise Exception(u"Cannot convert `{}` to valid IRI".format(url))

        # print "Baked: ", iri
        return URIRef(iri)
Esempio n. 56
0
def parse_url(url):
    try:
        matches = rfc3987.parse(url, rule='URI')
    except ValueError:
        raise HTTPBadRequest(detail=Messages.invalid_uri)
    if matches['scheme'] not in ['http', 'https']:
        raise HTTPBadRequest(detail=Messages.invalid_uri)
    matches['path'] = matches['path'] or '/'
    matches['fragment'] = None
    return rfc3987.compose(**matches)
Esempio n. 57
0
def parse_streamer_url(url, nicknames):
    if match(url, 'absolute_URI'):
        rv1 = [x for x in parse(url)['path'].split('/') if x][-1]
        rv2 = url
        return rv1, rv2
    elif nicknames.find(url):
        return url, nicknames.get(url)
    else:
        print("Nickname \"{0}\" has not been defined yet".format(url))
        return None, None
Esempio n. 58
0
 def build_domain(self, url):
     """
     :param domaininfo:
     :return:
     """
     d = parse(url, rule='IRI')
     li = d['authority'].split('.')
     domain = li[len(li) - 2]
     self.is_valid(domain)
     module = self.my_import('domains.'+domain)
     self.fetcher = module.Fetcher(requester.Requester())