コード例 #1
0
ファイル: config.py プロジェクト: jdeck88/ppo-data-pipeline
    def _get_uri_from_label(self, def_text):
        """
        Fetches a URI given a label by searching
        all term labels in braces ('{' and '}').  For
        example, if we encounter "{whole plant phenological stage}",
        it will be converted to "http://purl.obolibrary.org/obo/PPO_0000001".
        """
        labelre = re.compile(r'(\{[A-Za-z0-9\- _]+\})')
        defparts = labelre.split(def_text)

        newdef = ''
        for defpart in defparts:
            if labelre.match(defpart):
                label = defpart.strip("{}")

                # Get the class IRI associated with this label.
                try:
                    labelIRI = self.__label_map.lookupIRI(label)
                except KeyError:
                    raise RuntimeError(
                        'The class label, "' + label +
                        '", could not be matched to a term IRI.')

                newdef = str(labelIRI)
            else:
                newdef += defpart

        if len(defparts) == 0:
            newdef = def_text

        if len(newdef) != 0:
            # attempt parsing wlth the rfc3987 library and throws error if not a valid IRI
            rfc3987.parse(newdef, rule='IRI')

        return newdef
コード例 #2
0
def clientfunc(connection, addr):
    print "Got connection from ", addr
    connection.send("Server up and running")
    while True:
        connection.send("Ready for another url")
        valid = True
        url_recieved = c.recv(1024)
        try:
            parse(url_recieved, rule="IRI")
        except Exception as e:
            valid = False
        if valid:
            url_recieved = url_recieved.split('/')[2]
            print "Do you want to write URL:", url_recieved, "to config file"
            print "for the node", addr
            response = raw_input("(yes/no):")
            if response == 'yes':
                fp = open("user.action", 'a')
                fp.write(url_recieved)
                fp.write("\n")
                print "URL written to the file user.action"
                print "Done"
                connection.send("The URL is accepted")
                fp.close()
            else:
                print "URL not added"
                print "Message sent to client"
                connection.send("The URL is blocked by admin")
        else:
            connection.send("The URL is not valid")
            pass
    connection.close()
コード例 #3
0
ファイル: scraper.py プロジェクト: bgheneti/PythonPictoscrape
def webCheck(website):

        try:
                print parse(website, rule='IRI')
                return True        
        except:
                return False
コード例 #4
0
ファイル: csvw.py プロジェクト: RinkeHoekstra/COW
    def expandURL(self, url_pattern, row, datatype=False):
        """Takes a Jinja or Python formatted string, applies it to the row values, and returns it as a URIRef"""

        try:
            unicode_url_pattern = unicode(url_pattern)
        except NameError:
            unicode_url_pattern = str(url_pattern).split(')')[0].split('(')[-1]
        # print(unicode_url_pattern)

        url = self.render_pattern(unicode_url_pattern, row)

        # DEPRECATED
        # for ns, nsuri in namespaces.items():
        #     if url.startswith(ns):
        #         url = url.replace(ns + ':', nsuri)
        #         break

        try:
            iri = iribaker.to_iri(url)
            rfc3987.parse(iri, rule='IRI')
        except:
            raise Exception(u"Cannot convert `{}` to valid IRI".format(url))

        # print(iri)
        return URIRef(iri)
コード例 #5
0
ファイル: types.py プロジェクト: vsavenkov/pyyacp
 def is_iri(value):
     try:
         import rfc3987
         rfc3987.parse(value, rule="URI")
         return True
     except Exception as e:
         return False
コード例 #6
0
def url_remapper(src, dest):
    src_parts = parse(src, 'URI')
    dest_parts = parse(dest, 'URI')

    src_path = Path(unquote(src_parts['path'])).resolve()
    dest_path = Path(unquote(dest_parts['path'])).resolve()

    def remap(url):
        url_parts = parse(url, 'URI')
        if not (url_parts['scheme'] == src_parts['scheme']
                and url_parts['authority'] == src_parts['authority']):
            return False, url

        url_path = Path(unquote(url_parts['path'])).resolve()
        if src_path != url_path and src_path not in url_path.parents:
            return False, url

        result_path = dest_path / url_path.relative_to(src_path)

        # Use a trailing slash if the incoming path had one. This facilitates
        # further URI resolution operations.
        if url_parts['path'].endswith('/'):
            final_path = f'{result_path}/'
        else:
            final_path = str(result_path)

        return True, (compose(scheme=dest_parts['scheme'],
                              authority=dest_parts['authority'],
                              path=quote(final_path),
                              query=url_parts['query'],
                              fragment=url_parts['fragment']))

    return remap
コード例 #7
0
 def __is_url(cls, url_string):
     from rfc3987 import parse
     try:
         parse(url_string, rule='IRI')
         return True
     except ValueError:
         return False
コード例 #8
0
def checker(url):

    try:
        parse(url)
        return True
    except ValueError:
        return False
    return False
コード例 #9
0
def check_uri(data):
    if not isinstance(data, str) and not isinstance(data, string_types):
        raise TypeError("URL must be a string, not a {}".format(data))
    try:
        rfc3987.parse(data, rule="URI")
    except Exception as e:
        print(e)
        raise TypeError('except URL type, found {}'.format(data))
コード例 #10
0
def test_against_legacy_ref(url):
    legacy = True
    try:
        rfc3987.parse(url, rule='URI_reference')
    except ValueError:
        legacy = False
    new = validate_rfc3986(url, rule='URI_reference')
    assert legacy == bool(new)
コード例 #11
0
ファイル: formats.py プロジェクト: NotJustAToy/falcon-heavy
    def validate_format(self, value: str, *args: ty.Any,
                        **context: ty.Any) -> ValidationResult:
        try:
            rfc3987.parse(value, rule='URI')
        except ValueError:
            return self.messages['format']

        return None
コード例 #12
0
ファイル: gdelt.py プロジェクト: davidcurtis/suckapy
def set_from_url(record, return_data):
    if len(record['SOURCEURL']) > 0:
        try:
            parse(record['SOURCEURL'], rule='IRI')
            return_data['fromURL'] = record['SOURCEURL']
        except:
            pass

    return return_data
コード例 #13
0
ファイル: mylib.py プロジェクト: edezhic/game-news-summarizer
def get_links_from_url(url, domain, params):

    tree = request_page_tree(url)
    links = []
    for a in tree.xpath('//a'):
        ignore_link = False
        try:
            link = str(a.get('href'))
        except:
            #Link is somehow broken
            link = ''
            ignore_link = True
            pass

        if 'ignore_urls_with' in params:
            if any(pattern in link for pattern in params['ignore_urls_with']):
                ignore_link = True

        if 'ignore_urls_without' in params:
            found = False
            if any(pattern in link
                   for pattern in params['ignore_urls_without']):
                found = True

            if not found:
                ignore_link = True

        if ignore_link:
            continue

        try:
            if '#' in link:
                link = link[:link.find('#')]

            if 'retain_params' not in params and '?' in link:
                link = link[:link.find('?')]

            if parse(link, rule='IRI') and (domain + '/') in link:
                links.append(str(link).encode('ASCII'))

        except ValueError as e:
            if 'http' not in link and 'java' not in link and len(
                    link) > 1 and link != 'None' and link[0] not in ['?', '#']:
                if str(link).encode('ASCII', 'ignore')[0] != '/':
                    link = '/' + link
                rel_link = 'http://' + domain + link
                try:
                    if parse(rel_link, rule='IRI'):
                        links.append(str(rel_link).encode('ASCII'))
                except:
                    pass
            pass

        except:
            pass

    return list(set(links))
コード例 #14
0
 def insert_concept(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = BNCF[data['id']]
     self.sparql.add((a, RDF.type, SKOS.Concept))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
コード例 #15
0
ファイル: html_strings.py プロジェクト: Triple1996/ratchat
 def is_html(self, message):
     for suffix in self.urlExtensions:
         if suffix in message:
             try:
                 parse(message, rule="URI")
                 return True
             except ValueError:
                 pass
     return False
コード例 #16
0
 def insert_organization(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = AOP[data['id']]
     self.sparql.add((a, RDF.type, FOAF.Organization))
     self.sparql.add((a, FOAF.name, Literal(data['label'])))
     return 'OK'
コード例 #17
0
ファイル: item_helper.py プロジェクト: mhk8/ticket-and-game
def handle_profile_image(user_id, image_url):
    try:
        parse(image_url, rule="IRI")
    except ValueError:
        return False
    query = (db.session.query(User).filter(User.id == user_id).first())
    query.image_url = image_url
    db.session.commit()
    return True
コード例 #18
0
def get_message_type(message):
    try:
        parse(message, rule="IRI")
        if is_url_image(message):
            return "image"
        else:
            return "url"
    except ValueError:
        return "text"
コード例 #19
0
def set_from_url(record, return_data):
    if len(record['SOURCEURL']) > 0:
        try:
            parse(record['SOURCEURL'], rule='IRI')
            return_data['fromURL'] = record['SOURCEURL']
        except:
            pass

    return return_data
コード例 #20
0
ファイル: store.py プロジェクト: ciromattia/annotaria
 def insert_organization(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = AOP[data['id']]
     self.sparql.add((a, RDF.type, FOAF.Organization))
     self.sparql.add((a, FOAF.name, Literal(data['label'])))
     return 'OK'
コード例 #21
0
 def insert_place(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = DBPEDIA[data['id']]
     self.sparql.add((a, RDF.type, DBPEDIA.Place))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
コード例 #22
0
ファイル: store.py プロジェクト: ciromattia/annotaria
 def insert_place(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = DBPEDIA[data['id']]
     self.sparql.add((a, RDF.type, DBPEDIA.Place))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
コード例 #23
0
    def __new__(cls, *args, **kwargs):
        if cls.nullable and args[0] is None:
            return None

        value = super().__new__(cls, *args, **kwargs)

        if cls.trim_whitespace:
            value = value.strip()

        if cls.min_length is not None:
            if len(value) < cls.min_length:
                if cls.min_length == 1:
                    raise TypeSystemError(cls=cls, code='blank')
                else:
                    raise TypeSystemError(cls=cls, code='min_length')

        if cls.max_length is not None:
            if len(value) > cls.max_length:
                raise TypeSystemError(cls=cls, code='max_length')

        if cls.pattern is not None:
            if not re.search(cls.pattern, value):
                raise TypeSystemError(cls=cls, code='pattern')

        # Validate format, if specified
        if cls.format == 'date':
            try:
                value = datetime.strptime(value, "%Y-%m-%d").date()
            except ValueError as e:
                raise TypeSystemError(str(e), cls=cls)
        elif cls.format == 'date-time':
            try:
                value = isodate.parse_datetime(value)
            except (ValueError, isodate.ISO8601Error) as e:
                raise TypeSystemError(str(e), cls=cls)
        elif cls.format == 'email':
            if '@' not in value:
                raise TypeSystemError('Not a valid email address.', cls=cls)
        elif cls.format == 'time':
            try:
                value = datetime.strptime(value, "%H:%M:%S")
            except ValueError as e:
                raise TypeSystemError(str(e), cls=cls)
        elif cls.format == 'uri':
            try:
                rfc3987.parse(value, rule='URI')
            except ValueError as e:
                raise TypeSystemError(str(e), cls=cls)

        # Coerce value to the native str type.  We only do this if the value
        # is an instance of the class.  It could be a datetime instance or
        # a str already if `trim_whitespace` is True.
        if isinstance(value, cls):
            value = cls.native_type(value)

        cls.validate(value)
        return value
コード例 #24
0
ファイル: store.py プロジェクト: ciromattia/annotaria
 def insert_concept(self, data):
     try:
         parse(data['id'], rule='IRI')
         a = URIRef(data['id'])
     except ValueError:
         a = BNCF[data['id']]
     self.sparql.add((a, RDF.type, SKOS.Concept))
     self.sparql.add((a, RDFS.label, Literal(data['label'])))
     return 'OK'
コード例 #25
0
def test_against_legacy_hypothesis(url):
    print(url)
    legacy = True
    try:
        rfc3987.parse(url, rule='URI')
    except ValueError:
        legacy = False
    new = validate_rfc3986(url)
    assert legacy == bool(new)
コード例 #26
0
ファイル: CloudScraper.py プロジェクト: bbhunter/CloudScraper
def checker(url):
    '''
    Check if the url is a valid one or not.
    '''
    try:
        parse(url)
        return True
    except ValueError:
        return False
    return False
コード例 #27
0
ファイル: urlhelpers.py プロジェクト: lrei/canonical_urls
def validate_url(url):
    """
    Validates URL (actually, IRIs).
    """
    try:
        rfc3987.parse(url, rule="IRI")
    except:
        return False

    return True
コード例 #28
0
 def insert_author(self, author):
     try:
         parse(author['author_id'], rule='IRI')
         a = author['author_id']
     except ValueError:
         a = AOP[author['author_id']]
     self.sparql.add((a, RDF.type, FOAF.Person))
     self.sparql.add((a, FOAF.name, Literal(author['author_fullname'])))
     if 'author_email' in author:
         self.sparql.add((a, SCHEMA.email, Literal(author['author_email'])))
     return 'OK'
コード例 #29
0
ファイル: store.py プロジェクト: ciromattia/annotaria
 def insert_author(self, author):
     try:
         parse(author['author_id'], rule='IRI')
         a = author['author_id']
     except ValueError:
         a = AOP[author['author_id']]
     self.sparql.add((a, RDF.type, FOAF.Person))
     self.sparql.add((a, FOAF.name, Literal(author['author_fullname'])))
     if 'author_email' in author:
         self.sparql.add((a, SCHEMA.email, Literal(author['author_email'])))
     return 'OK'
コード例 #30
0
def is_url(url):
    """
    Returns True if `url` is an IRI as specified in the RFC 3987
    (https://www.ietf.org/rfc/rfc3987.txt)
    """
    try:
        rfc3987.parse(url, rule="IRI")
        return True
    except ValueError:
        logger.warning("%s is not a valid url.", url)
        return False
コード例 #31
0
def guess_server_url(
    url: str,
    login_page: str = Options.startup_page,
    proxy: "Proxy" = None,
    timeout: int = 5,
) -> str:
    """
    Guess the complete server URL given an URL (either an IP address,
    a simple domain name or an already complete URL).

    Note: this function cannot be decorated with lru_cache().

    :param url: The server URL (IP, domain name, full URL).
    :param login_page: The Drive login page.
    :param int timeout: Timeout for each and every request.
    :return: The complete URL.
    """
    import requests
    import rfc3987

    from requests.exceptions import SSLError

    kwargs: Dict[str, Any] = {
        "timeout": timeout,
        "verify": Options.ca_bundle or not Options.ssl_no_verify,
    }
    for new_url in compute_urls(url):
        try:
            rfc3987.parse(new_url, rule="URI")
            log.debug(f"Testing URL {new_url!r}")
            full_url = f"{new_url}/{login_page}"
            if proxy:
                kwargs["proxies"] = proxy.settings(url=full_url)
            with requests.get(full_url, **kwargs) as resp:
                resp.raise_for_status()
                if resp.status_code == 200:  # Happens when JSF is installed
                    log.debug(f"Found URL: {new_url}")
                    return new_url
        except SSLError as exc:
            if "CERTIFICATE_VERIFY_FAILED" in str(exc):
                raise InvalidSSLCertificate()
        except requests.HTTPError as exc:
            if exc.response.status_code in (401, 403):
                # When there is only Web-UI installed, the code is 401.
                log.debug(f"Found URL: {new_url}")
                return new_url
        except (ValueError, requests.RequestException):
            log.debug(f"Bad URL: {new_url}")
        except Exception:
            log.exception("Unhandled error")

    if not url.lower().startswith("http"):
        return ""
    return url
コード例 #32
0
ファイル: utils.py プロジェクト: nuxeo/nuxeo-drive
def guess_server_url(
    url: str,
    login_page: str = Options.startup_page,
    proxy: "Proxy" = None,
    timeout: int = 5,
) -> str:
    """
    Guess the complete server URL given an URL (either an IP address,
    a simple domain name or an already complete URL).

    :param url: The server URL (IP, domain name, full URL).
    :param login_page: The Drive login page.
    :param int timeout: Timeout for each and every request.
    :return: The complete URL.
    """
    import requests
    import rfc3987

    from requests.exceptions import SSLError

    kwargs: Dict[str, Any] = {
        "timeout": timeout,
        "verify": Options.ca_bundle or not Options.ssl_no_verify,
    }
    for new_url in compute_urls(url):
        try:
            rfc3987.parse(new_url, rule="URI")
            log.debug(f"Testing URL {new_url!r}")
            full_url = f"{new_url}/{login_page}"
            if proxy:
                kwargs["proxies"] = proxy.settings(url=full_url)
            with requests.get(full_url, **kwargs) as resp:
                resp.raise_for_status()
                if resp.status_code == 200:
                    log.debug(f"Found URL: {new_url}")
                    return new_url
        except requests.HTTPError as exc:
            if exc.response.status_code in {401, 403}:
                # When there is only Web-UI installed, the code is 401.
                log.debug(f"Found URL: {new_url}")
                return new_url
        except SSLError as exc:
            if "CERTIFICATE_VERIFY_FAILED" in str(exc):
                raise InvalidSSLCertificate()
        except (ValueError, requests.RequestException):
            log.debug(f"Bad URL: {new_url}")
        except Exception:
            log.exception("Unhandled error")

    if not url.lower().startswith("http"):
        return ""
    return url
コード例 #33
0
def getThumbnailContent(metadata):
    if 'image' not in metadata:
        return 'no preview'
    images = metadata['image']
    if len(images) < 1:
        return 'no preview'
    try:
        img_uri = images[0].strip()
        rfc3987.parse(img_uri, "URI")
        return '<img src="https://steemitimages.com/128x256/{img_uri}" />'.format(
            **locals())
    except Exception:
        return 'no preview'
コード例 #34
0
    def validate(self, value):
        """Check that the URL is valid, and optionally accessible."""
        try:
            parse(value)
        except ValueError:
            print value
            self.error("Value is not a valid URL")

        if self.verify_exists:
            try:
                get(value)
            except:
                self.error("The URL appears to be inaccessible")
コード例 #35
0
def escape_url(url):
    try:
        rfc3987.parse(url, rule="URI")
        return url
    except ValueError:
        if url.lower().startswith('https://'):
            scheme = 'https://'
        elif url.lower().startswith('http://'):
            scheme = 'http://'
        else:
            scheme = ''

        url = quote_url(url[len(scheme):])
        return scheme + url
コード例 #36
0
ファイル: validate.py プロジェクト: timgdavies/ocdskit
 def check_url(instance):
     # See https://github.com/Julian/jsonschema/blob/master/jsonschema/_format.py
     if not isinstance(instance, str_types):
         return True
     rfc3987.parse(instance, rule='URI')  # raises ValueError
     try:
         response = requests.get(instance, timeout=self.args.timeout)
         result = response.status_code in (200,)
         if not result:
             print('HTTP {} on GET {}'.format(response.status_code, instance))
         return result
     except requests.exceptions.Timeout as e:
         print('Timedout on GET {}'.format(instance))
         return False
コード例 #37
0
ファイル: Sourcespade.py プロジェクト: 5l1v3r1/sourcespade
def spider(base_urls, target):
    '''
        Loop through the initial links found in the given page. Each new link
        discovered will be added to the list if it's not already there, and thus
        crawled aswell looking for more links.

        wannabe list works as the placeholder for the urls that are yet to crawl.
        base_urls is a list with all the already crawled urls.
    '''
    global target_
    target_ = parse(target)
    p = Pool(arguments.process)
    wannabe = [
        url for url in base_urls
        if target_['authority'] in parse(url)['authority']
    ]

    while True:
        #retrieve all the urls returned by the workers
        new_urls = p.map(worker, wannabe)
        #flatten them and remove repeated ones
        new_urls = list(set(itertools.chain(*new_urls)))
        wannabe = []
        i = 0

        #if new_urls is empty meaning no more urls are being discovered, exit the loop
        if new_urls == []:
            break

        else:
            for url in new_urls:
                if url not in base_urls:
                    '''
                    For each new url, check if it hasn't been crawled. If it's
                    indeed new and contains the target domain it gets appended to
                    the wannabe list so in the next iteration it will be crawled.
                    '''
                    i += 1
                    if target_['authority'] in parse(url)['authority']:
                        wannabe.append(url)
                    base_urls.append(url)

        print(
            colored('\nNew urls appended: {}\n'.format(i),
                    'green',
                    attrs=['bold']))

    #once all the links for the given depth have been analyzed, execute the parser
    parser(base_urls)
コード例 #38
0
ファイル: schema.py プロジェクト: PrasannaVenkadesh/portia
 def is_valid_uri(instance):
     if not isinstance(instance, six.string_types):
         return True
     uri = urlparse(instance)
     query = urlencode(parse_qsl(unquote(uri.query.encode('utf-8'))))
     return rfc3987.parse(uri._replace(query=query).geturl(),
                          rule='URI')
コード例 #39
0
def serialize_fe(fe, reified, wiki_title, add_triple, format):
    # The FE predicate takes the FE label
    p1 = _uri_for('FE', 'predicate', fe['FE'])

    # The FE object takes the linked entity URI and/or the literal
    le_uri = fe.get('uri')
    literal = fe.get('literal')
    
    if le_uri:  # It's a URI
        wiki_title = quote(le_uri.split('/')[-1].encode('utf8'))
        o1 = NAMESPACES['resource'] + wiki_title
        parsed = parse(o1, rule='URI_reference')  # URI sanity check
        assert add_triple(reified, p1, o1)

    if literal:  # It's a literal
        if type(literal) in {str, unicode}:
            assert add_triple(reified, p1, literal)

        elif type(literal) == dict:

            if 'duration' in literal:
                assert add_triple(reified, p1, literal['duration'])

            if 'start' in literal:
                assert add_triple(reified, '%sstartYear' % NAMESPACES['ontology'],
                                  literal['start'])

            if 'end' in literal:
                assert add_triple(reified, '%sendYear' % NAMESPACES['ontology'],
                                  literal['end'])

        else:
            raise Exception("Don't know how to serialize: " + repr(literal))
コード例 #40
0
def launch_the_stream():
    parser = create_parser()
    if len(argv) == 1:
        parser.print_help()
        exit(1)

    arguments = parser.parse_args()

    rv = 0
    statistics = UserStat(APPLICATION_NAME, STAT_FILE_NAME)
    statistics.load()
    nicknames = Nicknames(APPLICATION_NAME, ALIAS_FILE_NAME)
    nicknames.load()
    if arguments.stat:
        print(str(statistics))
    elif arguments.aliases:
        print(str(nicknames))
    elif arguments.clear:
        trimmed = statistics.fltr(lambda key, value: value > int(arguments.clear))
        statistics.save()
        print("Statistics cleared: {0}".format(trimmed))
    elif (arguments.let and len(arguments.let) == 2):
        (nick, URL) = arguments.let
        nicknames.assign(nick, URL)
        # Extract the last part of URL path as a streamer nick
        streamer = [x for x in parse(URL)['path'].split('/') if x][-1]
        trimmed = statistics.fltr(lambda key, value: streamer not in key)
        statistics.save()
        nicknames.save()
        print("{0} was assigned to {1}; Statistics cleared: {2}".format(nick, URL, trimmed))
    else:
        rv = assemble_command(arguments, statistics, nicknames)

    return rv
コード例 #41
0
def generate_validator_from_schema(schema_uri):

    #download the schema to a string
    schema = None
    #handle http and file
    uri_split = rfc3987.parse(schema_uri)
    if uri_split['scheme'] in ("http", "https"):
        #its a http or https use requests
        schema = requests.get(schema_uri).json()
    elif uri_split['scheme'] == "file":
        #its a file, open as normal
        #reconstiture the file path from the uri
        with open(
                os.path.abspath(
                    os.path.join(uri_split['authority'], uri_split['path'])),
                'r') as schema_file:
            schema = json.load(schema_file)
    else:
        raise ValueError("schema uri must have file or url scheme")

    #Create a refresolver to allow resolution
    #of relative schema links
    #This is required to use git branches / versions and
    #local development correctly
    #Don't use from_schema because it uses the $id baked
    #into the schema, and we want to avoid baking
    handlers = dict(file=file_handler)
    resolver = jss.RefResolver(schema_uri, schema, handlers=handlers, store={})

    validator = jss.Draft7Validator(
        schema=schema,
        resolver=resolver,
    )

    return validator
コード例 #42
0
ファイル: ark.py プロジェクト: GeoHistoricalData/gallipy
    def parse(ark_str):
        """Parse an ARK URL or an ARK ID string into an Ark oject

        Args:
            ark_str (str): The string to parse.

        Returns:
            Ark: The parsed ARK.

        Raises:
            ArkParsingError: If parsing fails.
        """
        try:
            parts = rfc3987.parse(ark_str, rule="URI")  # Ensure ark is a URI
            parser = Lark(_GRAMMAR, start='arkid')

            # Extract an ARK ID from ark_str if ark_str is a full ARK URL.
            if parts["scheme"] != _ARKID_SCHEME:
                arkid_str = parts["path"].lstrip("/")
                if not parts["authority"]:  # NMA is required
                    msg = 'Name Mapping Authority cannot be null.'
                    raise ArkParsingError(msg, ark_str)
            else:
                arkid_str = ark_str

            tree = parser.parse(arkid_str)
            ark_parts = ArkIdTransformer().transform(tree)
            ark_parts.update(parts)
            ark = Ark(**ark_parts)
            return Either.pure(ark)

        except (TypeError, ValueError, ParseError, UnexpectedCharacters) as ex:
            return Left(ArkParsingError(str(ex), ark_str))
コード例 #43
0
ファイル: md2html.py プロジェクト: fernandezcuesta/md2html
 def to_b64(self, image_filename, *args):
     """ Returns a tuple with (b64content, imgtype) where:
         - b64content is a base64 representation of the input file
         - imgtype is the image type as detected by imghdr
     """
     self.logger.debug('Converting image %s to base64', image_filename)
     self.logger.debug('Current directory %s', os.path.abspath(os.curdir))
     try:
         img_info = parse(image_filename, rule='IRI')
         extension = img_info['path'].split('.')[-1]
         content = urlopen(image_filename)
     except ValueError:  # not a valid IRI, assume local file
         self.logger.debug("Image '%s' doesn't have a valid URL, "
                           "assuming local", image_filename)
         try:
             extension = imghdr.what(image_filename)
             if extension is None:
                 self.logger.debug('Image extension not detected, skipping')
                 return ''
             content = open(image_filename, 'rb')
         except (IOError, AttributeError, TypeError):
             return ''
     except (HTTPError, URLError, TypeError):
         return ''
     txt = 'data:image/{};base64,\n{}'.format(extension,
                                              content.read().encode('base64'
                                                                    )
                                              )
     content.close()
     return txt
コード例 #44
0
ファイル: links.py プロジェクト: leb2dg/SHARE
    def execute(self, obj):
        if not isinstance(obj, str):
            raise TypeError('\'{}\' is not of type str.'.format(obj))

        parsed = self._parse(obj)
        parsed = self._process(**parsed)
        return rfc3987.parse(rfc3987.compose(**parsed))
コード例 #45
0
ファイル: shortening.py プロジェクト: Yuanye/MatrixUtils
def validate_url(url):
    try:
        p = parse(url, rule='URI_reference')
        r = all((p['scheme'], p['authority'], p['path']))
    except Exception as e:
        print e
        r = False 
    return r
コード例 #46
0
ファイル: formats.py プロジェクト: Arable/flex
def uri_validator(value, **kwargs):
    try:
        parts = rfc3987.parse(value, rule="URI")
    except ValueError:
        raise ValidationError(MESSAGES["format"]["invalid_uri"].format(value))

    if not parts["scheme"] or not parts["authority"]:
        raise ValidationError(MESSAGES["format"]["invalid_uri"].format(value))
コード例 #47
0
ファイル: test_links.py プロジェクト: Stevenholloway/SHARE
def test_urn_link(urn, result):
    if isinstance(result, Exception):
        with pytest.raises(type(result)) as e:
            URNLink().execute(urn)
        assert e.value.args == result.args
    else:
        assert rfc3987.parse(result)  # Extra URL validation
        assert URNLink().execute(urn)['IRI'] == result
コード例 #48
0
ファイル: test_links.py プロジェクト: Stevenholloway/SHARE
def test_ark_link(ark_id, result):
    if isinstance(result, Exception):
        with pytest.raises(type(result)) as e:
            ARKLink().execute(ark_id)
        assert e.value.args == result.args
    else:
        assert rfc3987.parse(result)  # Extra URL validation
        assert ARKLink().execute(ark_id)['IRI'] == result
コード例 #49
0
ファイル: test_links.py プロジェクト: Stevenholloway/SHARE
def test_doi_link(doi, result):
    if isinstance(result, Exception):
        with pytest.raises(type(result)) as e:
            DOILink().execute(doi)
        assert e.value.args == result.args
    else:
        assert rfc3987.parse(result)  # Extra URL validation
        assert DOILink().execute(doi)['IRI'] == result
コード例 #50
0
ファイル: formats.py プロジェクト: pipermerriam/flex
def uri_validator(value, **kwargs):
    try:
        parts = rfc3987.parse(value, rule='URI')
    except ValueError:
        raise ValidationError(MESSAGES['format']['invalid_uri'].format(value))

    if not parts['scheme'] or not parts['authority']:
        raise ValidationError(MESSAGES['format']['invalid_uri'].format(value))
コード例 #51
0
ファイル: grabber.py プロジェクト: paregorios/pleiadesgrabber
 def __init__(self, parent, format):
     self.format = format
     self.uri = parent.uri + FORMATS[format]
     self.valid = rfc3987.parse(self.uri, rule="absolute_URI")
     if self.valid:
         l.debug("Initialized serialization (%s: %s)." % ((self.format, self.uri)))
     else:
         l.WARNING("%s is not a valid absolute URI, so this serialization will not be retrieved.")
コード例 #52
0
ファイル: unKML.py プロジェクト: ua-snap/unKML
  def getLayerData(self):

    # Is this a proper URL?
    try:
      rfc3987.parse(self.location, rule='IRI')
      isUrl = True
    except:
      isUrl = False

    # Download layer from URL.
    if isUrl:
      logging.info('Downloading {0} from {1}'.format(self.name, self.location))
      try:
        response = urllib2.urlopen(self.location)
        data = response.read()
      except Exception, e:
        logging.exception(e)
        return False
コード例 #53
0
ファイル: abiquo-firstboot.py プロジェクト: abiquo/rpms
 def check_api_url(self, url):
     try:
         p = parse(url, rule="IRI")
         if p != None:
             return True
         else:
             return False
     except ValueError:
         return False
コード例 #54
0
ファイル: rmt_server.py プロジェクト: RossBarnie/rmt
 def try_host(self, hostname):
     success = False
     try:
         result = parse(hostname, rule="URI")
         if result['authority'] != '':
             return True
     except ValueError, e:
         logging.error("address given does not match URI definition")
         logging.exception(e)
コード例 #55
0
ファイル: csvw.py プロジェクト: CLARIAH/wp4-converters
    def expandURL(self, url_pattern, row, datatype=False):
        """Takes a Jinja or Python formatted string, applies it to the row values, and returns it as a URIRef"""
        url = self.render_pattern(unicode(url_pattern), row)

        # DEPRECATED
        # for ns, nsuri in namespaces.items():
        #     if url.startswith(ns):
        #         url = url.replace(ns + ':', nsuri)
        #         break

        try:
            iri = iribaker.to_iri(url)
            rfc3987.parse(iri, rule='IRI')
        except:
            raise Exception(u"Cannot convert `{}` to valid IRI".format(url))

        # print "Baked: ", iri
        return URIRef(iri)
コード例 #56
0
ファイル: query.py プロジェクト: hel-repo/hel
def parse_url(url):
    try:
        matches = rfc3987.parse(url, rule='URI')
    except ValueError:
        raise HTTPBadRequest(detail=Messages.invalid_uri)
    if matches['scheme'] not in ['http', 'https']:
        raise HTTPBadRequest(detail=Messages.invalid_uri)
    matches['path'] = matches['path'] or '/'
    matches['fragment'] = None
    return rfc3987.compose(**matches)
コード例 #57
0
def parse_streamer_url(url, nicknames):
    if match(url, 'absolute_URI'):
        rv1 = [x for x in parse(url)['path'].split('/') if x][-1]
        rv2 = url
        return rv1, rv2
    elif nicknames.find(url):
        return url, nicknames.get(url)
    else:
        print("Nickname \"{0}\" has not been defined yet".format(url))
        return None, None
コード例 #58
0
ファイル: videoFetcher.py プロジェクト: Faianca/video-spider
 def build_domain(self, url):
     """
     :param domaininfo:
     :return:
     """
     d = parse(url, rule='IRI')
     li = d['authority'].split('.')
     domain = li[len(li) - 2]
     self.is_valid(domain)
     module = self.my_import('domains.'+domain)
     self.fetcher = module.Fetcher(requester.Requester())