Ejemplo n.º 1
0
class IbanType(PropertyType):
    name = "iban"
    group = "ibans"
    label = _("IBAN")
    plural = _("IBANs")
    matchable = True
    pivot = True

    def validate(self, text, **kwargs):
        text = sanitize_text(text)
        try:
            return iban.validate(text)
        except ValidationError:
            return False

    def clean_text(self, text, **kwargs):
        """Create a more clean, but still user-facing version of an
        instance of the type."""
        return text.replace(" ", "").upper()

    def country_hint(self, value):
        return value[:2].lower()

    def rdf(self, value):
        return URIRef(self.node_id(value))

    def node_id(self, value):
        return f"iban:{value.upper()}"

    def caption(self, value):
        return iban.format(value)
Ejemplo n.º 2
0
class EntityType(PropertyType):
    ID_RE = re.compile(r'^[0-9a-zA-Z]([0-9a-zA-Z\.\-]*[0-9a-zA-Z])?$')
    name = 'entity'
    group = 'entities'
    label = _('Entity')
    plural = _('Entities')
    matchable = True
    pivot = True

    def validate(self, text, **kwargs):
        text = sanitize_text(text)
        if text is None:
            return False
        return self.ID_RE.match(text) is not None

    def clean(self, text, **kwargs):
        entity_id = get_entity_id(text)
        if self.validate(entity_id):
            return entity_id

    def rdf(self, value):
        return URIRef('entity:%s' % value)

    def caption(self, value):
        return None
Ejemplo n.º 3
0
class IpType(PropertyType):
    """Internet protocol addresses. This supports both addresses used
    by the protocol versions 4 (e.g. ``192.168.1.143``) and 6
    (e.g. ``0:0:0:0:0:ffff:c0a8:18f``)."""

    name = "ip"
    group = "ips"
    label = _("IP-Address")
    plural = _("IP-Addresses")
    matchable = True
    pivot = True

    def validate(self, ip, **kwargs):
        """Check to see if this is a valid ip address."""
        try:
            ip_address(ip)
            return True
        except ValueError:
            return False

    def clean_text(self, text, **kwargs):
        """Create a more clean, but still user-facing version of an
        instance of the type."""
        try:
            return str(ip_address(text))
        except ValueError:
            return None

    def rdf(self, value):
        return URIRef("ip:%s" % value)
Ejemplo n.º 4
0
class IpType(PropertyType):
    name = "ip"
    group = "ips"
    label = _("IP-Address")
    plural = _("IP-Addresses")
    matchable = True
    pivot = True

    def validate(self, ip, **kwargs):
        """Check to see if this is a valid ip address."""
        try:
            ip_address(ip)
            return True
        except ValueError:
            return False

    def clean_text(self, text, **kwargs):
        """Create a more clean, but still user-facing version of an
        instance of the type."""
        try:
            return str(ip_address(text))
        except ValueError:
            return None

    def rdf(self, value):
        return URIRef("ip:%s" % value)
Ejemplo n.º 5
0
class IdentifierType(PropertyType):
    """Used for registration numbers, codes etc."""
    COMPARE_CLEAN = re.compile(r'[\W_]+')
    name = 'identifier'
    group = 'identifiers'
    label = _('Identifier')
    plural = _('Identifiers')
    matchable = True

    def normalize(self, text, **kwargs):
        """Normalize for comparison."""
        ids = super(IdentifierType, self).normalize(text, **kwargs)
        return [normalize(i) for i in ids]

    def clean_compare(self, value):
        # TODO: should this be used for normalization?
        value = self.COMPARE_CLEAN.sub('', value)
        return value.lower()

    def compare(self, left, right):
        left = self.clean_compare(left)
        right = self.clean_compare(right)
        specificity = self.specificity(shortest(left, right))
        if left == right:
            return specificity
        if left in right or right in left:
            return .8 * specificity
        return 0

    def _specificity(self, value):
        return dampen(4, 10, value)

    def node_id(self, value):
        return 'id:%s' % value
Ejemplo n.º 6
0
class IdentifierType(PropertyType):
    """Used for registration numbers, codes etc."""

    COMPARE_CLEAN = re.compile(r"[\W_]+")
    name = "identifier"
    group = "identifiers"
    label = _("Identifier")
    plural = _("Identifiers")
    matchable = True
    pivot = True

    def clean_compare(self, value):
        # TODO: should this be used for normalization?
        value = self.COMPARE_CLEAN.sub("", value)
        return value.lower()

    def compare(self, left, right):
        left = self.clean_compare(left)
        right = self.clean_compare(right)
        specificity = self.specificity(shortest(left, right))
        if left == right:
            return specificity
        if left in right or right in left:
            return 0.8 * specificity
        return 0

    def _specificity(self, value):
        return dampen(6, 10, value)

    def node_id(self, value):
        return "id:%s" % value
Ejemplo n.º 7
0
class EntityType(PropertyType):
    ID_RE = re.compile(r"^[0-9a-zA-Z]([0-9a-zA-Z\.\-]*[0-9a-zA-Z])?$")
    name = "entity"
    group = "entities"
    label = _("Entity")
    plural = _("Entities")
    matchable = True
    pivot = True

    def validate(self, text, **kwargs):
        text = sanitize_text(text)
        if text is None:
            return False
        return self.ID_RE.match(text) is not None

    def clean(self, text, **kwargs):
        entity_id = get_entity_id(text)
        if entity_id is None:
            return
        entity_id = str(entity_id)
        if self.ID_RE.match(entity_id) is not None:
            return entity_id

    def rdf(self, value):
        return URIRef("entity:%s" % value)

    def caption(self, value):
        return None
Ejemplo n.º 8
0
class AddressType(PropertyType):
    """A geographic address used to describe a location of a residence or post
    box. There is no specified order for the sub-parts of an address (e.g. street,
    city, postal code), and we should consider introducing an Address schema type
    to retain fidelity in cases where address parts are specified."""

    LINE_BREAKS = re.compile(r"(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)")
    COMMATA = re.compile(r"(,\s?[,\.])")
    name = "address"
    group = "addresses"
    label = _("Address")
    plural = _("Addresses")
    matchable = True
    pivot = True

    def clean_text(self, address, **kwargs):
        """Basic clean-up."""
        address = self.LINE_BREAKS.sub(", ", address)
        address = self.COMMATA.sub(", ", address)
        address = collapse_spaces(address)
        if len(address):
            return address

    def _specificity(self, value: str) -> float:
        return dampen(10, 60, value)

    def node_id(self, value: str) -> str:
        return "addr:%s" % slugify(value)
Ejemplo n.º 9
0
class MimeType(PropertyType):
    """A MIME media type are a specification of a content type on a network.
    Each MIME type is assinged by IANA and consists of two parts: the type
    and sub-type. Common examples are: ``text/plain``, ``application/json`` and
    ``application/pdf``.

    MIME type properties do not contain parameters as used in HTTP headers,
    like ``charset=UTF-8``."""

    name = "mimetype"
    group = "mimetypes"
    label = _("MIME-Type")
    plural = _("MIME-Types")
    matchable = False

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        text = normalize_mimetype(text)
        if text != DEFAULT:
            return text
        return None

    def rdf(self, value: str) -> Identifier:
        return URIRef(f"urn:mimetype:{value}")

    def caption(self, value: str) -> str:
        return parse_mimetype(value).label or value
Ejemplo n.º 10
0
class NameType(PropertyType):
    name = 'name'
    group = 'names'
    label = _('Name')
    plural = _('Names')
    matchable = True

    def clean_text(self, name, **kwargs):
        """Basic clean-up."""
        name = strip_quotes(name)
        name = collapse_spaces(name)
        return name

    def pick(self, values):
        values = [sanitize_text(v) for v in ensure_list(values)]
        values = [v for v in values if v is not None]
        if not len(values):
            return None
        if 1 == len(values):
            return values[0]
        return setmedian(values)

    def _specificity(self, value):
        # TODO: insert artificial intelligence here.
        return dampen(3, 50, value)

    def compare(self, left, right):
        return jaro_winkler(left, right)
Ejemplo n.º 11
0
class UrlType(PropertyType):
    """A uniform resource locator (URL). This will perform some normalisation
    on the URL so that it's sure to be using valid encoding/quoting, and to
    make sure the URL has a schema (e.g. 'http', 'https', ...)."""

    name = "url"
    group = "urls"
    label = _("URL")
    plural = _("URLs")
    matchable = True
    pivot = True

    def validate(self, url, **kwargs):
        """Check if `url` is a valid URL."""
        return is_valid_url(url)

    def clean_text(self, url, **kwargs):
        """Perform intensive care on URLs, see `urlnormalizer`."""
        return normalize_url(url, drop_fragments=False)

    def _specificity(self, value):
        return dampen(10, 120, value)

    def rdf(self, value):
        return URIRef(value)

    def node_id(self, value):
        return "url:%s" % value
Ejemplo n.º 12
0
class AddressType(PropertyType):
    LINE_BREAKS = re.compile(r'(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)')
    COMMATA = re.compile(r'(,\s?[,\.])')
    name = 'address'
    group = 'addresses'
    label = _('Address')
    plural = _('Addresses')
    matchable = True

    def clean_text(self, address, **kwargs):
        """Basic clean-up."""
        address = self.LINE_BREAKS.sub(', ', address)
        address = self.COMMATA.sub(', ', address)
        address = collapse_spaces(address)
        if len(address):
            return address

    # TODO: normalize well-known parts like "Street", "Road", etc.
    # TODO: consider using https://github.com/openvenues/pypostal
    # def normalize(self, address, **kwargs):
    #     """Make the address more compareable."""
    #     addresses = super(AddressType, self).normalize(address, **kwargs)
    #     return addresses

    def _specificity(self, value):
        return dampen(10, 60, value)
Ejemplo n.º 13
0
class MimeType(PropertyType):
    """A MIME media type are a specification of a content type on a network.
    Each MIME type is assinged by IANA and consists of two parts: the type
    and sub-type. Common examples are: ``text/plain``, ``application/json`` and
    ``application/pdf``.

    MIME type properties do not contain parameters as used in HTTP headers,
    like ``charset=UTF-8``."""

    name = "mimetype"
    group = "mimetypes"
    label = _("MIME-Type")
    plural = _("MIME-Types")
    matchable = False

    def clean_text(self, text, **kwargs):
        text = normalize_mimetype(text)
        if text != DEFAULT:
            return text

    def rdf(self, value):
        return URIRef("urn:mimetype:%s" % value)

    def caption(self, value):
        return parse_mimetype(value).label
Ejemplo n.º 14
0
class NameType(PropertyType):
    name = "name"
    group = "names"
    label = _("Name")
    plural = _("Names")
    matchable = True
    pivot = True

    def clean_text(self, name, **kwargs):
        """Basic clean-up."""
        name = strip_quotes(name)
        return collapse_spaces(name)

    def pick(self, values):
        values = [sanitize_text(v) for v in ensure_list(values)]
        values = [v for v in values if v is not None]
        if len(values) <= 1:
            return first(values)
        return setmedian(sorted(values))

    def _specificity(self, value):
        # TODO: insert artificial intelligence here.
        return dampen(3, 50, value)

    def compare(self, left, right):
        return jaro_winkler(left, right)

    def node_id(self, value):
        return "name:%s" % slugify(value)
Ejemplo n.º 15
0
class StringType(PropertyType):
    name = "string"
    label = _("Label")
    plural = _("Labels")
    matchable = False

    def node_id(self, value):
        return None
Ejemplo n.º 16
0
class EntityType(PropertyType):
    """A reference to another entity via its ID. This is how entities in FtM
    become a graph: by pointing at each other using :ref:`references`.

    Entity IDs can either be `namespaced` or `plain`, depending on the context.
    When setting properties of this type, you can pass in an entity proxy or
    dict of the entity, the ID will then be extracted and stored.
    """

    REGEX_RAW = r"^[0-9a-zA-Z]([0-9a-zA-Z\.\-]*[0-9a-zA-Z])?$"
    REGEX = re.compile(REGEX_RAW)
    name = "entity"
    group = "entities"
    label = _("Entity")
    plural = _("Entities")
    matchable = True
    pivot = True

    def validate(self, value: str) -> bool:
        text = sanitize_text(value)
        if text is None:
            return False
        return self.REGEX.match(text) is not None

    def clean(
        self,
        raw: Any,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        entity_id = get_entity_id(raw)
        if entity_id is None:
            return None
        return self.clean_text(entity_id, fuzzy=fuzzy, format=format, proxy=proxy)

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        """Specific types can apply their own cleaning routines here (this is called
        by ``clean`` after the value has been converted to a string and null values
        have been filtered)."""
        if proxy is not None and text == proxy.id:
            msg = gettext("Self-relationship (%s): %s")
            raise InvalidData(msg % (proxy.schema, text))
        if self.REGEX.match(text) is not None:
            return text
        return None

    def rdf(self, value: str) -> Identifier:
        return URIRef(f"entity:{value}")

    def caption(self, value: str) -> None:
        return None
Ejemplo n.º 17
0
class CountryType(EnumType):
    """Properties to define countries and territories. This is completely
    descriptive and needs to deal with data from many origins, so we support
    a number of unusual and controversial designations (e.g. the Soviet Union,
    Transnistria, Somaliland, Kosovo)."""

    name = "country"
    group = "countries"
    label = _("Country")
    plural = _("Countries")
    matchable = True

    def _locale_names(self, locale):
        # extra territories that OCCRP is interested in.
        names = {
            "zz": gettext("Global"),
            "eu": gettext("European Union"),
            # Overwrite "Czechia" label:
            "cz": gettext("Czech Republic"),
            "xk": gettext("Kosovo"),
            "yucs": gettext("Yugoslavia"),
            "csxx": gettext("Serbia and Montenegro"),
            "suhh": gettext("Soviet Union"),
            "ge-ab": gettext("Abkhazia"),
            "x-so": gettext("South Ossetia"),
            "so-som": gettext("Somaliland"),
            "gb-wls": gettext("Wales"),
            "gb-sct": gettext("Scotland"),
            "gb-nir": gettext("Northern Ireland"),
            "md-pmr": gettext("Transnistria"),
        }
        for code, label in locale.territories.items():
            code = code.lower()
            if code in names:
                continue
            try:
                int(code)
            except ValueError:
                names[code] = label
        return names

    def clean_text(self, country, fuzzy=False, **kwargs):
        """Determine a two-letter country code based on an input.

        The input may be a country code, a country name, etc.
        """
        code = country.lower().strip()
        if code in self.codes:
            return code
        country = countrynames.to_code(country, fuzzy=fuzzy)
        if country is not None:
            return country.lower()

    def country_hint(self, value: str) -> str:
        return value

    def rdf(self, value: str) -> Identifier:
        return URIRef("iso-3166-1:%s" % value)
Ejemplo n.º 18
0
class TextType(StringType):
    """Longer text fragments, such as descriptions or document text. Unlike
    string properties, it might make sense to treat properties of this type as
    full-text search material."""

    name = "text"
    label = _("Text")
    plural = _("Texts")
    max_size = 30 * MEGABYTE
Ejemplo n.º 19
0
class PhoneType(PropertyType):
    name = "phone"
    group = "phones"
    label = _("Phone number")
    plural = _("Phone numbers")
    matchable = True
    pivot = True

    def _clean_countries(self, proxy):
        yield None
        if proxy is not None:
            for country in proxy.countries:
                yield country.upper()

    def _parse_number(self, number, proxy=None):
        """Parse a phone number and return in international format.

        If no valid phone number can be detected, None is returned. If
        a country code is supplied, this will be used to infer the
        prefix.

        https://github.com/daviddrysdale/python-phonenumbers
        """
        for code in self._clean_countries(proxy):
            try:
                yield parse_number(number, code)
            except NumberParseException:
                pass

    def clean_text(self, number, proxy=None, **kwargs):
        for num in self._parse_number(number, proxy=proxy):
            if is_valid_number(num):
                return format_number(num, PhoneNumberFormat.E164)

    def validate(self, number, proxy=None, **kwargs):
        for num in self._parse_number(number, proxy=proxy):
            if is_valid_number(num):
                return True
        return False

    def country_hint(self, value):
        try:
            number = parse_number(value)
            return geocoder.region_code_for_number(number).lower()
        except NumberParseException:
            pass

    def _specificity(self, value):
        # TODO: insert artificial intelligence here.
        return dampen(6, 11, value)

    def rdf(self, value):
        return URIRef("tel:%s" % value)

    def caption(self, value):
        number = parse_number(value)
        return format_number(number, PhoneNumberFormat.INTERNATIONAL)
Ejemplo n.º 20
0
class HTMLType(PropertyType):
    name = "html"
    label = _("HTML")
    plural = _("HTMLs")
    matchable = False
    max_size = 30 * MEGABYTE

    def node_id(self, value):
        return None
Ejemplo n.º 21
0
class TextType(PropertyType):
    name = "text"
    label = _("Text")
    plural = _("Texts")
    matchable = False
    max_size = 30 * MEGABYTE

    def node_id(self, value):
        return None
Ejemplo n.º 22
0
class DateType(PropertyType):
    """A date or time stamp. This is based on ISO 8601, but meant to allow for different
    degrees of precision by specifying a prefix. This means that ``2021``, ``2021-02``,
    ``2021-02-16``, ``2021-02-16T21``, ``2021-02-16T21:48`` and ``2021-02-16T21:48:52``
    are all valid values, with an implied precision.

    The timezone is always expected to be UTC and cannot be specified otherwise. There is
    no support for calendar weeks (``2021-W7``) and date ranges (``2021-2024``)."""

    name = "date"
    group = "dates"
    label = _("Date")
    plural = _("Dates")
    matchable = True

    def validate(self, value: str) -> bool:
        """Check if a thing is a valid date."""
        prefix = parse(value)
        return prefix.precision != Precision.EMPTY

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        """The classic: date parsing, every which way."""
        if format is not None:
            return parse_format(text, format).text
        return parse(text).text

    def _specificity(self, value: str) -> float:
        return dampen(5, 13, value)

    def compare(self, left: str, right: str) -> float:
        prefix = os.path.commonprefix([left, right])
        return dampen(4, 10, prefix)

    def rdf(self, value: str) -> Identifier:
        return Literal(value, datatype=XSD.dateTime)

    def node_id(self, value: str) -> str:
        return f"date:{value}"

    def to_datetime(self, value: str) -> Optional[datetime]:
        return parse(value).dt

    def to_number(self, value: str) -> Optional[float]:
        date = self.to_datetime(value)
        if date is None:
            return None
        # We make a best effort all over the app to ensure all times are in UTC.
        if date.tzinfo is None:
            date = date.replace(tzinfo=timezone.utc)
        return date.timestamp()
Ejemplo n.º 23
0
class LanguageType(PropertyType):
    name = 'language'
    group = 'languages'
    label = _('Language')
    plural = _('Languages')
    matchable = False

    # Language whitelist
    LANGUAGES = [
        'eng', 'fra', 'deu', 'rus', 'spa', 'nld', 'ron', 'kat', 'ara', 'tur',
        'ltz', 'ell', 'lit', 'ukr', 'zho', 'bel', 'bul', 'bos', 'jpn', 'ces',
        'lav', 'por', 'pol', 'hye', 'hrv', 'hin', 'heb', 'uzb', 'mon', 'urd',
        'sqi', 'kor', 'isl', 'ita', 'est', 'nor', 'fas', 'swa', 'slv', 'slk',
        'aze', 'tgk', 'kaz', 'tuk', 'kir', 'hun', 'dan', 'afr', 'swe', 'srp',
        'ind', 'kan', 'mkd', 'mlt', 'msa', 'fin', 'cat'
    ]
    LANGUAGES = get_env_list('FTM_LANGUAGES', LANGUAGES)
    LANGUAGES = [l.lower().strip() for l in LANGUAGES]

    def __init__(self, *args):
        self._names = {}

    @property
    def names(self):
        locale = get_locale()
        if locale not in self._names:
            self._names[locale] = {}
            for lang in self.LANGUAGES:
                self._names[locale][lang] = lang
            for code, label in locale.languages.items():
                code = iso_639_alpha3(code)
                if code in self.LANGUAGES:
                    self._names[locale][code] = label
        return self._names[locale]

    def validate(self, text, **kwargs):
        text = sanitize_text(text)
        if text is None:
            return False
        return text in self.LANGUAGES

    def clean_text(self, text, **kwargs):
        code = iso_639_alpha3(text)
        if code in self.LANGUAGES:
            return code

    def rdf(self, value):
        return URIRef('iso-639:%s' % value)

    def caption(self, value):
        return self.names.get(value, value)

    def to_dict(self):
        data = super(LanguageType, self).to_dict()
        data['values'] = self.names
        return data
Ejemplo n.º 24
0
class ChecksumType(PropertyType):
    """Used for content hashes, usually SHA1 (I know, I know)."""
    name = 'checksum'
    group = 'checksums'
    label = _('Checksum')
    plural = _('Checksums')
    matchable = True

    def rdf(self, value):
        return URIRef('hash:%s' % value)
Ejemplo n.º 25
0
class StringType(PropertyType):
    """A simple string property with no additional semantics."""

    name = "string"
    label = _("Label")
    plural = _("Labels")
    matchable = False

    def node_id(self, value):
        return None
Ejemplo n.º 26
0
class GenderType(EnumType):
    """A human gender. This is not meant to be a comprehensive model of
    the social realities of gender but a way to capture data from (mostly)
    government databases and represent it in a way that can be used by
    structured tools. I'm not sure this justifies the simplification."""

    MALE = "male"
    FEMALE = "female"
    OTHER = "other"

    LOOKUP = {
        "m": MALE,
        "man": MALE,
        "masculin": MALE,
        "männlich": MALE,
        "мужской": MALE,
        "f": FEMALE,
        "woman": FEMALE,
        "féminin": FEMALE,
        "weiblich": FEMALE,
        "женский": FEMALE,
        "o": OTHER,
        "d": OTHER,
        "divers": OTHER,
    }

    name = "gender"
    group = "genders"
    label = _("Gender")
    plural = _("Genders")
    matchable = False

    def _locale_names(self, locale: Locale) -> EnumValues:
        return {
            self.MALE: gettext("male"),
            self.FEMALE: gettext("female"),
            self.OTHER: gettext("other"),
        }

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        code = text.lower().strip()
        code = self.LOOKUP.get(code, code)
        if code not in self.codes:
            return None
        return code

    def rdf(self, value: str) -> Identifier:
        return URIRef(f"gender:{value}")
Ejemplo n.º 27
0
class ChecksumType(PropertyType):
    """Used for content hashes, usually SHA1 (I know, I know)."""

    name = "checksum"
    group = "checksums"
    label = _("Checksum")
    plural = _("Checksums")
    matchable = True

    def rdf(self, value):
        return URIRef("hash:%s" % value)
Ejemplo n.º 28
0
class UrlType(PropertyType):
    """A uniform resource locator (URL). This will perform some normalisation
    on the URL so that it's sure to be using valid encoding/quoting, and to
    make sure the URL has a schema (e.g. 'http', 'https', ...)."""

    SCHEMES = ("http", "https", "ftp", "mailto")
    DEFAULT_SCHEME = "http"

    name = "url"
    group = "urls"
    label = _("URL")
    plural = _("URLs")
    matchable = True
    pivot = True

    def clean_text(
        self,
        text: str,
        fuzzy: bool = False,
        format: Optional[str] = None,
        proxy: Optional["EntityProxy"] = None,
    ) -> Optional[str]:
        """Perform intensive care on URLs to make sure they have a scheme
        and a host name. If no scheme is given HTTP is assumed."""
        try:
            parsed = urlparse(text)
        except (TypeError, ValueError):
            return None
        if not len(parsed.netloc):
            if "." in parsed.path and not text.startswith("//"):
                # This is a pretty weird rule meant to catch things like
                # 'www.google.com', but it'll likely backfire in some
                # really creative ways.
                return self.clean_text(f"//{text}")
            return None
        if not len(parsed.scheme):
            parsed = parsed._replace(scheme=self.DEFAULT_SCHEME)
        else:
            parsed = parsed._replace(scheme=parsed.scheme.lower())
        if parsed.scheme not in self.SCHEMES:
            return None
        if not len(parsed.path):
            parsed = parsed._replace(path="/")
        return parsed.geturl()

    def _specificity(self, value: str) -> float:
        return dampen(10, 120, value)

    def rdf(self, value: str) -> Identifier:
        return URIRef(value)

    def node_id(self, value: str) -> Optional[str]:
        return f"url:{value}"
Ejemplo n.º 29
0
class EntityType(PropertyType):
    name = 'entity'
    group = 'entities'
    label = _('Entity')
    plural = _('Entities')
    matchable = True

    def clean(self, text, **kwargs):
        return get_entity_id(text)

    def rdf(self, value):
        return URIRef('urn:entity:%s' % value)
Ejemplo n.º 30
0
class HTMLType(StringType):
    """Properties that contain raw hypertext markup (HTML).

    User interfaces rendering properties of this type need to take extreme
    care not to allow attacks such as cross-site scripting. It is recommended
    to perform server-side sanitisation, or to not render this property at all.
    """

    name = "html"
    label = _("HTML")
    plural = _("HTMLs")
    max_size = 30 * MEGABYTE