Exemple #1
0
class SerializationDoc(Document):
    i = Long()
    b = Boolean()
    d = Double()
    bin = Binary()
    ip = Ip()

    class Index:
        name = 'test-serialization'
class SerializationDoc(DocType):
    i = Long()
    b = Boolean()
    d = Double()
    bin = Binary()
    ip = Ip()

    class Meta:
        index = 'test-serialization'
Exemple #3
0
    class Type(DocType):
        url = Keyword()
        title = Text(analyzer="ik_smart")
        content = Text(analyzer="ik_smart")
        description = Text(analyzer="ik_smart")
        err = Binary()

        class Meta:
            index = indexName
            doc_type = "_doc"
Exemple #4
0
    class Type(DocType):
        column_classify = Keyword()
        channel = Keyword()  #
        datetime = Date()
        source = Keyword()
        url = Keyword()
        title = Text(analyzer="ik_smart")
        content = Text(analyzer="ik_smart")
        description = Text(analyzer="ik_smart")
        err = Binary()
        main = Text(analyzer="ik_smart")

        class Meta:
            index = indexName
            doc_type = "_doc"
Exemple #5
0
class Schema(Document):
    '''
        A Top-Level Schema
        https://schema.org/docs/schemas.html
    '''
    _meta = Object(DocumentMeta, required=True)
    context = Object(enabled=False)
    url = Keyword(required=True)
    locals()['~raw'] = Binary()

    # _id : schema namespace, provided by the front-end when registering
    #       accessible through constructor argument 'id' or schema.meta.id

    class Index:
        '''
        Associated ES Index
        '''
        name = 'discover_schema'
        settings = {"number_of_replicas": 0}

    @classmethod
    def gather_contexts(cls):

        contexts = {}

        for schema in cls.search().scan():
            contexts.update(schema.context.to_dict())

        contexts.update({
            "schema": "http://schema.org/",
        })

        return {k: v for k, v in contexts.items() if v}

    @classmethod
    def exists(cls, url_or_ns):
        '''check if a schema exists by url or namespace.'''
        q = Q('bool',
              should=[Q('term', _id=url_or_ns),
                      Q('term', url=url_or_ns)])
        search = cls.search().source(False).query(q)
        return bool(search.execute().hits)

    def encode_raw(self, text):
        '''
        Encode and save the original schema.
        Refresh timestamp in _meta field.
        Return the encoded binary.
        '''
        assert isinstance(text, str)
        _raw = gzip.compress(text.encode())
        self['~raw'] = _raw
        self._meta.timestamp = datetime.now()
        return _raw

    def decode_raw(self):
        '''
        Decode the compressed raw definition.
        Return decoded json saved in _raw field.
        '''
        if '~raw' in self:
            return json.loads(gzip.decompress(self['~raw']).decode())
        return None

    def save(self, *args, **kwargs):
        assert self.meta.id
        return super().save(*args, **kwargs)
Exemple #6
0
class Schema(Document):
    """
    A discovery-app schema object.
    The es backend is a collection of objects of this type.
    """
    locals()['~raw'] = Binary()
    clses = Keyword(multi=True)
    props = Keyword(multi=True)
    _meta = Object(Metadata, required=True)

    #pylint:disable=too-few-public-methods
    class Index:
        ''' Associated ES index information '''
        name = 'discovery'
        doc_type = 'schema'
        settings = {"number_of_shards": 1, "number_of_replicas": 0}

    class Meta:
        ''' Meta-Fields for Schema document '''
        doc_type = 'schema'

    def encode_url(self):
        ''' Generate URL hash to be used as the document _id,
        automatically invoked when a Schema is saved  '''
        url = getattr(self._meta, 'url', None)
        if not url:
            raise ValueError("Missing required _meta.url field.")
        return blake2b(url.encode('utf8'), digest_size=16).hexdigest()

    def encode_raw(self):
        ''' Encode and compress an original schema file to bytes,
        automatically invoked during saving if ~raw is not set '''
        try:
            res = requests.get(self._meta.url)
            res.raise_for_status()
            _raw = res.text.encode()
            _raw = gzip.compress(_raw)
            return _raw
        except requests.exceptions.RequestException:
            pass

    def decode_raw(self):
        ''' Decode the saved _raw field or return empty string if _raw not set '''
        if '~raw' in self:
            return gzip.decompress(self['~raw']).decode()
        return ''

    #pylint: disable=arguments-differ
    def save(self, ref_raw=False, **kwargs):
        '''
        Save the Schema document into elasticsearch.
        If the document doesn’t exist it is created, it is overwritten otherwise.
        Returns True if this operations resulted in new document being created.
        The document is saved with an update to its timestamp to the current time.
        The _id will be based on a hash of the url field.
        '''
        self.meta.id = self.encode_url()
        if ref_raw or '~raw' not in self:
            self['~raw'] = self.encode_raw()
        self._meta.stamp()
        return super().save(refresh=True, **kwargs)