Esempio n. 1
0
class Manga(Document):
    title = field.Text()
    title = field.Text(analyzer=titles,
                       multi=True,
                       fields={
                           'space': field.Text(analyzer=titles_space,
                                               multi=True),
                           'keyword': field.Keyword(multi=True),
                       })
    tags = field.Object(Tag)
    upload_at = field.Date()
    scan_at = field.Date()

    url = field.Keyword()
    cover_url = field.Keyword()
    images_urls = field.Keyword(multi=True)
    images_len = field.Integer()

    class Index:
        name = 'nhentai__mangas'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando manga {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False
Esempio n. 2
0
class ForumDocument(SumoDocument):
    """
    ES document for forum posts. Thread information is duplicated across all posts in that thread.
    """

    thread_title = field.Text()
    thread_forum_id = field.Keyword()
    thread_created = field.Date()
    thread_creator_id = field.Keyword()
    thread_is_locked = field.Boolean()
    thread_is_sticky = field.Boolean()

    content = field.Text()
    author_id = field.Keyword()
    created = field.Date()
    updated = field.Date()
    updated_by_id = field.Keyword()

    class Index:
        name = config.FORUM_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    def get_field_value(self, field, instance, *args):
        if field.startswith("thread_"):
            instance = instance.thread
            field = field[len("thread_"):]
        return super().get_field_value(field, instance, *args)

    @classmethod
    def get_model(cls):
        return Post

    @classmethod
    def get_queryset(cls):
        return Post.objects.select_related("thread")
Esempio n. 3
0
class Inner_sub_profile(InnerDoc):
    sub_profile_id = field.Keyword()
    status = field.Keyword()

    @property
    def sub_profile(self):
        return Sub_profile.get(self.sub_profile_id)
class User(InnerDoc):
    name = field.Text(analyzer=titles,
                      fields={
                          'space': field.Text(analyzer=titles_space),
                          'keyword': field.Keyword(),
                      })
    url = field.Keyword()
class Metadata(InnerDoc):
    language = field.Keyword()
    fuente = field.Keyword()
    frequency = field.Keyword()
    name_publisher = field.Keyword()
    email_publisher = field.Keyword()
    published = field.Date()
Esempio n. 6
0
class Index_inner(InnerDoc):
    index = field.Keyword()
    klass = field.Keyword()

    @property
    def map(self):
        if self.klass:
            cls = import_(self.klass)
        else:
            cls = Dweller
        if self.index:
            cls._index._name = self.index
        return cls

    @map.setter
    def map(self, value):
        self.klass = export(value)
        self.index = value._index._name

    @property
    def exists(self):
        return self.map._index.exists()

    def purge(self):
        if '*' in self.map._index._name:
            raise Dangerous_purge(self.map._index._name)
        self.map._index.delete()
Esempio n. 7
0
class Document(BaseDocument):
    url = field.Keyword()
    url_text = field.Text()
    referer = field.Keyword()
    title = field.Text()
    html = field.Text()
    text = field.Text()
    timestamp = field.Date(default_timezone=settings.TIME_ZONE)
 def _schema2doc_map(self):
     _map = {
         'integer':
         dsl_field.Long(),
         'number':
         dsl_field.ScaledFloat(scaling_factor=100),
         'string':
         dsl_field.Text(analyzer=polish_analyzer,
                        fields={
                            'raw': dsl_field.Text(),
                            'keyword': dsl_field.Keyword(),
                        }),
         'any':
         dsl_field.Text(analyzer=polish_analyzer,
                        fields={
                            'raw': dsl_field.Text(),
                            'keyword': dsl_field.Keyword(),
                        }),
         'boolean':
         dsl_field.Boolean(),
         'time':
         dsl_field.Text(
             fields={
                 'text': dsl_field.Text(),
                 'time': dsl_field.Date(
                     format=constance_config.TIME_FORMATS),
             }),
         'duration':
         dsl_field.DateRange(),
         'default':
         dsl_field.Text(),
         'date':
         dsl_field.Text(
             fields={
                 'text': dsl_field.Text(),
                 'date': dsl_field.Date(
                     format=constance_config.DATE_FORMATS),
             }),
         'datetime':
         dsl_field.Text(
             fields={
                 'text':
                 dsl_field.Text(),
                 'datetime':
                 dsl_field.Date(format=constance_config.DATE_FORMATS),
             })
     }
     for key, val in _map.items():
         _map[key] = CustomObject(properties={
             'val': val,
             'repr': dsl_field.Keyword(),
         })
     return _map
class Data_set_resource(InnerDoc):
    title = field.Text(analyzer=titles,
                       fields={
                           'space': field.Text(analyzer=titles_space),
                           'keyword': field.Keyword(),
                       })
    description = field.Text(analyzer=titles,
                             fields={
                                 'space': field.Text(analyzer=titles_space),
                                 'keyword': field.Keyword(),
                             })
    download_link = field.Keyword()
    kind = field.Keyword()
Esempio n. 10
0
class ProfileDocument(SumoDocument):
    username = field.Keyword(normalizer="lowercase")
    name = field.Text(fields={"keyword": field.Keyword()})
    email = field.Keyword()
    # store avatar url so we don't need to hit the db when searching users
    # but set enabled=False to ensure ES does no parsing of it
    avatar = field.Object(enabled=False)

    timezone = field.Keyword()
    country = field.Keyword()
    locale = field.Keyword()

    involved_from = field.Date()

    product_ids = field.Keyword(multi=True)
    group_ids = field.Keyword(multi=True)

    class Index:
        name = config.USER_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    def prepare_username(self, instance):
        return instance.user.username

    def prepare_email(self, instance):
        if instance.public_email:
            return instance.user.email

    def prepare_avatar(self, instance):
        if avatar := instance.fxa_avatar:
            return InnerDoc(url=avatar)
Esempio n. 11
0
    class D(document.Document):
        kw = field.Keyword()
        class Meta:
            doc_type = 'not-doc'

        class Index:
            name = 'test-not-doc-index'
Esempio n. 12
0
    def doc(self):
        if not self._doc_cache:
            _fields, _map = {}, {}
            for idx, _f in enumerate(self.schema['fields']):
                alias_name = _f['name']
                field_name = 'col{}'.format(idx + 1)
                _field = _schema2doc_map[_f['type']]
                _map[field_name] = alias_name
                _fields[field_name] = _field

            _fields['resource'] = dsl_field.Nested(
                properties={
                    'id': dsl_field.Integer(),
                    'title': dsl_field.Text(
                        analyzer=polish_analyzer,
                        fields={'raw': dsl_field.Keyword()})
                }
            )

            _fields['updated_at'] = dsl_field.Date()
            _fields['row_no'] = dsl_field.Long()

            doc = type(self.idx_name, (DocType,), _fields)
            doc._doc_type.index = self.idx_name
            doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
            doc._doc_type.mapping._meta['_meta']
            self._doc_cache = doc
        return self._doc_cache
    def prepare_doc(self):
        _fields, _map = {}, {}
        for idx, _f in enumerate(self.schema['fields'], 1):
            alias_name = _f['name']
            field_name = 'col{}'.format(idx)
            _field = self._schema2doc_map[_f['type']]
            _map[field_name] = alias_name
            _fields[field_name] = _field

        if self.has_geo_data:
            _fields['shape'] = dsl_field.GeoShape()
            _fields['point'] = dsl_field.GeoPoint()
            _fields['label'] = dsl_field.Text()
            _fields['shape_type'] = dsl_field.Integer()

        _fields['resource'] = dsl_field.Nested(
            properties={
                'id':
                dsl_field.Integer(),
                'title':
                dsl_field.Text(analyzer=polish_analyzer,
                               fields={'raw': dsl_field.Keyword()})
            })

        _fields['updated_at'] = dsl_field.Date()
        _fields['row_no'] = dsl_field.Long()
        _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Esempio n. 14
0
class DocWithNested(document.DocType):
    comments = field.Nested(
        properties={
            'title': field.Text(),
            'tags': field.Keyword(multi=True)
        }
    )
Esempio n. 15
0
class Ssn_trace(InnerDoc):
    is_valid = field.Boolean()
    is_deceased = field.Boolean()

    ssn = field.Keyword()
    human_message = field.Text()
    issued = field.Object(Ssn_issued)
Esempio n. 16
0
class Activity(InnerDoc):
    action = field.Text(analyzer=titles,
                        fields={
                            'space': field.Text(analyzer=titles_space),
                            'keyword': field.Keyword(),
                        })
    date = field.Date()
    user = field.Object(User)
Esempio n. 17
0
class MessageIndex(DocType):
    room = field.Keyword()
    user = field.Text()
    created = field.Date()
    message = field.Text()
    status = field.Text()
    tags = Nested(properties={'tags': field.Text()})

    class Meta:
        index = 'Message'
Esempio n. 18
0
def get_document_fields(document, excluding=None):
    excluding = excluding or []
    attributes = {"_id": elasticsearch_fields.Keyword()}
    for (
            attr_name,
            attr,
    ) in document._doc_type.mapping.properties.properties._d_.items():
        if attr_name in excluding:
            continue
        attributes[attr_name] = attr
    return OrderedDict(sorted(attributes.items()))
Esempio n. 19
0
class Image(Dweller):
    mime = field.Keyword()
    extension = field.Keyword()
    file = field.Keyword()
    dir = field.Keyword()
    album = field.Keyword()
    base_path = field.Keyword()
    thumbnail_dir = field.Keyword()
    thumbnail_path = field.Keyword()

    def __init__(self, *args, **kw):
        super().__init__(*args, **kw)
        if self.value_raw and not self.value.exists:
            logger.warning("cannot find the file '{}'".format(self.value.path))

    @Dweller.value.getter
    def value(self):
        return Chibi_image(self.value_raw)

    @value.setter
    def value(self, value):
        if value is None:
            value = self.value_raw

        if isinstance(value, str):
            value = Chibi_image(value)

        self.value_raw = value.path
        self.mime = value.properties.mime
        self.extension = value.properties.extension
        self.file = value.file_name
        self.dir = value.dir
        self.album = os.path.split(self.dir)[-1]

        self.base_path = list(os.path.split(self.dir))
        self.base_path.pop()
        self.base_path = join(*self.base_path)

        if self.thumbnail_dir:
            self.thumbnail_path = Chibi_path(self.thumbnail_dir)
        else:
            self.thumbnail_path = add_extensions(self.base_path, "thumbnail")
        self.thumbnail_path = self.thumbnail_path + self.album

        mkdir(self.thumbnail_path)
        thumbnail = value.thumbnail(self.thumbnail_path)
        self.thumbnail_path = thumbnail.path

    @property
    def thumbnail(self):
        return Chibi_image(self.thumbnail_path)
Esempio n. 20
0
class Resource(Document):
    title = field.Text(analyzer=titles,
                       fields={
                           'space': field.Text(analyzer=titles_space),
                           'keyword': field.Keyword(),
                       })
    description = field.Text(analyzer=titles,
                             fields={
                                 'space': field.Text(analyzer=titles_space),
                                 'keyword': field.Keyword(),
                             })
    kind = field.Keyword()
    url = field.Keyword()
    created_at = field.Date()

    tags = field.Text(analyzer=titles,
                      multi=True,
                      fields={
                          'space': field.Text(analyzer=titles_space,
                                              multi=True),
                          'keyword': field.Keyword(multi=True),
                      })

    metadata = field.Object(Metadata)

    class Index:
        name = 'chibi_gob__open_data__dataset__resource'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando dataset {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False

    def save(self, *args, **kw):
        super().save(*args, **kw)
Esempio n. 21
0
class Dataset(Document):
    resources = field.Object(Data_set_resource, multi=True)
    tags = field.Text(analyzer=titles,
                      multi=True,
                      fields={
                          'space': field.Text(analyzer=titles_space,
                                              multi=True),
                          'keyword': field.Keyword(multi=True),
                      })

    metadata = field.Object(Metadata)
    activity = field.Object(Activity, multi=True)
    url = field.Keyword()
    status = field.Keyword()
    created_at = field.Date()

    class Index:
        name = 'chibi_gob__open_data__dataset'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando dataset {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False

    @classmethod
    def get_by_url(cls, url):
        logger.info(f"get dataset {url}")
        result = cls.search().filter("term", url=url)[:1].execute()
        if result:
            return result[0]
        return None

    def save(self, *args, **kw):
        super().save(*args, **kw)
class Article( Document ):
    title = field.Text(
        analyzer=titles, multi=True,
        fields={
            'space': field.Text( analyzer=titles_space, multi=True ),
            'keyword': field.Keyword( multi=True ),
        } )
    text = field.Text(
        analyzer=titles, multi=True,
        fields={
            'space': field.Text( analyzer=titles_space, multi=True ),
            'keyword': field.Keyword( multi=True ),
        } )
    category = field.Text(
        analyzer=category, multi=True,
        fields={
            'keyword': field.Keyword( multi=True ),
        } )
    create_at = field.Date()
    upload_at = field.Date()
    scan_at = field.Date()
    url = field.Keyword()

    class Index:
        name = 'somos_kudasai__articles'
        settings = { 'number_of_shards': 2, 'number_of_replicas': 1 }

    @classmethod
    def url_is_scaned( cls, url ):
        logger.info( f"buscando articulo {url}" )
        if cls.search().filter( "term", url=url ).count() > 0:
            return True
        return False

    def save( self, *args, **kw ):
        super().save( *args, **kw )
Esempio n. 23
0
class CaseStudyInnerDoc(InnerDoc):
    wildcard = field.Text()
    pk = field.Integer(index=False)
    title = field.Text(copy_to='wildcard')
    short_summary = field.Text(copy_to='wildcard')
    description = field.Text(copy_to='wildcard')
    sector = field.Text(copy_to='wildcard')
    keywords = field.Text(copy_to='wildcard')
    image = field.Text(index=False)
    company_number = field.Text(index=False)
    image_one_caption = field.Text(copy_to='wildcard')
    image_two_caption = field.Text(copy_to='wildcard')
    image_three_caption = field.Text(copy_to='wildcard')
    testimonial = field.Text(copy_to='wildcard')
    testimonial_name = field.Keyword(copy_to='wildcard')
    testimonial_job_title = field.Text(copy_to='wildcard')
    slug = field.Text(index=False)
Esempio n. 24
0
class ProfileDocument(SumoDocument):
    username = field.Keyword(normalizer="lowercase")
    name = field.Text(fields={"keyword": field.Keyword()})
    email = field.Keyword()
    # store avatar url so we don't need to hit the db when searching users
    # but set enabled=False to ensure ES does no parsing of it
    avatar = field.Object(enabled=False)

    timezone = field.Keyword()
    country = field.Keyword()
    locale = field.Keyword()

    involved_from = field.Date()

    product_ids = field.Keyword(multi=True)
    group_ids = field.Keyword(multi=True)

    class Index:
        name = config.USER_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    @classmethod
    def prepare(cls, instance):
        """Override super method to exclude docs from indexing."""
        # Add a discard field in the document if the following conditions are met
        # User is not active
        if not instance.user.is_active:
            instance.es_discard_doc = "unindex_me"

        return super(ProfileDocument, cls).prepare(instance)

    def prepare_username(self, instance):
        return instance.user.username

    def prepare_email(self, instance):
        if instance.public_email:
            return instance.user.email

    def prepare_avatar(self, instance):
        if avatar := instance.fxa_avatar:
            return InnerDoc(url=avatar)
Esempio n. 25
0
class Population(Document):
    name = field.Text(fields={
        'raw': field.Keyword(),
    })
    description = field.Text()
    dweller = field.Object(Dweller_inner)
    samples = field.Object(Dweller_inner, multi=True)

    class Index:
        name = "population"

    def add_sample(self, sample_class=None, index=None):
        result = {}
        if sample_class is None:
            sample_class = Sample
        result['klass'] = export(sample_class)
        if index is not None:
            result['index'] = index
        self.samples.append(result)
Esempio n. 26
0
class Dweller(Document):
    value_raw = field.Keyword()
    real_raw = field.Float()

    def __init__(self, *args, value=None, **kw):
        super().__init__(*args, **kw)
        if value is not None:
            self.value = value

    @property
    def value(self):
        return self.value_raw

    @value.setter
    def value(self, value):
        self.value_raw = str(value)

    @property
    def real(self):
        return self.real_raw
    def prepare_doc(self):
        _fields = {
            'shape':
            dsl_field.GeoShape(),
            'point':
            dsl_field.GeoPoint(),
            'shape_type':
            dsl_field.Integer(),
            'label':
            dsl_field.Text(),
            'resource':
            dsl_field.Nested(
                properties={
                    'id':
                    dsl_field.Integer(),
                    'title':
                    dsl_field.Text(analyzer=polish_analyzer,
                                   fields={'raw': dsl_field.Keyword()})
                }),
            'updated_at':
            dsl_field.Date(),
            'row_no':
            dsl_field.Long()
        }
        _map = {}

        for idx, _f in enumerate(self.schema, 1):
            if _f.type not in self._schema2doc_map:
                continue
            alias_name = _f.name
            field_name = f'col{idx}'
            _field = self._schema2doc_map[_f.type]
            _map[field_name] = alias_name
            _fields[field_name] = _field
            _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Esempio n. 28
0
class Tag(InnerDoc):
    artists = field.Text(analyzer=tag,
                         multi=True,
                         fields={'keyword': field.Keyword(multi=True)})
    categories = field.Text(analyzer=tag,
                            multi=True,
                            fields={'keyword': field.Keyword(multi=True)})
    characters = field.Text(analyzer=tag,
                            multi=True,
                            fields={'keyword': field.Keyword(multi=True)})
    groups = field.Text(analyzer=tag,
                        multi=True,
                        fields={'keyword': field.Keyword(multi=True)})
    languages = field.Text(analyzer=tag,
                           multi=True,
                           fields={'keyword': field.Keyword(multi=True)})
    parodies = field.Text(analyzer=tag,
                          multi=True,
                          fields={'keyword': field.Keyword(multi=True)})
    tags = field.Text(analyzer=tag,
                      multi=True,
                      fields={'keyword': field.Keyword(multi=True)})
Esempio n. 29
0
class ForumDocument(SumoDocument):
    """
    ES document for forum posts. Thread information is duplicated across all posts in that thread.
    """

    thread_title = field.Text()
    thread_forum_id = field.Keyword()
    forum_slug = field.Keyword()
    thread_id = field.Keyword()
    thread_created = field.Date()
    thread_creator_id = field.Keyword()
    thread_is_locked = field.Boolean()
    thread_is_sticky = field.Boolean()

    content = field.Text()
    author_id = field.Keyword()
    created = field.Date()
    updated = field.Date()
    updated_by_id = field.Keyword()

    class Index:
        pass

    def prepare_forum_slug(self, instance):
        return instance.thread.forum.slug

    def get_field_value(self, field, instance, *args):
        if field.startswith("thread_"):
            instance = instance.thread
            field = field[len("thread_"):]
        return super().get_field_value(field, instance, *args)

    @classmethod
    def get_model(cls):
        return Post

    @classmethod
    def get_queryset(cls):
        return Post.objects.prefetch_related("thread", "thread__forum")
class OptionalObjectWithRequiredField(document.Document):
    comments = field.Nested(properties={'title': field.Keyword(required=True)})