class ProductIndex(RealTimeSearchIndex): """ Index for inventory.models.Product """ # summary template will include title, description, category name and # subcategory name in order to make it easy to search summary = CharField(document=True, use_template=True, stored=False) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) product_id = IntegerField(model_attr="id") category = CharField(model_attr="category__name") #, faceted=True) category_id = IntegerField(model_attr="category__id") subcategory = CharField(model_attr="subcategory__name", null=True) #, faceted=True, null=True) subcategory_id = IntegerField(model_attr="subcategory__id", null=True) price = FloatField() # price = DecimalField() image_url = CharField(null=True, indexed=True) marketplace_id = IntegerField(model_attr="shop__marketplace__id") shop_id = IntegerField(model_attr="shop__id") shop_name = CharField(model_attr="shop__name", indexed=False) shop_default_dns = CharField(model_attr="shop__default_dns", indexed=False) shop_currency = CharField(indexed=False) added_at = DateTimeField(model_attr="date_time") def prepare_price(self, obj): price = obj.child().price # the price of a Lot is an instance method if callable(price): return price() # the .price of an Item is an instance attribute return price def prepare_image_url(self, obj): image = obj.child().image() if image: return image.image.url_100x100 return None def prepare_shop_currency(self, obj): return obj.shop.preference().checkout_currency
class LotIndex(RealTimeSearchIndex): """ Indexes lots.models.Lot instances """ # remember to respect haystack primary field convention name! # by default, stored and indexed parameters are True summary = CharField(document=True, use_template=True) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) state = CharField(model_attr="get_state_display") shop_id = IntegerField(model_attr="shop__id", stored=False) shop_name = CharField(model_attr="shop__name") category = CharField(model_attr="category") subcategory = CharField(model_attr="subcategory", null=True) added_at = DateTimeField(model_attr="date_time", stored=False) # def get_queryset(self): def index_queryset(self): # allow to search only active lots return Lot.objects.filter(state="A")
class ItemIndex(RealTimeSearchIndex): """ Indexes for_sale.models.Item instances """ # remember to respect haystack primary field convention name! # by default, stored and indexed parameters are True summary = CharField(document=True, use_template=True) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) price = FloatField(model_attr="price", indexed=False) # price = DecimalField(model_attr="price", indexed=False) added_at = DateTimeField(model_attr="date_time", stored=False) shop_id = IntegerField(model_attr="shop__id", stored=False) shop_name = CharField(model_attr="shop__name") category = CharField(model_attr="category") subcategory = CharField(model_attr="subcategory", null=True) # order by: # * relevance == score function? # * added_at ASC / DESC # * price ASC / DESC # * title ASC / DESC # def get_queryset(self): def index_queryset(self): return Item.objects.filter(qty__gt=0)
class TargetValueIndex(CelerySearchIndex, Indexable): '''Base search index which will be shared between various search indexes in targeting. It's strongly related to django-haystack''' # text field with document=True is required text = CharField(document=True) value = AutocompleteField(model_attr='value') category = CharField(model_attr='category') rank = IntegerField() def get_model(self): return TargetValue def prepare_rank(self, target_value): if not target_value.category == dimensions.g_location: return 1 dimension_rank = ((dimensions.city, 3), (dimensions.region, 2), (dimensions.country, 1)) values = target_value.value_dict for dimension, rank in dimension_rank: if dimension in values: # Return rank of lowest hierarchy only return rank def prepare_value(self, target_value): ''' Add hierarchy name to location target values, to enable filtering by hierarchy Example: 'USA' -> 'USA country' 'USA;Wisconsin' -> 'USA;Wisconsin state' 'USA;Pensylvania;California' -> ''USA;Pensylvania;California city' ''' if not target_value.category == dimensions.g_location: return target_value.value # Hierarchy names which are appended to index values location_hierarchies = ['country', 'state', 'city'] values = list(target_value.value_list) last_index = len(values) - 1 hierarchy = location_hierarchies[last_index] values[last_index] = "%s %s" % (values[last_index], hierarchy) return TargetMap.pack(values) def index_queryset(self, **kwargs): '''Returns query on which haystack will perform indexing''' return self.get_model().objects.representants().exclude( category=dimensions.g_publisher)
class SiteIndex(CelerySearchIndex, Indexable): '''Base search index which will be shared between various search indexes in targeting. It's strongly related to django-haystack''' #: defines text field on contents of which search will be performed. text = CharField(document=True, use_template=True) url = NgramField() owner_id = IntegerField(model_attr='owner_id') def get_model(self): return Site def prepare_url(self, site): return quote(site.url) def index_queryset(self, **kwargs): '''Returns query on which haystack will perform indexing''' return self.get_model().objects.all()
class AuctionSessionIndex(RealTimeSearchIndex): """ Indexes auctions.models.AuctionSession instances """ # remember to respect haystack primary field convention name! # by default, stored and indexed parameters are True summary = CharField(document=True, use_template=True) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) shop_id = IntegerField(model_attr="shop__id", stored=False) shop_name = CharField(model_attr="shop__name") starts_at = DateTimeField(model_attr="start", stored=False) ends_at = DateTimeField(model_attr="end", stored=False) # def get_queryset(self): def index_queryset(self): return AuctionSession.objects.filter(start__lte=datetime.now(), end__gt=datetime.now())
class ContainerIndex(SearchIndex): text = CharField(document=True, use_template=True) channel_id = IntegerField(model_attr="channel_id") date_available = DateTimeField(model_attr='date_available') date_update = DateTimeField(model_attr='date_update') tags = MultiValueField(null=True) def prepare_tags(self, obj): if not obj.tags: return tags = [] for tag in obj.get_tags() or []: tags.append(tag.slug) tags.append(tag.name) return tags def get_updated_field(self): return 'date_update' def index_queryset(self, using=None): return self.get_model().objects.all_published()
class SpeciesComponentIndex(EntryIndex): species_id = IntegerField(model_attr='species__id') species_wid = CharField(model_attr='species__wid') class Meta: pass
class TeamVideoLanguagesIndex(SearchIndex): text = CharField(document=True, use_template=True, template_name="teams/teamvideo_languages_for_search.txt") team_id = IntegerField() team_video_pk = IntegerField(indexed=False) video_pk = IntegerField(indexed=False) video_id = CharField(indexed=False) video_title = CharField(faceted=True) video_url = CharField(indexed=False) original_language = CharField() original_language_display = CharField(indexed=False) absolute_url = CharField(indexed=False) project_pk = IntegerField(indexed=True) task_count = IntegerField() # never store an absolute url with solr # since the url changes according to the user # one cannot construct the url at index time # video_absolute_url = CharField(indexed=False) thumbnail = CharField(indexed=False) title = CharField() project_name = CharField(indexed=False) project_slug = CharField(indexed=False) description = CharField(indexed=True) is_complete = BooleanField() video_complete_date = DateTimeField(null=True) # list of completed language codes video_completed_langs = MultiValueField() # list of completed language absolute urls. should have 1-1 mapping to video_compelted_langs video_completed_lang_urls = MultiValueField(indexed=False) latest_submission_date = DateTimeField(null=True) team_video_create_date = DateTimeField() # possible values for visibility: # is_public=True anyone can see # is_public=False and owned_by_team_id=None -> a regular user owns, no teams can list this video # is_public=False and owned_by_team_id=X -> only team X can see this video is_public = BooleanField() owned_by_team_id = IntegerField(null=True) # All subtitle languages containing at least one version are included in the total count. num_total_langs = IntegerField() # Completed languages are languages which have at least one version that is: # # * Public # * Covers all dialog # * Fully synced # * Fully translated, if a translation num_completed_langs = IntegerField() def prepare(self, obj): self.prepared_data = super(TeamVideoLanguagesIndex, self).prepare(obj) self.prepared_data['team_id'] = obj.team.id self.prepared_data['team_video_pk'] = obj.id self.prepared_data['video_pk'] = obj.video.id self.prepared_data['video_id'] = obj.video.video_id self.prepared_data['video_title'] = obj.video.title.strip() self.prepared_data['video_url'] = obj.video.get_video_url() original_sl = obj.video.subtitle_language() if original_sl: self.prepared_data['original_language_display'] = \ original_sl.get_language_display() self.prepared_data['original_language'] = original_sl.language else: self.prepared_data['original_language_display'] = '' self.prepared_data['original_language'] = '' self.prepared_data['absolute_url'] = obj.get_absolute_url() self.prepared_data['thumbnail'] = obj.get_thumbnail() self.prepared_data['title'] = unicode(obj).strip() self.prepared_data['description'] = obj.description self.prepared_data['is_complete'] = obj.video.complete_date is not None self.prepared_data['video_complete_date'] = obj.video.complete_date self.prepared_data['project_pk'] = obj.project.pk self.prepared_data['project_name'] = obj.project.name self.prepared_data['project_slug'] = obj.project.slug self.prepared_data['team_video_create_date'] = obj.created completed_sls = obj.video.completed_subtitle_languages() all_sls = (obj.video.subtitlelanguage_set.annotate( num_versions=Count('subtitleversion')).filter(num_versions__gt=0)) all_sls = [ sl for sl in all_sls if not sl.latest_version(public_only=False).is_all_blank() ] self.prepared_data['num_total_langs'] = len(all_sls) self.prepared_data['num_completed_langs'] = len(completed_sls) self.prepared_data['video_completed_langs'] = \ [sl.language for sl in completed_sls] self.prepared_data['video_completed_lang_urls'] = \ [sl.get_absolute_url() for sl in completed_sls] self.prepared_data['task_count'] = models.Task.objects.incomplete( ).filter(team_video=obj).count() policy = obj.video.policy owned_by = None if policy and policy.belongs_to_team: owned_by = policy.object_id self.prepared_data[ 'is_public'] = VideoVisibilityPolicy.objects.video_is_public( obj.video) self.prepared_data["owned_by_team_id"] = owned_by return self.prepared_data @classmethod def results_for_members(self, team): base_qs = SearchQuerySet().models(models.TeamVideo) public = SQ(is_public=True) mine = SQ(is_public=False, owned_by_team_id=team.pk) return base_qs.filter(public | mine) @classmethod def results(self): return SearchQuerySet().models(models.TeamVideo).filter(is_public=True)
class CartItemIndex(RealTimeSearchIndex): summary = EdgeNgramField(model_attr='product__title', document=True) shop_id = IntegerField(model_attr="cart__shop__id", stored=False) cart_id = IntegerField(model_attr="cart__id") category_id = CharField(model_attr="product__category_id") subcategory_id = CharField(model_attr="product__subcategory_id", null=True)
class PublisherTargetValueIndex(CelerySearchIndex, Indexable): ''' Specific publisher TargetValue search index. Aim is to store PublisherTargetValue objects as docs within searchengine itself .. note:: When querying search engine, remember to pass such query: .. code-block:: python SearchQuerySet().filter( django_ct='targeting.publishertargetvalue' ).all() ''' text = CharField(document=True) """ Text name was previously NgramField, but due to problems with haystack/whoosh support for this field, we've migrated NgramField to CharField. NgramField(document=True) caused strange behaviours in SearchQuerySet(), e.g. all() returned empty set but filtering worked fine. An after-life of this dicovery can be tracked at: https://github.com/toastdriven/django-haystack/issues/913 """ name = AutocompleteField() network = AutocompleteField() inventory = CharField() publisher_id = IntegerField(null=True) network_id = IntegerField() pubkey = CharField() # Restrictions sizes = MultiValueField(indexed=False) positions = MultiValueField(indexed=False) segments = MultiValueField(indexed=False) def get_model(self): return PublisherTargetValue def index_queryset(self, **kwargs): '''Returns query on which haystack will perform indexing''' return self.get_model().objects.representants() def should_update(self, publisher_tv, **kwargs): return publisher_tv.indexable def prepare_name(self, publisher_tv): if publisher_tv.key == dimensions.publisher_name: return publisher_tv.value_dict[dimensions.publisher_name] return '' def prepare_text(self, publisher_tv): return publisher_tv.value def prepare_inventory(self, publisher_tv): return publisher_tv.inventory_key def prepare_pubkey(self, publisher_tv): return publisher_tv.key def prepare_sizes(self, publisher_tv): return publisher_tv.sizes def prepare_positions(self, publisher_tv): '''Translates position value and saves to list''' translated_positions = [] for position in publisher_tv.positions: representant = TargetValue.objects.get_representant_value_list( exchange=publisher_tv.exchange, category=dimensions.position, value=[position], ) if representant is not None: translated_positions.extend(representant) return translated_positions def prepare_segments(self, publisher_tv): return [seg.display_name for seg in publisher_tv.segments.all()] def prepare(self, publisher_tv): self.prepared_data = super(PublisherTargetValueIndex, self).prepare(publisher_tv) network_name = publisher_tv.value_dict[dimensions.network] network_id = publisher_tv.network_id self.prepared_data['network'] = network_name self.prepared_data['network_id'] = network_id if publisher_tv.key == dimensions.publisher_name: self.prepared_data['publisher_id'] = publisher_tv.id return self.prepared_data
class FossilIndex(SearchIndex): text = CharField(document=True, use_template=False, stored=False) trial_id = CharField() rev_seq = CharField() is_most_recent = BooleanField() main_title = MultiValueField() rec_status = CharField(faceted=True) date_registration = DateTimeField() outdated = BooleanField() status = CharField() rec_country = MultiValueField() is_observational = BooleanField() i_type = MultiValueField(faceted=True) gender = CharField() minimum_recruitment_age = IntegerField() #in hours maximum_recruitment_age = IntegerField() #in hours def prepare_minimum_recruitment_age(self, obj): fossil_ct = obj.get_object_fossil() try: unit = fossil_ct.agemin_unit value = fossil_ct.agemin_value except: return None age = normalize_age(value, unit) if unit != '-' else 0 return age def prepare_maximum_recruitment_age(self, obj): fossil_ct = obj.get_object_fossil() try: unit = fossil_ct.agemax_unit value = fossil_ct.agemax_value except: return None age = normalize_age(value, unit) if unit != '-' else normalize_age( 200, 'Y') return age def prepare_trial_id(self, obj): fossil_ct = obj.get_object_fossil() try: return fossil_ct.trial_id except AttributeError: return None def prepare_rev_seq(self, obj): return obj.revision_sequential def prepare_is_most_recent(self, obj): return obj.is_most_recent def prepare_rec_status(self, obj): try: return obj.get_object_fossil().recruitment_status.label except AttributeError: return None def prepare_date_registration(self, obj): try: return obj.get_object_fossil().date_registration except AttributeError: return None def prepare_outdated(self, obj): try: return obj.get_object_fossil().outdated except AttributeError: return None def prepare_status(self, obj): try: return obj.get_object_fossil().status except AttributeError: return None def prepare_main_title(self, obj): fossil_ct = obj.get_object_fossil() fossil_ct._load_translations() try: main_titles = [] for lang in fossil_ct._translations.keys(): fossil_ct._language = lang main_titles.append(fossil_ct.main_title()) return main_titles except AttributeError: return None def prepare_text(self, obj): fossil_ct = obj.get_object_fossil() fossil_ct._load_translations() retrieve_data_from = [ 'scientific_contacts', 'utrn_number', 'secondary_ids', 'trial_id', 'scientific_title', 'public_title', 'acronym', 'scientific_acronym', 'scientific_acronym_expansion', 'hc_freetext', 'i_freetext' ] retrieve_data_from_multilanguage = [ 'scientific_title', 'public_title', 'acronym', 'scientific_acronym', 'scientific_acronym_expansion', 'hc_freetext', 'i_freetext' ] all_text = set() for fossil_method in retrieve_data_from: try: all_text.add(getattr(fossil_ct, fossil_method)) except AttributeError: pass for lang in fossil_ct._translations.keys( ): #index content in all available languages fossil_ct._language = lang for fossil_method in retrieve_data_from_multilanguage: try: all_text.add(getattr(fossil_ct, fossil_method)) except AttributeError: pass primary_sponsor = getattr(fossil_ct, 'primary_sponsor', '') if primary_sponsor: for v in primary_sponsor.values(): if isinstance(v, basestring): all_text.add(v) all_text.discard(None) return ' '.join(all_text).strip() def prepare_rec_country(self, obj): fossil_ct = obj.get_object_fossil() return [country['label'] for country in fossil_ct.recruitment_country] def prepare_is_observational(self, obj): fossil_ct = obj.get_object_fossil() return getattr(fossil_ct, 'is_observational', False) def prepare_i_type(self, obj): fossil_ct = obj.get_object_fossil() sources = [] for source in fossil_ct.support_sources: try: sources.append(source['institution']['i_type']['label']) except KeyError: #field doesnt exist pass return sources def prepare_gender(self, obj): fossil_ct = obj.get_object_fossil() if fossil_ct.gender == 'M': return 'male' elif fossil_ct.gender == 'F': return 'female' else: return 'both'
class resourceInfoType_modelIndex(SearchIndex, indexes.Indexable): """ The `SearchIndex` which indexes `resourceInfoType_model`s. """ # in the text field we list all resource model field that shall be searched # search fields are defined in templates/search/indexes/repository/resourceinfotype_model_text.txt text = CharField(document=True, use_template=True, stored=False) # view and download counts of the resource dl_count = IntegerField(stored=False) view_count = IntegerField(stored=False) # list of sorting results # the access to the sorting results is made in the MetashareFacetedSearchView function of views.py resourceNameSort = CharField(indexed=True, faceted=True) resourceTypeSort = CharField(indexed=True, faceted=True) mediaTypeSort = CharField(indexed=True, faceted=True) languageNameSort = CharField(indexed=True, faceted=True) # list of filters # # filter fields are described using: # - label: the display of the filter in the interface, # - facet_id: a unique id per filter, # - parent_id: used for sub filters, indicates which filter is the parent of a sub filter # (parent_id=0 is mandatory for top filters) # - faceted=True: mandatory to indicate the field is a filter # # notice: variable names must end by "Filter" # # important notice: the definition of the variable name is important for sub filters: # The item name of the sub filter must be lower cased for (e.g. textngram), # then followed by the sub filter name with the first character upper cased (e.g. textngramOrder), # and finalised with "Filter" (e.g. textngramOrderFilter). Otherwise, another item of the same top filter # could be considered as parent (here, for instance, "text" instead of "textngram") # # for each filter, a facet function must be added to "SearchQuerySet()" in urls.py # (e.g. .facet("textngramOrderFilter"), the function parameter corresponding to the variable name of the filter # # the creation of the filter structure is made in the _create_filters_structure function of views.py languageNameFilter = LabeledMultiValueField( label=_('Language'), facet_id=1, parent_id=0, faceted=True) resourceTypeFilter = LabeledMultiValueField( label=_('Resource Type'), facet_id=2, parent_id=0, faceted=True) mediaTypeFilter = LabeledMultiValueField( label=_('Media Type'), facet_id=3, parent_id=0, faceted=True) # availabilityFilter = LabeledMultiValueField( # label=_('Availability'), facet_id=4, parent_id=0, # faceted=True) licenceFilter = LabeledMultiValueField( label=_('Licence'), facet_id=5, parent_id=0, faceted=True) restrictionsOfUseFilter = LabeledMultiValueField( label=_('Conditions of Use'), facet_id=6, parent_id=0, faceted=True) validatedFilter = LabeledMultiValueField( label=_('Validated'), facet_id=7, parent_id=0, faceted=True) useNlpSpecificFilter = LabeledMultiValueField( label=_('Use Is NLP Specific'), facet_id=9, parent_id=0, faceted=True) lingualityTypeFilter = LabeledMultiValueField( label=_('Linguality Type'), facet_id=10, parent_id=0, faceted=True) multilingualityTypeFilter = LabeledMultiValueField( label=_('Multilinguality Type'), facet_id=11, parent_id=0, faceted=True) modalityTypeFilter = LabeledMultiValueField( label=_('Modality Type'), facet_id=12, parent_id=0, faceted=True) dataFormatFilter = LabeledMultiValueField( label=_('Data Format'), facet_id=13, parent_id=0, faceted=True) # bestPracticesFilter = LabeledMultiValueField( # label=_('Conformance to Standards/Best Practices'), facet_id=14, parent_id=0, # faceted=True) domainFilter = LabeledMultiValueField( label=_('Domain'), facet_id=15, parent_id=0, faceted=True) corpusAnnotationTypeFilter = LabeledMultiValueField( label=_('Annotation Type'), facet_id=19, parent_id=2, faceted=True) languageDescriptionLDTypeFilter = LabeledMultiValueField( label=_('Language Description Type'), facet_id=21, parent_id=2, faceted=True) languageDescriptionEncodingLevelFilter = LabeledMultiValueField( label=_('Encoding Level'), facet_id=22, parent_id=2, faceted=True) languageDescriptionGrammaticalPhenomenaCoverageFilter = LabeledMultiValueField( label=_('Grammatical Phenomena Coverage'), facet_id=23, parent_id=2, faceted=True) lexicalConceptualResourceLRTypeFilter = LabeledMultiValueField( label=_('Lexical/Conceptual Resource Type'), facet_id=24, parent_id=2, faceted=True) lexicalConceptualResourceEncodingLevelFilter = LabeledMultiValueField( label=_('Encoding Level'), facet_id=25, parent_id=2, faceted=True) lexicalConceptualResourceLinguisticInformationFilter = LabeledMultiValueField( label=_('Linguistic Information'), facet_id=26, parent_id=2, faceted=True) toolServiceToolServiceTypeFilter = LabeledMultiValueField( label=_('Tool/Service Type'), facet_id=27, parent_id=2, faceted=True) toolServiceToolServiceSubTypeFilter = LabeledMultiValueField( label=_('Tool/Service Subtype'), facet_id=28, parent_id=2, faceted=True) toolServiceLanguageDependentTypeFilter = LabeledMultiValueField( label=_('Language Dependent'), facet_id=29, parent_id=2, faceted=True) toolServiceInputOutputResourceTypeFilter = LabeledMultiValueField( label=_('InputInfo/OutputInfo Resource Type'), facet_id=30, parent_id=2, faceted=True) toolServiceInputOutputMediaTypeFilter = LabeledMultiValueField( label=_('InputInfo/OutputInfo Media Type'), facet_id=31, parent_id=2, faceted=True) toolServiceAnnotationTypeFilter = LabeledMultiValueField( label=_('Annotation Type'), facet_id=32, parent_id=2, faceted=True) # toolServiceAnnotationFormatFilter = LabeledMultiValueField( # label=_('Annotation Format'), facet_id=33, parent_id=2, # faceted=True) # toolServiceEvaluatedFilter = LabeledMultiValueField( # label=_('Evaluated'), facet_id=34, parent_id=2, # faceted=True) appropriatenessForDSIFilter = LabeledMultiValueField( label=_('Appropriateness For DSI'), facet_id=56, parent_id=0, faceted=True) publicationStatusFilter = LabeledCharField( label=_('Publication Status'), facet_id=57, parent_id=0, faceted=True) processabilityFilter = LabeledCharField( label=_('Processable by ELRC'), facet_id=58, parent_id=0, faceted=True) # Start sub filters textTextGenreFilter = LabeledMultiValueField( label=_('Text Genre'), facet_id=35, parent_id=3, faceted=True) textTextTypeFilter = LabeledMultiValueField( label=_('Text Type'), facet_id=36, parent_id=3, faceted=True) # languageVarietyFilter = LabeledMultiValueField( # label=_('Language Variety'), facet_id=55, parent_id=0, # faceted=True) # we create all items that may appear in the search results list already at # index time rendered_result = CharField(use_template=True, indexed=False) def get_model(self): """ Returns the model class of which instances are indexed here. """ return resourceInfoType_model def index_queryset(self, using=None): """ Returns the default QuerySet to index when doing a full index update. In our case this is a QuerySet containing only published resources that have not been deleted, yet. """ return self.read_queryset() def read_queryset(self, using=None): """ Returns the default QuerySet for read actions. In our case this is a QuerySet containing only published resources that have not been deleted, yet. """ return self.get_model().objects.filter(storage_object__deleted=False, storage_object__publication_status__in=[INGESTED, PUBLISHED]) def should_update(self, instance, **kwargs): ''' Only index resources that are at least ingested. In other words, do not index internal resources. ''' return instance.storage_object.publication_status in (INGESTED, PUBLISHED) def update_object(self, instance, using=None, **kwargs): """ Updates the index for a single object instance. In this implementation we do not only handle instances of the model as returned by get_model(), but we also support the models that are registered in our own _setup_save() method. """ if os.environ.get('DISABLE_INDEXING_DURING_IMPORT', False) == 'True': return if isinstance(instance, StorageObject): LOGGER.debug("StorageObject changed for resource #{0}." \ .format(instance.id)) related_resource_qs = instance.resourceinfotype_model_set if (not related_resource_qs.count()): # no idea why this happens, but it does; there are storage # objects which are not attached to any # resourceInfoType_model return related_resource = related_resource_qs.iterator().next() if instance.deleted: # if the resource has been flagged for deletion, then we # don't want to keep/have it in the index LOGGER.info("Resource #{0} scheduled for deletion from " \ "the index.".format(related_resource.id)) self.remove_object(related_resource, using=using) return instance = related_resource elif not isinstance(instance, self.get_model()): assert False, "Unexpected sender: {0}".format(instance) LOGGER.error("Unexpected sender: {0}".format(instance)) return # we better recreate our resource instance from the DB as otherwise it # has happened for some reason that the instance was not up-to-date instance = self.get_model().objects.get(pk=instance.id) LOGGER.info("Resource #{0} scheduled for reindexing." \ .format(instance.id)) super(resourceInfoType_modelIndex, self) \ .update_object(instance, using=using, **kwargs) def remove_object(self, instance, using=None, **kwargs): """ Removes a single object instance from the index. """ if os.environ.get('DISABLE_INDEXING_DURING_IMPORT', False) == 'True': return super(resourceInfoType_modelIndex, self).remove_object(instance, using=using, **kwargs) def prepare_dl_count(self, obj): """ Returns the download count for the given resource object. """ return model_utils.get_lr_stat_action_count( obj.storage_object.identifier, DOWNLOAD_STAT) def prepare_view_count(self, obj): """ Returns the view count for the given resource object. """ return model_utils.get_lr_stat_action_count( obj.storage_object.identifier, VIEW_STAT) def prepare_resourceNameSort(self, obj): """ Collect the data to sort the Resource Names """ # get the Resource Name resourceNameSort = obj.identificationInfo.get_default_resourceName() resourceNameSort = unidecode(resourceNameSort) # keep alphanumeric characters only resourceNameSort = re.sub('[\W_]', '', resourceNameSort) # set Resource Name to lower case resourceNameSort = resourceNameSort.lower() return resourceNameSort def prepare_resourceTypeSort(self, obj): """ Collect the data to sort the Resource Types """ # get the list of Resource Types resourceTypeSort = self.prepare_resourceTypeFilter(obj) # render unique list of Resource Types resourceTypeSort = list(set(resourceTypeSort)) # sort Resource Types resourceTypeSort.sort() # join Resource Types into a string resourceTypeSort = ",".join(resourceTypeSort) # keep alphanumeric characters only resourceTypeSort = re.sub('[\W_]', '', resourceTypeSort) # set list of Resource Types to lower case resourceTypeSort = resourceTypeSort.lower() return resourceTypeSort def prepare_mediaTypeSort(self, obj): """ Collect the data to sort the Media Types """ # get the list of Media Types mediaTypeSort = self.prepare_mediaTypeFilter(obj) # render unique list of Media Types mediaTypeSort = list(set(mediaTypeSort)) # sort Media Types mediaTypeSort.sort() # join Media Types into a string mediaTypeSort = ",".join(mediaTypeSort) # keep alphanumeric characters only mediaTypeSort = re.sub('[\W_]', '', mediaTypeSort) # set list of Media Types to lower case mediaTypeSort = mediaTypeSort.lower() return mediaTypeSort def prepare_languageNameSort(self, obj): """ Collect the data to sort the Language Names """ # get the list of languages languageNameSort = self.prepare_languageNameFilter(obj) # render unique list of languages languageNameSort = list(set(languageNameSort)) # sort languages languageNameSort.sort() # join languages into a string languageNameSort = ",".join(languageNameSort) # keep alphanumeric characters only languageNameSort = re.sub('[\W_]', '', languageNameSort) # set list of languages to lower case languageNameSort = languageNameSort.lower() return languageNameSort def prepare_languageNameFilter(self, obj): """ Collect the data to filter the resources on Language Name """ result = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): result.extend([lang.languageName for lang in corpus_info.languageinfotype_model_set.all()]) elif isinstance(corpus_media, lexicalConceptualResourceInfoType_model): lcr_media_type = corpus_media.lexicalConceptualResourceMediaType if lcr_media_type.lexicalConceptualResourceTextInfo: result.extend([lang.languageName for lang in lcr_media_type \ .lexicalConceptualResourceTextInfo.languageinfotype_model_set.all()]) elif isinstance(corpus_media, languageDescriptionInfoType_model): ld_media_type = corpus_media.languageDescriptionMediaType if ld_media_type.languageDescriptionTextInfo: result.extend([lang.languageName for lang in ld_media_type \ .languageDescriptionTextInfo.languageinfotype_model_set.all()]) elif isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: result.extend([lang.languageName for lang in corpus_media.inputInfo.languagesetinfotype_model_set.all()]) if corpus_media.outputInfo: result.extend([lang.languageName for lang in corpus_media.outputInfo.languagesetinfotype_model_set.all()]) return result def prepare_resourceTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type """ resType = obj.resourceComponentType.as_subclass().resourceType if resType: return [resType] return [] def prepare_mediaTypeFilter(self, obj): """ Collect the data to filter the resources on Media Type """ return model_utils.get_resource_media_types(obj) def prepare_availabilityFilter(self, obj): """ Collect the data to filter the resources on Availability """ return [distributionInfo.get_availability_display() for distributionInfo in obj.distributioninfotype_model_set.all()] def prepare_licenceFilter(self, obj): """ Collect the data to filter the resources on Licence """ return model_utils.get_resource_license_types(obj) def prepare_restrictionsOfUseFilter(self, obj): """ Collect the data to filter the resources on Restrictions Of USe """ return [restr for distributionInfo in obj.distributioninfotype_model_set.all() for licence_info in distributionInfo.licenceInfo.all() for restr in licence_info.get_restrictionsOfUse_display_list()] def prepare_validatedFilter(self, obj): """ Collect the data to filter the resources on Validated """ return [validation_info.validated for validation_info in obj.validationinfotype_model_set.all()] def prepare_lingualityTypeFilter(self, obj): """ Collect the data to filter the resources on Linguality Type """ return model_utils.get_resource_linguality_infos(obj) def prepare_multilingualityTypeFilter(self, obj): """ Collect the data to filter the resources on Multilinguality Type """ result = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): mtf = corpus_info.lingualityInfo \ .get_multilingualityType_display() if mtf != '': result.append(mtf) elif isinstance(corpus_media, lexicalConceptualResourceInfoType_model): lcr_media_type = corpus_media.lexicalConceptualResourceMediaType if lcr_media_type.lexicalConceptualResourceTextInfo: mtf = lcr_media_type.lexicalConceptualResourceTextInfo \ .lingualityInfo.get_multilingualityType_display() if mtf != '': result.append(mtf) elif isinstance(corpus_media, languageDescriptionInfoType_model): ld_media_type = corpus_media.languageDescriptionMediaType if ld_media_type.languageDescriptionTextInfo: mtf = ld_media_type.languageDescriptionTextInfo \ .lingualityInfo.get_multilingualityType_display() if mtf != '': result.append(mtf) return result def prepare_dataFormatFilter(self, obj): """ Collect the data to filter the resources on Mime Type """ dataFormat_list = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): dataFormat_list.extend([MIMETYPEVALUE_TO_MIMETYPELABEL[dataFormat.dataFormat] if dataFormat.dataFormat in MIMETYPEVALUE_TO_MIMETYPELABEL else dataFormat.dataFormat for dataFormat in corpus_info.textformatinfotype_model_set.all()]) elif isinstance(corpus_media, lexicalConceptualResourceInfoType_model): lcr_media_type = corpus_media.lexicalConceptualResourceMediaType if lcr_media_type.lexicalConceptualResourceTextInfo: dataFormat_list.extend([MIMETYPEVALUE_TO_MIMETYPELABEL[dataFormat.dataFormat] if dataFormat.dataFormat in MIMETYPEVALUE_TO_MIMETYPELABEL else dataFormat.dataFormat for dataFormat in lcr_media_type.lexicalConceptualResourceTextInfo \ .textformatinfotype_model_set.all()]) elif isinstance(corpus_media, languageDescriptionInfoType_model): ld_media_type = corpus_media.languageDescriptionMediaType if ld_media_type.languageDescriptionTextInfo: dataFormat_list.extend([MIMETYPEVALUE_TO_MIMETYPELABEL[dataFormat.dataFormat] if dataFormat.dataFormat in MIMETYPEVALUE_TO_MIMETYPELABEL else dataFormat.dataFormat for dataFormat in ld_media_type.languageDescriptionTextInfo \ .textformatinfotype_model_set.all()]) elif isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: dataFormat_list.extend(corpus_media.inputInfo.dataFormat) if corpus_media.outputInfo: dataFormat_list.extend(corpus_media.outputInfo.dataFormat) return dataFormat_list def prepare_bestPracticesFilter(self, obj): """ Collect the data to filter the resources on Best Practices """ result = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): for annotation_info in corpus_info.annotationinfotype_model_set.all(): result.extend(annotation_info.get_conformanceToStandardsBestPractices_display_list()) elif isinstance(corpus_media, lexicalConceptualResourceInfoType_model): if corpus_media.lexicalConceptualResourceEncodingInfo: result.extend(corpus_media.lexicalConceptualResourceEncodingInfo \ .get_conformanceToStandardsBestPractices_display_list()) elif isinstance(corpus_media, languageDescriptionInfoType_model): if corpus_media.languageDescriptionEncodingInfo: result.extend(corpus_media.languageDescriptionEncodingInfo \ .get_conformanceToStandardsBestPractices_display_list()) elif isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: result.extend(corpus_media.inputInfo \ .get_conformanceToStandardsBestPractices_display_list()) if corpus_media.outputInfo: result.extend(corpus_media.outputInfo \ .get_conformanceToStandardsBestPractices_display_list()) return result def prepare_domainFilter(self, obj): """ Collect the data to filter the resources on Domain """ result = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): result.extend([domain_info.domain for domain_info in corpus_info.domaininfotype_model_set.all()]) elif isinstance(corpus_media, lexicalConceptualResourceInfoType_model): lcr_media_type = corpus_media.lexicalConceptualResourceMediaType if lcr_media_type.lexicalConceptualResourceTextInfo: result.extend([domain_info.domain for domain_info in lcr_media_type.lexicalConceptualResourceTextInfo \ .domaininfotype_model_set.all()]) elif isinstance(corpus_media, languageDescriptionInfoType_model): ld_media_type = corpus_media.languageDescriptionMediaType if ld_media_type.languageDescriptionTextInfo: result.extend([domain_info.domain for domain_info in ld_media_type.languageDescriptionTextInfo \ .domaininfotype_model_set.all()]) return result def prepare_corpusAnnotationTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for corpus if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): for annotation_info in corpus_info.annotationinfotype_model_set.all(): result.append(annotation_info.get_annotationType_display()) return result def prepare_languageDescriptionLDTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, languageDescriptionInfoType_model): return [corpus_media.get_languageDescriptionType_display()] return [] def prepare_languageDescriptionEncodingLevelFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, languageDescriptionInfoType_model) \ and corpus_media.languageDescriptionEncodingInfo: return corpus_media.languageDescriptionEncodingInfo \ .get_encodingLevel_display_list() return [] def prepare_lexicalConceptualResourceLRTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for lexicalConceptual if isinstance(corpus_media, lexicalConceptualResourceInfoType_model): result.append(corpus_media.get_lexicalConceptualResourceType_display()) return result def prepare_lexicalConceptualResourceEncodingLevelFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for lexicalConceptual if isinstance(corpus_media, lexicalConceptualResourceInfoType_model): if corpus_media.lexicalConceptualResourceEncodingInfo: result.extend(corpus_media.lexicalConceptualResourceEncodingInfo. \ get_encodingLevel_display_list()) return result def prepare_lexicalConceptualResourceLinguisticInformationFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for lexicalConceptual if isinstance(corpus_media, lexicalConceptualResourceInfoType_model): if corpus_media.lexicalConceptualResourceEncodingInfo: result.extend(corpus_media.lexicalConceptualResourceEncodingInfo. \ get_linguisticInformation_display_list()) return result def prepare_toolServiceToolServiceTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for toolService if isinstance(corpus_media, toolServiceInfoType_model): result.append(corpus_media.get_toolServiceType_display()) return result # TODO: edit to Function field # def prepare_toolServiceToolServiceSubTypeFilter(self, obj): # """ # Collect the data to filter the resources on Resource Type children # """ # result = [] # # corpus_media = obj.resourceComponentType.as_subclass() # # # Filter for toolService # if isinstance(corpus_media, toolServiceInfoType_model): # result.extend(corpus_media.toolServiceSubtype) # # return result def prepare_toolServiceLanguageDependentTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for toolService if isinstance(corpus_media, toolServiceInfoType_model): result.append(corpus_media.get_languageDependent_display()) return result def prepare_toolServiceInputOutputResourceTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for toolService if isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: result.extend( corpus_media.inputInfo.get_resourceType_display_list()) if corpus_media.outputInfo: result.extend( corpus_media.outputInfo.get_resourceType_display_list()) return result def prepare_toolServiceInputOutputMediaTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for toolService if isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: result.append(corpus_media.inputInfo.get_mediaType_display()) if corpus_media.outputInfo: result.append(corpus_media.outputInfo.get_mediaType_display()) return result def prepare_toolServiceAnnotationTypeFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: result.extend(corpus_media.inputInfo.get_annotationType_display_list()) if corpus_media.outputInfo: result.extend(corpus_media.outputInfo.get_annotationType_display_list()) return result def prepare_toolServiceEvaluatedFilter(self, obj): """ Collect the data to filter the resources on Resource Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for toolService if isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.toolServiceEvaluationInfo: result.append(corpus_media.toolServiceEvaluationInfo.get_evaluated_display()) return result def prepare_textTextGenreFilter(self, obj): """ Collect the data to filter the resources on Media Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for corpus if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): result.extend([text_classification_info.textGenre \ for text_classification_info in corpus_info.textclassificationinfotype_model_set.all()]) return result def prepare_textTextTypeFilter(self, obj): """ Collect the data to filter the resources on Media Type children """ result = [] corpus_media = obj.resourceComponentType.as_subclass() # Filter for corpus if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): result.extend([text_classification_info.textType \ for text_classification_info in corpus_info.textclassificationinfotype_model_set.all()]) return result def prepare_languageVarietyFilter(self, obj): """ Collect the data to filter the resources on Language Variety """ result = [] corpus_media = obj.resourceComponentType.as_subclass() if isinstance(corpus_media, corpusInfoType_model): media_type = corpus_media.corpusMediaType for corpus_info in media_type.corpustextinfotype_model_set.all(): for lang in corpus_info.languageinfotype_model_set.all(): result.extend([variety.languageVarietyName for variety in lang.languageVarietyInfo.all()]) elif isinstance(corpus_media, lexicalConceptualResourceInfoType_model): lcr_media_type = corpus_media.lexicalConceptualResourceMediaType if lcr_media_type.lexicalConceptualResourceTextInfo: for lang in lcr_media_type.lexicalConceptualResourceTextInfo. \ languageinfotype_model_set.all(): result.extend([variety.languageVarietyName for variety in lang.languageVarietyInfo.all()]) elif isinstance(corpus_media, languageDescriptionInfoType_model): ld_media_type = corpus_media.languageDescriptionMediaType if ld_media_type.languageDescriptionTextInfo: for lang in ld_media_type.languageDescriptionTextInfo. \ languageinfotype_model_set.all(): result.extend([variety.languageVarietyName for variety in lang.languageVarietyInfo.all()]) elif isinstance(corpus_media, toolServiceInfoType_model): if corpus_media.inputInfo: result.extend(corpus_media.inputInfo.languageVarietyName) if corpus_media.outputInfo: result.extend(corpus_media.outputInfo.languageVarietyName) return result def prepare_appropriatenessForDSIFilter(self, obj): """ Collect the data to filter the resources on appropriatenessForDSIFilter """ return obj.identificationInfo.get_appropriatenessForDSI_display_list() def prepare_publicationStatusFilter(self, obj): """ Collect the data to filter the resources on publication status """ return obj.publication_status() def prepare_processabilityFilter(self, obj): return is_processable(obj)[0]