class QuestionIndex(SearchIndex, Indexable): """ Thread proxy model index """ title = CharField(model_attr="title", boost=5) text = CharField(document=True) ################################### node_visibility = CharField(model_attr="node__visibility", indexed=False) external_access = BooleanField(model_attr="external_access", indexed=False) node_users = MultiValueField(indexed=False) ################################### text_for_highlighting = CharField(indexed=False) node_title = CharField(indexed=False) thread_type = CharField(model_attr="thread_type", indexed=False) url = CharField(model_attr="get_absolute_url", indexed=False) last_changed = CharField(model_attr="render_last_changed", indexed=False) main_post_text = CharField(indexed=False) display_title = CharField(model_attr="node__title", indexed=False) tags = MultiValueField() tags_data = MultiValueField(indexed=False) def get_model(self): return QuestionProxy def index_queryset(self, using=None): """Used when the entire index for model is updated.""" qs = self.get_model().objects.filter( is_deleted=False, thread_type=THREAD_TYPE_QUESTION, # posts__post_type__in=[ # POST_TYPE_QUESTION, # _main_post # POST_TYPE_COMMENT # ] ).select_related( "node" ) return qs ################################### def prepare_title(self, obj): return obj.title def prepare_text(self, obj): try: main_post = obj._main_post() except ObjectDoesNotExist, e: logging.error("FulltextIndex error | %s" % repr({ "pk": obj.pk, "error": str(e), "type": type(obj), })) main_post = None data = [] data.append(obj.title) if main_post: data.append(obj.node.title) data.append(main_post.text) for text in main_post.comments.all().values_list("text", flat=True): data.append(text) return " ".join([ strip_tags(ch).strip() for ch in data ])
class DocumentIndex(SearchIndex, Indexable): """ Document index """ title = CharField(boost=5) text = CharField(document=True) # PERM FIELDS external_access = BooleanField(indexed=False) node_users = MultiValueField(indexed=False) node_visibility = CharField(indexed=False) icon = CharField(indexed=False) node_title = CharField(indexed=False) url = CharField(indexed=False) last_changed = CharField(indexed=False) def get_model(self): return Document def index_queryset(self, using=None): """Used when the entire index for model is updated.""" qs = self.get_model().objects.filter( is_deleted=False, thread__is_deleted=False ).select_related( "thread" ) return qs def prepare_title(self, obj): return obj.thread.title def prepare_text(self, obj): thread = obj.thread main_post = thread._main_post() data = [] data.append(thread.node.title) data.append(thread.title) data.append(main_post.text) return " ".join([ strip_tags(ch).strip() for ch in data ]) def prepare_icon(self, obj): return obj.get_icon() def prepare_node_title(self, obj): return obj.thread.node.title_with_status() def prepare_url(self, obj): return obj.get_absolute_url() def prepare_last_changed(self, obj): return obj.thread.render_last_changed() ################################### def prepare_node_users(self, obj): if obj.thread.node.visibility == NODE_VISIBILITY_PRIVATE: # store users only for private nodes return list(obj.thread.node.node_users.values_list("user_id", flat=True)) return [] def prepare_external_access(self, obj): return obj.thread.external_access def prepare_node_visibility(self, obj): return obj.thread.node.visibility
class DocumentPageIndex(SearchIndex, Indexable): """ Post proxy model """ title = CharField(boost=5) text = CharField(document=True) # PERM FIELDS external_access = BooleanField(indexed=False) node_users = MultiValueField(indexed=False) node_visibility = CharField(indexed=False) url = CharField(indexed=False) document_icon = CharField(indexed=False) node_title = CharField(indexed=False) page = CharField(indexed=False) def get_model(self): return Page def index_queryset(self, using=None): """Used when the entire index for model is updated.""" qs = self.get_model().objects.filter( document_revision__document__thread__is_deleted=False, ).exclude( Q(plaintext=DOCUMENT_PAGE_CONTENT_IF_ERROR) | Q(plaintext="") ) return qs def prepare_title(self, obj): return obj.document_revision.document.thread.title def prepare_text(self, obj): return obj.plaintext def prepare_node_title(self, obj): return obj.document_revision.document.thread.node.title_with_status() def prepare_document_icon(self, obj): return obj.document_revision.document.get_icon() def prepare_page(self, obj): return "%s/%s" % ( obj.number, obj.document_revision.pages.aggregate(_max=Max("number")).get("_max", obj.number) ) def prepare_url(self, obj): # TODO: go to selected page return obj.document_revision.document.get_absolute_url() ################################### def prepare_node_users(self, obj): if obj.document_revision.document.thread.node.visibility == NODE_VISIBILITY_PRIVATE: # store users only for private nodes return list(obj.document_revision.document.thread.node.node_users.values_list("user_id", flat=True)) return [] def prepare_external_access(self, obj): return obj.document_revision.document.thread.external_access def prepare_node_visibility(self, obj): return obj.document_revision.document.thread.node.visibility
class AnswerIndex(SearchIndex, Indexable): """ Post proxy model """ title = CharField(boost=5) text = CharField(document=True) url = CharField(indexed=False) last_changed = CharField(indexed=False) # tags = MultiValueField() # tags_data = MultiValueField(indexed=False) # PERM FIELDS external_access = BooleanField(indexed=False) node_users = MultiValueField(indexed=False) node_visibility = CharField(indexed=False) def get_model(self): return AnswerProxy def index_queryset(self, using=None): """Used when the entire index for model is updated.""" qs = self.get_model().objects.filter( deleted=False, post_type=POST_TYPE_THREAD_POST, thread__thread_type=THREAD_TYPE_QUESTION ) return qs def prepare_title(self, obj): return obj.thread.title def prepare_text(self, obj): # TODO data = [] data.append(obj.thread.node.title) data.append(obj.text) for c in obj.comments.all(): data.append(c.text) return " ".join([ strip_tags(ch).strip() for ch in data ]) def prepare_url(self, obj): return obj.get_absolute_url() # def prepare_tags(self, obj): # return list( # obj.thread.tags.order_by("name").values_list("name", flat=True) # ) # def prepare_tags_data(self, obj): # return list( # obj.thread.tags.order_by("name").values_list("id", "name") # ) ################################### def prepare_node_users(self, obj): if obj.thread.node.visibility == NODE_VISIBILITY_PRIVATE: # store users only for private nodes return list(obj.thread.node.node_users.values_list("user_id", flat=True)) return [] def prepare_external_access(self, obj): return obj.thread.external_access def prepare_node_visibility(self, obj): return obj.thread.node.visibility def prepare_last_changed(self, obj): return obj.thread.render_last_changed()
class PublisherTargetValueIndex(CelerySearchIndex, Indexable): ''' Specific publisher TargetValue search index. Aim is to store PublisherTargetValue objects as docs within searchengine itself .. note:: When querying search engine, remember to pass such query: .. code-block:: python SearchQuerySet().filter( django_ct='targeting.publishertargetvalue' ).all() ''' text = CharField(document=True) """ Text name was previously NgramField, but due to problems with haystack/whoosh support for this field, we've migrated NgramField to CharField. NgramField(document=True) caused strange behaviours in SearchQuerySet(), e.g. all() returned empty set but filtering worked fine. An after-life of this dicovery can be tracked at: https://github.com/toastdriven/django-haystack/issues/913 """ name = AutocompleteField() network = AutocompleteField() inventory = CharField() publisher_id = IntegerField(null=True) network_id = IntegerField() pubkey = CharField() # Restrictions sizes = MultiValueField(indexed=False) positions = MultiValueField(indexed=False) segments = MultiValueField(indexed=False) def get_model(self): return PublisherTargetValue def index_queryset(self, **kwargs): '''Returns query on which haystack will perform indexing''' return self.get_model().objects.representants() def should_update(self, publisher_tv, **kwargs): return publisher_tv.indexable def prepare_name(self, publisher_tv): if publisher_tv.key == dimensions.publisher_name: return publisher_tv.value_dict[dimensions.publisher_name] return '' def prepare_text(self, publisher_tv): return publisher_tv.value def prepare_inventory(self, publisher_tv): return publisher_tv.inventory_key def prepare_pubkey(self, publisher_tv): return publisher_tv.key def prepare_sizes(self, publisher_tv): return publisher_tv.sizes def prepare_positions(self, publisher_tv): '''Translates position value and saves to list''' translated_positions = [] for position in publisher_tv.positions: representant = TargetValue.objects.get_representant_value_list( exchange=publisher_tv.exchange, category=dimensions.position, value=[position], ) if representant is not None: translated_positions.extend(representant) return translated_positions def prepare_segments(self, publisher_tv): return [seg.display_name for seg in publisher_tv.segments.all()] def prepare(self, publisher_tv): self.prepared_data = super(PublisherTargetValueIndex, self).prepare(publisher_tv) network_name = publisher_tv.value_dict[dimensions.network] network_id = publisher_tv.network_id self.prepared_data['network'] = network_name self.prepared_data['network_id'] = network_id if publisher_tv.key == dimensions.publisher_name: self.prepared_data['publisher_id'] = publisher_tv.id return self.prepared_data
class TeamVideoLanguagesIndex(SearchIndex): text = CharField(document=True, use_template=True, template_name="teams/teamvideo_languages_for_search.txt") team_id = IntegerField() team_video_pk = IntegerField(indexed=False) video_pk = IntegerField(indexed=False) video_id = CharField(indexed=False) video_title = CharField(faceted=True) video_url = CharField(indexed=False) original_language = CharField() original_language_display = CharField(indexed=False) absolute_url = CharField(indexed=False) project_pk = IntegerField(indexed=True) task_count = IntegerField() # never store an absolute url with solr # since the url changes according to the user # one cannot construct the url at index time # video_absolute_url = CharField(indexed=False) thumbnail = CharField(indexed=False) title = CharField() project_name = CharField(indexed=False) project_slug = CharField(indexed=False) description = CharField(indexed=True) is_complete = BooleanField() video_complete_date = DateTimeField(null=True) # list of completed language codes video_completed_langs = MultiValueField() # list of completed language absolute urls. should have 1-1 mapping to video_compelted_langs video_completed_lang_urls = MultiValueField(indexed=False) latest_submission_date = DateTimeField(null=True) team_video_create_date = DateTimeField() # possible values for visibility: # is_public=True anyone can see # is_public=False and owned_by_team_id=None -> a regular user owns, no teams can list this video # is_public=False and owned_by_team_id=X -> only team X can see this video is_public = BooleanField() owned_by_team_id = IntegerField(null=True) # All subtitle languages containing at least one version are included in the total count. num_total_langs = IntegerField() # Completed languages are languages which have at least one version that is: # # * Public # * Covers all dialog # * Fully synced # * Fully translated, if a translation num_completed_langs = IntegerField() def prepare(self, obj): self.prepared_data = super(TeamVideoLanguagesIndex, self).prepare(obj) self.prepared_data['team_id'] = obj.team.id self.prepared_data['team_video_pk'] = obj.id self.prepared_data['video_pk'] = obj.video.id self.prepared_data['video_id'] = obj.video.video_id self.prepared_data['video_title'] = obj.video.title.strip() self.prepared_data['video_url'] = obj.video.get_video_url() original_sl = obj.video.subtitle_language() if original_sl: self.prepared_data[ 'original_language_display'] = original_sl.get_language_code_display self.prepared_data['original_language'] = original_sl.language_code else: self.prepared_data['original_language_display'] = '' self.prepared_data['original_language'] = '' self.prepared_data['absolute_url'] = obj.get_absolute_url() self.prepared_data['thumbnail'] = obj.get_thumbnail() self.prepared_data['title'] = obj.video.title_display() self.prepared_data['description'] = obj.description self.prepared_data['is_complete'] = obj.video.complete_date is not None self.prepared_data['video_complete_date'] = obj.video.complete_date self.prepared_data['project_pk'] = obj.project.pk self.prepared_data['project_name'] = obj.project.name self.prepared_data['project_slug'] = obj.project.slug self.prepared_data['team_video_create_date'] = obj.created completed_sls = list(obj.video.completed_subtitle_languages()) all_sls = obj.video.newsubtitlelanguage_set.having_nonempty_tip() self.prepared_data['num_total_langs'] = all_sls.count() self.prepared_data['num_completed_langs'] = len(completed_sls) self.prepared_data['video_completed_langs'] = \ [sl.language_code for sl in completed_sls] self.prepared_data['video_completed_lang_urls'] = \ [sl.get_absolute_url() for sl in completed_sls] self.prepared_data['task_count'] = models.Task.objects.incomplete( ).filter(team_video=obj).count() team_video = obj.video.get_team_video() self.prepared_data['is_public'] = team_video.team.is_visible self.prepared_data[ "owned_by_team_id"] = team_video.team.id if team_video else None return self.prepared_data @classmethod def results_for_members(self, team): base_qs = SearchQuerySet().models(models.TeamVideo) public = SQ(is_public=True) mine = SQ(is_public=False, owned_by_team_id=team.pk) return base_qs.filter(public | mine) @classmethod def results(self): return SearchQuerySet().models(models.TeamVideo).filter(is_public=True)
class FossilIndex(SearchIndex): text = CharField(document=True, use_template=False, stored=False) trial_id = CharField() rev_seq = CharField() is_most_recent = BooleanField() main_title = MultiValueField() rec_status = CharField(faceted=True) date_registration = DateTimeField() outdated = BooleanField() status = CharField() rec_country = MultiValueField() is_observational = BooleanField() i_type = MultiValueField(faceted=True) gender = CharField() minimum_recruitment_age = IntegerField() #in hours maximum_recruitment_age = IntegerField() #in hours def prepare_minimum_recruitment_age(self, obj): fossil_ct = obj.get_object_fossil() try: unit = fossil_ct.agemin_unit value = fossil_ct.agemin_value except: return None age = normalize_age(value, unit) if unit != '-' else 0 return age def prepare_maximum_recruitment_age(self, obj): fossil_ct = obj.get_object_fossil() try: unit = fossil_ct.agemax_unit value = fossil_ct.agemax_value except: return None age = normalize_age(value, unit) if unit != '-' else normalize_age( 200, 'Y') return age def prepare_trial_id(self, obj): fossil_ct = obj.get_object_fossil() try: return fossil_ct.trial_id except AttributeError: return None def prepare_rev_seq(self, obj): return obj.revision_sequential def prepare_is_most_recent(self, obj): return obj.is_most_recent def prepare_rec_status(self, obj): try: return obj.get_object_fossil().recruitment_status.label except AttributeError: return None def prepare_date_registration(self, obj): try: return obj.get_object_fossil().date_registration except AttributeError: return None def prepare_outdated(self, obj): try: return obj.get_object_fossil().outdated except AttributeError: return None def prepare_status(self, obj): try: return obj.get_object_fossil().status except AttributeError: return None def prepare_main_title(self, obj): fossil_ct = obj.get_object_fossil() fossil_ct._load_translations() try: main_titles = [] for lang in fossil_ct._translations.keys(): fossil_ct._language = lang main_titles.append(fossil_ct.main_title()) return main_titles except AttributeError: return None def prepare_text(self, obj): fossil_ct = obj.get_object_fossil() fossil_ct._load_translations() retrieve_data_from = [ 'scientific_contacts', 'utrn_number', 'secondary_ids', 'trial_id', 'scientific_title', 'public_title', 'acronym', 'scientific_acronym', 'scientific_acronym_expansion', 'hc_freetext', 'i_freetext' ] retrieve_data_from_multilanguage = [ 'scientific_title', 'public_title', 'acronym', 'scientific_acronym', 'scientific_acronym_expansion', 'hc_freetext', 'i_freetext' ] all_text = set() for fossil_method in retrieve_data_from: try: all_text.add(getattr(fossil_ct, fossil_method)) except AttributeError: pass for lang in fossil_ct._translations.keys( ): #index content in all available languages fossil_ct._language = lang for fossil_method in retrieve_data_from_multilanguage: try: all_text.add(getattr(fossil_ct, fossil_method)) except AttributeError: pass primary_sponsor = getattr(fossil_ct, 'primary_sponsor', '') if primary_sponsor: for v in primary_sponsor.values(): if isinstance(v, basestring): all_text.add(v) all_text.discard(None) return ' '.join(all_text).strip() def prepare_rec_country(self, obj): fossil_ct = obj.get_object_fossil() return [country['label'] for country in fossil_ct.recruitment_country] def prepare_is_observational(self, obj): fossil_ct = obj.get_object_fossil() return getattr(fossil_ct, 'is_observational', False) def prepare_i_type(self, obj): fossil_ct = obj.get_object_fossil() sources = [] for source in fossil_ct.support_sources: try: sources.append(source['institution']['i_type']['label']) except KeyError: #field doesnt exist pass return sources def prepare_gender(self, obj): fossil_ct = obj.get_object_fossil() if fossil_ct.gender == 'M': return 'male' elif fossil_ct.gender == 'F': return 'female' else: return 'both'
class TrialIndex(SearchIndex, Indexable): text = CharField(document=True, use_template=True) title = CharField(model_attr='title') session_title = CharField(model_attr='session__title') experiment_title = CharField(model_attr='session__experiment__title') study_title = CharField(model_attr='session__experiment__study__title') taxon = CharField(model_attr='session__experiment__subject__taxon', faceted=True) #accession = CharField(model_attr='accession', null=True) food_type = CharField(model_attr='food_type', null=True, faceted=True) food_size = CharField(model_attr='food_size', null=True) food_property = CharField(model_attr='food_property', null=True) created_at = DateTimeField(model_attr='created_at') updated_at = DateTimeField(model_attr='updated_at') behaviorowl_primary = CharField(model_attr='behaviorowl_primary', null=True) behaviorowl_primary_ancestors = MultiValueField(faceted=True) behaviorowl_primary_part_of = MultiValueField(faceted=True) techniques = MultiValueField(indexed=False, stored=True, faceted=True) # List of muscle labels for EMG and Sono sensors and non-muscle anatomical # locations on other sensors analoc_direct = MultiValueField() # `analoc_direct` along with the ancestors of each MuscleOwl in the list analoc = MultiValueField(faceted=True) # Muscles which members of `analoc_direct` are a part of, except when the same # muscle is already listed in `analoc` # # This is currently just muscles, but could be extended to include any # other OwlTerms if they are added later. analoc_part_of = MultiValueField(faceted=True) def prepare_techniques(self, obj): technique_dict = dict(Techniques.CHOICES) techniques = set() for channel in obj.session.channels.all(): try: techniques.add(technique_dict[channel.setup.technique]) except IndexError: print "Unknown technique #%d for channel %s on trial %s" % ( channel.setup.technique, channel, obj) if settings.DEBUG: print "Techniques: %s" % sorted(techniques) return sorted(techniques) @fail_with_return_value(None) def prepare_behaviorowl_primary(self, obj): return obj.behaviorowl_primary.label_with_synonyms() @fail_with_return_value([]) def prepare_behaviorowl_primary_ancestors(self, obj): # TODO: should we be including the original behaviorowl here? ancestors = obj.behaviorowl_primary.ancestor_classes_inclusive() return [b.label_with_synonyms() for b in ancestors] @fail_with_return_value([]) def prepare_behaviorowl_primary_part_of(self, obj): # TODO: should we be including the original behaviorowl here? part_ofs = obj.behaviorowl_primary.part_of_classes_inclusive() return [b.label_with_synonyms() for b in part_ofs] def prepare(self, obj): """ Prepare the list of muscles for the index by traversing all channels on this trial and including all muscles listed on channels which include a muscle term. These channels are just EMG and Sono types. We also prepare the list of muscles which have the target muscle as a part, but only store the muscles which are distinct from the muscles we are already storing in the subClassOf field. This arrangement enables a search to be broadened from just subClass relationships to both subClass and part_of relationships by adding an OR filter on the `muscles_part_of` field. An alternate implemention might instead store two independent fields for indexing; one uses just the subClassOf relationship and the other might use both. A search would be broadened by switching from one field to the other. """ muscles_ancestors = set() muscles_part_of = set() muscles_direct = set() for m in trial_muscles(obj): if m != None: m_label = m.label_with_synonyms() if len(m_label): muscles_direct.add(m_label) for m_ancestor in m.ancestor_classes(): muscles_ancestors.add(m_ancestor.label_with_synonyms()) for m_part_of in m.part_of_classes(): muscles_part_of.add(m_part_of.label_with_synonyms()) muscles_part_of = muscles_part_of.difference(muscles_ancestors) self.prepared_data = super(TrialIndex, self).prepare(obj) if settings.DEBUG: print "MUSCLES DIRECT %s" % muscles_direct print "MUSCLES ANCESTORS %s" % muscles_ancestors print "MUSCLES PART OF %s" % muscles_part_of # Now add the anatomical location terms to the set analocs_direct = set() for al in trial_analocs(obj): if al != None and len(unicode(al)): analocs_direct.add(unicode(al)) # Store all AnatomicalLocations, MuscleOwls, and ancestors of MuscleOwls self.prepared_data['analoc'] = list(analocs_direct | muscles_direct | muscles_ancestors) # Store muscles that the muscle is part of, but isn't already a # subclass of. We don't have "part_of" relationships for analocs, so # don't include them. self.prepared_data['analoc_part_of'] = list(muscles_part_of - muscles_ancestors) # Store MuscleOwls and AnatomicalLocations applied directly to the # trial's session's channel's sensors. self.prepared_data['analoc_direct'] = list(muscles_direct | analocs_direct) return self.prepared_data def load_all_queryset(self): return Trial.objects.all().prefetch_related('bucket_set') def get_model(self): return Trial def build_queryset(self, using=None, start_date=None, end_date=None): qs = super(TrialIndex, self).build_queryset(using=using) # We count any modification to the containers containing this trial, # because information from all these containers is included in the # search index. # # We don't include sensors and channels explicitly because, at the time # of writing, it is not possible to edit sensors or channels without # editing the containing setup. # # These could probably be enhanced by including units and other CvTerm # fields, but for now I hope that only Taxon is likely to change. updated_at_fields = ( 'updated_at', 'session__updated_at', 'experiment__updated_at', 'experiment__setup__updated_at', 'experiment__subject__updated_at', 'experiment__subject__taxon__updated_at', 'study__updated_at', ) def to_q(fieldname, op, rhs): from django.db.models import Q kwargs = {fieldname + '__' + op: rhs} return Q(**kwargs) def build_conditions(op, rhs): fields = iter(updated_at_fields) conditions = to_q(next(fields), op, rhs) for field in fields: conditions |= to_q(field, op, rhs) return conditions if start_date: qs = qs.filter(build_conditions('gte', start_date)) if end_date: qs = qs.filter(build_conditions('lte', end_date)) return qs