class LinkIndex(SearchIndex, Indexable): text = CharField(document=True, use_template=True) date_available = DateTimeField(model_attr='date_available') date_update = DateTimeField(model_attr='date_update') tags = MultiValueField(null=True) def prepare_tags(self, obj): if not obj.tags: return tags = [] for tag in obj.get_tags() or []: tags.append(tag.slug) tags.append(tag.name) return tags def get_model(self): return Link def get_updated_field(self): return 'date_update' def index_queryset(self, using=None): return Link.objects.filter( date_available__lte=timezone.now(), published=True)
class EventIndex(SearchIndex): text = CharField(document=True, use_template=True) creator = CharField(model_attr='creator') datetime = DateTimeField(model_attr='time') created_at = DateTimeField(model_attr='created_at') market = CharField() def prepare_market(self, obj): return Event.market.name
class PostIndex(SearchIndex): text = CharField(document=True, use_template=True) date_available = DateTimeField(model_attr='date_available') date_update = DateTimeField(model_attr='date_update') def get_updated_field(self): return 'date_update' def index_queryset(self): return Post.objects.filter( date_available__lte=datetime.now(), published=True)
class ChannelIndex(SearchIndex, Indexable): text = CharField(document=True, use_template=True) date_available = DateTimeField(model_attr='date_available') date_update = DateTimeField(model_attr='date_update') def get_model(self): return Channel def get_updated_field(self): return 'date_update' def index_queryset(self, using=None): return Channel.objects.filter(date_available__lte=timezone.now(), published=True)
class ItemIndex(RealTimeSearchIndex): """ Indexes for_sale.models.Item instances """ # remember to respect haystack primary field convention name! # by default, stored and indexed parameters are True summary = CharField(document=True, use_template=True) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) price = FloatField(model_attr="price", indexed=False) # price = DecimalField(model_attr="price", indexed=False) added_at = DateTimeField(model_attr="date_time", stored=False) shop_id = IntegerField(model_attr="shop__id", stored=False) shop_name = CharField(model_attr="shop__name") category = CharField(model_attr="category") subcategory = CharField(model_attr="subcategory", null=True) # order by: # * relevance == score function? # * added_at ASC / DESC # * price ASC / DESC # * title ASC / DESC # def get_queryset(self): def index_queryset(self): return Item.objects.filter(qty__gt=0)
class LotIndex(RealTimeSearchIndex): """ Indexes lots.models.Lot instances """ # remember to respect haystack primary field convention name! # by default, stored and indexed parameters are True summary = CharField(document=True, use_template=True) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) state = CharField(model_attr="get_state_display") shop_id = IntegerField(model_attr="shop__id", stored=False) shop_name = CharField(model_attr="shop__name") category = CharField(model_attr="category") subcategory = CharField(model_attr="subcategory", null=True) added_at = DateTimeField(model_attr="date_time", stored=False) # def get_queryset(self): def index_queryset(self): # allow to search only active lots return Lot.objects.filter(state="A")
class StoryIndex(SearchIndex): text = CharField(document=True, use_template=True) author = CharField(faceted=True) pub_date = DateTimeField(model_attr='pub_date', faceted=True) tags = MultiValueField(model_attr='tags', faceted=True) def get_queryset(self): return Story.objects.filter(pub_date__lte=datetime.datetime.now()) def prepare_author(self, obj): try: sauthor = StoryAuthor.objects.filter(story__pub_date= \ self.prepared_data['pub_date'])[0] author = sauthor.author except IndexError: author = u'Journal Staff' return author def prepare_tags(self, obj): tags = Tag.objects.get_for_object(Story.objects.filter(pub_date=\ self.prepared_data['pub_date'])[0]) if not tags is None: return tags else: return u'No Tags'
class BasicPostIndex(SearchIndex): text = CharField(document=True, use_template=True) pub_date = DateTimeField(model_attr='date_published') def index_queryset(self): """Used when the entire index for model is updated.""" return BasicPost.objects.published()
class ThreadIndex(SearchIndex): text = CharField(document=True, use_template=True) creator = CharField(model_attr='creator') datetime = DateTimeField() creation_date = DateTimeField() # TODO: this shit sucks and is could be done without touching # the db most of the time... leave creation date alone and manually # update datetime def prepare_datetime(self, obj): return Post.objects.filter( thread=obj).order_by('-created_at')[0].created_at def prepare_creation_date(self, obj): return Post.objects.filter( thread=obj).order_by('created_at')[0].created_at
class BlogIndex(SearchIndex, Indexable): text=CharField(document=True, use_template=True) author=CharField(model_attr='author', faceted=True) title=CharField(model_attr='title', use_template=True) #, indexed=False) body=EdgeNgramField(model_attr='body', use_template=True) timestamp=DateTimeField(model_attr='created') def get_model(self): return Blog def index_queryset(self, using=None): return self.get_model().objects.filter(created__lte=now) def prepare_body(self, obj): return obj.body def prepare_text(self, obj): return obj.text
class PageIndex(SearchIndex): """Search index for pages content.""" text = CharField(document=True, use_template=True) title = CharField(model_attr='title') url = CharField(model_attr='get_absolute_url') publication_date = DateTimeField(model_attr='publication_date') def get_queryset(self): """Used when the entire index for model is updated.""" return Page.objects.published()
class BaseIndex(SearchIndex): """ Define a base search index class for all models. Fields text, created, and modified are generic across all models. See haystack documentation for what the text field and document=True mean. Templates have to be added to templates/search/indexes/freeform_data. """ text = CharField(document=True, use_template=True) created = DateTimeField(model_attr='created') modified = DateTimeField(model_attr='modified') model_type = None def get_model(self): return self.model_type def index_queryset(self, using=None): """ Used when the entire index for model is updated. """ return self.get_model().objects.all()
class MemberIndex(SearchIndex, Indexable): text=CharField(document=True, use_template=True) identifier=CharField(model_attr='identifier') email=CharField(model_attr='email', use_template=True) cellular=EdgeNgramField(model_attr='cellular', use_template=True) #rendered=CharField(use_template=True, indexed=False) timestamp=DateTimeField(model_attr='created') def get_model(self): return Member def index_queryset(self, using=None): return self.get_model().objects.filter(created__lte=now) def prepare_cellular(self, obj): return obj.cellular
class AuctionSessionIndex(RealTimeSearchIndex): """ Indexes auctions.models.AuctionSession instances """ # remember to respect haystack primary field convention name! # by default, stored and indexed parameters are True summary = CharField(document=True, use_template=True) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) shop_id = IntegerField(model_attr="shop__id", stored=False) shop_name = CharField(model_attr="shop__name") starts_at = DateTimeField(model_attr="start", stored=False) ends_at = DateTimeField(model_attr="end", stored=False) # def get_queryset(self): def index_queryset(self): return AuctionSession.objects.filter(start__lte=datetime.now(), end__gt=datetime.now())
class ContainerIndex(SearchIndex): text = CharField(document=True, use_template=True) channel_id = IntegerField(model_attr="channel_id") date_available = DateTimeField(model_attr='date_available') date_update = DateTimeField(model_attr='date_update') tags = MultiValueField(null=True) def prepare_tags(self, obj): if not obj.tags: return tags = [] for tag in obj.get_tags() or []: tags.append(tag.slug) tags.append(tag.name) return tags def get_updated_field(self): return 'date_update' def index_queryset(self, using=None): return self.get_model().objects.all_published()
class ProductIndex(RealTimeSearchIndex): """ Index for inventory.models.Product """ # summary template will include title, description, category name and # subcategory name in order to make it easy to search summary = CharField(document=True, use_template=True, stored=False) title = CharField(model_attr="title") description = CharField(model_attr="description", indexed=False, null=True) product_id = IntegerField(model_attr="id") category = CharField(model_attr="category__name") #, faceted=True) category_id = IntegerField(model_attr="category__id") subcategory = CharField(model_attr="subcategory__name", null=True) #, faceted=True, null=True) subcategory_id = IntegerField(model_attr="subcategory__id", null=True) price = FloatField() # price = DecimalField() image_url = CharField(null=True, indexed=True) marketplace_id = IntegerField(model_attr="shop__marketplace__id") shop_id = IntegerField(model_attr="shop__id") shop_name = CharField(model_attr="shop__name", indexed=False) shop_default_dns = CharField(model_attr="shop__default_dns", indexed=False) shop_currency = CharField(indexed=False) added_at = DateTimeField(model_attr="date_time") def prepare_price(self, obj): price = obj.child().price # the price of a Lot is an instance method if callable(price): return price() # the .price of an Item is an instance attribute return price def prepare_image_url(self, obj): image = obj.child().image() if image: return image.image.url_100x100 return None def prepare_shop_currency(self, obj): return obj.shop.preference().checkout_currency
class PageIndex(SearchIndex, Indexable): """Search index for pages content.""" text = CharField(document=True, use_template=True) title = CharField(model_attr='title') url = CharField(model_attr='get_absolute_url') publication_date = DateTimeField(model_attr='publication_date') def index_queryset(self, using=None): return self.get_model().objects.published() def should_update(self, instance, **kwargs): return instance.status == Page.PUBLISHED def get_model(self): return Page
class PageIndex(SearchIndex, Indexable): """ Haystack seach index """ name = CharField(model_attr='name') text = CharField(document=True, use_template=True) updated_at = DateTimeField(model_attr='updated_at') def get_model(self): return Page def index_queryset(self, *args, **kwargs): """ Используется, когда весь индекс для модели обновляется. TODO: Check this """ return Page.objects.select_related().all()
class TorrentIndex(SearchIndex, Indexable): text = CharField(document=True) title = CharField(model_attr='title') team_name = CharField(model_attr='team_name', null=True) pub_date = DateTimeField(model_attr='pub_date', null=True) author = CharField(model_attr='author', null=True) category = CharField(model_attr='category', null=True) def get_model(self): return Torrent def index_queryset(self, using=None): return self.get_model().objects.all() def prepare(self, obj): data = super(TorrentIndex, self).prepare(obj) data['text'] = data['title'] return data
class RealTimePageIndex(RealTimeSearchIndex, Indexable): """Search index for pages content.""" text = CharField(document=True, use_template=True) title = CharField(model_attr='title') url = CharField(model_attr='get_absolute_url') publication_date = DateTimeField(model_attr='publication_date') def index_queryset(self, using=None): """Haystack 2.0 requires this method now""" return self.get_model().objects.published() def get_queryset(self): """Used when the entire index for model is updated.""" return Page.objects.published() def get_model(self): return Page def should_update(self, instance, **kwargs): return instance.status == Page.PUBLISHED
class FossilIndex(SearchIndex): text = CharField(document=True, use_template=False, stored=False) trial_id = CharField() rev_seq = CharField() is_most_recent = BooleanField() main_title = MultiValueField() rec_status = CharField(faceted=True) date_registration = DateTimeField() outdated = BooleanField() status = CharField() rec_country = MultiValueField() is_observational = BooleanField() i_type = MultiValueField(faceted=True) gender = CharField() minimum_recruitment_age = IntegerField() #in hours maximum_recruitment_age = IntegerField() #in hours def prepare_minimum_recruitment_age(self, obj): fossil_ct = obj.get_object_fossil() try: unit = fossil_ct.agemin_unit value = fossil_ct.agemin_value except: return None age = normalize_age(value, unit) if unit != '-' else 0 return age def prepare_maximum_recruitment_age(self, obj): fossil_ct = obj.get_object_fossil() try: unit = fossil_ct.agemax_unit value = fossil_ct.agemax_value except: return None age = normalize_age(value, unit) if unit != '-' else normalize_age( 200, 'Y') return age def prepare_trial_id(self, obj): fossil_ct = obj.get_object_fossil() try: return fossil_ct.trial_id except AttributeError: return None def prepare_rev_seq(self, obj): return obj.revision_sequential def prepare_is_most_recent(self, obj): return obj.is_most_recent def prepare_rec_status(self, obj): try: return obj.get_object_fossil().recruitment_status.label except AttributeError: return None def prepare_date_registration(self, obj): try: return obj.get_object_fossil().date_registration except AttributeError: return None def prepare_outdated(self, obj): try: return obj.get_object_fossil().outdated except AttributeError: return None def prepare_status(self, obj): try: return obj.get_object_fossil().status except AttributeError: return None def prepare_main_title(self, obj): fossil_ct = obj.get_object_fossil() fossil_ct._load_translations() try: main_titles = [] for lang in fossil_ct._translations.keys(): fossil_ct._language = lang main_titles.append(fossil_ct.main_title()) return main_titles except AttributeError: return None def prepare_text(self, obj): fossil_ct = obj.get_object_fossil() fossil_ct._load_translations() retrieve_data_from = [ 'scientific_contacts', 'utrn_number', 'secondary_ids', 'trial_id', 'scientific_title', 'public_title', 'acronym', 'scientific_acronym', 'scientific_acronym_expansion', 'hc_freetext', 'i_freetext' ] retrieve_data_from_multilanguage = [ 'scientific_title', 'public_title', 'acronym', 'scientific_acronym', 'scientific_acronym_expansion', 'hc_freetext', 'i_freetext' ] all_text = set() for fossil_method in retrieve_data_from: try: all_text.add(getattr(fossil_ct, fossil_method)) except AttributeError: pass for lang in fossil_ct._translations.keys( ): #index content in all available languages fossil_ct._language = lang for fossil_method in retrieve_data_from_multilanguage: try: all_text.add(getattr(fossil_ct, fossil_method)) except AttributeError: pass primary_sponsor = getattr(fossil_ct, 'primary_sponsor', '') if primary_sponsor: for v in primary_sponsor.values(): if isinstance(v, basestring): all_text.add(v) all_text.discard(None) return ' '.join(all_text).strip() def prepare_rec_country(self, obj): fossil_ct = obj.get_object_fossil() return [country['label'] for country in fossil_ct.recruitment_country] def prepare_is_observational(self, obj): fossil_ct = obj.get_object_fossil() return getattr(fossil_ct, 'is_observational', False) def prepare_i_type(self, obj): fossil_ct = obj.get_object_fossil() sources = [] for source in fossil_ct.support_sources: try: sources.append(source['institution']['i_type']['label']) except KeyError: #field doesnt exist pass return sources def prepare_gender(self, obj): fossil_ct = obj.get_object_fossil() if fossil_ct.gender == 'M': return 'male' elif fossil_ct.gender == 'F': return 'female' else: return 'both'
class TeamVideoLanguagesIndex(SearchIndex): text = CharField(document=True, use_template=True, template_name="teams/teamvideo_languages_for_search.txt") team_id = IntegerField() team_video_pk = IntegerField(indexed=False) video_pk = IntegerField(indexed=False) video_id = CharField(indexed=False) video_title = CharField(faceted=True) video_url = CharField(indexed=False) original_language = CharField() original_language_display = CharField(indexed=False) absolute_url = CharField(indexed=False) project_pk = IntegerField(indexed=True) task_count = IntegerField() # never store an absolute url with solr # since the url changes according to the user # one cannot construct the url at index time # video_absolute_url = CharField(indexed=False) thumbnail = CharField(indexed=False) title = CharField() project_name = CharField(indexed=False) project_slug = CharField(indexed=False) description = CharField(indexed=True) is_complete = BooleanField() video_complete_date = DateTimeField(null=True) # list of completed language codes video_completed_langs = MultiValueField() # list of completed language absolute urls. should have 1-1 mapping to video_compelted_langs video_completed_lang_urls = MultiValueField(indexed=False) latest_submission_date = DateTimeField(null=True) team_video_create_date = DateTimeField() # possible values for visibility: # is_public=True anyone can see # is_public=False and owned_by_team_id=None -> a regular user owns, no teams can list this video # is_public=False and owned_by_team_id=X -> only team X can see this video is_public = BooleanField() owned_by_team_id = IntegerField(null=True) # All subtitle languages containing at least one version are included in the total count. num_total_langs = IntegerField() # Completed languages are languages which have at least one version that is: # # * Public # * Covers all dialog # * Fully synced # * Fully translated, if a translation num_completed_langs = IntegerField() def prepare(self, obj): self.prepared_data = super(TeamVideoLanguagesIndex, self).prepare(obj) self.prepared_data['team_id'] = obj.team.id self.prepared_data['team_video_pk'] = obj.id self.prepared_data['video_pk'] = obj.video.id self.prepared_data['video_id'] = obj.video.video_id self.prepared_data['video_title'] = obj.video.title.strip() self.prepared_data['video_url'] = obj.video.get_video_url() original_sl = obj.video.subtitle_language() if original_sl: self.prepared_data['original_language_display'] = \ original_sl.get_language_display() self.prepared_data['original_language'] = original_sl.language else: self.prepared_data['original_language_display'] = '' self.prepared_data['original_language'] = '' self.prepared_data['absolute_url'] = obj.get_absolute_url() self.prepared_data['thumbnail'] = obj.get_thumbnail() self.prepared_data['title'] = unicode(obj).strip() self.prepared_data['description'] = obj.description self.prepared_data['is_complete'] = obj.video.complete_date is not None self.prepared_data['video_complete_date'] = obj.video.complete_date self.prepared_data['project_pk'] = obj.project.pk self.prepared_data['project_name'] = obj.project.name self.prepared_data['project_slug'] = obj.project.slug self.prepared_data['team_video_create_date'] = obj.created completed_sls = obj.video.completed_subtitle_languages() all_sls = (obj.video.subtitlelanguage_set.annotate( num_versions=Count('subtitleversion')).filter(num_versions__gt=0)) all_sls = [ sl for sl in all_sls if not sl.latest_version(public_only=False).is_all_blank() ] self.prepared_data['num_total_langs'] = len(all_sls) self.prepared_data['num_completed_langs'] = len(completed_sls) self.prepared_data['video_completed_langs'] = \ [sl.language for sl in completed_sls] self.prepared_data['video_completed_lang_urls'] = \ [sl.get_absolute_url() for sl in completed_sls] self.prepared_data['task_count'] = models.Task.objects.incomplete( ).filter(team_video=obj).count() policy = obj.video.policy owned_by = None if policy and policy.belongs_to_team: owned_by = policy.object_id self.prepared_data[ 'is_public'] = VideoVisibilityPolicy.objects.video_is_public( obj.video) self.prepared_data["owned_by_team_id"] = owned_by return self.prepared_data @classmethod def results_for_members(self, team): base_qs = SearchQuerySet().models(models.TeamVideo) public = SQ(is_public=True) mine = SQ(is_public=False, owned_by_team_id=team.pk) return base_qs.filter(public | mine) @classmethod def results(self): return SearchQuerySet().models(models.TeamVideo).filter(is_public=True)
class PostIndex(SearchIndex): text = CharField(document=True, use_template=True) creator = CharField(model_attr='creator') datetime = DateTimeField(model_attr='created_at')
class TrialIndex(SearchIndex, Indexable): text = CharField(document=True, use_template=True) title = CharField(model_attr='title') session_title = CharField(model_attr='session__title') experiment_title = CharField(model_attr='session__experiment__title') study_title = CharField(model_attr='session__experiment__study__title') taxon = CharField(model_attr='session__experiment__subject__taxon', faceted=True) #accession = CharField(model_attr='accession', null=True) food_type = CharField(model_attr='food_type', null=True, faceted=True) food_size = CharField(model_attr='food_size', null=True) food_property = CharField(model_attr='food_property', null=True) created_at = DateTimeField(model_attr='created_at') updated_at = DateTimeField(model_attr='updated_at') behaviorowl_primary = CharField(model_attr='behaviorowl_primary', null=True) behaviorowl_primary_ancestors = MultiValueField(faceted=True) behaviorowl_primary_part_of = MultiValueField(faceted=True) techniques = MultiValueField(indexed=False, stored=True, faceted=True) # List of muscle labels for EMG and Sono sensors and non-muscle anatomical # locations on other sensors analoc_direct = MultiValueField() # `analoc_direct` along with the ancestors of each MuscleOwl in the list analoc = MultiValueField(faceted=True) # Muscles which members of `analoc_direct` are a part of, except when the same # muscle is already listed in `analoc` # # This is currently just muscles, but could be extended to include any # other OwlTerms if they are added later. analoc_part_of = MultiValueField(faceted=True) def prepare_techniques(self, obj): technique_dict = dict(Techniques.CHOICES) techniques = set() for channel in obj.session.channels.all(): try: techniques.add(technique_dict[channel.setup.technique]) except IndexError: print "Unknown technique #%d for channel %s on trial %s" % ( channel.setup.technique, channel, obj) if settings.DEBUG: print "Techniques: %s" % sorted(techniques) return sorted(techniques) @fail_with_return_value(None) def prepare_behaviorowl_primary(self, obj): return obj.behaviorowl_primary.label_with_synonyms() @fail_with_return_value([]) def prepare_behaviorowl_primary_ancestors(self, obj): # TODO: should we be including the original behaviorowl here? ancestors = obj.behaviorowl_primary.ancestor_classes_inclusive() return [b.label_with_synonyms() for b in ancestors] @fail_with_return_value([]) def prepare_behaviorowl_primary_part_of(self, obj): # TODO: should we be including the original behaviorowl here? part_ofs = obj.behaviorowl_primary.part_of_classes_inclusive() return [b.label_with_synonyms() for b in part_ofs] def prepare(self, obj): """ Prepare the list of muscles for the index by traversing all channels on this trial and including all muscles listed on channels which include a muscle term. These channels are just EMG and Sono types. We also prepare the list of muscles which have the target muscle as a part, but only store the muscles which are distinct from the muscles we are already storing in the subClassOf field. This arrangement enables a search to be broadened from just subClass relationships to both subClass and part_of relationships by adding an OR filter on the `muscles_part_of` field. An alternate implemention might instead store two independent fields for indexing; one uses just the subClassOf relationship and the other might use both. A search would be broadened by switching from one field to the other. """ muscles_ancestors = set() muscles_part_of = set() muscles_direct = set() for m in trial_muscles(obj): if m != None: m_label = m.label_with_synonyms() if len(m_label): muscles_direct.add(m_label) for m_ancestor in m.ancestor_classes(): muscles_ancestors.add(m_ancestor.label_with_synonyms()) for m_part_of in m.part_of_classes(): muscles_part_of.add(m_part_of.label_with_synonyms()) muscles_part_of = muscles_part_of.difference(muscles_ancestors) self.prepared_data = super(TrialIndex, self).prepare(obj) if settings.DEBUG: print "MUSCLES DIRECT %s" % muscles_direct print "MUSCLES ANCESTORS %s" % muscles_ancestors print "MUSCLES PART OF %s" % muscles_part_of # Now add the anatomical location terms to the set analocs_direct = set() for al in trial_analocs(obj): if al != None and len(unicode(al)): analocs_direct.add(unicode(al)) # Store all AnatomicalLocations, MuscleOwls, and ancestors of MuscleOwls self.prepared_data['analoc'] = list(analocs_direct | muscles_direct | muscles_ancestors) # Store muscles that the muscle is part of, but isn't already a # subclass of. We don't have "part_of" relationships for analocs, so # don't include them. self.prepared_data['analoc_part_of'] = list(muscles_part_of - muscles_ancestors) # Store MuscleOwls and AnatomicalLocations applied directly to the # trial's session's channel's sensors. self.prepared_data['analoc_direct'] = list(muscles_direct | analocs_direct) return self.prepared_data def load_all_queryset(self): return Trial.objects.all().prefetch_related('bucket_set') def get_model(self): return Trial def build_queryset(self, using=None, start_date=None, end_date=None): qs = super(TrialIndex, self).build_queryset(using=using) # We count any modification to the containers containing this trial, # because information from all these containers is included in the # search index. # # We don't include sensors and channels explicitly because, at the time # of writing, it is not possible to edit sensors or channels without # editing the containing setup. # # These could probably be enhanced by including units and other CvTerm # fields, but for now I hope that only Taxon is likely to change. updated_at_fields = ( 'updated_at', 'session__updated_at', 'experiment__updated_at', 'experiment__setup__updated_at', 'experiment__subject__updated_at', 'experiment__subject__taxon__updated_at', 'study__updated_at', ) def to_q(fieldname, op, rhs): from django.db.models import Q kwargs = {fieldname + '__' + op: rhs} return Q(**kwargs) def build_conditions(op, rhs): fields = iter(updated_at_fields) conditions = to_q(next(fields), op, rhs) for field in fields: conditions |= to_q(field, op, rhs) return conditions if start_date: qs = qs.filter(build_conditions('gte', start_date)) if end_date: qs = qs.filter(build_conditions('lte', end_date)) return qs