def add_faceted_document(index): document = search.Document( doc_id='doc1', fields=[search.AtomField(name='name', value='x86')], facets=[ search.AtomFacet(name='type', value='computer'), search.NumberFacet(name='ram_size_gb', value=8) ]) index.put(document)
def add_to_search_index(object_id, labels, metadata, mapped_category=None, most_similar_category=None, thumb_url=None, preview_url=None): """Adds document to the search index.""" try: # Define document search fields - these can be queried using keyword search fields = [ search.TextField(name='image_id', value=object_id), ] # Define document facet fields facets = [] # Add label descriptions into search and facet fields. Search API allows # multiple values for the same field. for label in labels: fields.append( search.TextField(name='label', value=label['description'])) facets.append( search.AtomFacet(name='label_facet', value=label['description'])) # Add mapped category and most similar category as facets if mapped_category: fields.append( search.TextField(name='mapped_category', value=mapped_category)) facets.append( search.AtomFacet(name='mapped_category_facet', value=mapped_category)) if most_similar_category: fields.append( search.TextField(name='most_similar_category', value=most_similar_category)) facets.append( search.AtomFacet(name='most_similar_category_facet', value=most_similar_category)) # We're not using a database, so store the image URLs to the index. # We don't need to search on the image URL, but we will need them to display # images in the user interface. # Add thumbnail url if thumb_url: fields.append(search.TextField(name='thumb_url', value=thumb_url)) # Add preview url if thumb_url: fields.append( search.TextField(name='preview_url', value=preview_url)) # Add any other object metadata as document search fields for k, v in metadata.iteritems(): fields.append(search.TextField(name=k, value=v)) # Add the document to the search index d = search.Document(doc_id=hashing.hash_value(object_id), fields=fields, facets=facets) add_result = search.Index(name='imagesearch').put(d) doc_id = add_result[0].id return doc_id except search.Error: logging.exception('Something went wrong in add_to_search_index()')
def item_data(item): fields = [ search.AtomField('store', item.key.parent().id()), search.AtomField('sku', item.key.id()), search.TextField('title', item.title), # "Unsupported field type TOKENIZED_PREFIX" # search.TokenizedPrefixField('title_prefix', item.title), search.AtomField('url', item.url), search.AtomField('image', item.image), # DateField supports only date accuracy (ie. not second) search.NumberField('added', to_unix(item.added)), search.NumberField('checked', to_unix(item.checked)) ] if item.custom: custom = set() for val in item.custom.itervalues(): if isinstance(val, basestring): custom.add(val) elif isinstance(val, (int, long)): custom.add(str(val)) if custom: fields.append( search.TextField('custom', " ".join(sorted(custom)))) facets = [] if item.category: id_path = [ "%d" % ck.id() for ck in cat_path(item.key, item.category) ] if id_path: fields.append(search.TextField('categories', " ".join(id_path))) # NumberFacet is 30 bit facets += [ search.AtomFacet('category', cat_id) for cat_id in id_path ] prices = Price.query(ancestor=item.key) \ .order(-Price.timestamp) \ .fetch() if prices: us_cents = map(to_us_cents, prices) fields += [ search.NumberField('us_cents', us_cents[0]), search.TextField('price_history', " ".join(map(format_history_price, prices))), ] if len(us_cents) > 1: discount_usc = max(us_cents) - us_cents[0] if discount_usc: fields.append( search.NumberField('discount_us_cents', discount_usc)) discount_pc = discount_usc * 100 / max(us_cents) if discount_pc: fields.append( search.NumberField('discount_pc', discount_pc)) tags = [] if item.removed: fields.append(search.NumberField('removed', to_unix(item.removed))) tags.append('removed') else: # NOT queries are expensive, thus providing information in both forms tags.append('active') fields += [search.AtomField('tags', "#%s" % t) for t in tags] return fields, facets