Example #1
0
def add_faceted_document(index):
    document = search.Document(
        doc_id='doc1',
        fields=[search.AtomField(name='name', value='x86')],
        facets=[
            search.AtomFacet(name='type', value='computer'),
            search.NumberFacet(name='ram_size_gb', value=8)
        ])

    index.put(document)
Example #2
0
def add_to_search_index(object_id,
                        labels,
                        metadata,
                        mapped_category=None,
                        most_similar_category=None,
                        thumb_url=None,
                        preview_url=None):
    """Adds document to the search index."""
    try:
        # Define document search fields - these can be queried using keyword search
        fields = [
            search.TextField(name='image_id', value=object_id),
        ]

        # Define document facet fields
        facets = []

        # Add label descriptions into search and facet fields. Search API allows
        # multiple values for the same field.
        for label in labels:
            fields.append(
                search.TextField(name='label', value=label['description']))
            facets.append(
                search.AtomFacet(name='label_facet',
                                 value=label['description']))

        # Add mapped category and most similar category as facets
        if mapped_category:
            fields.append(
                search.TextField(name='mapped_category',
                                 value=mapped_category))
            facets.append(
                search.AtomFacet(name='mapped_category_facet',
                                 value=mapped_category))

        if most_similar_category:
            fields.append(
                search.TextField(name='most_similar_category',
                                 value=most_similar_category))
            facets.append(
                search.AtomFacet(name='most_similar_category_facet',
                                 value=most_similar_category))

        # We're not using a database, so store the image URLs to the index.
        # We don't need to search on the image URL, but we will need them to display
        # images in the user interface.

        # Add thumbnail url
        if thumb_url:
            fields.append(search.TextField(name='thumb_url', value=thumb_url))

        # Add preview url
        if thumb_url:
            fields.append(
                search.TextField(name='preview_url', value=preview_url))

        # Add any other object metadata as document search fields
        for k, v in metadata.iteritems():
            fields.append(search.TextField(name=k, value=v))

        # Add the document to the search index
        d = search.Document(doc_id=hashing.hash_value(object_id),
                            fields=fields,
                            facets=facets)
        add_result = search.Index(name='imagesearch').put(d)
        doc_id = add_result[0].id

        return doc_id

    except search.Error:
        logging.exception('Something went wrong in add_to_search_index()')
Example #3
0
    def item_data(item):
        fields = [
            search.AtomField('store',
                             item.key.parent().id()),
            search.AtomField('sku', item.key.id()),
            search.TextField('title', item.title),
            # "Unsupported field type TOKENIZED_PREFIX"
            # search.TokenizedPrefixField('title_prefix', item.title),
            search.AtomField('url', item.url),
            search.AtomField('image', item.image),
            # DateField supports only date accuracy (ie. not second)
            search.NumberField('added', to_unix(item.added)),
            search.NumberField('checked', to_unix(item.checked))
        ]

        if item.custom:
            custom = set()
            for val in item.custom.itervalues():
                if isinstance(val, basestring):
                    custom.add(val)
                elif isinstance(val, (int, long)):
                    custom.add(str(val))
            if custom:
                fields.append(
                    search.TextField('custom', " ".join(sorted(custom))))

        facets = []
        if item.category:
            id_path = [
                "%d" % ck.id() for ck in cat_path(item.key, item.category)
            ]
            if id_path:
                fields.append(search.TextField('categories',
                                               " ".join(id_path)))
                # NumberFacet is 30 bit
                facets += [
                    search.AtomFacet('category', cat_id) for cat_id in id_path
                ]

        prices = Price.query(ancestor=item.key) \
                      .order(-Price.timestamp) \
                      .fetch()
        if prices:
            us_cents = map(to_us_cents, prices)
            fields += [
                search.NumberField('us_cents', us_cents[0]),
                search.TextField('price_history',
                                 " ".join(map(format_history_price, prices))),
            ]
            if len(us_cents) > 1:
                discount_usc = max(us_cents) - us_cents[0]
                if discount_usc:
                    fields.append(
                        search.NumberField('discount_us_cents', discount_usc))
                discount_pc = discount_usc * 100 / max(us_cents)
                if discount_pc:
                    fields.append(
                        search.NumberField('discount_pc', discount_pc))

        tags = []
        if item.removed:
            fields.append(search.NumberField('removed', to_unix(item.removed)))
            tags.append('removed')
        else:
            # NOT queries are expensive, thus providing information in both forms
            tags.append('active')

        fields += [search.AtomField('tags', "#%s" % t) for t in tags]

        return fields, facets