def _post_put_hook(self, future): if (self.url): name = ','.join(text_utils.tokenize_autocomplete(self.name)) registeredName = ','.join( text_utils.tokenize_autocomplete(self.registeredName)) fields = [ search.TextField(name="name", value=name), search.TextField(name="registeredName", value=registeredName) ] d = search.Document(doc_id=self.url, fields=fields) try: add_result = search.Index(name="Businesses_Index").put(d) except search.Error: pass
def index_artifact(index_, id_, fields): f = [] for i in xrange(0, len(fields), 3): if fields[i] == ATOMFIELD: f.append(search.AtomField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == TEXTFIELD: f.append(search.TextField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == HTMLFIELD: f.append(search.HtmlField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == NUMBERFIELD: f.append( search.NumberField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == DATEFIELD: f.append(search.DateField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == GEOPOINTFIELD: f.append(search.GeoField(name=fields[i + 1], value=fields[i + 2])) doc = search.Document(doc_id=id_, fields=f) retry_count = 0 while True: try: index_.put(doc) break except search.Error as e: if retry_count < 3: log.warning( 'Error put doc into index, could be out of space. Creating new index' ) index_ = search.Index(index_.name[:-4] + str(int(index_.name[-4:])).zfill(4), namespace=index_.namespace) retry_count += 1 else: raise e
def _post_put_hook(self, future): if (self.url): name = ','.join(text_utils.tokenize_autocomplete(self.name)) address = ','.join( text_utils.tokenize_autocomplete(self.location.address)) fields = [ search.TextField(name="name", value=name), search.TextField(name="address", value=address) ] d = search.Document(doc_id=self.url, fields=fields) try: search.Index(name="Establishments_Index").put(d) except search.Error: pass
def createPositionDocument(doc_id, gaepos): geoPt = search.GeoPoint(gaepos.lat, gaepos.lon) d = search.Document(fields=[ search.TextField(name="id", value=doc_id), search.GeoField(name="position", value=geoPt) ]) return d
def _post_put_hook(self, future): if self.status == self.STATUS_PUBLISHED: doc = search.Document(doc_id=self.key.urlsafe(), fields=[ search.TextField(name='text', value=self.text), search.TextField(name='title', value=self.title), search.TextField(name="slug", value=self.slug), ]) search.Index('search-post').put(doc) else: search.Index('search-post').delete(self.key.urlsafe())
def test_ids_only(self): doc1 = search.Document(doc_id='doc1', fields=[ search.TextField(name='k', value='abc def ghi')]) doc2 = search.Document(doc_id='doc2', fields=[ search.TextField(name='k', value='abc jkl mno')]) doc3 = search.Document(doc_id='doc3', fields=[ search.TextField(name='k', value='abc jkl ghi')]) index = search.Index('my_index') index.put([doc1, doc2, doc3]) result, cursor = gae_search_services.search( 'k:abc', 'my_index', ids_only=True) self.assertIn('doc1', result) self.assertIn('doc2', result) self.assertIn('doc3', result)
def create_romanized_name_fields(romanize_method, **kwargs): """ Creates romanized name fields (romanized by romanize_method) for full text search. """ fields = [] romanized_names_list = [] romanize_method_name = romanize_method.__name__ for field_name, field_value in kwargs.iteritems(): romanized_names = romanize_method(field_value) for index, romanized_name in enumerate(romanized_names): fields.extend(create_fields_for_rank('%s_romanized_by_%s_%d' % (field_name, romanize_method_name, index), romanized_name)) romanized_names_list.extend(romanized_names) full_name_fields, romanized_full_names = ( create_full_name_without_space_fields( romanize_method, kwargs['given_name'], kwargs['family_name'])) fields.extend(full_name_fields) romanized_names_list.extend(romanized_full_names) names = ':'.join([name for name in romanized_names_list if name]) fields.append( appengine_search.TextField( name='names_romanized_by_' + romanize_method_name, value=names)) return fields
def index(self): doc = search.Document( doc_id = self.key.urlsafe(), fields = [ search.TextField(name='email', value=self.email), search.TextField(name='first_name', value=self.first_name), search.TextField(name='last_name', value=self.last_name), search.DateField(name='created', value=self.created), search.TextField(name='hash', value=self.get_hash()), ]) try: index = search.Index(name="user") index.put(doc) except search.Error: print 'Failed to index user %s' % self.email
def test_respect_limit(self): doc1 = search.Document( doc_id='doc1', fields=[search.TextField(name='k', value='abc def ghi')]) doc2 = search.Document( doc_id='doc2', fields=[search.TextField(name='k', value='abc jkl mno')]) doc3 = search.Document( doc_id='doc3', fields=[search.TextField(name='k', value='abc jkl ghi')]) index = search.Index('my_index') index.put([doc1, doc2, doc3]) result, cursor = gae_search_services.search('k:abc', 'my_index', limit=2) self.assertEqual(len(result), 2)
def put(model): if _searchable: kind = model.key.kind() m = _searchable.get(kind) if not m: return index_name = m.get("_index", _INDEX_NAME) index = search.Index(name=index_name) fields = [] for k, v in m.iteritems(): # skip things starting with _ like _index if k[0] == "_": continue cls = getattr(search, v) search_val = getattr(model, k, None) if not search_val: continue # Note: GeoPt values should be converted tp search.GeoPoint values before adding to the search index ! if isinstance(search_val, ndb.GeoPt): search_val = search.GeoPoint(search_val.lat, search_val.lon) fields.append(cls(name=k, value=search_val)) # add a Kind type to all searchable items in the default index if index_name == _INDEX_NAME: fields.append(search.TextField(name="Kind", value=kind)) doc = search.Document(doc_id=model.key.urlsafe(), fields=fields) try: index.put(doc) except search.Error: logging.error("Failed to put document {}".format(doc))
def create_player(player): # logging.debug(player) player = search.Document(doc_id=str(player.key.id()), fields=[ search.TextField(name='name', value=player.name), search.NumberField(name='age', value=player.age), search.TextField(name='national', value=player.national), search.TextField(name='position', value=player.position), search.NumberField(name='salary', value=player.salary) ]) return player
def update_public_index(obj): try: location = None data = '' if isinstance(obj, User) and obj.name: data = obj.name location = obj.last_location elif isinstance(obj, Message) and obj.text: data = obj.text elif isinstance(obj, Group) and obj.name: data = obj.name else: return index = search.Index(name=PUBLIC_SEARCH_INDEX) fields = [search.TextField(name='text', value=data)] if location and location.latlon: latlon = location.latlon fields.append( search.GeoField(name='location', value=search.GeoPoint(latlon.lat, latlon.lon))) index.put(search.Document(doc_id=obj.key.urlsafe(), fields=fields)) except: logging.warn('Adding object %s to search index failed.' % (str(obj))) logging.warn(sys.exc_info()[0])
def _indexForumThread(forum_thread, new_forum_post = None): """Does the actual work of indexing the given forum thread. We expect to be called in a deferred handler.""" forum= forum_thread.forum fields = [search.TextField(name="subject", value=forum_thread.subject), search.DateField(name="posted", value=forum_thread.posted), search.DateField(name="last_post", value=forum_thread.last_post), search.AtomField(name="forum", value=forum.slug.replace(":", "_"))] if forum.alliance: fields.append(search.AtomField(name="alliance", value=forum.alliance.replace(":", "_"))) else: fields.append(search.AtomField(name="alliance", value="NA")) content = "" for forum_post in model.forum.ForumPost.all().ancestor(forum_thread).order("posted"): if new_forum_post and str(forum_post.key()) == str(new_forum_post.key()): new_forum_post = None content += "\r\n<hr />\r\n" + forum_post.content if new_forum_post: content = new_forum_post.content + content fields.append(search.HtmlField(name="content", value=content)) doc = search.Document( doc_id = str(forum_thread.key()), fields = fields) index = search.Index(name="forum") index.put(doc)
def get_field(self,key,value): try: if(self.mapping[key]): if(self.mapping[key]=='TEXTFIELD'): return search.TextField(name=key,value=value); elif(self.mapping[key]=='ATOMFIELD'): return search.AtomField(name=key,value=value); elif(self.mapping[key]=='NUMBERFIELD'): return search.NumberField(name=key,value=value); elif(self.mapping[key]=='HTMLFIELD'): return search.HtmlField(name=key,value=value); elif(self.mapping[key]=='DATEFIELD'): timestamp=value; timestamp = timestamp if timestamp>0 else -timestamp; value=datetime.fromtimestamp(timestamp/1000.0); return search.DateField(name=key,value=value); elif(self.mapping[key]=='GEOFIELD'): return search.GeoField(name=key,value=value); else: return None; else: return None; except KeyError,keyError: print(keyError); return None;
def put_one_document(self, msg): doc_id = '{channel_id}_{user}_{ts}'.format(channel_id=msg.channel_id, user=msg.user, ts=int(msg.ts)) doc = search.Document(doc_id=doc_id, fields=[ search.TextField(name='text', value=msg.text), search.AtomField(name='user_name', value=msg.get_user_name()), search.AtomField(name='channel_id', value=msg.channel_id), search.AtomField(name='msg_key', value=str(msg.key.id())), search.DateField(name='ts', value=msg.get_datetime()), ]) # Index the document. try: self.index.put(doc) except search.PutError, e: result = e.results[0] if result.code == search.OperationResult.TRANSIENT_ERROR: # possibly retry indexing result.object_id return self.put_one_document(msg)
def _make_fields(key, value): """Returns the fields corresponding to the key value pair according to the type of value. Args: key: str. The name of the field. value: *. The field value. Returns: list(*). A list of fields. Raises: ValueError. The type of field value is not list, str, Number or datetime. """ if isinstance(value, list): _validate_list(key, value) return [_make_fields(key, v)[0] for v in value] if isinstance(value, python_utils.BASESTRING): return [gae_search.TextField(name=key, value=value)] if isinstance(value, numbers.Number): return [gae_search.NumberField(name=key, value=value)] if isinstance(value, (datetime.datetime, datetime.date)): return [gae_search.DateField(name=key, value=value)] raise ValueError( 'Value for document field %s should be a (unicode) string, numeric ' 'type, datetime.date, datetime.datetime or list of such types, got %s' % (key, type(value)))
def loadStoreLocationData(): # create documents from store location info # currently logs but otherwise swallows search errors. slocs = stores.stores for s in slocs: logging.info("s: %s", s) geopoint = search.GeoPoint(s[3][0], s[3][1]) fields = [search.TextField(name=docs.Store.STORE_NAME, value=s[1]), search.TextField(name=docs.Store.STORE_ADDRESS, value=s[2]), search.GeoField(name=docs.Store.STORE_LOCATION, value=geopoint) ] d = search.Document(doc_id=s[0], fields=fields) try: add_result = search.Index(config.STORE_INDEX_NAME).put(d) except search.Error: logging.exception("Error adding document:")
def put_index(self): """ index the element at each""" empty_string = lambda x: x if x else "" organization = str(self.organization.id()) my_document = search.Document( doc_id=str(self.key.id()), fields=[ search.TextField(name=u'type', value=u'Leadstatus'), search.TextField(name='organization', value=empty_string(organization)), search.TextField(name='owner', value=empty_string(self.owner)), search.TextField(name='title', value=empty_string(self.status)), search.DateField(name='created_at', value=self.created_at), ]) my_index = search.Index(name="GlobalIndex") my_index.put(my_document)
def getSearchDocumentFields(self, valuesCache, name): """ Returns a list of search-fields (GAE search API) for this bone. """ return ([ search.TextField(name=name, value=unicode(valuesCache[name])) ])
def re_index_prospect(prospect): index = search.Index(name=PROSPECT_INDEX) azzert(prospect, 'Prospect not found') try: index.delete(prospect.id) except ValueError: pass fields = [ search.AtomField(name='key', value=prospect.id), search.TextField(name='name', value=prospect.name), search.TextField(name='address', value=prospect.address), search.TextField(name='phone', value=prospect.phone), search.TextField(name='email', value=prospect.email) ] index.put(search.Document(doc_id=prospect.id, fields=fields))
def test_arguments_are_preserved_in_retries(self): index = search.Index('index') index.put([ search.Document( doc_id='doc', fields=[search.TextField(name='prop', value='val')]) ]) exception = self._get_delete_error(1, transient=0) failing_delete = test_utils.FailingFunction(search.Index.delete, exception, 3) delete_docs_counter = test_utils.CallCounter( gae_search_services.delete_documents_from_index) index_ctx = self.swap(search.Index, 'delete', failing_delete) delete_docs_ctx = self.swap(gae_search_services, 'delete_documents_from_index', delete_docs_counter) with index_ctx, delete_docs_ctx: gae_search_services.delete_documents_from_index(['doc'], 'index', retries=4) self.assertEqual(delete_docs_counter.times_called, 4) result = search.Index('my_index').get('doc') self.assertIsNone(result)
def test_delete_error_with_transient_result(self): error = self._get_delete_error(3, transient=1) failing_delete = test_utils.FailingFunction(search.Index.delete, error, 4) delete_docs_counter = test_utils.CallCounter( gae_search_services.delete_documents_from_index) index = search.Index('my_index') for i in python_utils.RANGE(3): index.put( search.Document( doc_id='d' + python_utils.STR(i), fields=[search.TextField(name='prop', value='value')])) delete_ctx = self.swap(search.Index, 'delete', failing_delete) delete_docs_ctx = self.swap(gae_search_services, 'delete_documents_from_index', delete_docs_counter) with delete_ctx, delete_docs_ctx: gae_search_services.delete_documents_from_index(['d0', 'd1', 'd2'], 'my_index', retries=5) self.assertEqual(delete_docs_counter.times_called, 5) for i in python_utils.RANGE(3): result = search.Index('my_index').get( bytes('doc' + python_utils.convert_to_bytes(i))) self.assertIsNone(result)
def _build_event_date(i, event, ed, venue, start, end, is_hours=False): """ Helper to create a specific date - yeilds one search doc """ category = ed.category if is_hours: category = CATEGORY.HOURS fields = [] doc_id = '%s-%s' % (event.slug, i) fields.append(search.TextField(name='name', value=event.name)) fields.append(search.AtomField(name='slug', value=event.slug)) fields.append(search.AtomField(name='event_keystr', value=str(event.key.urlsafe()))) # Populate bits specific to the event date fields.append(search.NumberField(name='start', value=unix_time(timezone('UTC').localize(start)))) fields.append(search.NumberField(name='end', value=unix_time(timezone('UTC').localize(end)))) fields.append(search.AtomField(name='category', value=category)) # Attach Venue/Geo Information fields.append(search.AtomField(name='venue_slug', value=ed.venue_slug)) venue_geo = None if venue.geo: geoPt = venue.geo if isinstance(geoPt, list): geoPt = geoPt[0] venue_geo = search.GeoPoint(geoPt.lat, geoPt.lon) fields.append(search.GeoField(name='venue_geo', value=venue_geo)) return search.Document(doc_id=doc_id, fields=fields)
def _buildProductFields(cls, pid=None, category=None, name=None, user_id=None, description=None, category_name=None, image_url=None, price=None, ppacc=None, **params): """Build all the additional non-core fields for a document of the given product type (category), using the given params dict, and the already-constructed list of 'core' fields. All such additional category-specific fields are treated as required. """ fields = cls._buildCoreProductFields(pid, name, user_id, description, category, category_name, image_url, price, ppacc) # get the specification of additional (non-'core') fields for this category pdict = categories.product_dict.get(category_name) if pdict: # for all fields for k, field_type in pdict.iteritems(): # see if there is a value in the given params for that field. # if there is, get the field type, create the field, and append to the # document field list. if k in params: v = params[k] if field_type == search.NumberField: try: val = float(v) fields.append(search.NumberField(name=k, value=val)) except ValueError: error_message = ( 'bad value %s for field %s of type %s' % (k, v, field_type)) logging.error(error_message) raise errors.OperationFailedError(error_message) elif field_type == search.TextField: fields.append(search.TextField(name=k, value=str(v))) else: # you may want to add handling of other field types for generality. # Not needed for our current sample data. logging.warn('not processed: %s, %s, of type %s', k, v, field_type) else: error_message = ( 'value not given for field "%s" of field type "%s"' % (k, field_type)) logging.warn(error_message) raise errors.OperationFailedError(error_message) else: # else, did not have an entry in the params dict for the given field. logging.warn( 'product field information not found for category name %s', params['category_name']) return fields
def test_delete_single_document(self): doc = search.Document(doc_id='doc_id', fields=[search.TextField(name='k', value='v')]) index = search.Index('my_index') index.put([doc]) gae_search_services.delete_documents_from_index(['doc_id'], 'my_index') self.assertIsNone(index.get('doc_id'))
def create_document(algorithm_id, algorithm_summary, display_name, link_url): """creates a search.Document. :param algorithm_id: :param algorithm_summary: HTML :param display_name: :param link_url: :rtype : google.appengine.api.search.Document """ return search.Document(doc_id=algorithm_id, fields=[ search.TextField(name='algorithmId', value=algorithm_id), search.HtmlField(name='algorithmSummary', value=algorithm_summary), search.TextField(name='displayName', value=display_name), search.TextField(name='linkURL', value=link_url), search.DateField(name='date', value=datetime.now()) ])
def _create_doc_event(cls, source): fb_info = source.fb_info if not fb_info: return None # Only index fan pages for now: # - Profiles are not public or indexable. # - Groups don't have a location. (but they do!?) # - Fan Pages are both. if source.graph_type != thing_db.GRAPH_TYPE_FANPAGE: return None # TODO(lambert): find a way to index no-location sources. # As of now, the lat/long number fields cannot be None. # In what radius/location should no-location sources show up # and how do we want to return them # Perhaps a separate index that is combined at search-time? if fb_info.get('location', None) is None: return None if not source.latitude: return None country = fb_info['location'].get('country', '').upper() if country in iso3166.countries_by_name: country_code = iso3166.countries_by_name[country].alpha2 else: country_code = None doc_event = search.Document( doc_id=source.graph_id, fields=[ search.TextField(name='name', value=source.name), search.TextField(name='description', value=fb_info.get('general_info', '')), search.NumberField(name='like_count', value=fb_info['likes']), search.TextField(name='category', value=fb_info['category']), search.TextField(name='category_list', value=', '.join(str(x['name']) for x in fb_info.get('category_list', []))), search.NumberField(name='latitude', value=source.latitude), search.NumberField(name='longitude', value=source.longitude), #search.TextField(name='categories', value=' '.join(source.auto_categories)), search.TextField(name='country', value=country_code), search.NumberField(name='num_real_events', value=source.num_real_events or 0), ], #language=XX, # We have no good language detection rank=source.num_real_events or 0, ) return doc_event
def test_use_cursor(self): doc1 = search.Document( doc_id='doc1', language='en', rank=1, fields=[search.TextField(name='k', value='abc def ghi')]) doc2 = search.Document( doc_id='doc2', language='en', rank=1, fields=[search.TextField(name='k', value='abc jkl mno')]) doc3 = search.Document( doc_id='doc3', language='en', rank=1, fields=[search.TextField(name='k', value='abc jkl ghi')]) index = search.Index('my_index') index.put([doc1, doc2, doc3]) result1, result1_cursor = gae_search_services.search('k:abc', 'my_index', size=2) result2, _ = gae_search_services.search('k:abc', 'my_index', cursor=result1_cursor) self.assertEqual(len(result1), 2) self.assertEqual(len(result2), 1) dict1 = { 'id': 'doc1', 'k': 'abc def ghi', 'language_code': 'en', 'rank': 1 } self.assertIn(dict1, result1 + result2) dict2 = { 'id': 'doc2', 'k': 'abc jkl mno', 'language_code': 'en', 'rank': 1 } self.assertIn(dict2, result1 + result2) dict3 = { 'id': 'doc3', 'k': 'abc jkl ghi', 'language_code': 'en', 'rank': 1 } self.assertIn(dict3, result1 + result2)
def create_document(person): """ Creates document for full text search. It should be called in add_record_to_index method. """ fields = [] # Add repo and record_id to fields repo = person.repo record_id = person.record_id doc_id = repo + ':' + record_id fields.append(appengine_search.TextField(name='repo', value=repo)) fields.append( appengine_search.TextField(name='record_id', value=record_id)) fields.extend(create_non_romanized_fields( given_name=person.given_name, family_name=person.family_name, full_name=person.full_name, alternate_names=person.alternate_names, home_street=person.home_street, home_city=person.home_city, home_state=person.home_state, home_postal_code=person.home_postal_code, home_neighborhood=person.home_neighborhood, home_country=person.home_country)) # Applies two methods because kanji is used in Chinese and Japanese, # and romanizing in chinese and japanese is different. for romanize_method in ROMANIZE_METHODS: fields.extend(create_romanized_name_fields( romanize_method, given_name=person.given_name, family_name=person.family_name, full_name=person.full_name, alternate_names=person.alternate_names)) fields.extend(create_romanized_location_fields( romanize_method, home_street=person.home_street, home_city=person.home_city, home_state=person.home_state, home_postal_code=person.home_postal_code, home_neighborhood=person.home_neighborhood, home_country=person.home_country)) return appengine_search.Document(doc_id=doc_id, fields=fields)
def create_test_documents_list(data_list, documents, length): """ Prepare test documents""" for i in range(length): document = search.Document( doc_id=data_list[i]['algorithmId'], fields=[ search.TextField(name='algorithmId', value=data_list[i]['algorithmId']), search.HtmlField(name='algorithmSummary', value=data_list[i]['algorithmSummary']), search.TextField(name='displayName', value=data_list[i]['displayName']), search.TextField(name='linkURL', value=data_list[i]['linkURL']), search.DateField(name='date', value=datetime.now()) ]) documents.append(document)