def delete_documents_from_index(doc_ids, index, retries=DEFAULT_NUM_RETRIES): """Deletes documents from an index. Args: - doc_ids: a list of document ids of documents to be deleted from the index. - index: the name of the index to delete the document from, a string. - retries: the number of times to retry deleting the documents. Raises: - SearchFailureError: raised when the deletion fails. If it fails for any document, none will be deleted. """ if not isinstance(index, python_utils.BASESTRING): raise ValueError( 'Index must be the unicode/str name of an index, got %s' % type(index)) for ind, doc_id in enumerate(doc_ids): if not isinstance(doc_id, python_utils.BASESTRING): raise ValueError('all doc_ids must be string, got %s at index %d' % (type(doc_id), ind)) index = gae_search.Index(index) try: logging.debug('Attempting to delete documents from index %s, ids: %s' % (index.name, ', '.join(doc_ids))) index.delete(doc_ids, deadline=5) except gae_search.DeleteError as e: logging.exception('Something went wrong during deletion.') if retries > 1: for res in e.results: if res.code == gae_search.OperationResult.TRANSIENT_ERROR: new_retries = retries - 1 logging.debug('%d tries left, retrying.' % (new_retries)) delete_documents_from_index(doc_ids=doc_ids, index=index.name, retries=new_retries) return raise SearchFailureError(e)
def get(self): """Show a list of reviews for the product indicated by the 'pid' request parameter.""" query = self.request.get('location_query') lat = self.request.get('latitude') lon = self.request.get('longitude') # the location query from the client will have this form: # distance(store_location, geopoint(37.7899528, -122.3908226)) < 40000 # logging.info('location query: %s, lat %s, lon %s', query, lat, lon) try: index = search.Index(config.STORE_INDEX_NAME) # search using simply the query string: # results = index.search(query) # alternately: sort results by distance loc_expr = 'distance(store_location, geopoint(%s, %s))' % (lat, lon) sortexpr = search.SortExpression( expression=loc_expr, direction=search.SortExpression.ASCENDING, default_value=0) sortopts = search.SortOptions(expressions=[sortexpr]) search_query = search.Query( query_string=query.strip(), options=search.QueryOptions( sort_options=sortopts, )) results = index.search(search_query) except search.Error: logging.exception("There was a search error:") self.render_json([]) return # logging.info("geo search results: %s", results) response_obj2 = [] for res in results: gdoc = docs.Store(res) geopoint = gdoc.getFieldVal(gdoc.STORE_LOCATION) resp = {'addr': gdoc.getFieldVal(gdoc.STORE_ADDRESS), 'storename': gdoc.getFieldVal(gdoc.STORE_NAME), 'lat': geopoint.latitude, 'lon': geopoint.longitude} response_obj2.append(resp) logging.info("resp: %s", response_obj2) self.render_json(response_obj2)
def delete_post(self, post_id): """ Delete the post. :param post_id: :return: True / False """ if g.user is None or post_id is None: return False index = search.Index(name=config.text_search_name) if g.user['is_admin']: # delete require from admin index.delete([post_id]) return True # check post from the same author document = index.get(post_id) if document.field('author').value == g.user['email']: index.delete([post_id]) return True else: return False
def save_activity_search_document(a): restricted = "yes" if a.access.domain_restricted else "no" doc = search.Document( doc_id=a.key.urlsafe(), fields=[ search.HtmlField(name='content', value=a.object_.content), search.DateField(name='published', value=a.published.date()), search.AtomField(name='visibility', value=a.access.visibility), search.AtomField(name='restricted', value=restricted), search.AtomField(name='community', value=a.access.community_name), search.AtomField(name='provider', value=a.provider), search.AtomField(name='verb', value=a.verb), search.AtomField(name='author', value=a.actor.get().user.get().primary_email), search.AtomField(name='google_id', value=a.key.id()), ]) try: index = search.Index(name="activities") index.put(doc) except search.Error: logging.exception('PUT of Activity Document FAILED')
def search_posts(terms=None): if not terms: terms = request.args.get('terms', '') results = None try: index = search.Index(name='posts') query = search.Query( # very naive rough impl query_string='published: True AND (' + terms + ')', options=search.QueryOptions( returned_fields=['title', 'slug'], snippeted_fields=['content'], )) results = index.search(query) except search.Error as err: print(err) return render_template('search.html', terms=terms, results=results, now=date.today())
def test_del_algorithm_Found(self): """Tests if algorithm is deleted from an 101 algorithms database while searching for existent algorithmId 'algorithmId63' and if function returns '0'""" searched_id = 'algorithmId63' right_list = [] create_test_algorithm_list(right_list, 101) documents = [] create_test_documents_list(right_list, documents, 101) index = search.Index(name=search_algorithm._INDEX_STRING) index.put(documents) # end of preparing data result = search_algorithm.del_algorithm(index, searched_id) self.assertEqual(0, result, msg='Algorithm was not deleted properly') self.assertNotEqual( 1, result, msg='Algorithm was there but was not deleted properly') self.assertNotEqual(2, result, msg='Algorithm was not there before delete') self.assertIsNone( index.get(searched_id), msg='Algorithm is still there after "successful" deletion')
def createFacets(self,date_now): """ **Description**: Method fetches top 20 tags from search database. """ active_index = 'dataset_index' tags = [] index = search.Index(active_index) options=search.QueryOptions(returned_fields=[],limit=50) f_options = search.FacetOptions(discovery_limit=100,discovery_value_limit=20,depth=900) facetsQuery = search.Query(query_string="", options=options, facet_options = f_options, enable_facet_discovery=True) facets_results = index.search(facetsQuery) for facetResult in facets_results.facets: for f in facetResult.values: tag = FacetsTagsModel(tag_name=f.label,tag_count=f.count,timestamp=date_now) tag.save() tags.append({'name':f.label,'count':f.count}) return tags
def add(): name = request.form['name'] email = request.form['email'] rfid = request.form['rfid'] user = User(name=name, email=email, admin=False, photo_url='', money=0, rfid=rfid) user.put() user_document = search.Document(fields=[ search.TextField(name='name', value=user.name), search.TextField(name='email', value=user.email) ]) search.Index(name='user').put(user_document) return '', 204
def post(self): global documents_counter global total_docs_counter global documents_list documents_counter+=1 total_docs_counter+=1 request_body = self.request.body person_doc(self, total_docs_counter, request_body) if documents_counter >= 1: try: documents_counter = 0 index = search.Index(name="crowdsourced_mumbai_monsoon") index.put(documents_list) if DEBUG == TRUE: self.response.write("PERSON CAR location objects") documents_list=[] except search.Error: if DEBUG == TRUE: self.response.write('Put failed')
def test_arguments_are_preserved_in_retries(self): doc = {'id': 'doc', 'prop': 'val'} exception = self._get_put_error(1, transient=0) failing_put = test_utils.FailingFunction(search.Index.put, exception, 3) add_docs_counter = test_utils.CallCounter( gae_search_services.add_documents_to_index) put_ctx = self.swap(search.Index, 'put', failing_put) add_docs_ctx = self.swap(gae_search_services, 'add_documents_to_index', add_docs_counter) with put_ctx, add_docs_ctx: gae_search_services.add_documents_to_index([doc], 'my_index', retries=4) self.assertEqual(add_docs_counter.times_called, 4) result = search.Index('my_index').get('doc') self.assertEqual(result.field('prop').value, 'val')
def delete_index(): # index.get_range by returns up to 100 documents at a time, so we must # loop until we've deleted all items. # https://cloud.google.com/appengine/docs/standard/python/search/ logging.info("index " + _INDEX_NAME + " deleted") index = search.Index(name = _INDEX_NAME) while True: # Use ids_only to get the list of document IDs in the index without # the overhead of getting the entire document. document_ids = [ document.doc_id for document in index.get_range(ids_only=True)] # If no IDs were returned, we've deleted everything. if not document_ids: break # Delete the documents for the given IDs index.delete(document_ids)
def test_AlgorithmsHandler_GETOneAlgorithm(self): """Tests if only one algorithm is returned from database containing only one algorithm""" wrong_list = [] right_list = [] create_test_algorithm_list(wrong_list, 1) create_test_algorithm_list(right_list, 1) wrong_list[0]['linkURL'] = 'wrongLinkURL' document = search_algorithm.create_document( right_list[0]['algorithmId'], right_list[0]['algorithmSummary'], right_list[0]['displayName'], right_list[0]['linkURL']) search.Index(name=search_algorithm._INDEX_STRING).put(document) response = self.testapp.get('/algorithms/') self.assertEqual(200, response.status_int) self.assertIsNotNone(response.charset) self.assertListEqual( right_list, json.loads(response.normal_body.decode(encoding=response.charset))) self.assertNotIn( wrong_list[0], json.loads(response.normal_body.decode(encoding=response.charset))) self.assertEqual('application/json', response.content_type)
def test_respect_search_query(self): doc1 = search.Document( doc_id='doc1', rank=1, language='en', fields=[search.TextField(name='k', value='abc def ghi')]) doc2 = search.Document( doc_id='doc2', rank=1, language='en', fields=[search.TextField(name='k', value='abc jkl mno')]) doc3 = search.Document( doc_id='doc3', rank=1, language='en', fields=[search.TextField(name='k', value='abc jkl ghi')]) index = search.Index('my_index') index.put([doc1, doc2, doc3]) result = gae_search_services.search('k:jkl', 'my_index')[0] self.assertNotIn( { 'id': 'doc1', 'k': 'abc def ghi', 'language_code': 'en', 'rank': 1 }, result) self.assertIn( { 'id': 'doc2', 'k': 'abc jkl mno', 'language_code': 'en', 'rank': 1 }, result) self.assertIn( { 'id': 'doc3', 'k': 'abc jkl ghi', 'language_code': 'en', 'rank': 1 }, result)
def post(self): user = users.get_current_user() is_special_char_present = False for char in SPECIAL_CHAR.split(): if char in self.request.get("userName").strip(): is_special_char_present = True break if user: if is_special_char_present: self.add_message('Only . and _ are allowed in user name', 'danger') self.redirect('/', abort=False) return rec = User.query( User.userName == self.request.get("userName").strip()).fetch(1) if len(rec): self.add_message('User name already exists.', 'danger') else: key_user = ndb.Key(User, user.email()) key_user = key_user.get() key_user.userName = self.request.get("userName").strip() u_key = key_user.put() index = search.Index(name='search_user') doc_id = u_key.urlsafe() emaildoc = user.email().split("@")[0] emaildoc = ','.join(self.tokenize_autocomplete(emaildoc)) uNameDoc = ','.join( self.tokenize_autocomplete( self.request.get("userName").strip())) document = search.Document( doc_id=doc_id, fields=[ search.TextField(name='name', value=uNameDoc), search.TextField(name='email', value=emaildoc) ]) index.put(document) self.redirect('/', abort=False)
def test_search_all_documents(self): doc1 = search.Document( doc_id='doc1', language='en', rank=1, fields=[search.TextField(name='category', value='abc def ghi')]) doc2 = search.Document( doc_id='doc2', language='en', rank=2, fields=[search.TextField(name='category', value='abc jkl mno')]) doc3 = search.Document( doc_id='doc3', language='en', rank=3, fields=[search.TextField(name='category', value='abc jkl ghi')]) index = search.Index('my_index') index.put([doc1, doc2, doc3]) result = gae_search_services.search('', 'my_index', [], [])[0] self.assertIn( { 'id': 'doc1', 'category': 'abc def ghi', 'rank': 1, 'language_code': 'en' }, result) self.assertIn( { 'id': 'doc2', 'category': 'abc jkl mno', 'rank': 2, 'language_code': 'en' }, result) self.assertIn( { 'id': 'doc3', 'category': 'abc jkl ghi', 'rank': 3, 'language_code': 'en' }, result)
def post(self): template_values = UserAuth(self.request.uri).userTemplateVals() if template_values['user'] == '' or template_values['user'] is None: template = JINJA_ENVIRONMENT.get_template('templates/main.html') self.response.write(template.render(template_values)) else: action = self.request.get('button') tweets_content = self.request.get('tweet_content') if action.lower() == 'tweet': tweets_content = self.request.get('tweet_content') if len(tweets_content) > 280: template_values['tweets_content'] = tweets_content template_values['tweets'] = self.getTweets(template_values['username']) template_values['err_msg'] = 'Content can not be more than 280 characters.' template = JINJA_ENVIRONMENT.get_template('templates/home.html') self.response.write(template.render(template_values)) else: tweets_model = TweetsModel() tweets_model.user_name = template_values['username'] tweets_model.tweet_content = tweets_content img_url = '' if len(self.get_uploads()) > 0: upload = self.get_uploads()[0] blobinfo = blobstore.BlobInfo(upload.key()) filename = blobinfo.filename tweets_model.tweet_image = upload.key() img_url = get_serving_url(upload.key()) tweets_model.tweet_image_url = get_serving_url(upload.key()) tweets_model.put() document = search.Document( fields = [ search.TextField('tweets_content', tweets_content), search.TextField('username', template_values['username']), search.TextField('image_url', img_url) ] ) index = search.Index('tweets') index.put(document) self.redirect('/home')
class InternalHome(basehandler.BaseHandler): """ handles the get and post requests for '/admin/internal' get request: - display the wikipages published for internal """ def get(self): if self.useradmin: pages = getPageContent(getRecentPages(internal=True)) self.render("internalhome.html", pages=pages) def post(self): query = self.request.get('search').strip() if query: # sort results by date descending expr_list = [ search.SortExpression( expression='date', default_value=datetime(1999, 01, 01), direction=search.SortExpression.DESCENDING) ] # construct the sort options sort_opts = search.SortOptions(expressions=expr_list) query_options = search.QueryOptions(limit=10, snippeted_fields=['content'], sort_options=sort_opts, returned_fields=['path_link']) query_obj = search.Query(query_string=query, options=query_options) results = search.Index(name=config.__INDEX_NAME__).search( query=query_obj) len_results = len(results.results) self.render('internalhome.html', results=results, len_results=len_results, query=query)
def execute_search(message): options = search.QueryOptions(snippeted_fields=['html']) if message.cursor: options.cursor = message.cursor if message.limit: optons.limit = message.limit query = search.Query(message.q, options=options) index = search.Index(INDEX, namespace=NAMESPACE) results = index.search(query) cursor = results.cursor docs = [] if results.results: for doc in results.results: doc_message = DocumentMessage() doc_message.language = doc.language doc_message.locale = _get_field(doc, 'locale') doc_message.title = _get_field(doc, 'title') doc_message.path = _get_field(doc, 'path') doc_message.snippet = _get_expression(doc, 'html') doc_message.fields = _get_all_fields(doc) docs.append(doc_message) return docs, cursor
def get(self): query = self.request.get('q') if not query: self.redirect('/') else: index = search.Index('recipes') snippet = 'snippet("%s", directions, 140)' % query options = search.QueryOptions(returned_expressions=[ search.FieldExpression(name='snippet', expression=snippet) ]) results = index.search( query=search.Query(query_string=query, options=options)) docs = [] if results: docs = results.results tpl_values = {'recipes': docs, 'query': query} self.render('serp/serp.html', **tpl_values)
def search_search_text(text, the_type, the_values): # forum_key_urlsafe): index = search.Index(name="gigomatic_searchtext_index") found = [] if the_values: if len(the_values) == 1: query = search.Query('"{0}" type:{1} value:{2}'.format( text, the_type, the_values[0])) else: query = search.Query('"{0}" type:{1} value:{2}'.format( text, the_type, ' OR '.join(the_values))) else: query = search.Query('{0}'.format(text)) try: results = index.search(query) except search.Error: logging.exception('Search failed') return [] return results
def update_search_index(self, owner, repo, version_key, library, bower): metadata = json.loads(library.metadata) fields = [ search.AtomField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.AtomField(name='kind', value=library.kind), search.AtomField(name='version', value=version_key.id()), search.TextField(name='github_description', value=metadata.get('description', '')), search.TextField(name='bower_description', value=bower.get('description', '')), search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))), search.TextField( name='prefix_matches', value=' '.join( util.generate_prefixes_from_list( [repo] + util.safesplit(metadata.get('description')) + util.safesplit(bower.get('description')) + repo.replace("_", " ").replace("-", " ").split()))), ] analysis = Content.get_by_id('analysis', parent=version_key) if analysis is not None and analysis.status == Status.ready: analysis = json.loads(analysis.content) elements = analysis.get('elementsByTagName', {}).keys() if elements != []: fields.append( search.TextField(name='element', value=' '.join(elements))) behaviors = analysis.get('behaviorsByName', {}).keys() if behaviors != []: fields.append( search.TextField(name='behavior', value=' '.join(behaviors))) document = search.Document(doc_id=Library.id(owner, repo), fields=fields) index = search.Index('repo') index.put(document)
def post(self): json_string = self.request.body dict_object = json.loads(json_string) url = dict_object['url'] stream_id = dict_object['stream_id'] lat = dict_object['lat'] lon = dict_object['lon'] photo_result = Photo.from_url(url) if not photo_result: photo = Photo(url=url, stream_id=stream_id, lat=lat, lon=lon) photo_key = photo.put() geopoint = search.GeoPoint(lat, lon) search_index = search.Document( doc_id= str(stream_id), fields=[search.TextField(name='url', value=url), search.GeoField(name='geopoint', value=geopoint) ]) result = search.Index(name='photo').put(search_index)
def delete_all(): """Deletes all documents from the index.""" try: response_dict = {} index = search.Index(name='imagesearch') while True: document_ids = [document.doc_id for document in index.get_range(ids_only=True)] if not document_ids: break index.delete(document_ids) index.delete_schema() response_dict['result'] = 'ok' except search.DeleteError: logging.exception('Something went wrong in delete_all()') return jsonify(response_dict)
def test_AlgorithmsIdHandler_GET_NotFound(self): """Tests if nothing is found in an 101 algorithms long database while searching for nonexistent algorithmId xyz1""" searchedId = 'xyz1' right_list = [] create_test_algorithm_list(right_list, 101) documents = [] create_test_documents_list(right_list, documents, 101) index = search.Index(name=search_algorithm._INDEX_STRING) index.put(documents) # end of preparing data result = index.get_range(ids_only=True) self.assertLess(0, len(result.results), msg='The database is empty') response = self.testapp.get('/algorithms/' + searchedId, expect_errors=True) self.assertEqual( 404, response.status_int, msg='Non existent Algorithm was found in empty database') self.assertEqual('application/json', response.content_type) self.assertIn('Algorithm Not Found', response.normal_body.decode(encoding='UTF-8'))
def list(query): sort_company = search.SortExpression( expression='company_name', direction=search.SortExpression.ASCENDING, default_value='ZZZZZZZZZZ') sort_surname = search.SortExpression( expression='last_name', direction=search.SortExpression.ASCENDING, default_value="") sort_options = search.SortOptions(expressions=[sort_company, sort_surname]) query_options = search.QueryOptions( sort_options=sort_options, limit=1000 ) query = search.Query(query_string=query, options=query_options) index = search.Index(name="customers_test") found_documents = index.search(query) return found_documents
def get_all_document_ids(index_name): """ Gets all document IDs within a search index """ index = search_api.Index(name=index_name) last_index = None document_ids = set() while True: doc_ids = [doc.doc_id for doc in index.get_range( start_id=last_index, ids_only=True, limit=1000, include_start_object=False).results] if doc_ids: last_index = doc_ids[-1] document_ids.update(doc_ids) else: break return document_ids
def update_edge_indexes(cls, parent_key, kind, indexed_edge): try: parent = parent_key.get() if parent: empty_string = lambda x: smart_str(x) if x else "" search_index = search.Index(name="GlobalIndex") search_document = search_index.get(str(parent_key.id())) data = {'id': parent_key.id()} if search_document: for e in search_document.fields: if e.name == kind: if isinstance(indexed_edge, basestring): indexed_edge = '%s %s' % (empty_string( e.value), indexed_edge) else: indexed_edge = '%s %s' % (empty_string( e.value), str(indexed_edge)) data[e.name] = e.value data[kind] = indexed_edge parent.put_index(data) except: print 'ERROR: an error had occurred when updating edges index'
def search_features(query_string): features = [] sort1 = search.SortExpression(expression='feature', direction=search.SortExpression.ASCENDING, default_value="") sort_opts = search.SortOptions(expressions=[sort1]) query_options = search.QueryOptions(limit=50, returned_fields=['feature'], sort_options=sort_opts) query = search.Query(query_string=query_string, options=query_options) try: index = search.Index(name="featureIndex") search_result = index.search(query) for document in search_result.results: features.append(document.fields[0].value) except search.Error: logging.error("There was an error running the search on \"%s\"." % query_string) return features
def del_quote(quote_id, user): q = Post.get_by_id(quote_id) if q is not None and (users.is_current_user_admin() or q.creator == user): comments = get_comments(quote_id) votes = get_vote(quote_id) for comment in comments: comment.key.delete() for vote in votes: vote.key.delete() if q.image is not None: blobstore.delete(q.image) q.key.delete() doc_index = search.Index(name=_INDEX_SEARCH) if doc_index.get(doc_id=str(q.key.id())): doc_index.delete(document_ids=str(q.key.id())) logging.debug('Deleting the post!: %s and all the comments!' % quote_id)
def search_location_reports(query_string): # TODO: trying to implement google search api for future awesomeness sort = search.SortOptions(expressions=[ search.SortExpression(expression='date', direction=search.SortExpression.DESCENDING) ]) options = search.QueryOptions(sort_options=sort, returned_fields=[ 'address', 'date', 'sport_theme', 'user_email', 'image_url', 'tags' ]) query_string = ''.join(['pieces:', query_string]) query = search.Query(query_string=query_string, options=options) results = search.Index('api-location-reports').search(query) out = {'results': []} if results: for item in results: out['results'].append({f.name: f.value for f in item.fields}) return out