def delete_item( self, pid ): """ Deletes item from custom bell index. Called by one_offs.rebuild_custom_index(). """ SOLR_ROOT_URL = ( os.environ.get('BELL_I_SOLR_ROOT') ) self.logger.info( 'in tasks.indexer.delete_item() [for custom-solr]; SOLR_ROOT_URL, %s' % SOLR_ROOT_URL ) solr = Solr( SOLR_ROOT_URL ) response = solr.delete_by_query( 'pid:"%s"' % pid, commit=True ) response_status = response.status self.logger.info( 'in tasks.indexer.delete_item() [for custom-solr]; pid, %s; response_status, %s' % (pid, response_status) ) if not response_status == 200: raise Exception( 'custom-solr delete problem logged' ) return response_status
def delete_target_custom_solr_pids( self ): ## load pids to be deleted with open( self.PIDS_TO_DELETE_SOURCE_DATA_JSON_PATH ) as f: deletion_pid_lst = json.loads( f.read() ) ## run deletion loop, tracking along way for pid in deletion_pid_lst: solr = Solr( self.CUSTOM_INDEX_SOLR_URL_ROOT ) response = solr.delete_by_query( 'pid:"%s"' % pid, commit=True ) response_status = response.status self.update_tracker( pid, response_status ) if not response_status == 200: logger.error( 'custom-solr delete problem-response for pid `{pid}`: ```{response}```'.format(pid=pid, resp=response_status) ) return
class QueryResultTestCase(unittest.TestCase): def setUp(self): self.solr = Solr('http://localhost:8983/solr') def test_search(self): response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4) self.assertEqual(len(response.documents), 4) def test_search_cursor(self): cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(1): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 4) cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(4): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 1) def test_commit(self): response = self.solr.commit() self.assertEqual(response.status, 200) def test_optimize(self): response = self.solr.optimize() self.assertEqual(response.status, 200) def test_ping(self): response = self.solr.ping() self.assertEqual(response.status, 200) def test_is_up(self): response = self.solr.is_up() self.assertEqual(response, True) def test_update_delete(self): # Get total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) total_results = response.total_results # Post one document using json documents = [{'id' : 1}] response = self.solr.update(documents, input_type='json') self.assertEqual(response.status, 200) # Post anoter document using xml documents = [{'id' : 2}] response = self.solr.update(documents, input_type='xml') self.assertEqual(response.status, 200) # Compare total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results + 2) # Now delete the two document posted above query = 'id:1' key = 2 response = self.solr.delete_by_query(query) self.assertEqual(response.status, 200) response = self.solr.delete_by_key(key) self.assertEqual(response.status, 200) response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results) def tearDown(self): pass def test_query(self): pass
class eBsolr: cursor = None def __init__(self, urls, config, version=4): self.cursor = Solr(urls, version=version) def update(self, documents, input_type='json', commit=False): self.cursor.update(documents, input_type, commit) def deleteById(self, tid, commit=False): return self.cursor.delete_by_key(tid, commit=commit) def deleteByQuery(self, query, commit=False): return self.cursor.delete_by_query(query=query, commit=commit) def deleteAll(self, commit=False): return self.cursor.delete_by_query("*:*", commit=commit) def getResponse(self, search, fields=None, start=0, rows=None, sort=None, fq=None): query = {'q': search} if fields: if isinstance(fields, basestring): query['fl'] = fields else: query['fl'] = ",".join(fields) if sort: query['sort'] = sort if fq: query['fq'] = fq # Default to 10000 rows limit = rows if rows is None: limit = _MAXROWS query['start'] = start query['rows'] = limit response = self.cursor.search(**query) if int(response.status) >= 400: raise Exception('Error Solr {}: {}'.format(response.status, response.extract_errmessage())) if rows is None and response.total_results > limit: # query['start'] = response.total_results query['rows'] = response.total_results response = self.cursor.search(**query) return response def get_language_query(self, language): q_temp = None if language is not None and language != "": langArray = language.split(';') if len(langArray) > 0: lang = langArray[0] q_temp = "language:%s" % lang for lang in langArray[1:]: q_temp = "%s OR language:%s" % (q_temp, lang) return q_temp def getDocs(self, search, fields=None, start=0, rows=None, sort=None, fq=None): """search: query sintaks ex: "field:keys,field2:keys2" fields: field yg di ambil (list) ex: ['field', 'field2'] start: start row rows: max / limit row sort: order rows ex: field asc, field2 desc""" # Get documents response = self.getResponse(search, fields, start, rows, sort, fq) return {"docs": response.documents, "count": response.total_results} def getFacetList(self, facets, facetField): ff = {} if not isinstance(facetField, list): facetField = facetField.split(",") for facet in facetField: if facet: ff[facet] = facets['facet_fields'][facet] return ff def getFacetPivotGeneral(self, query, facetField, pivotField, limit=None, fq=None): try: url = "{0}select?q={1}&rows=1&wt=json&indent=true&facet=true&facet.pivot={2},{3}".format( self.cursor.base_url, query.replace("+", "%2B"), facetField, pivotField) url = '{}select'.format(self.cursor.base_url) params = {'q': query, 'rows': 0, 'wt': 'json', 'indent': 'true', 'facet': 'true', 'facet.pivot': '{},{}'.format(facetField, pivotField)} if limit: params['facet.limit'] = limit if fq: params['fq'] = fq # url = "%s&facet.limit=%d" % (url, limit) http_response = requests.get(url, params=params) # print url # http_response = requests.get(url) return http_response.json()['facet_counts']['facet_pivot']['{0},{1}'.format(facetField, pivotField)] except Exception, e: print("Error parsing facet pivot...") print e return None
class QueryResultTestCase(unittest.TestCase): def setUp(self): self.solr = Solr('http://localhost:8983/solr') def test_search(self): response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4) self.assertEqual(len(response.documents), 4) def test_search_cursor(self): cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(1): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 4) cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(4): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 1) def test_commit(self): response = self.solr.commit() self.assertEqual(response.status, 200) def test_optimize(self): response = self.solr.optimize() self.assertEqual(response.status, 200) def test_ping(self): response = self.solr.ping() self.assertEqual(response.status, 200) def test_is_up(self): response = self.solr.is_up() self.assertEqual(response, True) def test_update_delete(self): # Get total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) total_results = response.total_results # Post one document using json documents = [{'id': 1}] response = self.solr.update(documents, input_type='json') self.assertEqual(response.status, 200) # Post anoter document using xml documents = [{'id': 2}] response = self.solr.update(documents, input_type='xml') self.assertEqual(response.status, 200) # Compare total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results + 2) # Now delete the two document posted above query = 'id:1' key = 2 response = self.solr.delete_by_query(query) self.assertEqual(response.status, 200) response = self.solr.delete_by_key(key) self.assertEqual(response.status, 200) response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results) def tearDown(self): pass def test_query(self): pass
# Make a connection to Mongo. try: db_conn = Connection() # db_conn = Connection("emo2.trinity.duke.edu", 27017) except ConnectionFailure: print "couldn't connect: be sure that Mongo is running on localhost:27017" sys.exit(1) db = db_conn['fashion_ip'] # create a connection to a solr server solr = Solr('http://localhost:8080/solr') # DELETE ALL DOCS FIRST!! solr.delete_by_query(query='*:*', commit=True) total_docs = db.docs.find().count() count = 0 documents = [] for doc in db.docs.find({},{'_id':True,'year':True,'court':True,'court_level':True,'url':True,'name':True,'content':True,'tags':True,'subjects':True}): if count%100 == 0: print count # don't know how else to get solr to take IDs... doc['_id'] = str(doc['_id']) # include subject tag in list of strings if weigth greater than 0.01 if 'subjects' in doc: sub_tmp = [k for k,v in doc['subjects'].items() if v >= 0.05] doc['subjects'] = sub_tmp
def del_all_index(server): server = Solr(server) server.delete_by_query('*:*') server.commit()
# Make a connection to Mongo. try: db_conn = Connection() # db_conn = Connection("emo2.trinity.duke.edu", 27017) except ConnectionFailure: print "couldn't connect: be sure that Mongo is running on localhost:27017" sys.exit(1) db = db_conn['fashion_ip'] # create a connection to a solr server solr = Solr('http://localhost:8080/solr') # DELETE ALL DOCS FIRST!! solr.delete_by_query(query='*:*', commit=True) total_docs = db.docs.find().count() count = 0 documents = [] for doc in db.docs.find({}, { '_id': True, 'year': True, 'court': True, 'court_level': True, 'url': True, 'name': True, 'content': True, 'tags': True, 'subjects': True
# delete from mysolr import Solr #update lixiaoyao #db.baike.update({_id:"http://baike.baidu.com/view/4488.htm"},{$set:{gameURL:["http://baike.baidu.com/subview/2188/5215542.htm" ]}}) solr_url = 'http://10.76.0.137:8983/solr/' iden = [ "http://baike.baidu.com/view/760101.htm", "http://baike.baidu.com/subview/10941/5236539.htm", "http://baike.baidu.com/subview/3049782/11262117.htm", "http://baike.baidu.com/subview/10786/6081536.htm", "http://baike.baidu.com/subview/533601/8190340.htm", "http://baike.baidu.com/view/1016334.htm", "http://baike.baidu.com/view/2106174.htm", "http://baike.baidu.com/subview/187895/6353044.htm", "http://baike.baidu.com/subview/187895/6353038.htm", "http://baike.baidu.com/view/123796.htm", "http://baike.baidu.com/subview/187895/6353044.htm", "http://baike.baidu.com/subview/168512/7010262.htm", ] solr = Solr(solr_url) for x in iden: solr.delete_by_key(x,commit=True) ''' query = {'q':'*:*'} solr.delete_by_query(query,commit=True) '''
class QueryResultTestCase(unittest.TestCase): def setUp(self): self.solr = Solr(os.getenv("SOLR_URL")) def test_search(self): response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4) self.assertEqual(len(response.documents), 4) def test_search_cursor(self): cursor = self.solr.search_cursor(q="*:*") i = 0 for response in cursor.fetch(1): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 4) cursor = self.solr.search_cursor(q="*:*") i = 0 for response in cursor.fetch(4): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 1) def test_commit(self): response = self.solr.commit() self.assertEqual(response.status, 200) def test_optimize(self): response = self.solr.optimize() self.assertEqual(response.status, 200) def test_ping(self): response = self.solr.ping() self.assertEqual(response.status, 200) def test_is_up(self): response = self.solr.is_up() self.assertEqual(response, True) def test_update_delete(self): # Get total results response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) total_results = response.total_results # Post one document using json documents = [{"id": 1}] response = self.solr.update(documents, input_type="json") self.assertEqual(response.status, 200) # Post anoter document using xml documents = [{"id": 2}] response = self.solr.update(documents, input_type="xml") self.assertEqual(response.status, 200) # Compare total results response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results + 2) # Now delete the two document posted above query = "id:1" key = 2 response = self.solr.delete_by_query(query) self.assertEqual(response.status, 200) response = self.solr.delete_by_key(key) self.assertEqual(response.status, 200) response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results) def tearDown(self): pass def test_query(self): pass