def index_data(): docs = get_data() client = SolrClient('http://localhost:8983/solr') client.index_json('stocks', json.dumps(docs)) client.commit('stocks')
def index_json(): client = SolrClient('http://localhost:8983/solr') docs = [ {'id' : '8', 'field8' : 'value8'}, ] client.index_json('test', json.dumps(docs)) client.commit('test')
def update_solr(self, task=None): solr = SolrClient(SOLR_URI + '/solr/') collection = 'listmanager' if not task: task = self.task document = {} document['id'] = task.id document['title'] = task.title document['note'] = task.note if task.note else '' #document['tag'] =[t for t in task.tag.split(',')] if task.tag else [] document['tag'] =[k.name for k in task.keywords] # better this than relying on tag document['completed'] = task.completed != None document['star'] = task.star # haven't used this yet and schema doesn't currently reflect it #note that I didn't there was any value in indexing or storing context and folder document['context'] = task.context.title document['folder'] = task.folder.title json_docs = json.dumps([document]) response = solr.index_json(collection, json_docs) # response = solr.commit(collection, waitSearcher=False) # doesn't actually seem to work # Since solr.commit didn't seem to work, substituted the below, which works url = SOLR_URI + '/solr/' + collection + '/update' r = requests.post(url, data={"commit":"true"}) #print(r.text) root = ET.fromstring(r.text) if root[0][0].text == '0': print(self.colorize("solr update successful", 'yellow')) else: print(self.colorize("there was a problem with the solr update", 'yellow'))
def update_solr(): def now(): return datetime.now().isoformat(' ').split('.')[0] solr = SolrClient(SOLR_URI + '/solr/') collection = 'listmanager' solr_sync = remote_session.query(Sync).get('solr') last_solr_sync = solr_sync.timestamp log = f"{now()}: last Solr sync = {last_solr_sync.isoformat(' ').split('.')[0]}\n" tasks = remote_session.query(Task).filter(Task.modified > last_solr_sync) log = f"{now()}: number of tasks modified since "\ f"last sync = {str(tasks.count())}\n" + log max = round(tasks.count(), -2) + 200 i = -1 s = 0 for n in range(100, max, 100): documents = [] for i, task in enumerate(tasks[s:n]): document = {} document['id'] = task.id document['title'] = task.title document['note'] = task.note if task.note else '' document['tag'] = [t for t in task.tag.split(',') ] if task.tag else [] document['completed'] = task.completed != None document[ 'star'] = task.star # haven't used this yet and schema doesn't currently reflect it #note that I didn't there was any value in indexing or storing context and folder document['context'] = task.context.title document['folder'] = task.folder.title documents.append(document) json_docs = json.dumps(documents) response = solr.index_json(collection, json_docs) # response = solr.commit(collection, waitSearcher=False) # doesn't actually seem to work # Since solr.commit didn't seem to work, substituted the below, which works url = SOLR_URI + '/solr/' + collection + '/update' r = requests.post(url, data={"commit": "true"}) #print(r.text) #print("Tasks {} to {}".format(s,n)) s = n solr_sync.timestamp = datetime.now() + timedelta(seconds=2) remote_session.commit() log = f"{now()}: new Solr sync = "\ f"{solr_sync.timestamp.isoformat(' ').split('.')[0]}\n" + log return log, i
class ClientTestIndexing(unittest.TestCase): #High Level Client Tests @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) for field in test_config['collections']['copy_fields']: try: self.solr.schema.delete_copy_field(test_config['SOLR_COLLECTION'],field) except: pass for field in test_config['collections']['fields']: try: self.solr.schema.create_field(test_config['SOLR_COLLECTION'],field) except: pass def setUp(self): self.delete_docs() self.commit() def delete_docs(self): self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'],'*') self.commit() def commit(self): self.solr.commit(test_config['SOLR_COLLECTION'],openSearcher=True) sleep(5) @unittest.skip("Skipping for now") def test_access_without_auth(self): if not test_config['SOLR_CREDENTIALS'][0]: return solr = SolrClient(test_config['SOLR_SERVER'],devel=True) with self.assertRaises(ConnectionError) as cm: solr.query('SolrClient_unittest',{'q':'not_gonna_happen'}) def test_indexing_json(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs)) self.commit() sleep(5) for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual(self.solr.query(test_config['SOLR_COLLECTION'],{'q':'id:{}'.format(doc['id'])}).get_num_found(),1) self.delete_docs() self.commit() def test_indexing_conn_log(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs)) self.commit() sleep(5) for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual(self.solr.query(test_config['SOLR_COLLECTION'],{'q':'id:{}'.format(doc['id'])}).get_num_found(),1) logging.info(self.solr.transport._action_log) self.delete_docs() self.commit() def test_index_json_file(self): self.docs = self.rand_docs.get_docs(55) with open('temp_file.json','w') as f: json.dump(self.docs,f) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_stream_file_gzip_file(self): self.docs = self.rand_docs.get_docs(60) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass @unittest.skip("Don't test remote indexing in travis") def test_index_json_file(self): self.docs = self.rand_docs.get_docs(61) with open('temp_file.json','w') as f: json.dump(self.docs,f) r = self.solr.local_index(test_config['SOLR_COLLECTION'],'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_rows(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],{'q':'*:*'}, rows=50): self.assertTrue(len(res.docs) == 50) docs.extend(res.docs) queries +=1 self.assertEqual( [x['id'] for x in sorted(docs, key= lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key= lambda x: x['id'])] ) self.assertTrue(1000/50 == queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],{'q':'*:*'}): self.assertTrue(len(res.docs) == 1000) docs.extend(res.docs) queries +=1 self.assertTrue(queries == 1) self.assertEqual( [x['id'] for x in sorted(docs, key= lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key= lambda x: x['id'])] ) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_max(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q':'*:*'}, rows = 50, max_start = 502): self.assertTrue(len(res.docs) == 50) queries +=1 docs.extend(res.docs) ids = [x['id'] for x in docs] for item in docs: self.assertTrue(item['id'] in ids) self.assertEqual(11, queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass
#document = {'id':item['id'].lower()} # apparently ran the first time to transfer to raspi without track in the list # the reason so few tracks actually have a track number (I did a few starting 08072016) #document.update({k:item[k] for k in item if k in ('id','album','artist','title','uri','track')}) document = { k: item[k] for k in item if k in ('id', 'album', 'artist', 'title', 'uri', 'track') } documents.append(document) #print(documents) n = 0 while True: # there are limitations in how many docs can be uploaded in a batch but it's more than 100 cur_documents = documents[n:n + 100] if not cur_documents: break cur_documents = json.dumps(cur_documents) response = solr_new.index_json(collection, cur_documents) print(response) # Since solr.commit didn't seem to work, substituted the below, which works url = ec_uri + ":8983/solr/" + collection + "/update" r = requests.post(url, data={"commit": "true"}) print(r.text) n += 100
documents = [] for task in tasks[s:n]: document = {} document['id'] = task.id document['title'] = task.title document['note'] = task.note if task.note else '' document['tag'] = [t for t in task.tag.split(',')] if task.tag else [] document['completed'] = task.completed != None document[ 'star'] = task.star # haven't used this yet and schema doesn't currently reflect it #note that I didn't there was any value in indexing or storing context and folder document['context'] = task.context.title document['folder'] = task.folder.title documents.append(document) json_docs = json.dumps(documents) response = solr.index_json(collection, json_docs) #response = solr.commit(collection, waitSearcher=False) # doesn't actually seem to work # Since solr.commit didn't seem to work, substituted the below, which works url = SOLR_URI + '/solr/' + collection + '/update' r = requests.post(url, data={"commit": "true"}) print(r.text) print("Tasks {} to {}".format(s, n)) s = n
title = track.title uri = track.uri id_ = album + ' ' + title id_ = id_.replace(' ', '_') id_ = id_.lower() document = {"id":id_, "title":title, "uri":uri, "album":album, "artist":artist, "track":n} print(repr(document).encode('cp1252', errors='replace')) for k in document: print(str(k+':'+str(document[k])).encode('cp1252', errors='ignore')) documents.append(document) n+=1 solr = SolrClient(solr_uri+'/solr') collection = 'sonos_companion' response = solr.index_json(collection, json.dumps(documents)) print(response) # Since solr.commit didn't seem to work, substituted the below, which works url = solr_uri+"/solr/"+collection+"/update" r = requests.post(url, data={"commit":"true"}) print(r.text) resp = input("Do you want to continue? (y or n) ") if resp not in ('y', 'yes'): cont = False ###################################################################### # The below would be if you had a lot of documents #n = 0 #while True:
""" import json from collections import defaultdict from SolrClient import SolrClient solr = SolrClient('http://localhost:8983/solr') j = defaultdict(dict) with open('DOI.json', 'r') as f: for k, v in json.load(f).items(): j[k]['doi'] = v with open('grobid.json', 'r') as f: for doc in json.load(f): for i, pub in enumerate(doc['relatedPublications']): for k, v in pub.items(): j[doc['id']]['relatedPublications_' + k + '_' + str(i)] = v with open('geotopic.json', 'r') as f: for k, v in json.load(f).items(): j[k].update(v) with open('measurements.json', 'rb') as f: for doc in json.loads(f.read().decode(errors='ignore')): j[doc['id']]['units'] = doc['units'] with open('sweet.json', 'r') as f: for doc in json.load(f): for k, v in doc.items(): if k.startswith('NER_Sweet_'): j[doc['id']][k] = v for k, v in j.items(): v['id'] = k solr.index_json('collection1', json.dumps(list(j.values())))
"album": album, "artist": artist, "track": n } print(repr(document).encode('cp1252', errors='replace')) for k in document: print( str(k + ':' + str(document[k])).encode('cp1252', errors='ignore')) documents.append(document) n += 1 solr = SolrClient(solr_uri + '/solr') collection = 'sonos_companion' response = solr.index_json(collection, json.dumps(documents)) print(response) # Since solr.commit didn't seem to work, substituted the below, which works url = solr_uri + "/solr/" + collection + "/update" r = requests.post(url, data={"commit": "true"}) print(r.text) resp = input("Do you want to continue? (y or n) ") if resp not in ('y', 'yes'): cont = False ###################################################################### # The below would be if you had a lot of documents #n = 0 #while True:
document = {} # We create a unique id but concatenating the album and the song title id_ = item['album'] + ' ' + item['title'] id_ = id_.replace(' ', '_') document['id'] = id_ document.update({k:item[k] for k in item if k in ('artist','album','title','uri', 'album_art')}) documents.append(document) n = 0 while True: # there are limitations in how many docs can be uploaded in a batch but it's more than 100 cur_documents = documents[n:n+100] if not cur_documents: break cur_documents = json.dumps(cur_documents) response = solr.index_json(collection, cur_documents) print(response) #The commit from SolrClient is not working #response = solr.commit(collection, waitSearcher=False) #print(response) # Since solr.commit didn't seem to work, substituted the below, which works url = ec_uri+":8983/solr/"+collection+"/update" r = requests.post(url, data={"commit":"true"}) print(r.text) n+=100
class SOLRDocumentManager(IDocumentManager): def __init__(self, server_address: str, index_name: str) -> None: self.client = SolrClient(server_address) self.index = index_name self._serializer = json_serializer() def add(self, document: IndexDocument) -> BaseResponse: response = BaseResponse() try: document.id = document.unique_id doc_body = self._serializer.serialize([document]) solr_response = self.client.index_json(self.index, doc_body) if not solr_response: return response.set_error( Error("IntegrationError", 500, "Index failed to add index!")) self.client.commit(self.index, openSearcher=True, waitSearcher=False) response = BaseResponse(True) except BasicException as e: response.set_error(Error("InternalServerError", 500, e.message)) except Exception as e: response.set_error( Error("InternalServerError", 500, 'Unknown error occurred!')) return response def delete(self, unique_id: str) -> BaseResponse: response = BaseResponse() try: solr_response = self.client.delete_doc_by_id(self.index, unique_id) if not solr_response: return response.set_error( Error("IntegrationError", 500, "Index failed to delete index!")) self.client.commit(self.index, openSearcher=True, waitSearcher=False) response = BaseResponse(True) except BasicException as e: response.set_error(Error("InternalServerError", 500, e.message)) except Exception as e: print(e) response.set_error( Error("InternalServerError", 500, 'Unknown error occurred!')) return response def get(self, unique_id: str) -> DocumentResponse: pass def search(self, query: SearchQuery) -> SearchResult: solr_query = "" solr_field_query = "" solr_range_query = [] for criteria in query.searchCriteria: solr_field_query += criteria.field + '^' + str( criteria.weight) + " " words = criteria.term.split(" ") for word in words: word = word.lower() solr_query += " " + word for range_criteria in query.rangeCriteria: solr_range_query.append(range_criteria.field + ":[" + str(range_criteria.minimum) + " TO " + str(range_criteria.maximum) + "]") data = { "q": solr_query.strip(), "offset": query.page * query.items, "limit": query.items, "filter": solr_range_query, "defType": "edismax", "qf": solr_field_query } result = SearchResult(0, False) try: response = self.client.query_raw(self.index, data) result = SearchResult(response['response']['numFound'], True) for document in response['response']['docs']: result.add_result( self._serializer.deserialize(document, self.index_object_type)) except Exception as e: result.set_error( Error("InternalServerError", 500, 'Unknown error occurred!')) return result
class ClientTestQuery(unittest.TestCase): @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0],devel=True,auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'],'*') for field in test_config['collections']['copy_fields']: try: self.solr.collections.delete_copy_field(test_config['SOLR_COLLECTION'],field) except: pass for field in test_config['collections']['fields']: try: self.solr.collections.create_field(test_config['SOLR_COLLECTION'],field) except: pass #Index Some data self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs)) self.solr.commit(test_config['SOLR_COLLECTION'],openSearcher=True) def test_basic_query(self): r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) def test_facet(self): r = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', 'facet':'true', 'facet.field':'facet_test', }) local_facets = {} for doc in self.docs: try: local_facets[doc['facet_test']] +=1 except: local_facets[doc['facet_test']] = 1 try: self.assertDictEqual(local_facets,r.get_facets()['facet_test']) except Exception as e: logging.info("local") logging.info(local_facets) logging.info("facets") logging.info(r.get_facets()) raise def test_facet_with_fq(self): r = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', 'facet':True, 'facet.field':'facet_test', }) first_facet_field = list(r.get_facets()['facet_test'].keys())[0] first_facet_field_count = r.get_facets()['facet_test'][first_facet_field] r = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', 'facet':True, 'facet.field':'facet_test', 'fq':'facet_test:{}'.format(first_facet_field) }) self.assertEqual(r.get_num_found(),first_facet_field_count) def test_facet_range(self): res = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', 'facet':True, 'facet.range':'price', 'facet.range.start':0, 'facet.range.end':100, 'facet.range.gap':10 }) prices = [doc['price'] for doc in self.docs] div = lambda x: str(x//10 * 10) out = {} for k,g in itertools.groupby(sorted(prices),div): out[k] = len(list(g)) or 0 self.assertDictEqual(out,res.get_facets_ranges()['price']) def test_facet_pivot(self): res = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', 'facet':True, 'facet.pivot':['facet_test,price','facet_test,id'] }) out = {} for doc in self.docs: if doc['facet_test'] not in out: out[doc['facet_test']] = {} if doc['price'] not in out[doc['facet_test']]: out[doc['facet_test']][doc['price']]=1 else: out[doc['facet_test']][doc['price']]+=1 self.assertDictEqual(out,res.get_facet_pivot()['facet_test,price']) def test_get_field_values_as_list(self): res = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', }) results = res.get_field_values_as_list('product_name_exact') docs = res.docs temp = [] for doc in docs: if 'product_name_exact' in doc: temp.append(doc['product_name_exact']) self.assertEqual(results,temp) def test_get_facet_values_as_list(self): r = self.solr.query(test_config['SOLR_COLLECTION'],{ 'q':'*:*', 'facet':'true', 'facet.field':'facet_test', })
class ClientTestQuery(unittest.TestCase): @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*') for field in test_config['collections']['copy_fields']: try: self.solr.collections.delete_copy_field( test_config['SOLR_COLLECTION'], field) except: pass for field in test_config['collections']['fields']: try: self.solr.collections.create_field( test_config['SOLR_COLLECTION'], field) except: pass #Index Some data self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps(self.docs)) self.solr.commit(test_config['SOLR_COLLECTION'], openSearcher=True) def test_basic_query(self): r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) def test_facet(self): r = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': 'true', 'facet.field': 'facet_test', }) local_facets = {} for doc in self.docs: try: local_facets[doc['facet_test']] += 1 except: local_facets[doc['facet_test']] = 1 try: self.assertDictEqual(local_facets, r.get_facets()['facet_test']) except Exception as e: logging.info("local") logging.info(local_facets) logging.info("facets") logging.info(r.get_facets()) raise def test_facet_with_fq(self): r = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.field': 'facet_test', }) first_facet_field = list(r.get_facets()['facet_test'].keys())[0] first_facet_field_count = r.get_facets( )['facet_test'][first_facet_field] r = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.field': 'facet_test', 'fq': 'facet_test:{}'.format(first_facet_field) }) self.assertEqual(r.get_num_found(), first_facet_field_count) def test_facet_range(self): res = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.range': 'price', 'facet.range.start': 0, 'facet.range.end': 100, 'facet.range.gap': 10 }) prices = [doc['price'] for doc in self.docs] div = lambda x: str(x // 10 * 10) out = {} for k, g in itertools.groupby(sorted(prices), div): out[k] = len(list(g)) or 0 self.assertDictEqual(out, res.get_facets_ranges()['price']) def test_facet_pivot(self): res = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.pivot': ['facet_test,price', 'facet_test,id'] }) out = {} for doc in self.docs: if doc['facet_test'] not in out: out[doc['facet_test']] = {} if doc['price'] not in out[doc['facet_test']]: out[doc['facet_test']][doc['price']] = 1 else: out[doc['facet_test']][doc['price']] += 1 self.assertDictEqual(out, res.get_facet_pivot()['facet_test,price']) def test_get_field_values_as_list(self): res = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', }) results = res.get_field_values_as_list('product_name_exact') docs = res.docs temp = [] for doc in docs: if 'product_name_exact' in doc: temp.append(doc['product_name_exact']) self.assertEqual(results, temp) def test_get_facet_values_as_list(self): r = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': 'true', 'facet.field': 'facet_test', })
class ClientTestQuery(unittest.TestCase): @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*') for field in test_config['collections']['copy_fields']: try: self.solr.collections.delete_copy_field( test_config['SOLR_COLLECTION'], field) except: pass for field in test_config['collections']['fields']: try: self.solr.collections.create_field( test_config['SOLR_COLLECTION'], field) except: pass #Index Some data self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps(self.docs)) self.solr.commit(test_config['SOLR_COLLECTION'], openSearcher=True) def test_basic_query(self): r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) def test_facet(self): r = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': 'true', 'facet.field': 'facet_test', }) local_facets = {} for doc in self.docs: try: local_facets[doc['facet_test']] += 1 except: local_facets[doc['facet_test']] = 1 try: self.assertDictEqual(local_facets, r.get_facets()['facet_test']) except Exception as e: logging.info("local") logging.info(local_facets) logging.info("facets") logging.info(r.get_facets()) raise def test_facet_with_fq(self): r = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.field': 'facet_test', }) first_facet_field = list(r.get_facets()['facet_test'].keys())[0] first_facet_field_count = r.get_facets( )['facet_test'][first_facet_field] r = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.field': 'facet_test', 'fq': 'facet_test:{}'.format(first_facet_field) }) self.assertEqual(r.get_num_found(), first_facet_field_count) def test_facet_range(self): res = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.range': 'price', 'facet.range.start': 0, 'facet.range.end': 100, 'facet.range.gap': 10 }) prices = [doc['price'] for doc in self.docs] div = lambda x: str(x // 10 * 10) out = {} for k, g in itertools.groupby(sorted(prices), div): out[k] = len(list(g)) or 0 self.assertDictEqual(out, res.get_facets_ranges()['price']) def test_facet_pivot(self): res = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': True, 'facet.pivot': ['facet_test,price', 'facet_test,id'] }) out = {} for doc in self.docs: if doc['facet_test'] not in out: out[doc['facet_test']] = {} if doc['price'] not in out[doc['facet_test']]: out[doc['facet_test']][doc['price']] = 1 else: out[doc['facet_test']][doc['price']] += 1 self.assertDictEqual(out, res.get_facet_pivot()['facet_test,price']) def test_get_field_values_as_list(self): res = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', }) results = res.get_field_values_as_list('product_name_exact') docs = res.docs temp = [] for doc in docs: if 'product_name_exact' in doc: temp.append(doc['product_name_exact']) self.assertEqual(results, temp) def test_get_facet_values_as_list(self): r = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'facet': 'true', 'facet.limit': -1, 'facet.field': 'facet_test', }) self.assertEqual( sorted(r.data['facet_counts']['facet_fields']['facet_test'][1::2]), sorted(r.get_facet_values_as_list('facet_test'))) def test_grouped_count_1(self): ''' Get a dict of grouped docs ''' r = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'group': True, 'group.field': 'id', 'group.ngroups': True, }) self.assertEqual(r.get_ngroups(), 50) self.assertEqual(r.get_ngroups('id'), 50) def test_grouped_docs(self): ''' Get a dict of grouped docs ''' r = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'group': True, 'group.field': 'id', 'group.ngroups': True, }) self.assertEqual(len(r.docs), 10) self.assertTrue('doclist' in r.docs[0]) def test_grouped_docs(self): ''' Get a dict of grouped docs ''' r = self.solr.query( test_config['SOLR_COLLECTION'], { 'q': '*:*', 'group': True, 'group.field': 'id', 'group.ngroups': True, }) self.assertEqual(len(r.docs), 10) self.assertTrue('doclist' in r.docs[0]) def test_flat_groups(self): ''' Get a dict of grouped docs ''' r = self.solr.query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'group': True, 'group.field': 'id' }) flats = r.get_flat_groups() self.assertEqual(len(flats), 10) self.assertTrue('date' in flats[0]) def test_json_facet(self): ''' Get a dict of grouped docs ''' #Just lazy getting a new response object r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) a = r.get_jsonfacet_counts_as_dict( 'test', { 'count': 50, 'test': { 'buckets': [{ 'count': 10, 'pr': { 'buckets': [{ 'count': 2, 'unique': 1, 'val': 79 }, { 'count': 1, 'unique': 1, 'val': 9 }] }, 'pr_sum': 639.0, 'val': 'consectetur' }, { 'count': 8, 'pr': { 'buckets': [ { 'count': 1, 'unique': 1, 'val': 9 }, { 'count': 1, 'unique': 1, 'val': 31 }, { 'count': 1, 'unique': 1, 'val': 33 }, ] }, 'pr_sum': 420.0, 'val': 'auctor' }, { 'count': 8, 'pr': { 'buckets': [ { 'count': 2, 'unique': 1, 'val': 94 }, { 'count': 1, 'unique': 1, 'val': 25 }, ] }, 'pr_sum': 501.0, 'val': 'nulla' }] } }) b = { 'test': { 'auctor': { 'count': 8, 'pr': { 9: { 'count': 1, 'unique': 1 }, 31: { 'count': 1, 'unique': 1 }, 33: { 'count': 1, 'unique': 1 } }, 'pr_sum': 420.0 }, 'consectetur': { 'count': 10, 'pr': { 9: { 'count': 1, 'unique': 1 }, 79: { 'count': 2, 'unique': 1 } }, 'pr_sum': 639.0 }, 'nulla': { 'count': 8, 'pr': { 25: { 'count': 1, 'unique': 1 }, 94: { 'count': 2, 'unique': 1 } }, 'pr_sum': 501.0 } } } self.assertEqual(a, b)
class ClientTestIndexing(unittest.TestCase): @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) for field in test_config['collections']['copy_fields']: try: self.solr.schema.delete_copy_field( test_config['SOLR_COLLECTION'], field) except Exception as e: pass for field in test_config['collections']['fields']: try: self.solr.schema.create_field(test_config['SOLR_COLLECTION'], field) except Exception as e: pass def setUp(self): self.delete_docs() self.commit() def delete_docs(self): self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*') self.commit() def commit(self): # softCommit because we don't care about data on disk self.solr.commit(test_config['SOLR_COLLECTION'], openSearcher=True, softCommit=True) def test_down_solr_exception(self): # connect to "down" sorl host s = SolrClient('http://*****:*****@unittest.skip("Skipping for now") def test_access_without_auth(self): if not test_config['SOLR_CREDENTIALS'][0]: return solr = SolrClient(test_config['SOLR_SERVER'], devel=True) with self.assertRaises(ConnectionError) as cm: solr.query('SolrClient_unittest', {'q': 'not_gonna_happen'}) def test_indexing_json(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps(self.docs)) self.commit() for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual( self.solr.query(test_config['SOLR_COLLECTION'], { 'q': 'id:{}'.format(doc['id']) }).get_num_found(), 1) self.delete_docs() self.commit() def test_get(self): doc_id = '1' self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps([{ 'id': doc_id }])) # this returns the doc! self.solr.get(test_config['SOLR_COLLECTION'], doc_id) with self.assertRaises(NotFoundError): self.solr.get(test_config['SOLR_COLLECTION'], '5') def test_mget(self): self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps([{ 'id': '1' }])) self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps([{ 'id': '5' }])) docs = self.solr.mget(test_config['SOLR_COLLECTION'], ('5', '1')) self.assertEqual(len(docs), 2) def test_indexing_conn_log(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps(self.docs)) self.commit() for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual( self.solr.query(test_config['SOLR_COLLECTION'], { 'q': 'id:{}'.format(doc['id']) }).get_num_found(), 1) logging.info(self.solr.transport._action_log) self.delete_docs() self.commit() def test_index_json_file(self): self.docs = self.rand_docs.get_docs(55) with open('temp_file.json', 'w') as f: json.dump(self.docs, f) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_stream_file_gzip_file(self): self.docs = self.rand_docs.get_docs(60) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass @unittest.skip("Don't test remote indexing in travis") def test_index_json_file(self): self.docs = self.rand_docs.get_docs(61) with open('temp_file.json', 'w') as f: json.dump(self.docs, f) r = self.solr.local_index(test_config['SOLR_COLLECTION'], 'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_rows(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q': '*:*'}, rows=50): self.assertTrue(len(res.docs) == 50) docs.extend(res.docs) queries += 1 self.assertEqual( [x['id'] for x in sorted(docs, key=lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key=lambda x: x['id'])]) self.assertTrue(1000 / 50 == queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q': '*:*'}): self.assertTrue(len(res.docs) == 1000) docs.extend(res.docs) queries += 1 self.assertTrue(queries == 1) self.assertEqual( [x['id'] for x in sorted(docs, key=lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key=lambda x: x['id'])]) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_max(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q': '*:*'}, rows=50, max_start=502): self.assertTrue(len(res.docs) == 50) queries += 1 docs.extend(res.docs) ids = [x['id'] for x in docs] for item in docs: self.assertTrue(item['id'] in ids) self.assertEqual(11, queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_cursor_query(self): self.docs = self.rand_docs.get_docs(2000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.cursor_query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'rows': 100 }): self.assertTrue(len(res.docs) == 100) queries += 1 docs.extend(res.docs) ids = [x['id'] for x in docs] for item in docs: self.assertTrue(item['id'] in ids) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass