def build(self): try: CLIENT = SolrClient(SEARCH_ENGINE.get('URL')) except SolrError: print( "Solr не запущен, попробуйте выполнить команду: solr start -e cloud" ) # http://lucene.apache.org/solr/guide/8_2/requestdispatcher-in-solrconfig.html # script = """ # curl - H # 'Content-type:application/json' - d # '{"set-property": # {"requestDispatcher.requestParsers.enableRemoteStreaming": true}, "set-property":{"requestDispatcher.requestParsers.enableStreamBody": true}}' # http://localhost:8983/api/collections/infoportal/config # """ # rc = call(script, shell=True) # print(f"Выполнение скрипта: {rc}") print("СТАТУС КЛАСТЕРА") print(f'CLIENT.collections={CLIENT.collections.clusterstatus()}') print('ЭКСПОРТ ДОКУМЕНТОВ postres') conn = create_connection() if conn is None: return {} try: with conn: cur = conn.cursor() cur.execute("SELECT * FROM documents_documents") self.rows = cur.fetchall() open(TMP_FILENAME, 'w').close() with open(TMP_FILENAME, "wb") as f: f.write(bytes("[", encoding='utf-8')) for row in self.rows: document = SolrDocument(row) f.write(bytes(document.toJSON(), encoding='utf-8')) # print(bytes(json.dumps(document), encoding='utf-8')) # f.write(bytes(json.dumps(document), encoding='utf-8')) f.write(bytes(",", encoding='utf-8')) f.write(bytes("]", encoding='utf-8')) except Exception as ex: print(f"{ex}") print('ИМПОРТ ДОКУМЕНТОВ В Solr') CLIENT.local_index('infoportal', TMP_FILENAME)
class ClientTestIndexing(unittest.TestCase): #High Level Client Tests @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) for field in test_config['collections']['copy_fields']: try: self.solr.schema.delete_copy_field(test_config['SOLR_COLLECTION'],field) except: pass for field in test_config['collections']['fields']: try: self.solr.schema.create_field(test_config['SOLR_COLLECTION'],field) except: pass def setUp(self): self.delete_docs() self.commit() def delete_docs(self): self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'],'*') self.commit() def commit(self): self.solr.commit(test_config['SOLR_COLLECTION'],openSearcher=True) sleep(5) @unittest.skip("Skipping for now") def test_access_without_auth(self): if not test_config['SOLR_CREDENTIALS'][0]: return solr = SolrClient(test_config['SOLR_SERVER'],devel=True) with self.assertRaises(ConnectionError) as cm: solr.query('SolrClient_unittest',{'q':'not_gonna_happen'}) def test_indexing_json(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs)) self.commit() sleep(5) for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual(self.solr.query(test_config['SOLR_COLLECTION'],{'q':'id:{}'.format(doc['id'])}).get_num_found(),1) self.delete_docs() self.commit() def test_indexing_conn_log(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs)) self.commit() sleep(5) for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual(self.solr.query(test_config['SOLR_COLLECTION'],{'q':'id:{}'.format(doc['id'])}).get_num_found(),1) logging.info(self.solr.transport._action_log) self.delete_docs() self.commit() def test_index_json_file(self): self.docs = self.rand_docs.get_docs(55) with open('temp_file.json','w') as f: json.dump(self.docs,f) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_stream_file_gzip_file(self): self.docs = self.rand_docs.get_docs(60) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass @unittest.skip("Don't test remote indexing in travis") def test_index_json_file(self): self.docs = self.rand_docs.get_docs(61) with open('temp_file.json','w') as f: json.dump(self.docs,f) r = self.solr.local_index(test_config['SOLR_COLLECTION'],'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'}) self.assertEqual(r.get_num_found(),len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_rows(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],{'q':'*:*'}, rows=50): self.assertTrue(len(res.docs) == 50) docs.extend(res.docs) queries +=1 self.assertEqual( [x['id'] for x in sorted(docs, key= lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key= lambda x: x['id'])] ) self.assertTrue(1000/50 == queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],{'q':'*:*'}): self.assertTrue(len(res.docs) == 1000) docs.extend(res.docs) queries +=1 self.assertTrue(queries == 1) self.assertEqual( [x['id'] for x in sorted(docs, key= lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key= lambda x: x['id'])] ) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_max(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz','wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q':'*:*'}, rows = 50, max_start = 502): self.assertTrue(len(res.docs) == 50) queries +=1 docs.extend(res.docs) ids = [x['id'] for x in docs] for item in docs: self.assertTrue(item['id'] in ids) self.assertEqual(11, queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass
#!/usr/bin/env python3 from SolrClient import SolrClient solr = SolrClient('http://localhost:8983/solr') solr.local_index('geotopic', 'geotopic.json')
#!/usr/bin/env python3 from SolrClient import SolrClient solr = SolrClient('http://localhost:8983/solr') solr.local_index('measurements', 'measurements.json')
#!/usr/bin/env python3 from SolrClient import SolrClient solr = SolrClient('http://localhost:8983/solr') solr.local_index('grobid', 'grobid.json')
class ClientTestIndexing(unittest.TestCase): @classmethod def setUpClass(self): self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS']) self.rand_docs = RandomTestData() self.docs = self.rand_docs.get_docs(50) for field in test_config['collections']['copy_fields']: try: self.solr.schema.delete_copy_field( test_config['SOLR_COLLECTION'], field) except Exception as e: pass for field in test_config['collections']['fields']: try: self.solr.schema.create_field(test_config['SOLR_COLLECTION'], field) except Exception as e: pass def setUp(self): self.delete_docs() self.commit() def delete_docs(self): self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*') self.commit() def commit(self): # softCommit because we don't care about data on disk self.solr.commit(test_config['SOLR_COLLECTION'], openSearcher=True, softCommit=True) def test_down_solr_exception(self): # connect to "down" sorl host s = SolrClient('http://*****:*****@unittest.skip("Skipping for now") def test_access_without_auth(self): if not test_config['SOLR_CREDENTIALS'][0]: return solr = SolrClient(test_config['SOLR_SERVER'], devel=True) with self.assertRaises(ConnectionError) as cm: solr.query('SolrClient_unittest', {'q': 'not_gonna_happen'}) def test_indexing_json(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps(self.docs)) self.commit() for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual( self.solr.query(test_config['SOLR_COLLECTION'], { 'q': 'id:{}'.format(doc['id']) }).get_num_found(), 1) self.delete_docs() self.commit() def test_get(self): doc_id = '1' self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps([{ 'id': doc_id }])) # this returns the doc! self.solr.get(test_config['SOLR_COLLECTION'], doc_id) with self.assertRaises(NotFoundError): self.solr.get(test_config['SOLR_COLLECTION'], '5') def test_mget(self): self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps([{ 'id': '1' }])) self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps([{ 'id': '5' }])) docs = self.solr.mget(test_config['SOLR_COLLECTION'], ('5', '1')) self.assertEqual(len(docs), 2) def test_indexing_conn_log(self): self.docs = self.rand_docs.get_docs(53) self.solr.index_json(test_config['SOLR_COLLECTION'], json.dumps(self.docs)) self.commit() for doc in self.docs: logging.debug("Checking {}".format(doc['id'])) self.assertEqual( self.solr.query(test_config['SOLR_COLLECTION'], { 'q': 'id:{}'.format(doc['id']) }).get_num_found(), 1) logging.info(self.solr.transport._action_log) self.delete_docs() self.commit() def test_index_json_file(self): self.docs = self.rand_docs.get_docs(55) with open('temp_file.json', 'w') as f: json.dump(self.docs, f) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_stream_file_gzip_file(self): self.docs = self.rand_docs.get_docs(60) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass @unittest.skip("Don't test remote indexing in travis") def test_index_json_file(self): self.docs = self.rand_docs.get_docs(61) with open('temp_file.json', 'w') as f: json.dump(self.docs, f) r = self.solr.local_index(test_config['SOLR_COLLECTION'], 'temp_file.json') self.commit() r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'}) self.assertEqual(r.get_num_found(), len(self.docs)) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_rows(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q': '*:*'}, rows=50): self.assertTrue(len(res.docs) == 50) docs.extend(res.docs) queries += 1 self.assertEqual( [x['id'] for x in sorted(docs, key=lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key=lambda x: x['id'])]) self.assertTrue(1000 / 50 == queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q': '*:*'}): self.assertTrue(len(res.docs) == 1000) docs.extend(res.docs) queries += 1 self.assertTrue(queries == 1) self.assertEqual( [x['id'] for x in sorted(docs, key=lambda x: x['id'])], [x['id'] for x in sorted(self.docs, key=lambda x: x['id'])]) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_paging_query_with_max(self): self.docs = self.rand_docs.get_docs(1000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q': '*:*'}, rows=50, max_start=502): self.assertTrue(len(res.docs) == 50) queries += 1 docs.extend(res.docs) ids = [x['id'] for x in docs] for item in docs: self.assertTrue(item['id'] in ids) self.assertEqual(11, queries) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass def test_cursor_query(self): self.docs = self.rand_docs.get_docs(2000) with gzip.open('temp_file.json.gz', 'wb') as f: f.write(json.dumps(self.docs).encode('utf-8')) r = self.solr.stream_file(test_config['SOLR_COLLECTION'], 'temp_file.json.gz') self.commit() queries = 0 docs = [] for res in self.solr.cursor_query(test_config['SOLR_COLLECTION'], { 'q': '*:*', 'rows': 100 }): self.assertTrue(len(res.docs) == 100) queries += 1 docs.extend(res.docs) ids = [x['id'] for x in docs] for item in docs: self.assertTrue(item['id'] in ids) self.delete_docs() self.commit() try: os.remove('temp_file.json.gz') os.remove('temp_file.json') except: pass