Esempio n. 1
0
def index_data():

    docs = get_data()

    client = SolrClient('http://localhost:8983/solr')

    client.index_json('stocks', json.dumps(docs))

    client.commit('stocks')
Esempio n. 2
0
def index_json():

    client = SolrClient('http://localhost:8983/solr')

    docs = [
        {'id' : '8', 'field8' : 'value8'},
    ]

    client.index_json('test', json.dumps(docs))
    client.commit('test')
Esempio n. 3
0
    def update_solr(self, task=None):
        solr = SolrClient(SOLR_URI + '/solr/')
        collection = 'listmanager'

        if not task:
            task = self.task

        document = {}
        document['id'] = task.id
        document['title'] = task.title
        document['note'] = task.note if task.note else ''
        #document['tag'] =[t for t in task.tag.split(',')] if task.tag else []
        document['tag'] =[k.name for k in task.keywords] # better this than relying on tag

        document['completed'] = task.completed != None
        document['star'] = task.star # haven't used this yet and schema doesn't currently reflect it

        #note that I didn't there was any value in indexing or storing context and folder
        document['context'] = task.context.title
        document['folder'] = task.folder.title

        json_docs = json.dumps([document])
        response = solr.index_json(collection, json_docs)

        # response = solr.commit(collection, waitSearcher=False) # doesn't actually seem to work
        # Since solr.commit didn't seem to work, substituted the below, which works
        url = SOLR_URI + '/solr/' + collection + '/update'
        r = requests.post(url, data={"commit":"true"})
        #print(r.text)
        root = ET.fromstring(r.text)
        if root[0][0].text == '0':
            print(self.colorize("solr update successful", 'yellow'))
        else:
            print(self.colorize("there was a problem with the solr update", 'yellow'))
Esempio n. 4
0
def update_solr():
    def now():
        return datetime.now().isoformat(' ').split('.')[0]

    solr = SolrClient(SOLR_URI + '/solr/')
    collection = 'listmanager'
    solr_sync = remote_session.query(Sync).get('solr')
    last_solr_sync = solr_sync.timestamp
    log = f"{now()}: last Solr sync = {last_solr_sync.isoformat(' ').split('.')[0]}\n"
    tasks = remote_session.query(Task).filter(Task.modified > last_solr_sync)
    log = f"{now()}: number of tasks modified since "\
          f"last sync = {str(tasks.count())}\n" + log
    max = round(tasks.count(), -2) + 200
    i = -1
    s = 0
    for n in range(100, max, 100):

        documents = []
        for i, task in enumerate(tasks[s:n]):
            document = {}
            document['id'] = task.id
            document['title'] = task.title
            document['note'] = task.note if task.note else ''
            document['tag'] = [t for t in task.tag.split(',')
                               ] if task.tag else []

            document['completed'] = task.completed != None
            document[
                'star'] = task.star  # haven't used this yet and schema doesn't currently reflect it

            #note that I didn't there was any value in indexing or storing context and folder
            document['context'] = task.context.title
            document['folder'] = task.folder.title

            documents.append(document)

        json_docs = json.dumps(documents)
        response = solr.index_json(collection, json_docs)

        # response = solr.commit(collection, waitSearcher=False) # doesn't actually seem to work
        # Since solr.commit didn't seem to work, substituted the below, which works
        url = SOLR_URI + '/solr/' + collection + '/update'
        r = requests.post(url, data={"commit": "true"})
        #print(r.text)

        #print("Tasks {} to {}".format(s,n))
        s = n

    solr_sync.timestamp = datetime.now() + timedelta(seconds=2)
    remote_session.commit()
    log = f"{now()}: new Solr sync = "\
           f"{solr_sync.timestamp.isoformat(' ').split('.')[0]}\n" + log
    return log, i
Esempio n. 5
0
class ClientTestIndexing(unittest.TestCase):
    #High Level Client Tests
    
    @classmethod
    def setUpClass(self):
        self.solr = SolrClient(test_config['SOLR_SERVER'][0], devel=True, auth=test_config['SOLR_CREDENTIALS'])
        self.rand_docs = RandomTestData()
        self.docs = self.rand_docs.get_docs(50)
        
        for field in test_config['collections']['copy_fields']:
            try:
                self.solr.schema.delete_copy_field(test_config['SOLR_COLLECTION'],field)
            except:
                pass
        for field in test_config['collections']['fields']:
            try:
                self.solr.schema.create_field(test_config['SOLR_COLLECTION'],field)
            except:
                pass
                
    def setUp(self):
        self.delete_docs()
        self.commit()
    
    def delete_docs(self):
        self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'],'*')
        self.commit()
        
    def commit(self):
        self.solr.commit(test_config['SOLR_COLLECTION'],openSearcher=True)
        sleep(5)
    
    @unittest.skip("Skipping for now")
    def test_access_without_auth(self):
        if not test_config['SOLR_CREDENTIALS'][0]:
            return
        solr = SolrClient(test_config['SOLR_SERVER'],devel=True)
        with self.assertRaises(ConnectionError) as cm:
            solr.query('SolrClient_unittest',{'q':'not_gonna_happen'})
            
    
    def test_indexing_json(self):
        self.docs = self.rand_docs.get_docs(53)
        self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs))
        self.commit()
        sleep(5)
        for doc in self.docs:
            logging.debug("Checking {}".format(doc['id']))
            self.assertEqual(self.solr.query(test_config['SOLR_COLLECTION'],{'q':'id:{}'.format(doc['id'])}).get_num_found(),1)
        self.delete_docs()
        self.commit()
    
    def test_indexing_conn_log(self):
        self.docs = self.rand_docs.get_docs(53)
        self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs))
        self.commit()
        sleep(5)
        for doc in self.docs:
            logging.debug("Checking {}".format(doc['id']))
            self.assertEqual(self.solr.query(test_config['SOLR_COLLECTION'],{'q':'id:{}'.format(doc['id'])}).get_num_found(),1)
        logging.info(self.solr.transport._action_log)
        self.delete_docs()
        self.commit()
    
    def test_index_json_file(self):
        self.docs = self.rand_docs.get_docs(55)
        with open('temp_file.json','w') as f:
            json.dump(self.docs,f)
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json')
        self.commit()
        r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'})
        self.assertEqual(r.get_num_found(),len(self.docs))
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass
            
    
    def test_stream_file_gzip_file(self):
        self.docs = self.rand_docs.get_docs(60)
        with gzip.open('temp_file.json.gz','wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz')
        self.commit()
        r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'})
        self.assertEqual(r.get_num_found(),len(self.docs))
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass
            
    @unittest.skip("Don't test remote indexing in travis")
    def test_index_json_file(self):
        self.docs = self.rand_docs.get_docs(61)
        with open('temp_file.json','w') as f:
            json.dump(self.docs,f)
        r = self.solr.local_index(test_config['SOLR_COLLECTION'],'temp_file.json')
        self.commit()
        r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'})
        self.assertEqual(r.get_num_found(),len(self.docs))
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    def test_paging_query_with_rows(self):
        self.docs = self.rand_docs.get_docs(1000)
        with gzip.open('temp_file.json.gz','wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []
        for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],{'q':'*:*'}, rows=50):
            self.assertTrue(len(res.docs) == 50)
            docs.extend(res.docs)
            queries +=1
        self.assertEqual(
            [x['id'] for x in sorted(docs, key= lambda x: x['id'])],
            [x['id'] for x in sorted(self.docs, key= lambda x: x['id'])]
            )
        self.assertTrue(1000/50 == queries)
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass   

    def test_paging_query(self):
        self.docs = self.rand_docs.get_docs(1000)
        with gzip.open('temp_file.json.gz','wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []
        for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],{'q':'*:*'}):
            self.assertTrue(len(res.docs) == 1000)
            docs.extend(res.docs)
            queries +=1
        self.assertTrue(queries == 1)
        self.assertEqual(
            [x['id'] for x in sorted(docs, key= lambda x: x['id'])],
            [x['id'] for x in sorted(self.docs, key= lambda x: x['id'])]
            )
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass              
            
    def test_paging_query_with_max(self):
        self.docs = self.rand_docs.get_docs(1000)
        with gzip.open('temp_file.json.gz','wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []
        for res in self.solr.paging_query(test_config['SOLR_COLLECTION'], {'q':'*:*'}, rows = 50, max_start = 502):
            self.assertTrue(len(res.docs) == 50)
            queries +=1
            docs.extend(res.docs)
        ids = [x['id'] for x in docs]

        for item in docs:
            self.assertTrue(item['id'] in ids)

        self.assertEqual(11, queries)
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass    
Esempio n. 6
0
        #document = {'id':item['id'].lower()}
        # apparently ran the first time to transfer to raspi without track in the list
        # the reason so few tracks actually have a track number (I did a few starting 08072016)
        #document.update({k:item[k] for k in item if k in ('id','album','artist','title','uri','track')})
        document = {
            k: item[k]
            for k in item
            if k in ('id', 'album', 'artist', 'title', 'uri', 'track')
        }
        documents.append(document)
    #print(documents)

    n = 0
    while True:
        # there are limitations in how many docs can be uploaded in a batch but it's more than 100
        cur_documents = documents[n:n + 100]

        if not cur_documents:
            break

        cur_documents = json.dumps(cur_documents)
        response = solr_new.index_json(collection, cur_documents)
        print(response)

        # Since solr.commit didn't seem to work, substituted the below, which works
        url = ec_uri + ":8983/solr/" + collection + "/update"
        r = requests.post(url, data={"commit": "true"})
        print(r.text)

        n += 100
Esempio n. 7
0
    documents = []
    for task in tasks[s:n]:
        document = {}
        document['id'] = task.id
        document['title'] = task.title
        document['note'] = task.note if task.note else ''
        document['tag'] = [t for t in task.tag.split(',')] if task.tag else []

        document['completed'] = task.completed != None
        document[
            'star'] = task.star  # haven't used this yet and schema doesn't currently reflect it

        #note that I didn't there was any value in indexing or storing context and folder
        document['context'] = task.context.title
        document['folder'] = task.folder.title

        documents.append(document)

    json_docs = json.dumps(documents)
    response = solr.index_json(collection, json_docs)

    #response = solr.commit(collection, waitSearcher=False) # doesn't actually seem to work

    # Since solr.commit didn't seem to work, substituted the below, which works
    url = SOLR_URI + '/solr/' + collection + '/update'
    r = requests.post(url, data={"commit": "true"})
    print(r.text)

    print("Tasks {} to {}".format(s, n))
    s = n
Esempio n. 8
0
        title = track.title
        uri = track.uri
        id_ = album + ' ' + title
        id_ = id_.replace(' ', '_')
        id_ = id_.lower()
        document = {"id":id_, "title":title, "uri":uri, "album":album, "artist":artist, "track":n}
        print(repr(document).encode('cp1252', errors='replace')) 
        for k in document:
            print(str(k+':'+str(document[k])).encode('cp1252', errors='ignore'))
        documents.append(document)
        n+=1

    solr = SolrClient(solr_uri+'/solr')
    collection = 'sonos_companion'

    response = solr.index_json(collection, json.dumps(documents))
    print(response)

    # Since solr.commit didn't seem to work, substituted the below, which works
    url = solr_uri+"/solr/"+collection+"/update"
    r = requests.post(url, data={"commit":"true"})
    print(r.text)

    resp = input("Do you want to continue? (y or n) ")
    if resp not in ('y', 'yes'):
        cont = False

######################################################################
# The below would be if you had a lot of documents
#n = 0
#while True:
Esempio n. 9
0
"""

import json
from collections import defaultdict
from SolrClient import SolrClient

solr = SolrClient('http://localhost:8983/solr')
j = defaultdict(dict)
with open('DOI.json', 'r') as f:
    for k, v in json.load(f).items():
        j[k]['doi'] = v
with open('grobid.json', 'r') as f:
    for doc in json.load(f):
        for i, pub in enumerate(doc['relatedPublications']):
            for k, v in pub.items():
                j[doc['id']]['relatedPublications_' + k + '_' + str(i)] = v
with open('geotopic.json', 'r') as f:
    for k, v in json.load(f).items():
        j[k].update(v)
with open('measurements.json', 'rb') as f:
    for doc in json.loads(f.read().decode(errors='ignore')):
        j[doc['id']]['units'] = doc['units']
with open('sweet.json', 'r') as f:
    for doc in json.load(f):
        for k, v in doc.items():
            if k.startswith('NER_Sweet_'):
                j[doc['id']][k] = v
for k, v in j.items():
    v['id'] = k
solr.index_json('collection1', json.dumps(list(j.values())))
Esempio n. 10
0
            "album": album,
            "artist": artist,
            "track": n
        }
        print(repr(document).encode('cp1252', errors='replace'))
        for k in document:
            print(
                str(k + ':' + str(document[k])).encode('cp1252',
                                                       errors='ignore'))
        documents.append(document)
        n += 1

    solr = SolrClient(solr_uri + '/solr')
    collection = 'sonos_companion'

    response = solr.index_json(collection, json.dumps(documents))
    print(response)

    # Since solr.commit didn't seem to work, substituted the below, which works
    url = solr_uri + "/solr/" + collection + "/update"
    r = requests.post(url, data={"commit": "true"})
    print(r.text)

    resp = input("Do you want to continue? (y or n) ")
    if resp not in ('y', 'yes'):
        cont = False

######################################################################
# The below would be if you had a lot of documents
#n = 0
#while True:
Esempio n. 11
0
    document = {}
    # We create a unique id but concatenating the album and the song title
    id_ = item['album'] + ' ' + item['title']
    id_ = id_.replace(' ', '_')
    document['id'] = id_

    document.update({k:item[k] for k in item if k in ('artist','album','title','uri', 'album_art')})
    documents.append(document)

n = 0
while True:
    # there are limitations in how many docs can be uploaded in a batch but it's more than 100
    cur_documents = documents[n:n+100]

    if not cur_documents:
        break

    cur_documents = json.dumps(cur_documents) 
    response = solr.index_json(collection, cur_documents) 
    print(response)
    #The commit from SolrClient is not working
    #response = solr.commit(collection, waitSearcher=False)
    #print(response)

    # Since solr.commit didn't seem to work, substituted the below, which works
    url = ec_uri+":8983/solr/"+collection+"/update"
    r = requests.post(url, data={"commit":"true"})
    print(r.text)

    n+=100
Esempio n. 12
0
class SOLRDocumentManager(IDocumentManager):
    def __init__(self, server_address: str, index_name: str) -> None:
        self.client = SolrClient(server_address)
        self.index = index_name
        self._serializer = json_serializer()

    def add(self, document: IndexDocument) -> BaseResponse:
        response = BaseResponse()
        try:
            document.id = document.unique_id
            doc_body = self._serializer.serialize([document])
            solr_response = self.client.index_json(self.index, doc_body)
            if not solr_response:
                return response.set_error(
                    Error("IntegrationError", 500,
                          "Index failed to add index!"))
            self.client.commit(self.index,
                               openSearcher=True,
                               waitSearcher=False)
            response = BaseResponse(True)
        except BasicException as e:
            response.set_error(Error("InternalServerError", 500, e.message))
        except Exception as e:
            response.set_error(
                Error("InternalServerError", 500, 'Unknown error occurred!'))
        return response

    def delete(self, unique_id: str) -> BaseResponse:
        response = BaseResponse()
        try:
            solr_response = self.client.delete_doc_by_id(self.index, unique_id)
            if not solr_response:
                return response.set_error(
                    Error("IntegrationError", 500,
                          "Index failed to delete index!"))
            self.client.commit(self.index,
                               openSearcher=True,
                               waitSearcher=False)
            response = BaseResponse(True)
        except BasicException as e:
            response.set_error(Error("InternalServerError", 500, e.message))
        except Exception as e:
            print(e)
            response.set_error(
                Error("InternalServerError", 500, 'Unknown error occurred!'))
        return response

    def get(self, unique_id: str) -> DocumentResponse:
        pass

    def search(self, query: SearchQuery) -> SearchResult:
        solr_query = ""
        solr_field_query = ""
        solr_range_query = []
        for criteria in query.searchCriteria:
            solr_field_query += criteria.field + '^' + str(
                criteria.weight) + " "
            words = criteria.term.split(" ")
            for word in words:
                word = word.lower()
                solr_query += " " + word
        for range_criteria in query.rangeCriteria:
            solr_range_query.append(range_criteria.field + ":[" +
                                    str(range_criteria.minimum) + " TO " +
                                    str(range_criteria.maximum) + "]")
        data = {
            "q": solr_query.strip(),
            "offset": query.page * query.items,
            "limit": query.items,
            "filter": solr_range_query,
            "defType": "edismax",
            "qf": solr_field_query
        }
        result = SearchResult(0, False)
        try:
            response = self.client.query_raw(self.index, data)
            result = SearchResult(response['response']['numFound'], True)
            for document in response['response']['docs']:
                result.add_result(
                    self._serializer.deserialize(document,
                                                 self.index_object_type))
        except Exception as e:
            result.set_error(
                Error("InternalServerError", 500, 'Unknown error occurred!'))
        return result
Esempio n. 13
0
class ClientTestQuery(unittest.TestCase):
    
    @classmethod
    def setUpClass(self):
        self.solr = SolrClient(test_config['SOLR_SERVER'][0],devel=True,auth=test_config['SOLR_CREDENTIALS'])
        self.rand_docs = RandomTestData()
        self.docs = self.rand_docs.get_docs(50)
        self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'],'*')
        
        for field in test_config['collections']['copy_fields']:
            try:
                self.solr.collections.delete_copy_field(test_config['SOLR_COLLECTION'],field)
            except:
                pass
        for field in test_config['collections']['fields']:
            try:
                self.solr.collections.create_field(test_config['SOLR_COLLECTION'],field)
            except:
                pass
        
        #Index Some data
        self.solr.index_json(test_config['SOLR_COLLECTION'],json.dumps(self.docs))
        self.solr.commit(test_config['SOLR_COLLECTION'],openSearcher=True)
    
    def test_basic_query(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'],{'q':'*:*'})
        self.assertEqual(r.get_num_found(),len(self.docs))
        
    def test_facet(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            'facet':'true',
            'facet.field':'facet_test',
        })
        local_facets = {}
        for doc in self.docs:
            try:
                local_facets[doc['facet_test']] +=1
            except:
                local_facets[doc['facet_test']] = 1
        try:
            self.assertDictEqual(local_facets,r.get_facets()['facet_test'])
        except Exception as e:
            logging.info("local")
            logging.info(local_facets)
            logging.info("facets")
            logging.info(r.get_facets())
            raise
    
    def test_facet_with_fq(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            'facet':True,
            'facet.field':'facet_test',
        })
        first_facet_field = list(r.get_facets()['facet_test'].keys())[0]
        first_facet_field_count = r.get_facets()['facet_test'][first_facet_field]
        r = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            'facet':True,
            'facet.field':'facet_test',
            'fq':'facet_test:{}'.format(first_facet_field)
        })
        self.assertEqual(r.get_num_found(),first_facet_field_count)
        
    def test_facet_range(self):
        res = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            'facet':True,
            'facet.range':'price',
            'facet.range.start':0,
            'facet.range.end':100,
            'facet.range.gap':10
            })
        
        prices = [doc['price'] for doc in self.docs]
        div = lambda x: str(x//10 * 10)
        out = {}
        for k,g in itertools.groupby(sorted(prices),div):
            out[k] = len(list(g)) or 0
        self.assertDictEqual(out,res.get_facets_ranges()['price'])
    
    def test_facet_pivot(self):
        res = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            'facet':True,
            'facet.pivot':['facet_test,price','facet_test,id']
        })
        out = {}
        for doc in self.docs:
            if doc['facet_test'] not in out:
                out[doc['facet_test']] = {}
            if doc['price'] not in out[doc['facet_test']]:
                out[doc['facet_test']][doc['price']]=1
            else:
                out[doc['facet_test']][doc['price']]+=1
        self.assertDictEqual(out,res.get_facet_pivot()['facet_test,price'])
        
    def test_get_field_values_as_list(self):
        res = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            })
        results = res.get_field_values_as_list('product_name_exact')
        docs = res.docs
        temp = []
        for doc in docs:
            if 'product_name_exact' in doc:
                temp.append(doc['product_name_exact'])
        self.assertEqual(results,temp)
        
    def test_get_facet_values_as_list(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'],{
            'q':'*:*',
            'facet':'true',
            'facet.field':'facet_test',
        })
"""

import json
from collections import defaultdict
from SolrClient import SolrClient

solr = SolrClient('http://localhost:8983/solr')
j = defaultdict(dict)
with open('DOI.json', 'r') as f:
    for k, v in json.load(f).items():
        j[k]['doi'] = v
with open('grobid.json', 'r') as f:
    for doc in json.load(f):
        for i, pub in enumerate(doc['relatedPublications']):
            for k, v in pub.items():
                j[doc['id']]['relatedPublications_' + k + '_' + str(i)] = v
with open('geotopic.json', 'r') as f:
    for k, v in json.load(f).items():
        j[k].update(v)
with open('measurements.json', 'rb') as f:
    for doc in json.loads(f.read().decode(errors='ignore')):
        j[doc['id']]['units'] = doc['units']
with open('sweet.json', 'r') as f:
    for doc in json.load(f):
        for k, v in doc.items():
            if k.startswith('NER_Sweet_'):
                j[doc['id']][k] = v
for k, v in j.items():
    v['id'] = k
solr.index_json('collection1', json.dumps(list(j.values())))
Esempio n. 15
0
class ClientTestQuery(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.solr = SolrClient(test_config['SOLR_SERVER'][0],
                               devel=True,
                               auth=test_config['SOLR_CREDENTIALS'])
        self.rand_docs = RandomTestData()
        self.docs = self.rand_docs.get_docs(50)
        self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*')

        for field in test_config['collections']['copy_fields']:
            try:
                self.solr.collections.delete_copy_field(
                    test_config['SOLR_COLLECTION'], field)
            except:
                pass
        for field in test_config['collections']['fields']:
            try:
                self.solr.collections.create_field(
                    test_config['SOLR_COLLECTION'], field)
            except:
                pass

        #Index Some data
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps(self.docs))
        self.solr.commit(test_config['SOLR_COLLECTION'], openSearcher=True)

    def test_basic_query(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'})
        self.assertEqual(r.get_num_found(), len(self.docs))

    def test_facet(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
            'facet': 'true',
            'facet.field': 'facet_test',
        })
        local_facets = {}
        for doc in self.docs:
            try:
                local_facets[doc['facet_test']] += 1
            except:
                local_facets[doc['facet_test']] = 1
        try:
            self.assertDictEqual(local_facets, r.get_facets()['facet_test'])
        except Exception as e:
            logging.info("local")
            logging.info(local_facets)
            logging.info("facets")
            logging.info(r.get_facets())
            raise

    def test_facet_with_fq(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
            'facet': True,
            'facet.field': 'facet_test',
        })
        first_facet_field = list(r.get_facets()['facet_test'].keys())[0]
        first_facet_field_count = r.get_facets(
        )['facet_test'][first_facet_field]
        r = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': True,
                'facet.field': 'facet_test',
                'fq': 'facet_test:{}'.format(first_facet_field)
            })
        self.assertEqual(r.get_num_found(), first_facet_field_count)

    def test_facet_range(self):
        res = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': True,
                'facet.range': 'price',
                'facet.range.start': 0,
                'facet.range.end': 100,
                'facet.range.gap': 10
            })

        prices = [doc['price'] for doc in self.docs]
        div = lambda x: str(x // 10 * 10)
        out = {}
        for k, g in itertools.groupby(sorted(prices), div):
            out[k] = len(list(g)) or 0
        self.assertDictEqual(out, res.get_facets_ranges()['price'])

    def test_facet_pivot(self):
        res = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': True,
                'facet.pivot': ['facet_test,price', 'facet_test,id']
            })
        out = {}
        for doc in self.docs:
            if doc['facet_test'] not in out:
                out[doc['facet_test']] = {}
            if doc['price'] not in out[doc['facet_test']]:
                out[doc['facet_test']][doc['price']] = 1
            else:
                out[doc['facet_test']][doc['price']] += 1
        self.assertDictEqual(out, res.get_facet_pivot()['facet_test,price'])

    def test_get_field_values_as_list(self):
        res = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
        })
        results = res.get_field_values_as_list('product_name_exact')
        docs = res.docs
        temp = []
        for doc in docs:
            if 'product_name_exact' in doc:
                temp.append(doc['product_name_exact'])
        self.assertEqual(results, temp)

    def test_get_facet_values_as_list(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
            'facet': 'true',
            'facet.field': 'facet_test',
        })
Esempio n. 16
0
class ClientTestQuery(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.solr = SolrClient(test_config['SOLR_SERVER'][0],
                               devel=True,
                               auth=test_config['SOLR_CREDENTIALS'])
        self.rand_docs = RandomTestData()
        self.docs = self.rand_docs.get_docs(50)
        self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*')

        for field in test_config['collections']['copy_fields']:
            try:
                self.solr.collections.delete_copy_field(
                    test_config['SOLR_COLLECTION'], field)
            except:
                pass
        for field in test_config['collections']['fields']:
            try:
                self.solr.collections.create_field(
                    test_config['SOLR_COLLECTION'], field)
            except:
                pass

        #Index Some data
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps(self.docs))
        self.solr.commit(test_config['SOLR_COLLECTION'], openSearcher=True)

    def test_basic_query(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'})
        self.assertEqual(r.get_num_found(), len(self.docs))

    def test_facet(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
            'facet': 'true',
            'facet.field': 'facet_test',
        })
        local_facets = {}
        for doc in self.docs:
            try:
                local_facets[doc['facet_test']] += 1
            except:
                local_facets[doc['facet_test']] = 1
        try:
            self.assertDictEqual(local_facets, r.get_facets()['facet_test'])
        except Exception as e:
            logging.info("local")
            logging.info(local_facets)
            logging.info("facets")
            logging.info(r.get_facets())
            raise

    def test_facet_with_fq(self):
        r = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
            'facet': True,
            'facet.field': 'facet_test',
        })
        first_facet_field = list(r.get_facets()['facet_test'].keys())[0]
        first_facet_field_count = r.get_facets(
        )['facet_test'][first_facet_field]
        r = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': True,
                'facet.field': 'facet_test',
                'fq': 'facet_test:{}'.format(first_facet_field)
            })
        self.assertEqual(r.get_num_found(), first_facet_field_count)

    def test_facet_range(self):
        res = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': True,
                'facet.range': 'price',
                'facet.range.start': 0,
                'facet.range.end': 100,
                'facet.range.gap': 10
            })

        prices = [doc['price'] for doc in self.docs]
        div = lambda x: str(x // 10 * 10)
        out = {}
        for k, g in itertools.groupby(sorted(prices), div):
            out[k] = len(list(g)) or 0
        self.assertDictEqual(out, res.get_facets_ranges()['price'])

    def test_facet_pivot(self):
        res = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': True,
                'facet.pivot': ['facet_test,price', 'facet_test,id']
            })
        out = {}
        for doc in self.docs:
            if doc['facet_test'] not in out:
                out[doc['facet_test']] = {}
            if doc['price'] not in out[doc['facet_test']]:
                out[doc['facet_test']][doc['price']] = 1
            else:
                out[doc['facet_test']][doc['price']] += 1
        self.assertDictEqual(out, res.get_facet_pivot()['facet_test,price'])

    def test_get_field_values_as_list(self):
        res = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
        })
        results = res.get_field_values_as_list('product_name_exact')
        docs = res.docs
        temp = []
        for doc in docs:
            if 'product_name_exact' in doc:
                temp.append(doc['product_name_exact'])
        self.assertEqual(results, temp)

    def test_get_facet_values_as_list(self):
        r = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'facet': 'true',
                'facet.limit': -1,
                'facet.field': 'facet_test',
            })
        self.assertEqual(
            sorted(r.data['facet_counts']['facet_fields']['facet_test'][1::2]),
            sorted(r.get_facet_values_as_list('facet_test')))

    def test_grouped_count_1(self):
        '''
        Get a dict of grouped docs
        '''
        r = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'group': True,
                'group.field': 'id',
                'group.ngroups': True,
            })
        self.assertEqual(r.get_ngroups(), 50)
        self.assertEqual(r.get_ngroups('id'), 50)

    def test_grouped_docs(self):
        '''
        Get a dict of grouped docs
        '''
        r = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'group': True,
                'group.field': 'id',
                'group.ngroups': True,
            })
        self.assertEqual(len(r.docs), 10)
        self.assertTrue('doclist' in r.docs[0])

    def test_grouped_docs(self):
        '''
        Get a dict of grouped docs
        '''
        r = self.solr.query(
            test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'group': True,
                'group.field': 'id',
                'group.ngroups': True,
            })
        self.assertEqual(len(r.docs), 10)
        self.assertTrue('doclist' in r.docs[0])

    def test_flat_groups(self):
        '''
        Get a dict of grouped docs
        '''
        r = self.solr.query(test_config['SOLR_COLLECTION'], {
            'q': '*:*',
            'group': True,
            'group.field': 'id'
        })
        flats = r.get_flat_groups()
        self.assertEqual(len(flats), 10)
        self.assertTrue('date' in flats[0])

    def test_json_facet(self):
        '''
        Get a dict of grouped docs
        '''
        #Just lazy getting a new response object
        r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'})

        a = r.get_jsonfacet_counts_as_dict(
            'test', {
                'count': 50,
                'test': {
                    'buckets': [{
                        'count': 10,
                        'pr': {
                            'buckets': [{
                                'count': 2,
                                'unique': 1,
                                'val': 79
                            }, {
                                'count': 1,
                                'unique': 1,
                                'val': 9
                            }]
                        },
                        'pr_sum': 639.0,
                        'val': 'consectetur'
                    }, {
                        'count': 8,
                        'pr': {
                            'buckets': [
                                {
                                    'count': 1,
                                    'unique': 1,
                                    'val': 9
                                },
                                {
                                    'count': 1,
                                    'unique': 1,
                                    'val': 31
                                },
                                {
                                    'count': 1,
                                    'unique': 1,
                                    'val': 33
                                },
                            ]
                        },
                        'pr_sum': 420.0,
                        'val': 'auctor'
                    }, {
                        'count': 8,
                        'pr': {
                            'buckets': [
                                {
                                    'count': 2,
                                    'unique': 1,
                                    'val': 94
                                },
                                {
                                    'count': 1,
                                    'unique': 1,
                                    'val': 25
                                },
                            ]
                        },
                        'pr_sum': 501.0,
                        'val': 'nulla'
                    }]
                }
            })

        b = {
            'test': {
                'auctor': {
                    'count': 8,
                    'pr': {
                        9: {
                            'count': 1,
                            'unique': 1
                        },
                        31: {
                            'count': 1,
                            'unique': 1
                        },
                        33: {
                            'count': 1,
                            'unique': 1
                        }
                    },
                    'pr_sum': 420.0
                },
                'consectetur': {
                    'count': 10,
                    'pr': {
                        9: {
                            'count': 1,
                            'unique': 1
                        },
                        79: {
                            'count': 2,
                            'unique': 1
                        }
                    },
                    'pr_sum': 639.0
                },
                'nulla': {
                    'count': 8,
                    'pr': {
                        25: {
                            'count': 1,
                            'unique': 1
                        },
                        94: {
                            'count': 2,
                            'unique': 1
                        }
                    },
                    'pr_sum': 501.0
                }
            }
        }

        self.assertEqual(a, b)
Esempio n. 17
0
class ClientTestIndexing(unittest.TestCase):
    @classmethod
    def setUpClass(self):

        self.solr = SolrClient(test_config['SOLR_SERVER'][0],
                               devel=True,
                               auth=test_config['SOLR_CREDENTIALS'])
        self.rand_docs = RandomTestData()
        self.docs = self.rand_docs.get_docs(50)

        for field in test_config['collections']['copy_fields']:
            try:
                self.solr.schema.delete_copy_field(
                    test_config['SOLR_COLLECTION'], field)
            except Exception as e:
                pass

        for field in test_config['collections']['fields']:
            try:
                self.solr.schema.create_field(test_config['SOLR_COLLECTION'],
                                              field)
            except Exception as e:
                pass

    def setUp(self):
        self.delete_docs()
        self.commit()

    def delete_docs(self):
        self.solr.delete_doc_by_id(test_config['SOLR_COLLECTION'], '*')
        self.commit()

    def commit(self):
        # softCommit because we don't care about data on disk
        self.solr.commit(test_config['SOLR_COLLECTION'],
                         openSearcher=True,
                         softCommit=True)

    def test_down_solr_exception(self):
        # connect to "down" sorl host
        s = SolrClient('http://*****:*****@unittest.skip("Skipping for now")
    def test_access_without_auth(self):
        if not test_config['SOLR_CREDENTIALS'][0]:
            return
        solr = SolrClient(test_config['SOLR_SERVER'], devel=True)
        with self.assertRaises(ConnectionError) as cm:
            solr.query('SolrClient_unittest', {'q': 'not_gonna_happen'})

    def test_indexing_json(self):
        self.docs = self.rand_docs.get_docs(53)
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps(self.docs))
        self.commit()
        for doc in self.docs:
            logging.debug("Checking {}".format(doc['id']))
            self.assertEqual(
                self.solr.query(test_config['SOLR_COLLECTION'], {
                    'q': 'id:{}'.format(doc['id'])
                }).get_num_found(), 1)
        self.delete_docs()
        self.commit()

    def test_get(self):
        doc_id = '1'
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps([{
                                 'id': doc_id
                             }]))
        # this returns the doc!
        self.solr.get(test_config['SOLR_COLLECTION'], doc_id)
        with self.assertRaises(NotFoundError):
            self.solr.get(test_config['SOLR_COLLECTION'], '5')

    def test_mget(self):
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps([{
                                 'id': '1'
                             }]))
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps([{
                                 'id': '5'
                             }]))
        docs = self.solr.mget(test_config['SOLR_COLLECTION'], ('5', '1'))
        self.assertEqual(len(docs), 2)

    def test_indexing_conn_log(self):
        self.docs = self.rand_docs.get_docs(53)
        self.solr.index_json(test_config['SOLR_COLLECTION'],
                             json.dumps(self.docs))
        self.commit()
        for doc in self.docs:
            logging.debug("Checking {}".format(doc['id']))
            self.assertEqual(
                self.solr.query(test_config['SOLR_COLLECTION'], {
                    'q': 'id:{}'.format(doc['id'])
                }).get_num_found(), 1)
        logging.info(self.solr.transport._action_log)
        self.delete_docs()
        self.commit()

    def test_index_json_file(self):
        self.docs = self.rand_docs.get_docs(55)
        with open('temp_file.json', 'w') as f:
            json.dump(self.docs, f)
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],
                                  'temp_file.json')
        self.commit()
        r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'})
        self.assertEqual(r.get_num_found(), len(self.docs))
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    def test_stream_file_gzip_file(self):
        self.docs = self.rand_docs.get_docs(60)
        with gzip.open('temp_file.json.gz', 'wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],
                                  'temp_file.json.gz')
        self.commit()
        r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'})
        self.assertEqual(r.get_num_found(), len(self.docs))
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    @unittest.skip("Don't test remote indexing in travis")
    def test_index_json_file(self):
        self.docs = self.rand_docs.get_docs(61)
        with open('temp_file.json', 'w') as f:
            json.dump(self.docs, f)
        r = self.solr.local_index(test_config['SOLR_COLLECTION'],
                                  'temp_file.json')
        self.commit()
        r = self.solr.query(test_config['SOLR_COLLECTION'], {'q': '*:*'})
        self.assertEqual(r.get_num_found(), len(self.docs))
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    def test_paging_query_with_rows(self):
        self.docs = self.rand_docs.get_docs(1000)
        with gzip.open('temp_file.json.gz', 'wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],
                                  'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []
        for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],
                                          {'q': '*:*'},
                                          rows=50):
            self.assertTrue(len(res.docs) == 50)
            docs.extend(res.docs)
            queries += 1
        self.assertEqual(
            [x['id'] for x in sorted(docs, key=lambda x: x['id'])],
            [x['id'] for x in sorted(self.docs, key=lambda x: x['id'])])
        self.assertTrue(1000 / 50 == queries)
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    def test_paging_query(self):
        self.docs = self.rand_docs.get_docs(1000)
        with gzip.open('temp_file.json.gz', 'wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],
                                  'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []
        for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],
                                          {'q': '*:*'}):
            self.assertTrue(len(res.docs) == 1000)
            docs.extend(res.docs)
            queries += 1
        self.assertTrue(queries == 1)
        self.assertEqual(
            [x['id'] for x in sorted(docs, key=lambda x: x['id'])],
            [x['id'] for x in sorted(self.docs, key=lambda x: x['id'])])
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    def test_paging_query_with_max(self):
        self.docs = self.rand_docs.get_docs(1000)
        with gzip.open('temp_file.json.gz', 'wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],
                                  'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []
        for res in self.solr.paging_query(test_config['SOLR_COLLECTION'],
                                          {'q': '*:*'},
                                          rows=50,
                                          max_start=502):
            self.assertTrue(len(res.docs) == 50)
            queries += 1
            docs.extend(res.docs)
        ids = [x['id'] for x in docs]

        for item in docs:
            self.assertTrue(item['id'] in ids)

        self.assertEqual(11, queries)
        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass

    def test_cursor_query(self):
        self.docs = self.rand_docs.get_docs(2000)
        with gzip.open('temp_file.json.gz', 'wb') as f:
            f.write(json.dumps(self.docs).encode('utf-8'))
        r = self.solr.stream_file(test_config['SOLR_COLLECTION'],
                                  'temp_file.json.gz')
        self.commit()
        queries = 0
        docs = []

        for res in self.solr.cursor_query(test_config['SOLR_COLLECTION'], {
                'q': '*:*',
                'rows': 100
        }):
            self.assertTrue(len(res.docs) == 100)
            queries += 1
            docs.extend(res.docs)

        ids = [x['id'] for x in docs]

        for item in docs:
            self.assertTrue(item['id'] in ids)

        self.delete_docs()
        self.commit()
        try:
            os.remove('temp_file.json.gz')
            os.remove('temp_file.json')
        except:
            pass