예제 #1
0
 def delete_item( self, pid ):
     """ Deletes item from custom bell index.
         Called by one_offs.rebuild_custom_index(). """
     SOLR_ROOT_URL = ( os.environ.get('BELL_I_SOLR_ROOT') )
     self.logger.info( 'in tasks.indexer.delete_item() [for custom-solr]; SOLR_ROOT_URL, %s' % SOLR_ROOT_URL )
     solr = Solr( SOLR_ROOT_URL )
     response = solr.delete_by_query( 'pid:"%s"' % pid, commit=True )
     response_status = response.status
     self.logger.info( 'in tasks.indexer.delete_item() [for custom-solr]; pid, %s; response_status, %s' % (pid, response_status) )
     if not response_status == 200:
         raise Exception( 'custom-solr delete problem logged' )
     return response_status
예제 #2
0
 def delete_target_custom_solr_pids( self ):
     ## load pids to be deleted
     with open( self.PIDS_TO_DELETE_SOURCE_DATA_JSON_PATH ) as f:
         deletion_pid_lst = json.loads( f.read() )
     ## run deletion loop, tracking along way
     for pid in deletion_pid_lst:
         solr = Solr( self.CUSTOM_INDEX_SOLR_URL_ROOT )
         response = solr.delete_by_query( 'pid:"%s"' % pid, commit=True )
         response_status = response.status
         self.update_tracker( pid, response_status )
         if not response_status == 200:
             logger.error( 'custom-solr delete problem-response for pid `{pid}`: ```{response}```'.format(pid=pid, resp=response_status) )
     return
예제 #3
0
class QueryResultTestCase(unittest.TestCase):

    def setUp(self):
        self.solr = Solr('http://localhost:8983/solr')

    def test_search(self):
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, 4)
        self.assertEqual(len(response.documents), 4)

    def test_search_cursor(self):
        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(1):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 4)

        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(4):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 1)

    def test_commit(self):
        response = self.solr.commit()
        self.assertEqual(response.status, 200)

    def test_optimize(self):
        response = self.solr.optimize()
        self.assertEqual(response.status, 200)

    def test_ping(self):
        response = self.solr.ping()
        self.assertEqual(response.status, 200)

    def test_is_up(self):
        response = self.solr.is_up()
        self.assertEqual(response, True)

    def test_update_delete(self):
        # Get total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        total_results = response.total_results
        # Post one document using json
        documents = [{'id' : 1}]
        response = self.solr.update(documents, input_type='json')
        self.assertEqual(response.status, 200)
        # Post anoter document using xml
        documents = [{'id' : 2}]
        response = self.solr.update(documents, input_type='xml')
        self.assertEqual(response.status, 200)
        # Compare total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results + 2)

        # Now delete the two document posted above
        query = 'id:1'
        key = 2
        response = self.solr.delete_by_query(query)
        self.assertEqual(response.status, 200)
        response = self.solr.delete_by_key(key)
        self.assertEqual(response.status, 200)
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results)

    def tearDown(self):
        pass

    def test_query(self):
        pass
예제 #4
0
class eBsolr:
    cursor = None

    def __init__(self, urls, config, version=4):
        self.cursor = Solr(urls, version=version)

    def update(self, documents, input_type='json', commit=False):
        self.cursor.update(documents, input_type, commit)

    def deleteById(self, tid, commit=False):
        return self.cursor.delete_by_key(tid, commit=commit)

    def deleteByQuery(self, query, commit=False):
        return self.cursor.delete_by_query(query=query, commit=commit)

    def deleteAll(self, commit=False):
        return self.cursor.delete_by_query("*:*", commit=commit)

    def getResponse(self, search, fields=None, start=0, rows=None, sort=None, fq=None):
        query = {'q': search}
        if fields:
            if isinstance(fields, basestring):
                query['fl'] = fields
            else:
                query['fl'] = ",".join(fields)
        if sort:
            query['sort'] = sort

        if fq:
            query['fq'] = fq

        # Default to 10000 rows
        limit = rows
        if rows is None:
            limit = _MAXROWS
        query['start'] = start
        query['rows'] = limit

        response = self.cursor.search(**query)
        if int(response.status) >= 400:
            raise Exception('Error Solr {}: {}'.format(response.status, response.extract_errmessage()))
        if rows is None and response.total_results > limit:
            # query['start'] = response.total_results
            query['rows'] = response.total_results
            response = self.cursor.search(**query)

        return response

    def get_language_query(self, language):
        q_temp = None
        if language is not None and language != "":
            langArray = language.split(';')
            if len(langArray) > 0:
                lang = langArray[0]
                q_temp = "language:%s" % lang
                for lang in langArray[1:]:
                    q_temp = "%s OR language:%s" % (q_temp, lang)
        return q_temp

    def getDocs(self, search, fields=None, start=0, rows=None, sort=None, fq=None):
        """search: query sintaks ex: "field:keys,field2:keys2"
           fields: field yg di ambil (list) ex: ['field', 'field2']
           start: start row
           rows: max / limit row
           sort: order rows ex: field asc, field2 desc"""
        # Get documents
        response = self.getResponse(search, fields, start, rows, sort, fq)

        return {"docs": response.documents, "count": response.total_results}

    def getFacetList(self, facets, facetField):
        ff = {}
        if not isinstance(facetField, list):
            facetField = facetField.split(",")
        for facet in facetField:
            if facet:
                ff[facet] = facets['facet_fields'][facet]

        return ff

    def getFacetPivotGeneral(self, query, facetField, pivotField, limit=None, fq=None):
        try:
            url = "{0}select?q={1}&rows=1&wt=json&indent=true&facet=true&facet.pivot={2},{3}".format(
                self.cursor.base_url, query.replace("+", "%2B"), facetField, pivotField)

            url = '{}select'.format(self.cursor.base_url)
            params = {'q': query,
                      'rows': 0,
                      'wt': 'json',
                      'indent': 'true',
                      'facet': 'true',
                      'facet.pivot': '{},{}'.format(facetField, pivotField)}

            if limit:
                params['facet.limit'] = limit
            if fq:
                params['fq'] = fq
                #                 url = "%s&facet.limit=%d" % (url, limit)
            http_response = requests.get(url, params=params)
            # print url
            #             http_response = requests.get(url)

            return http_response.json()['facet_counts']['facet_pivot']['{0},{1}'.format(facetField, pivotField)]
        except Exception, e:
            print("Error parsing facet pivot...")
            print e
        return None
예제 #5
0
class QueryResultTestCase(unittest.TestCase):
    def setUp(self):
        self.solr = Solr('http://localhost:8983/solr')

    def test_search(self):
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, 4)
        self.assertEqual(len(response.documents), 4)

    def test_search_cursor(self):
        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(1):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 4)

        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(4):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 1)

    def test_commit(self):
        response = self.solr.commit()
        self.assertEqual(response.status, 200)

    def test_optimize(self):
        response = self.solr.optimize()
        self.assertEqual(response.status, 200)

    def test_ping(self):
        response = self.solr.ping()
        self.assertEqual(response.status, 200)

    def test_is_up(self):
        response = self.solr.is_up()
        self.assertEqual(response, True)

    def test_update_delete(self):
        # Get total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        total_results = response.total_results
        # Post one document using json
        documents = [{'id': 1}]
        response = self.solr.update(documents, input_type='json')
        self.assertEqual(response.status, 200)
        # Post anoter document using xml
        documents = [{'id': 2}]
        response = self.solr.update(documents, input_type='xml')
        self.assertEqual(response.status, 200)
        # Compare total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results + 2)

        # Now delete the two document posted above
        query = 'id:1'
        key = 2
        response = self.solr.delete_by_query(query)
        self.assertEqual(response.status, 200)
        response = self.solr.delete_by_key(key)
        self.assertEqual(response.status, 200)
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results)

    def tearDown(self):
        pass

    def test_query(self):
        pass
예제 #6
0
# Make a connection to Mongo.
try:
    db_conn = Connection()
    # db_conn = Connection("emo2.trinity.duke.edu", 27017)
except ConnectionFailure:
    print "couldn't connect: be sure that Mongo is running on localhost:27017"
    sys.exit(1)

db = db_conn['fashion_ip']

# create a connection to a solr server
solr = Solr('http://localhost:8080/solr')

# DELETE ALL DOCS FIRST!!
solr.delete_by_query(query='*:*', commit=True)

total_docs = db.docs.find().count()
count = 0
documents = []

for doc in db.docs.find({},{'_id':True,'year':True,'court':True,'court_level':True,'url':True,'name':True,'content':True,'tags':True,'subjects':True}):
	if count%100 == 0:
		print count
		
	# don't know how else to get solr to take IDs...
	doc['_id'] = str(doc['_id'])
	# include subject tag in list of strings if weigth greater than 0.01
	if 'subjects' in doc:
		sub_tmp = [k for k,v in doc['subjects'].items() if v >= 0.05]
		doc['subjects'] = sub_tmp
예제 #7
0
def del_all_index(server):
    server = Solr(server)
    server.delete_by_query('*:*')
    server.commit()
예제 #8
0
# Make a connection to Mongo.
try:
    db_conn = Connection()
    # db_conn = Connection("emo2.trinity.duke.edu", 27017)
except ConnectionFailure:
    print "couldn't connect: be sure that Mongo is running on localhost:27017"
    sys.exit(1)

db = db_conn['fashion_ip']

# create a connection to a solr server
solr = Solr('http://localhost:8080/solr')

# DELETE ALL DOCS FIRST!!
solr.delete_by_query(query='*:*', commit=True)

total_docs = db.docs.find().count()
count = 0
documents = []

for doc in db.docs.find({}, {
        '_id': True,
        'year': True,
        'court': True,
        'court_level': True,
        'url': True,
        'name': True,
        'content': True,
        'tags': True,
        'subjects': True
예제 #9
0
# delete 
from mysolr import Solr
#update lixiaoyao
#db.baike.update({_id:"http://baike.baidu.com/view/4488.htm"},{$set:{gameURL:["http://baike.baidu.com/subview/2188/5215542.htm" ]}})

solr_url = 'http://10.76.0.137:8983/solr/'
iden = [
	"http://baike.baidu.com/view/760101.htm",
	"http://baike.baidu.com/subview/10941/5236539.htm",
	"http://baike.baidu.com/subview/3049782/11262117.htm",
	"http://baike.baidu.com/subview/10786/6081536.htm",
	"http://baike.baidu.com/subview/533601/8190340.htm",
	"http://baike.baidu.com/view/1016334.htm",
	"http://baike.baidu.com/view/2106174.htm",
	"http://baike.baidu.com/subview/187895/6353044.htm",
	"http://baike.baidu.com/subview/187895/6353038.htm",
	"http://baike.baidu.com/view/123796.htm",
	"http://baike.baidu.com/subview/187895/6353044.htm",
	"http://baike.baidu.com/subview/168512/7010262.htm",	
]


solr = Solr(solr_url)
for x in iden:
	solr.delete_by_key(x,commit=True)

'''
query = {'q':'*:*'}
solr.delete_by_query(query,commit=True)
'''
예제 #10
0
class QueryResultTestCase(unittest.TestCase):
    def setUp(self):
        self.solr = Solr(os.getenv("SOLR_URL"))

    def test_search(self):
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, 4)
        self.assertEqual(len(response.documents), 4)

    def test_search_cursor(self):
        cursor = self.solr.search_cursor(q="*:*")
        i = 0
        for response in cursor.fetch(1):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 4)

        cursor = self.solr.search_cursor(q="*:*")
        i = 0
        for response in cursor.fetch(4):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 1)

    def test_commit(self):
        response = self.solr.commit()
        self.assertEqual(response.status, 200)

    def test_optimize(self):
        response = self.solr.optimize()
        self.assertEqual(response.status, 200)

    def test_ping(self):
        response = self.solr.ping()
        self.assertEqual(response.status, 200)

    def test_is_up(self):
        response = self.solr.is_up()
        self.assertEqual(response, True)

    def test_update_delete(self):
        # Get total results
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        total_results = response.total_results
        # Post one document using json
        documents = [{"id": 1}]
        response = self.solr.update(documents, input_type="json")
        self.assertEqual(response.status, 200)
        # Post anoter document using xml
        documents = [{"id": 2}]
        response = self.solr.update(documents, input_type="xml")
        self.assertEqual(response.status, 200)
        # Compare total results
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results + 2)

        # Now delete the two document posted above
        query = "id:1"
        key = 2
        response = self.solr.delete_by_query(query)
        self.assertEqual(response.status, 200)
        response = self.solr.delete_by_key(key)
        self.assertEqual(response.status, 200)
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results)

    def tearDown(self):
        pass

    def test_query(self):
        pass