def __init__(self, exit_on_error=True, solr_host=settings.SOLR_HOST, solr_port=settings.SOLR_PORT, solr_collection=settings.SOLR_COLLECTION): if 'http://' not in solr_host and 'https://' not in solr_host: # forgiving of configurations solr_host = 'http://' + solr_host self.session = requests.Session() if len(solr_collection) > 1: solr_collection = '/' + solr_collection if solr_port == 80: solr_connection_string = solr_host \ + '/solr' + solr_collection else: solr_connection_string = solr_host + ':' + str(solr_port) \ + '/solr' + solr_collection try: # print(solr_connection_string) self.connection = Solr(solr_connection_string, make_request=self.session, version=4) except requests.ConnectionError: print('\nError: Could not connect to Solr at: ' + solr_connection_string +\ '\nPlease verify your Solr instance and configuration.\n') if exit_on_error: sys.exit(1) else: self.connection = False
def __init__(self, context, request): self.context = context self.request = request self.create_count = 0 self.update_count = 0 self.messages = [] self.to_index = [] solr_uri = request.registry.settings.get('push.solr_uri', None) if solr_uri is None: raise AttributeError(u'A push.solr_uri is required') # XXX: We are importing solr here to be able to mock it in the tests from mysolr import Solr self.solr = Solr(solr_uri) self.shared = context.shared
def solr_search(self, query): """Do the solr search and pass back results""" output_dict = {} # Setup connections solr = Solr(self.server, version=4) #UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10 # Run the search search_results = solr.search(**query) # Format results for pdb in search_results.documents: output_dict[pdb.get('pdb_id').upper()] = { 'description': pdb.get('molecule_name')[0] } return output_dict
def update_deletions(context, request): """Receive a UID from the request vars and remove the associated object from the deleted feed. """ uid = request.POST.get('uid') if not uid: return solr_uri = request.registry.settings.get('push.solr_uri', None) if solr_uri is None: raise AttributeError(u'A push.solr_uri is required') from mysolr import Solr solr = Solr(solr_uri) logger.debug('Remove deleted status') remove_deleted_status(uid, context.shared, solr) return HTTPOk(body="Item no longer marked as deleted")
def _readLabcasSolr(self, labcasurl, labcas_sourceurl_prefix): u'''Read the statements made at the RDF at ``url`` and return a dictionary of {s → [{p → [o]}]} where ``s`` is a subject URI mapping to a sequence of dictionaries whose keys ``p`` are predicate URIs mapping to a sequence of ``o`` objects, which may be literal values or reference URIs.''' solr_conn = Solr(base_url=labcasurl, version=4) solr_query = {'q': '*:*'} solr_response = solr_conn.search(**solr_query) results = {} for obj in solr_response.documents: obj['sourceurl'] = labcas_sourceurl_prefix + obj.get("id") results[obj.get("id")] = obj return results
def run(self): df = pd.read_csv(self.input().open('r'), sep='\t') df['id'] = df['url'] solr = Solr('SOLR_HOST') # Index 10 docs at a time start = 0 increment = 10 while len(df[start:start + increment]) > 0: sliced = df[start:start + increment] docs = [] for index, row in sliced.iterrows(): doc = json.loads(row.to_json()) docs.append(doc) solr.update(docs, 'json') if start % 1000 == 0: # Just to see that is working print start start += increment
def delete_items(context, request): """Delete the given items from the index """ # If the request isn't an RSS feed, bail out if request.content_type not in ALLOWED_CONTENT: body_msg = ( "The content-type of the request must be one of the " "following: %s" ) % ", ".join(ALLOWED_CONTENT) return HTTPBadRequest(body=body_msg) solr_uri = request.registry.settings.get('push.solr_uri', None) if solr_uri is None: raise AttributeError(u'A push.solr_uri is required') # XXX: We are importing solr here to be able to mock it in the tests from mysolr import Solr solr = Solr(solr_uri) shared_content = feedparser.parse(request.body) missing = [] removed = 0 for item in shared_content.entries: uid = item['id'] uid = normalize_uid(uid) logger.debug('Deleting %s' % uid) if uid not in context.shared: missing.append(uid) solr.delete_by_key(uid) continue del context.shared[uid] solr.delete_by_key(uid) removed += 1 body_msg = "Removed %s items." % removed if missing: msg_str = " %s items could not be found for deletion: %s" args = (len(missing), ', '.join(missing)) msg = msg_str % args logger.warn(msg) body_msg += msg return HTTPOk(body=body_msg)
from mysolr import Solr import requests # set connection through requests session = requests.Session() solr_handle = Solr('http://localhost:8080/solr/search', make_request=session)
from mysolr import Solr import requests import localConfig # set connection through requests session = requests.Session() solr_handle = Solr(localConfig.solr_URL, make_request=session)
def setUp(self): self.solr = Solr('http://localhost:8983/solr')
def setUp(self): self.solr = Solr(os.getenv('SOLR_URL'))
import time import socket import xml.parsers.expat #import sunburnt from mysolr import Solr from Resource.ResourceHelper import ResourceHelper from Resource.Resource import Resource from Util.PathTool import PathTool from Digester.FeedDictFactory import FeedDictFactory solrBase = "http://localhost:8983/solr/" updateUrl = solrBase + 'update/' solr = Solr(solrBase) _pt = PathTool.PathTool() _rh = ResourceHelper() feeds = _rh.getAllFeedPaths() for feed in feeds: try: feedDictFactory = FeedDictFactory() feedDict = feedDictFactory.getFeedDict(feed) if feedDict != None and feedDict != {}: feedDict['id'] = Resource(feed, 'feed').get_id() print(feedDict['id']) print("Indexing", feedDict) solr.update([feedDict], 'json', commit=True) print('Indexed.')
def __init__(self, url): self.url = url self.conn = Solr(url)
import sys database = 'fashion_ip' collection = 'docs' # Make a connection to Mongo. try: db_conn = Connection("localhost") # db_conn = Connection("emo2.trinity.duke.edu", 27017) except ConnectionFailure: print "couldn't connect: be sure that Mongo is running on localhost:27017" sys.exit(1) db = db_conn[database] solr = Solr('http://emo2.trinity.duke.edu:8080/solr/') # query = {'q':'*:*','fl':'_id','tv.tf':'true','qt':'tvrh','rows':10,'start':0} # response = solr.search(**query) # tv = response.raw_response['termVectors'] # tv[0] == 'warnings' # tv[1] == [...] # tv[2] == 'doc-0' # tv[3] == [...] # tv[3][0] == 'uniqueKey' # tv[3][1] == '4f406d8347b2301618000000' # tv[3][2] == 'content' # tv[3][3] == ['1', ['tf', 2], '151', ['tf', 1], '157', ['tf', 1], '182', ['tf', 1], '186', ['tf', 2], ... # tv[4] == 'uniqueKeyFieldName' # tv[5] == '_id'
def __init__(self, urls, config, version=4): self.cursor = Solr(urls, version=version)
return self._stemmer.stem(word).lower() # Make a connection to Mongo. try: # db_conn = Connection("localhost", 27017) db_conn = Connection("emo2.trinity.duke.edu", 27017) except ConnectionFailure: print "couldn't connect: be sure that Mongo is running on localhost:27017" # sys.stdout.flush() sys.exit(1) db = db_conn['fashion_ip'] # Connection to Solr for faster full text searching solr = Solr('http://localhost:8080/solr') qstring = sys.argv[1] pir_re = re.compile(r'.* ' + qstring + '.*', re.IGNORECASE) porter = nltk.PorterStemmer() for year in range(1900, 2013): print '\nYEAR: ', year response = solr.search(q=qstring + ' year:' + str(year), fl='_id,score', rows=10000, start=0) documents = response.documents
#!/usr/bin/env python import sys import os import json from mysolr import Solr PDBE_SOLR_URL = "http://www.ebi.ac.uk/pdbe/search/pdb" solr = Solr(PDBE_SOLR_URL) PY3 = sys.version > '3' if PY3: import urllib.request as urllib2 else: import urllib2 SERVER_URL = "https://www.ebi.ac.uk/pdbe/api" def join_with_AND(query_params): '''convenience function to create query string with AND''' return " AND ".join(["%s:%s" % (k, v) for k, v in query_params.items()]) def execute_solr_query(query, query_fields): '''convenience function''' query["q"] = join_with_AND(query_fields) # add q response = solr.search(**query) documents = response.documents print("Found %d matching entities in %d entries." %
from mysolr import Solr # Default connection to localhost:8080 solr = Solr("http://localhost:8983/solr/barcore") # All solr params are supported! query = {'q': '*:*', 'facet': 'true', 'facet.field': 'zip'} response = solr.search(**query) # do stuff with documents for document in response.documents: # modify field 'foo' document['rating'] = 2.0 # update index with modified documents solr.update(response.documents, commit=True)
from flask import Flask, request, session, g, redirect, url_for, abort, render_template, flash import sqlite3 import pdb from mysolr import Solr import requests from contextlib import closing from flask.ext.sqlalchemy import SQLAlchemy #configuration must have the full path DATABASE = 'c:/Users/Alicia/PycharmProjects/WorldValues/worldvalues.db' DEBUG = True SECRET_KEY = 'development key' USERNAME = '******' PASSWORD = '******' solr = Solr('http://localhost:8983/solr/#/collection1') app = Flask(__name__) app.debug = True app.config.from_object(__name__) def connect_db(): return sqlite3.connect(app.config['DATABASE']) @app.before_request def before_request(): g.db = connect_db() @app.teardown_request