Example #1
0
 def __init__(self,
              exit_on_error=True,
              solr_host=settings.SOLR_HOST,
              solr_port=settings.SOLR_PORT,
              solr_collection=settings.SOLR_COLLECTION):
     if 'http://' not in solr_host and 'https://' not in solr_host:
         # forgiving of configurations
         solr_host = 'http://' + solr_host
     self.session = requests.Session()
     if len(solr_collection) > 1:
         solr_collection = '/' + solr_collection
     if solr_port == 80:
         solr_connection_string = solr_host \
             + '/solr' + solr_collection
     else:
         solr_connection_string = solr_host + ':' + str(solr_port) \
             + '/solr' + solr_collection
     try:
         # print(solr_connection_string)
         self.connection = Solr(solr_connection_string,
                                make_request=self.session,
                                version=4)
     except requests.ConnectionError:
         print('\nError: Could not connect to Solr at: ' + solr_connection_string +\
               '\nPlease verify your Solr instance and configuration.\n')
         if exit_on_error:
             sys.exit(1)
         else:
             self.connection = False
Example #2
0
 def __init__(self, context, request):
     self.context = context
     self.request = request
     self.create_count = 0
     self.update_count = 0
     self.messages = []
     self.to_index = []
     solr_uri = request.registry.settings.get('push.solr_uri', None)
     if solr_uri is None:
         raise AttributeError(u'A push.solr_uri is required')
     # XXX: We are importing solr here to be able to mock it in the tests
     from mysolr import Solr
     self.solr = Solr(solr_uri)
     self.shared = context.shared
Example #3
0
 def solr_search(self, query):
     """Do the solr search and pass back results"""
     output_dict = {}
     # Setup connections
     solr = Solr(self.server, version=4)
     #UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10
     # Run the search
     search_results = solr.search(**query)
     # Format results
     for pdb in search_results.documents:
         output_dict[pdb.get('pdb_id').upper()] = {
             'description': pdb.get('molecule_name')[0]
         }
     return output_dict
Example #4
0
def update_deletions(context, request):
    """Receive a UID from the request vars and remove the associated
    object from the deleted feed.
    """
    uid = request.POST.get('uid')
    if not uid:
        return
    solr_uri = request.registry.settings.get('push.solr_uri', None)
    if solr_uri is None:
        raise AttributeError(u'A push.solr_uri is required')
    from mysolr import Solr
    solr = Solr(solr_uri)
    logger.debug('Remove deleted status')
    remove_deleted_status(uid, context.shared, solr)
    return HTTPOk(body="Item no longer marked as deleted")
Example #5
0
    def _readLabcasSolr(self, labcasurl, labcas_sourceurl_prefix):
        u'''Read the statements made at the RDF at ``url`` and return a
        dictionary of {s → [{p → [o]}]} where ``s`` is a subject URI mapping
        to a sequence of dictionaries whose keys ``p`` are predicate URIs
        mapping to a sequence of ``o`` objects, which may be literal values
        or reference URIs.'''
        solr_conn = Solr(base_url=labcasurl, version=4)
        solr_query = {'q': '*:*'}
        solr_response = solr_conn.search(**solr_query)
        results = {}
        for obj in solr_response.documents:
            obj['sourceurl'] = labcas_sourceurl_prefix + obj.get("id")
            results[obj.get("id")] = obj

        return results
Example #6
0
    def run(self):
        df = pd.read_csv(self.input().open('r'), sep='\t')
        df['id'] = df['url']

        solr = Solr('SOLR_HOST')

        # Index 10 docs at a time
        start = 0
        increment = 10
        while len(df[start:start + increment]) > 0:
            sliced = df[start:start + increment]
            docs = []
            for index, row in sliced.iterrows():
                doc = json.loads(row.to_json())
                docs.append(doc)

            solr.update(docs, 'json')
            if start % 1000 == 0:
                # Just to see that is working
                print start
            start += increment
Example #7
0
def delete_items(context, request):
    """Delete the given items from the index
    """
    # If the request isn't an RSS feed, bail out
    if request.content_type not in ALLOWED_CONTENT:
        body_msg = (
            "The content-type of the request must be one of the "
            "following: %s"
        ) % ", ".join(ALLOWED_CONTENT)
        return HTTPBadRequest(body=body_msg)
    solr_uri = request.registry.settings.get('push.solr_uri', None)
    if solr_uri is None:
        raise AttributeError(u'A push.solr_uri is required')
    # XXX: We are importing solr here to be able to mock it in the tests
    from mysolr import Solr
    solr = Solr(solr_uri)
    shared_content = feedparser.parse(request.body)
    missing = []
    removed = 0
    for item in shared_content.entries:
        uid = item['id']
        uid = normalize_uid(uid)
        logger.debug('Deleting %s' % uid)
        if uid not in context.shared:
            missing.append(uid)
            solr.delete_by_key(uid)
            continue
        del context.shared[uid]
        solr.delete_by_key(uid)
        removed += 1
    body_msg = "Removed %s items." % removed
    if missing:
        msg_str = " %s items could not be found for deletion: %s"
        args = (len(missing), ', '.join(missing))
        msg = msg_str % args
        logger.warn(msg)
        body_msg += msg
    return HTTPOk(body=body_msg)
Example #8
0
from mysolr import Solr
import requests

# set connection through requests
session = requests.Session()
solr_handle = Solr('http://localhost:8080/solr/search', make_request=session)

Example #9
0
from mysolr import Solr
import requests

import localConfig

# set connection through requests
session = requests.Session()
solr_handle = Solr(localConfig.solr_URL, make_request=session)

Example #10
0
 def setUp(self):
     self.solr = Solr('http://localhost:8983/solr')
Example #11
0
 def setUp(self):
     self.solr = Solr(os.getenv('SOLR_URL'))
Example #12
0
import time
import socket
import xml.parsers.expat

#import sunburnt
from mysolr import Solr

from Resource.ResourceHelper import ResourceHelper
from Resource.Resource import Resource
from Util.PathTool import PathTool
from Digester.FeedDictFactory import FeedDictFactory

solrBase = "http://localhost:8983/solr/"
updateUrl = solrBase + 'update/'

solr = Solr(solrBase)

_pt = PathTool.PathTool()
_rh = ResourceHelper()
feeds = _rh.getAllFeedPaths()
for feed in feeds:   
    try:
        feedDictFactory = FeedDictFactory()
        feedDict = feedDictFactory.getFeedDict(feed)
        if feedDict != None and feedDict != {}:
            feedDict['id'] = Resource(feed, 'feed').get_id()
            print(feedDict['id'])
            print("Indexing", feedDict)
            
            solr.update([feedDict], 'json', commit=True)
            print('Indexed.')
Example #13
0
 def __init__(self, url):
     self.url = url
     self.conn = Solr(url)
Example #14
0
import sys

database = 'fashion_ip'
collection = 'docs'

# Make a connection to Mongo.
try:
    db_conn = Connection("localhost")
    # db_conn = Connection("emo2.trinity.duke.edu", 27017)
except ConnectionFailure:
    print "couldn't connect: be sure that Mongo is running on localhost:27017"
    sys.exit(1)

db = db_conn[database]

solr = Solr('http://emo2.trinity.duke.edu:8080/solr/')

# query = {'q':'*:*','fl':'_id','tv.tf':'true','qt':'tvrh','rows':10,'start':0}
# response = solr.search(**query)
# tv = response.raw_response['termVectors']
# tv[0] == 'warnings'
# tv[1] == [...]

# tv[2] == 'doc-0'
# tv[3] == [...]
# tv[3][0] == 'uniqueKey'
# tv[3][1] == '4f406d8347b2301618000000'
# tv[3][2] == 'content'
# tv[3][3] == ['1', ['tf', 2], '151', ['tf', 1], '157', ['tf', 1], '182', ['tf', 1], '186', ['tf', 2], ...
# tv[4] == 'uniqueKeyFieldName'
# tv[5] == '_id'
Example #15
0
 def __init__(self, urls, config, version=4):
     self.cursor = Solr(urls, version=version)
        return self._stemmer.stem(word).lower()


# Make a connection to Mongo.
try:
    # db_conn = Connection("localhost", 27017)
    db_conn = Connection("emo2.trinity.duke.edu", 27017)
except ConnectionFailure:
    print "couldn't connect: be sure that Mongo is running on localhost:27017"
    # sys.stdout.flush()
    sys.exit(1)

db = db_conn['fashion_ip']

# Connection to Solr for faster full text searching
solr = Solr('http://localhost:8080/solr')

qstring = sys.argv[1]

pir_re = re.compile(r'.* ' + qstring + '.*', re.IGNORECASE)
porter = nltk.PorterStemmer()

for year in range(1900, 2013):
    print '\nYEAR: ', year

    response = solr.search(q=qstring + ' year:' + str(year),
                           fl='_id,score',
                           rows=10000,
                           start=0)
    documents = response.documents
#!/usr/bin/env python

import sys
import os
import json
from mysolr import Solr

PDBE_SOLR_URL = "http://www.ebi.ac.uk/pdbe/search/pdb"
solr = Solr(PDBE_SOLR_URL)

PY3 = sys.version > '3'

if PY3:
    import urllib.request as urllib2
else:
    import urllib2

SERVER_URL = "https://www.ebi.ac.uk/pdbe/api"


def join_with_AND(query_params):
    '''convenience function to create query string with AND'''
    return " AND ".join(["%s:%s" % (k, v) for k, v in query_params.items()])


def execute_solr_query(query, query_fields):
    '''convenience function'''
    query["q"] = join_with_AND(query_fields)  # add q
    response = solr.search(**query)
    documents = response.documents
    print("Found %d matching entities in %d entries." %
Example #18
0
from mysolr import Solr

# Default connection to localhost:8080
solr = Solr("http://localhost:8983/solr/barcore")

# All solr params are supported!
query = {'q': '*:*', 'facet': 'true', 'facet.field': 'zip'}
response = solr.search(**query)

# do stuff with documents
for document in response.documents:
    # modify field 'foo'
    document['rating'] = 2.0

# update index with modified documents
solr.update(response.documents, commit=True)
Example #19
0
from flask import Flask, request, session, g, redirect, url_for, abort, render_template, flash
import sqlite3
import pdb
from mysolr import Solr
import requests
from contextlib import closing
from flask.ext.sqlalchemy import SQLAlchemy

#configuration must have the full path
DATABASE = 'c:/Users/Alicia/PycharmProjects/WorldValues/worldvalues.db'
DEBUG = True
SECRET_KEY = 'development key'
USERNAME = '******'
PASSWORD = '******'
solr = Solr('http://localhost:8983/solr/#/collection1')

app = Flask(__name__)
app.debug = True
app.config.from_object(__name__)


def connect_db():
    return sqlite3.connect(app.config['DATABASE'])


@app.before_request
def before_request():
    g.db = connect_db()


@app.teardown_request