예제 #1
0
def main(args):
    s = solr.SolrConnection('http://' + solrUrl + '/solr/newsdai')
    json_files = [
        pos_json for pos_json in os.listdir(dataDir)
        if pos_json.endswith('.json')
    ]
    icnt = 0

    class Options:
        dataDir = '..'

    opts = Options()
    a = Article("", opts)
    for ii, js in enumerate(json_files):
        if ii % 21 == 0: print('processing {}'.format(js))
        with open(os.path.join(dataDir, js)) as json_file:
            json_txt = json.load(json_file)
            for jj, doc in enumerate(json_txt):
                #{ "id":icnt, "Headline":ee['Headline'], #"CompanyCodes":ee['CompanyCodes'], "GmtTimeStamp":ee['GmtTimeStamp'] }
                try:
                    svo = a.getSVO(doc['Headline'])
                    if svo and len(svo) > 1:
                        doc['SVO'] = svo[0]
                    s.add(**doc, commit=True)
                except:
                    traceback.print_exc()
                if icnt % 1001 == 0:
                    print('adding {} doc'.format(icnt), end='\r', flush=True)
                icnt += 1
예제 #2
0
class searchManager:
    connection = solr.SolrConnection('http://localhost:8983/solr')
    # connection = urlopen(
    #                 'http://localhost:8983/solr/select?q=*&wt=python')
    # response = eval(connection.read())
    #
    #
    # print response['response']['numFound'], "documents found."
    #
    # # Print the name of each document.
    #
    # for document in response['response']['docs']:
    #   print "  Name =", document['name']


    def addDocuments(self):

        # add a document to the index
        doc = dict(
            id=789,
            title='Lucene in Action',
            author=['Erik Hatcher', 'Otis Gospodneti'],
            )
        doc = {"id": 34567, "title": "Mbuoe"}
        searchManager.connection.add(doc)
#        searchManager.connection.add(doc, commit=True)


    def searchDocuments(self, keyword):
        # do a search
        response = searchManager.connection.query('title:',keyword)
        for hit in response.results:
            print hit['title']
예제 #3
0
def main(argv):
    chunkFile = None
    solrUrl = None
    outputFile = None
    usage = 'check_failed.py -f <chunk file> -s <solr url> -o <output file> '

    try:
        opts, args = getopt.getopt(argv, "hf:s:o:",
                                   ["chunkFile=", "solrUrl=", "outputFile="])
    except getopt.GetoptError:
        print usage
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print usage
            sys.exit()
        elif opt in ("-f", "--chunkFile"):
            chunkFile = arg
        elif opt in ("-s", "--solrUrl"):
            solrUrl = arg
        elif opt in ("-o", "--outputFile"):
            outputFile = arg

    if chunkFile == None or solrUrl == None or outputFile == None:
        print usage
        sys.exit()

    print "Chunk File  : [" + str(chunkFile) + "]"
    print "Solr URL    : [" + str(solrUrl) + "]"
    print "Output File : [" + str(outputFile) + "]"

    s = solr.SolrConnection(solrUrl)
    buildChunkFile(outputFile, chunkFile, s)
예제 #4
0
def solr_add(**data_dict):
    commit = data_dict.pop('__commit__',True)
    s = solr.SolrConnection(settings.PRODUCT_SOLR)
    s.add(**data_dict)
    if commit:
        s.commit()
    s.close()
예제 #5
0
def searchApiSolr(URLToSolr,pathToData,search_string,psc_pattern,limit=ppApiConfig.LIMIT_NUM_MATCHING_TRANSACTIONS):
# create a connection to a solr server
    solrCon = solr.SolrConnection(URLToSolr)
    localTransactionDir = None
    
    timeSearchBegin = time.clock()

    transaction = None

    print "Not using cache."
    logger.info("Not using cache.")        
    localTransactionDir = Transaction.TransactionDirector()        
    t1 = time.clock()

    logger.info("Searching for search_string,psc" + search_string+","+psc_pattern)
    logger.error("Searching for search_string,psc" + search_string+","+psc_pattern)
    
    # do a search
    mainSearch = AGGREGATED_TEXT_FIELD+':'+search_string
    pscSearch = Transaction.PSC+':'+psc_pattern

    # the magic happens here...
    # you can add q_op='AND' here, but it seems to shut down all instances.  I'm afraid
    # I either need to use ediscmax or do something else.
    print "rows = "+ str(limit)
    if (psc_pattern == "*"):
        transactionDicts = solrCon.query(mainSearch,rows=limit,fl='*,score',deftype='edismax')
    else:
        transactionDicts = solrCon.query(mainSearch,rows=limit,fq=pscSearch,fl='*,score',deftype='edismax')
    return processSolrResults(transactionDicts)
예제 #6
0
def facetResults(facet):
    """ Returns dict of { facet_value_a: count_of_facet_value_a_entries }. """
    log.debug('facet, `%s`' % facet)
    try:
        # s = solr.SolrConnection( settings_app.SOLR_URL )
        # q = s.select( u'*:*', **{u'facet':u'true',u'facet.field':facet,u'rows':u'0',u'facet.limit':u'-1', u'facet.mincount':u'1'} )
        # log.debug( 'q, ```%s```' % q )
        # facet_count_dict =q.facet_counts[u'facet_fields'][facet]
        # return facet_count_dict
        s = solr.SolrConnection(settings_app.SOLR_URL)
        params = {
            u'facet': u'true',
            u'facet.field': facet,
            u'rows': u'0',
            u'facet.limit': u'-1',
            u'facet.mincount': u'1'
        }
        q = s.select(u'*:*', **params)
        # log.debug( 'q.__dict__, ```%s```' % pprint.pformat(q.__dict__) )
        facet_count_dict = q.facet_counts[u'facet_fields'][facet]
        return facet_count_dict
    except Exception as e:
        log.error('test')
        # raise Exception( str(e) )
        log.error('in common.facetResults(); exception, %s' % str(e))
예제 #7
0
    def feedback(self):
        # Get feedback for a result
        # request.params has guid and type ("up" or "down")

        conn = solr.SolrConnection('http://localhost:8983/solr')
        # Get solrpy to do the update
        conn.close()
예제 #8
0
    def get(self, request, *args, **kwargs):
        solrconn = solr.SolrConnection(settings.SOLR_SERVER)

        manuscript_id = kwargs['pk']

        if 'image_uri' in kwargs:
            image_uri = kwargs['image_uri']
            composed_request = u'type:"cantusdata_folio" AND manuscript_id:{0} AND image_uri:"{1}"'\
                .format(manuscript_id, image_uri)

            result = solrconn.query(composed_request,
                                    sort="number asc",
                                    rows=1,
                                    fields=FOLIO_FIELDS,
                                    score=False)

            # We only want the single result!
            # TODO: Figure out the best way to handle this
            if (result.results):
                return Response(result.results[0])
            else:
                raise Http404("No data for a folio with that number")
        else:
            composed_request = u'type:"cantusdata_folio" AND manuscript_id:{0}'.format(
                manuscript_id)
            results = solrconn.query(composed_request,
                                     sort="number asc",
                                     rows=1000,
                                     fields=FOLIO_FIELDS,
                                     score=False)
            return Response(results)
예제 #9
0
    def search(self):
        c.service = "search"
        query = request.params['query']
        if request.params.has_key('start'):
            start = request.params['start']
        else:
            start = 0

        if query.strip() == "":
            return render('/index.mako')

        conn = solr.SolrConnection('http://localhost:8983/solr')

        # Grab data from Solr
        params = {'q': sanitize(query), 'rows': 10, 'start': start}
        results = conn.query(**(params))
        conn.close()

        if len(results) == 0:
            c.query = query
            return render('/noresults.mako')

        # Send params to context
        c.start = start
        c.results = results
        c.query = query

        return render('/results.mako')
예제 #10
0
    def merge_article_results(self, responses, query):

        new_responses = []
        all_results = []

        for i in range(0, len(responses)):
            r = responses[i]
            num_found = r.numFound
            query_key = 'term' + str(i + 1)
            s = solr.SolrConnection(self.solr_url)
            s_query = "text:\"%s\" AND journal:\"%s\"" % (query[query_key],
                                                          query['journal'])
            params = {'start': '0', 'rows': str(num_found)}
            new_responses.append(s.query(s_query, **params))

        keys_sets = []

        for rsp in new_responses:
            key_set = set()
            for r in rsp.results:
                all_results.append(r)
                key_set.add(r['journal_art_id'])
            keys_sets.append(key_set)

        final_keys = keys_sets[0]

        for i in range(1, len(keys_sets)):
            final_keys = final_keys & keys_sets[i]

        return [r for r in all_results if r['journal_art_id'] in final_keys]
예제 #11
0
def run(filename):

    reader = csv.reader(open(filename, 'r'))

    def group(it, size):
        it = iter(it)

        def subit(it, first, size):
            yield first
            for i in xrange(size):
                yield it.next()

        while True:
            yield subit(it, it.next(), size - 1)

    s = solr.SolrConnection('http://localhost:8983/solr')

    def quorum(articles):
        articles = list(articles[1])
        return articles[0]

    def get_article(article):
        article = Article(*(f.decode('utf-8') for f in article))._asdict()
        article['keywords'] = article['keywords'].split(u', ')
        return article

    reader = itertools.imap(quorum,
                            itertools.groupby(reader, key=lambda x: x[0]))

    for i, g in enumerate(group(reader, 10000)):
        print "Loading", i
        s.add_many(itertools.imap(get_article, g))

    s.commit()
예제 #12
0
파일: index.py 프로젝트: zfadhlee/linksSDGs
def data(rows_returned):
    search = request.args.get("search")
    query = search
    search_collection = 'linksdgs'
    search_server = 'http://solr4-jmmnn-1.c9users.io/solr/' + str(search_collection)
    s = solr.SolrConnection(search_server)
    response = s.select(query , rows=rows_returned)
    return render_template("data.json", response=response)
예제 #13
0
def solr_delete(sender, instance, created, **kwargs):
    from django.conf import settings
    import solr
    solrconn = solr.SolrConnection(settings.SOLR_SERVER)
    record = solrconn.query("type:goudimel_book item_id:{0}".format(
        instance.id))
    solrconn.delete(record.results[0]['id'])
    solrconn.commit()
예제 #14
0
def order_solr_add(**data_dict):
    commit = data_dict.pop('__commit__',True)
    s = solr.SolrConnection(settings.ORDER_SOLR)
    s.add(**data_dict)
    # Disable explicit commit
    #if commit:
    #    s.commit()
    s.close()
예제 #15
0
 def run_task(self):
     task_solrs = self.task.solr_instances.all()
     solrs_connections = list(
         task_solrs.values_list('str_connection', flat=True))
     for conn in solrs_connections:
         client = solr.SolrConnection(conn)
         client.delete_query("*:*")
         client.commit()
예제 #16
0
 def update_search_index(self, request, queryset):
     from .search_indexes import ProductIndex
     ex_form = ExtraActionForm(request.POST)
     if ex_form.is_valid() and ex_form.cleaned_data.get('solr_instances'):
         client = solr.SolrConnection(ex_form.cleaned_data['solr_instances'])
         solr_inst = ProductIndex(client)
         solr_inst.reindex_by_qset(queryset, queryset.filter(availability=True))
         client.close()
예제 #17
0
def search(request, category, source, search_value, sort="", page=""):

    search_value = search_value.replace("%20", " ")
    suggestion_list = ""
    connection = solr.SolrConnection('http://localhost:8983/solr/CZ4034',
                                     debug=True)
    if source == "All2":
        status_list = connection.query('content:"' + search_value + '"',
                                       rows=100).results
    else:
        status_list = connection.query('content:"' + search_value + '"' +
                                       ' AND name:' + source).results

    conn = urlopen('http://localhost:8983/solr/CZ4034/suggest?q=' +
                   search_value.replace(" ", "%20") + '&wt=json')
    suggestion_json = json.load(conn)["spellcheck"]["suggestions"]

    if len(suggestion_json) > 1:
        suggestion_list = suggestion_json[1]["suggestion"]
        original_word = suggestion_json[0]

    temp_suggestion_list = []
    for suggestion in suggestion_list:
        temp_suggestion_list.append(
            search_value.replace(original_word, suggestion))

    suggestion_list = temp_suggestion_list

    for status in status_list:
        status["retweet_count"] = status["retweet_count"][0]

    if sort == "Popularity":
        status_list = sorted(status_list,
                             key=lambda status_list: status_list["like"],
                             reverse=True)
    elif sort == "Retweet":
        status_list = sorted(
            status_list,
            key=lambda status_list: status_list["retweet_count"],
            reverse=True)
    else:
        status_list = sorted(status_list,
                             key=lambda status_list: status_list["time"],
                             reverse=True)

    request.session.status_list = status_list
    pages = getPage(request)
    request.session.status_list = getStatusList(status_list, page)
    return render(
        request, 'home2.html', {
            'sort': sort,
            'status_list': request.session.status_list,
            'search_value': search_value,
            'source': source,
            'category': category,
            'suggestion_list': suggestion_list,
            'pages': pages
        })
예제 #18
0
def order_solr_suggest(q):
    s = solr.SolrConnection(settings.ORDER_SOLR, **{
        'operation': '/terms'})
    response = s.raw_query(** {
                'terms_fl': 'suggest',
                'wt': 'json',
                'omitHeaders': 'true',
                'terms_prefix': q})
    return response
예제 #19
0
def order_solr_search(q, fields=None, highlight=None, score=True,
    sort=None, sort_order='asc', operation='/select', **kw):

    s = solr.SolrConnection(settings.ORDER_SOLR)
    try:
        return s.query(q, fields, highlight, score, sort, sort_order, **kw)
    except solr.SolrException, se:
        # Not logging for now
        return None
예제 #20
0
def solr_tags(fields, q='*:*'):
    s = solr.SolrConnection(settings.PRODUCT_SOLR)
    res = s.raw_query(q=q, wt='json', facet='true', facet_field=fields)
    result = simplejson.loads(res)['facet_counts']['facet_fields']
    r = []
    for k,v in result.items():
        r.extend(v)
    response = dict([(r[i],r[i+1]) for i in range(len(r)-1)[::2]])
    return response
예제 #21
0
 def getBlog(self, filter):
     conn = solr.SolrConnection('http://localhost:8983/solr')
     print filter
     parsedFilter = SearchFilter()
     parsedFilter.decode(filter)
     print parsedFilter.toSolr()
     response = conn.query(**(parsedFilter.toSolr()))
     conn.close()
     return [hit['link'] for hit in response.results]
예제 #22
0
    def execute(self):
        conn = solr.SolrConnection(self.solr_server_url)

        for deleted in self._deletions:
            deleted.delete_from_solr(conn)

        conn.add_many(
            [added.create_solr_record() for added in self._additions])
        conn.commit()
예제 #23
0
def main(argv):
    #solrUrl = 'http://127.0.0.1:8080/solr/apidocs' # The URL of the solr instance
    #path = "D:/Git/wrdoclet/wrdoclet/target/doc/" # The directory of files to publish to solr
    solrUrl = ''
    path = ''
    buildID = ''

    try:
        opts, agrs = getopt.getopt(argv, "hi:s:b:",
                                   ["inputpath=", "solraddr=", "buildid="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt == "-h":
            usage()
            sys.exit()
        elif opt in ("-i", "--inputpath"):
            path = arg
        elif opt in ("-s", "--solraddr"):
            solrUrl = arg
        elif opt in ("-b", "--buildid"):
            buildID = arg

    if solrUrl == '' or path == '' or buildID == '':
        usage()
        sys.exit()

    print "input path of docs: ", path
    print "solr address url: ", solrUrl
    print "doc build ID: ", buildID

    solrInstance = solr.SolrConnection(solrUrl)  # Solr Connection object
    publishedCount = 0
    skippedCount = 0
    processedSet = set()

    try:
        pCount, sCount, pSet = publishToSolr(solrInstance, path, buildID)
        publishedCount += pCount
        skippedCount += sCount
        processedSet |= pSet
        #just for migration of old type users, official wrdoclet user will not hit this code path.
        pathlist = [
            join(path, f) for f in listdir(path)
            if isdir(join(path, f)) and f.startswith('detail')
        ]
        for p in pathlist:
            pCount, sCount, pSet = publishToSolr(solrInstance, p, buildID)
            publishedCount += pCount
            skippedCount += sCount
            processedSet |= pSet
    except PublishException, ex:
        printSummary(publishedCount + ex.publishedCount,
                     skippedCount + ex.skippedCount)
        raise
예제 #24
0
    def __init__(self, url):
        """Initialize the wrapper with the search url.

        Args:
            url (string) The Solr URL for the searchparty collection
        """
        self.url = url
        self.conn = solr.SolrConnection(url)
        self.weightage = {}
        self.setWeigtage([10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])
예제 #25
0
파일: index.py 프로젝트: zfadhlee/linksSDGs
def data2():
    row = request.args.get("row")
    query = request.args.get("query")
    
    search_collection = 'linksdgs'
    search_server = 'http://solr4-jmmnn-1.c9users.io/solr/' + str(search_collection)
    s = solr.SolrConnection(search_server)
    response = s.select('india' , rows=10)
    
    return render_template("data2.json", response=response)
예제 #26
0
    def query_journal(self, query, search_type):

        s = solr.SolrConnection(self.solr_url)

        try:
            response = self.journal_func_map[search_type.name](
                self, s, query, self.journal_facet_params)
            return self.process_journal_results(response, search_type)
        except KeyError as k:
            return None
예제 #27
0
def solr_delete(sender, instance, **kwargs):
    from django.conf import settings
    import solr

    solrconn = solr.SolrConnection(settings.SOLR_SERVER)
    record = solrconn.query("id:{0}".format(instance.id))
    if record:
        # the record already exists, so we'll remove it first.
        print("Deleting ".format(record.results[0]['id']))
        solrconn.delete(record.results[0]['id'])
예제 #28
0
    def test_solr_deletion(self):
        pk = self.first_manuscript.pk

        solrconn = solr.SolrConnection(settings.SOLR_SERVER)
        self.first_manuscript.delete_from_solr(solrconn)

        solrconn.commit()

        indexed = solrconn.query('type:cantusdata_manuscript AND item_id:{}'.format(pk))
        self.assertEqual(indexed.numFound, 0)
예제 #29
0
def SolrOptimize(indexing):
    #
    # Clear settings
    #
    print " - Optimize Solr data"

    for index in indexing:
        sol = solr.SolrConnection(solrconfig.solr_urls_stage[index])
        sol.optimize()
        sol.close()

        print "   - " + index + " optimized."

    sol = solr.SolrConnection(solrconfig.solr_urls_stage['all'])

    sol.optimize()
    sol.close()

    print '   - all optimized.'
예제 #30
0
def brewers(request, brewer_name):
    s = solr.SolrConnection('http://localhost:8983/solr')
    searchresults = s.query('brewery_name:"%s"' % brewer_name)
    form = SearchForm()
    return render_to_response(
        'searchresults.html', {
            'searchresults': searchresults,
            'title': 'Beers from',
            'em': brewer_name,
            'form': form
        })