def test_keyword(self): solr = pysolr.Solr('http://localhost:8983/solr/unittest') solr.add([{ "id": "Belfast", "description": "This is about Belfast", }, { "id": "Lisburn", "description": "This is about Lisburn", }, { "id": "Titanic", "description": "This is about Titanic", },]) query_generic = solr.search("about") query_titanic = solr.search("Titanic") generic_result_list = [] titanic_result_list = [] generic_test = ['Belfast','Lisburn','Titanic'] titanic_test = ['Titanic'] for result in query_titanic: titanic_result_list.append(result['id']) for result in query_generic: generic_result_list.append(result['id']) self.assertEqual(generic_result_list,generic_test) self.assertEqual(titanic_result_list,titanic_test)
def search(request): SolrGene = collections.namedtuple('SolrGene', ['species', 'species_taxid', 'sys_name', 'name', 'description', 'num_conditions', 'num_corems', 'num_gres']) if 'search_query' in request.GET: query = request.GET['search_query'] solr_docs, facets = solr.search(query) print "FACETS: ", facets search_terms = query docs = [] for doc in solr_docs: num_corems = 0 num_conds = 0 num_gres = 0 if 'num_conds' in doc: num_conds = doc['num_conds'] if 'num_corems' in doc: num_corems = doc['num_corems'] if 'num_gres' in doc: num_gres = doc['num_gres'] docs.append(SolrGene(doc['species'], doc['ncbi_taxonomy_id'], doc['sys_name'], doc['name'], doc['description'], num_conds, num_corems, num_gres)) return render_to_response('search_results.html', locals()) else: return render_to_response('search.html', locals())
def sample(limit, trainingSetSize, *args): def accept(doc): haskeys = False not in [doc.has_key(field) for field in args] if haskeys: return 0 not in [ len(doc[field][0]) if isinstance(doc[field], list) else len(doc[field]) for field in args ] else: return False fields = 'id,%s' % (','.join(args)) sort = quote('publication_date asc') documents = [] increment = __defaultInc__ if limit > __defaultInc__ else limit for f in range(0, limit, increment): if f + increment > limit: increment = limit - increment documents += solr.search({ 'fl': fields, 'sort': sort, 'rows': increment, 'start': f }) documents = filter(accept, documents) return { 'train': documents[0:trainingSetSize], 'test': documents[trainingSetSize:len(documents)] }
def id_ind_to_ind_name(id_industry): query = 'id_industry:' + str(id_industry) solr = pysolr.Solr('http://85.31.219.96:7183/solr/') results = solr.search(query) name = '' for hit in results: name = hit['industry'][0] break return name
def record_has_mult_topics( id_chart ): query = 'id_chart:' + str(id_chart) solr = pysolr.Solr('http://85.31.219.96:7183/solr/') results = solr.search( query, fl='topic', rows='100' ) id_chart = False for hit in results: try: if ( len(hit['topic']) > 1 ): id_chart = True except KeyError: pass return id_chart
def industry_and_topics( id_industry ): """ !!! NOW USING pysolr PACKAGE INSTEAD OF solrpy !!! """ """ Input : id_industry Output: dictionary of topics and their faceted counts """ query = 'id_industry:' + str(id_industry) solr = pysolr.Solr('http://85.31.219.96:7183/solr/') params = { 'facet.field' : 'topic', } results = solr.search(query, facet = 'on', **params ) facets = results.facets['facet_fields']['topic'] return list_to_dict(facets)
def test_solr_return_100(self): solr = pysolr.Solr('http://localhost:8983/solr/keyword') root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) path_to_files = root+'/python/data/image_json.json' with open(path_to_files) as data_file: data = json.load(data_file) solr.add(data) results = solr.search("Belfast", **{ 'hl': 'true', 'hl.fragsize': 100, 'rows': 100,}) self.assertEqual(len(results),100)
def sample(limit,trainingSetSize,*args): def accept(doc): haskeys = False not in [doc.has_key(field) for field in args] if haskeys: return 0 not in [len(doc[field][0]) if isinstance(doc[field],list) else len(doc[field]) for field in args] else: return False fields = 'id,%s' % (','.join(args)) sort = quote('publication_date asc') documents = [] increment = __defaultInc__ if limit > __defaultInc__ else limit for f in range(0,limit,increment): if f + increment > limit: increment = limit - increment documents += solr.search( {'fl' : fields, 'sort' : sort, 'rows' : increment, 'start' : f}) documents = filter(accept, documents) return { 'train' : documents[0:trainingSetSize], 'test' : documents[trainingSetSize:len(documents)]}