예제 #1
0
 def test_keyword(self):
     solr = pysolr.Solr('http://localhost:8983/solr/unittest')
     solr.add([{
            "id": "Belfast",
            "description": "This is about Belfast",
            },
           {
            "id": "Lisburn",
            "description": "This is about Lisburn",
            },
            {
            "id": "Titanic",
            "description": "This is about Titanic",
            },]) 
 
     query_generic = solr.search("about")
     query_titanic = solr.search("Titanic")
     generic_result_list = []
     titanic_result_list = []
     generic_test = ['Belfast','Lisburn','Titanic']
     titanic_test = ['Titanic']
 
 
     for result in query_titanic:
         titanic_result_list.append(result['id'])
     
     for result in query_generic:
         generic_result_list.append(result['id'])
     
     self.assertEqual(generic_result_list,generic_test)
     self.assertEqual(titanic_result_list,titanic_test)
예제 #2
0
def search(request):
    SolrGene = collections.namedtuple('SolrGene', ['species', 'species_taxid', 'sys_name', 'name', 'description', 'num_conditions', 'num_corems', 'num_gres'])
    if 'search_query' in request.GET:
        query = request.GET['search_query']
        solr_docs, facets = solr.search(query)
        print "FACETS: ", facets
        search_terms = query
        docs = []
        for doc in solr_docs:
            num_corems = 0
            num_conds = 0
            num_gres = 0

            if 'num_conds' in doc:
                num_conds = doc['num_conds']
            if 'num_corems' in doc:
                num_corems = doc['num_corems']
            if 'num_gres' in doc:
                num_gres = doc['num_gres']

            docs.append(SolrGene(doc['species'],
                                 doc['ncbi_taxonomy_id'],
                                 doc['sys_name'],
                                 doc['name'],
                                 doc['description'],
                                 num_conds,
                                 num_corems,
                                 num_gres))
        return render_to_response('search_results.html', locals())
    else:
        return render_to_response('search.html', locals())
예제 #3
0
def sample(limit, trainingSetSize, *args):
    def accept(doc):
        haskeys = False not in [doc.has_key(field) for field in args]
        if haskeys:
            return 0 not in [
                len(doc[field][0])
                if isinstance(doc[field], list) else len(doc[field])
                for field in args
            ]
        else:
            return False

    fields = 'id,%s' % (','.join(args))
    sort = quote('publication_date asc')
    documents = []
    increment = __defaultInc__ if limit > __defaultInc__ else limit
    for f in range(0, limit, increment):
        if f + increment > limit: increment = limit - increment
        documents += solr.search({
            'fl': fields,
            'sort': sort,
            'rows': increment,
            'start': f
        })

    documents = filter(accept, documents)
    return {
        'train': documents[0:trainingSetSize],
        'test': documents[trainingSetSize:len(documents)]
    }
예제 #4
0
def id_ind_to_ind_name(id_industry):
    query   = 'id_industry:' + str(id_industry)
    solr    = pysolr.Solr('http://85.31.219.96:7183/solr/')
    results = solr.search(query)
    name = ''
    for hit in results:
        name = hit['industry'][0]
        break    
    return name
예제 #5
0
def record_has_mult_topics( id_chart ):

    query   = 'id_chart:' + str(id_chart)
    solr    = pysolr.Solr('http://85.31.219.96:7183/solr/')
    results = solr.search( query, fl='topic', rows='100' )

    id_chart = False
    for hit in results:
        try:
            if ( len(hit['topic']) > 1 ): id_chart = True
        except KeyError:
            pass
    return id_chart
예제 #6
0
def industry_and_topics( id_industry ):
    """  
    !!! NOW USING pysolr PACKAGE INSTEAD OF solrpy !!!
    """
    """ Input : id_industry
        Output: dictionary of topics and their faceted counts """  

    query   = 'id_industry:' + str(id_industry)

    solr    = pysolr.Solr('http://85.31.219.96:7183/solr/')
    params  = {  'facet.field' : 'topic',  } 
    results = solr.search(query, facet = 'on', **params )

    facets = results.facets['facet_fields']['topic']
    return list_to_dict(facets)
예제 #7
0
 def test_solr_return_100(self):
     solr = pysolr.Solr('http://localhost:8983/solr/keyword')
     root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     path_to_files = root+'/python/data/image_json.json'
     
     with open(path_to_files) as data_file:    
         data = json.load(data_file)
     
     solr.add(data)
     
     results = solr.search("Belfast", **{
             'hl': 'true',
             'hl.fragsize': 100,
             'rows': 100,})
     
     self.assertEqual(len(results),100)
예제 #8
0
def sample(limit,trainingSetSize,*args):
    def accept(doc):
        haskeys = False not in [doc.has_key(field) for field in args] 
        if haskeys:
            return 0 not in [len(doc[field][0])
                if isinstance(doc[field],list) else len(doc[field]) for field in args]
        else:
            return False
    fields = 'id,%s' % (','.join(args))
    sort = quote('publication_date asc') 
    documents = []
    increment = __defaultInc__ if limit > __defaultInc__ else limit
    for f in range(0,limit,increment):
        if f + increment > limit: increment = limit - increment
        documents += solr.search(
            {'fl' : fields, 'sort' : sort,
            'rows' : increment, 'start' : f})  

    documents = filter(accept, documents)
    return { 'train' : documents[0:trainingSetSize],
        'test' : documents[trainingSetSize:len(documents)]}