def search(query, params, off, num_results_wanted): off = int(off) # query and params both come from sidebar, so should have exactly one. if not query and not params: return Result(0, off), get_empty_parameters() elif query and params: print "artstor 34, shouldn't have reached here... Have both query (%s) and params (%s)" % ( query, params) raise NotImplementedError elif query: query_terms = Query_Language(identifier).searcher_translator(query) else: query_terms = parse_parameters(params) print "artstor 48 query_terms %s" % query_terms # return empty result if no search terms (submitting an empty query breaks artstor) if len(query_terms) is 0: return Result(0, 0), get_empty_parameters() """ Caching of results, uncomment and fix if supported in other searchers TODO cached, created = HitCount.current_objects.get_or_create( source=self.get_source_id(), query='%s [%s:%s]' % (keyword, page, pagesize), defaults=dict(hits=0, valid_until=datetime.datetime.now() + datetime.timedelta(1))) if not created and cached.results: return simplejson.loads(cached.results) """ pagesize = num_results_wanted url = _get_url(query_terms, pagesize, off) html_page = _get_html_page(url) try: results = ElementTree(file=html_page) print "artstor 61 \n\tfile %s\n\tresults %s\n" % (html_page, results) num_results = int( results.findtext( '{http://www.loc.gov/zing/srw/}numberOfRecords')) or 0 except ExpatError: # XML parsing error num_results = 0 if not num_results: # other type of error or no results found return Result(0, off), _build_returnable_parameters(query_terms) #pages = int(math.ceil(float(total) / pagesize)) result = Result(num_results, num_results + off) image_divs = results.findall('.//{info:srw/schema/1/dc-v1.1}dc') print "artstor 77 query_terms %s" % query_terms for div in image_divs: (url, thumb, image_identifier, title) = _get_image(div) result.addImage(ResultImage(url, thumb, title, image_identifier)) # TODO cope with full image not actually giving result (timeout error) return result, _build_returnable_parameters(query_terms)
def search(query, params, off, num_results_wanted): off = int(off) # query and params both come from sidebar, so should have exactly one. if not query and not params: return Result(0, off), get_empty_parameters() elif query and params: print "artstor 34, shouldn't have reached here... Have both query (%s) and params (%s)" %(query, params) raise NotImplementedError elif query: query_terms = Query_Language(identifier).searcher_translator(query) else: query_terms = parse_parameters(params) print "artstor 48 query_terms %s" %query_terms # return empty result if no search terms (submitting an empty query breaks artstor) if len(query_terms) is 0: return Result(0, 0), get_empty_parameters() """ Caching of results, uncomment and fix if supported in other searchers TODO cached, created = HitCount.current_objects.get_or_create( source=self.get_source_id(), query='%s [%s:%s]' % (keyword, page, pagesize), defaults=dict(hits=0, valid_until=datetime.datetime.now() + datetime.timedelta(1))) if not created and cached.results: return simplejson.loads(cached.results) """ pagesize = num_results_wanted url = _get_url(query_terms, pagesize, off) html_page = _get_html_page(url) try: results = ElementTree(file=html_page) print "artstor 61 \n\tfile %s\n\tresults %s\n" %(html_page, results) num_results = int(results.findtext('{http://www.loc.gov/zing/srw/}numberOfRecords')) or 0 except ExpatError: # XML parsing error num_results = 0 if not num_results: # other type of error or no results found return Result(0, off), _build_returnable_parameters(query_terms) #pages = int(math.ceil(float(total) / pagesize)) result = Result(num_results, num_results+off) image_divs = results.findall('.//{info:srw/schema/1/dc-v1.1}dc') print "artstor 77 query_terms %s" %query_terms for div in image_divs: (url, thumb, image_identifier, title) = _get_image(div) result.addImage(ResultImage(url, thumb, title, image_identifier)) # TODO cope with full image not actually giving result (timeout error) return result, _build_returnable_parameters(query_terms)
def search(query, params, off, num_results_wanted): off = int(off) # query and params both come from sidebar, so should have exactly one. if not query and not params: return Result(0, off), get_empty_parameters() elif query and params: raise NotImplementedError elif query: query_terms = Query_Language(identifier).searcher_translator(query) else: query_terms = parse_parameters(params) for key in query_terms: query_terms[key] = list_to_str(query_terms[key]) """ Disable modifiers and adv search for now Todo: Work out if artstor can process adv search and modifiers Adding Adv sidebar if possible """ del_list = [] for key in query_terms: if not key == "": del_list.append(key) for key in del_list: if key in query_terms: del query_terms[key] if "query_string" in query_terms: del query_terms["query_string"] # return empty result if no search terms (submitting an empty query breaks artstor) if len(query_terms) is 0: return Result(0, 0), get_empty_parameters() """ Caching of results, uncomment and fix if supported in other searchers TODO cached, created = HitCount.current_objects.get_or_create( source=self.get_source_id(), query='%s [%s:%s]' % (keyword, page, pagesize), defaults=dict(hits=0, valid_until=datetime.datetime.now() + datetime.timedelta(1))) if not created and cached.results: return simplejson.loads(cached.results) """ pagesize = num_results_wanted url = _get_url(query_terms, pagesize, off) #html_page = _get_html_page(url) try: html_page = _get_html_page(url) if not html_page: print "ArtStor did not get any data from server, make sure MDID can reach the server through the firewall" return Result(0, off), _build_returnable_parameters(query_terms) results = ElementTree(file=html_page) num_results = int(results.findtext('{http://www.loc.gov/zing/srw/}numberOfRecords')) or 0 except: # XML parsing error print "ArtStor XML parsing error" num_results = 0 if not num_results: # other type of error or no results found return Result(0, off), _build_returnable_parameters(query_terms) #pages = int(math.ceil(float(total) / pagesize)) result = Result(num_results, off+50) image_divs = results.findall('.//{info:srw/schema/1/dc-v1.1}dc') for div in image_divs: (url, thumb, image_identifier, title) = _get_image(div) result.addImage(ResultImage(url, thumb, title, image_identifier)) # TODO cope with full image not actually giving result (timeout error) return result, _build_returnable_parameters(query_terms)