def build_parameters(query, params): """ builds parameters dictionary to search by""" if not params: translator = Query_Language(identifier) params = translator.searcher_translator(query) all_words = getValue(params, 'all words') exact_phrase = getValue(params, 'exact phrase') exclude = getValue(params, 'exclude words') not_in = getValue(params,'-') if exclude and not_in: exclude += "+"+not_in elif not_in: exclude = not_in if exclude: params.update({"exclude words":[exclude]}) artist = getValue(params, 'artist') keywords = getValue(params, 'title') accession_number = getValue(params, 'accession number') school = getValue(params, 'school') classification = getValue(params, 'classification') medium = getValue(params, 'medium') year1 = getValue(params, 'start date') year2 = getValue(params, 'end date') access = getValue(params, 'access') # build up the url url_base = BASE_ADVANCED_SEARCH_URL + "&all_words="+all_words + "&exact_phrase="+exact_phrase+ "&exclude_words="+exclude url_base += "&artist_last_name="+artist+"&keywords_in_title="+keywords + "&accession_num="+accession_number url_base += "&school="+school + "&classification="+classification + "&medium=" + medium + "&year="+year1 + "&year2="+year2 url_base += "&open_access="+access # replace all whitespace from the parameters url_base = re.sub(" ", "+", url_base) return params, url_base
def search(term, params, off, len): if DEBUG: print "CA Search" if not term and not params: return Result(0, 1), {} if not params: query_language = Query_Language(identifier) query_terms = query_language.searcher_translator(term) params = query_terms url = build_url(params, off, len) raw_data = get_data(url) data, count, num_results = parse_data(raw_data) nextOff = int(off) + int(num_results) result = Result(count, nextOff if nextOff < count else count) for i in data.keys(): image = data[i] print image result.addImage( ResultImage( image["url"], image["thumb"], rebuild("" + image["name"] + ", by: " + image["artist"] + ". " + image["description"]), json.dumps(image), ) ) return result, {}
def build_parameters(query, params): """ builds parameters dictionary to search by""" if not params: translator = Query_Language(identifier) params = translator.searcher_translator(query) all_words = getValue(params, 'all words') exact_phrase = getValue(params, 'exact phrase') exclude = getValue(params, 'exclude words') not_in = getValue(params, '-') if exclude and not_in: exclude += "+" + not_in elif not_in: exclude = not_in if exclude: params.update({"exclude words": [exclude]}) artist = getValue(params, 'artist') keywords = getValue(params, 'title') accession_number = getValue(params, 'accession number') school = getValue(params, 'school') classification = getValue(params, 'classification') medium = getValue(params, 'medium') year1 = getValue(params, 'start date') year2 = getValue(params, 'end date') access = getValue(params, 'access') # build up the url url_base = BASE_ADVANCED_SEARCH_URL + "&all_words=" + all_words + "&exact_phrase=" + exact_phrase + "&exclude_words=" + exclude url_base += "&artist_last_name=" + artist + "&keywords_in_title=" + keywords + "&accession_num=" + accession_number url_base += "&school=" + school + "&classification=" + classification + "&medium=" + medium + "&year=" + year1 + "&year2=" + year2 url_base += "&open_access=" + access # replace all whitespace from the parameters url_base = re.sub(" ", "+", url_base) return params, url_base
def count(keyword): query_language = Query_Language(identifier) query_terms = query_language.searcher_translator(keyword) url = build_url(query_terms, 0, 1) raw_data = get_data(url) data, count, num_results = parse_data(raw_data) return count
def build_URL(query, query_terms): if not query_terms: query_language = Query_Language(identifier) query_terms = query_language.searcher_translator(query) url = API_URL.replace("TROVE_KEY", TROVE_KEY) arg = empty_arg() url, arg = parse_trove_query(url, query_terms, arg) return url, arg
def build_URL(query, query_terms): if not query_terms: query_language = Query_Language(identifier) query_terms = query_language.searcher_translator(query) url = API_URL.replace("TROVE_KEY", TROVE_KEY) arg = empty_arg() url, arg = parse_trove_query(url, query_terms,arg) return url, arg
def build_URL(query, params): """ determines if the url should be simple or advanced and launches corresponding method""" if query and not params: ql = Query_Language(identifier) params = ql.searcher_translator(query) query, params = parse_gallica(params) if not query and not params: return None, get_empty_params() if query : return build_simple_url(query) return build_advanced_url(params)
def search(term, params, off, len): if DEBUG: print "CA Search" if not term and not params: return Result(0, 1), {} if not params: query_language = Query_Language(identifier) query_terms = query_language.searcher_translator(term) params = query_terms url = build_url(params, off, len) raw_data = get_data(url) data, count, num_results = parse_data(raw_data) nextOff = int(off)+int(num_results) result = Result(count, nextOff if nextOff < count else count) for i in data.keys(): image = data[i] print image result.addImage(ResultImage(image["url"], image['thumb'], rebuild(""+image['name']+", by: "+image['artist']+". "+image['description']), json.dumps(image))) return result, {}
def search(query, params, off, num_results_wanted): off = int(off) # query and params both come from sidebar, so should have exactly one. if not query and not params: return Result(0, off), get_empty_parameters() elif query and params: print "artstor 34, shouldn't have reached here... Have both query (%s) and params (%s)" % ( query, params) raise NotImplementedError elif query: query_terms = Query_Language(identifier).searcher_translator(query) else: query_terms = parse_parameters(params) print "artstor 48 query_terms %s" % query_terms # return empty result if no search terms (submitting an empty query breaks artstor) if len(query_terms) is 0: return Result(0, 0), get_empty_parameters() """ Caching of results, uncomment and fix if supported in other searchers TODO cached, created = HitCount.current_objects.get_or_create( source=self.get_source_id(), query='%s [%s:%s]' % (keyword, page, pagesize), defaults=dict(hits=0, valid_until=datetime.datetime.now() + datetime.timedelta(1))) if not created and cached.results: return simplejson.loads(cached.results) """ pagesize = num_results_wanted url = _get_url(query_terms, pagesize, off) html_page = _get_html_page(url) try: results = ElementTree(file=html_page) print "artstor 61 \n\tfile %s\n\tresults %s\n" % (html_page, results) num_results = int( results.findtext( '{http://www.loc.gov/zing/srw/}numberOfRecords')) or 0 except ExpatError: # XML parsing error num_results = 0 if not num_results: # other type of error or no results found return Result(0, off), _build_returnable_parameters(query_terms) #pages = int(math.ceil(float(total) / pagesize)) result = Result(num_results, num_results + off) image_divs = results.findall('.//{info:srw/schema/1/dc-v1.1}dc') print "artstor 77 query_terms %s" % query_terms for div in image_divs: (url, thumb, image_identifier, title) = _get_image(div) result.addImage(ResultImage(url, thumb, title, image_identifier)) # TODO cope with full image not actually giving result (timeout error) return result, _build_returnable_parameters(query_terms)
def _translate_query(query): """ translates from universal query language to dnz query language """ translator = Query_Language(identifier) query_terms = translator.searcher_translator(query) return query_terms
def _translate_query(query): translator = Query_Language(identifier) query_terms = translator.searcher_translator(query) return query_terms