def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest() super(Search, self).__init__(['%s', 'search.html'], check_etag=False) self.filters.append( ObfuscateEmails()) # ensure emails are obfuscated when rendered
class MetadataHTML(Metadata): def __init__(self): import re from medin.dws import SearchRequest self.search_request = SearchRequest() super(MetadataHTML, self).__init__(['%s', 'metadata.html']) # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls self.url_pattern = re.compile(r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""") self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered def setup(self, environ): from medin.dws import RESULT_SIMPLE q = get_query(environ, True) # get the query from the HTTP referrer referrer_query_string = str(q) # call the base setup, using the referrer query string as etag data parser, headers = super(MetadataHTML, self).setup(environ, referrer_query_string) criteria = q.asDict(False) self.search_request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger']) r = self.search_request() if referrer_query_string: referrer_query_string = '?'+referrer_query_string metadata = parser.parse() title = 'Metadata: %s' % metadata.title # urlify strings if metadata.additional_info: metadata.additional_info = self.urlify(metadata.additional_info) custodians = [contact.organisation or contact.name for contact in metadata.responsible_party.getContactsForRole('custodian') if contact and (contact.organisation or contact.name)] tvars = dict(metadata=metadata, criteria=criteria, referrer_query_string=referrer_query_string, custodians=custodians, hits=r.hits) return TemplateContext(title, tvars=tvars, headers=headers) def urlify(self, text): """ Create active HTML links out of plain text URLs """ # see http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python return self.url_pattern.sub(lambda x: '<a href="%(url)s">%(url)s</a>' % dict(url=str(x.group())), text);
def __init__(self): import re from medin.dws import SearchRequest self.search_request = SearchRequest() super(MetadataHTML, self).__init__(['%s', 'metadata.html']) # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls self.url_pattern = re.compile( r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""" ) self.filters.append( ObfuscateEmails()) # ensure emails are obfuscated when rendered
class ResultSummary(object): def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest() def prepareSOAP(self, environ): """ The interface for generating a SOAPCaller """ from medin.dws import RESULT_SIMPLE q = get_query(environ) q.setCount(0) # we don't need any results return self.request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger']) def __call__(self, environ, start_response): from json import dumps as tojson self.prepareSOAP(environ) r = self.request() json = tojson({ 'status': bool(r), 'hits': r.hits, 'time': environ['portal.timer'].runtime() }) headers = [('Content-Type', 'application/json')] start_response('200 OK', headers) return [json]
class ResultSummary(object): def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest() def prepareSOAP(self, environ): """ The interface for generating a SOAPCaller """ from medin.dws import RESULT_SIMPLE q = get_query(environ) q.setCount(0) # we don't need any results return self.request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger']) def __call__(self, environ, start_response): from json import dumps as tojson self.prepareSOAP(environ) r = self.request() json = tojson({'status': bool(r), 'hits': r.hits, 'time': environ['portal.timer'].runtime()}) headers = [('Content-Type', 'application/json')] start_response('200 OK', headers) return [json]
def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest() super(Search, self).__init__(['%s', 'search.html'], check_etag=False) self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered
def __init__(self): import re from medin.dws import SearchRequest self.search_request = SearchRequest() super(MetadataHTML, self).__init__(['%s', 'metadata.html']) # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls self.url_pattern = re.compile(r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""") self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered
class ResultsRequest(object): """ Perform a request to obtain results from the DWS for a search This class is needed as it encapsulates the request. The result of the request can then be formatted as required, e.g. either by MakoAppResults or CSVResults. """ def __init__(self, result_type): from medin.dws import SearchRequest self.result_type = result_type self.request = SearchRequest() def prepareSOAP(self, environ): """ The interface for generating a SOAPCaller """ q = get_query(environ) errors = q.verify() if errors: for error in errors: msg_error(environ, error) return self.request.prepareCaller(q, self.result_type, environ['logging.logger']) def __call__(self, environ): self.prepareSOAP(environ) r = self.request() updated = r.lastModified() timestamp = updated.strftime("%a, %d %b %Y %H:%M:%S GMT") etag = check_etag(environ, timestamp) return r, etag
def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest()
def __init__(self, result_type): from medin.dws import SearchRequest self.result_type = result_type self.request = SearchRequest()
class Search(MakoApp): def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest() super(Search, self).__init__(['%s', 'search.html'], check_etag=False) self.filters.append( ObfuscateEmails()) # ensure emails are obfuscated when rendered def prepareSOAP(self, environ): """ The interface for generating a SOAPCaller """ from medin.dws import RESULT_SIMPLE q = get_query(environ) errors = q.verify() if errors: for error in errors: msg_error(environ, error) q.setCount(0) # we only need one result # Get the results in descending order so the result can be # used in an etag (as it is the latest). sort = q.getSort(cast=False) q.setSort('updated,0') # generate the soap caller return self.request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger']) def setup(self, environ): db = get_db(environ) vocab = get_vocab(environ) # run the query self.prepareSOAP(environ) r = self.request() # check the etag try: docid = list(r)[0] except IndexError: docid = 'none' etag = check_etag(environ, docid) areas = get_areas(environ) q = get_query(environ) count = q.getCount( default=None) # We need to get the number of hits for the query. search_term = q.getSearchTerm(cast=False) sort = q.getSort(cast=False) bboxes = q.getBoxes() start_date = q.getStartDate(cast=False) end_date = q.getEndDate(cast=False) area = q.getArea(cast=False) criteria = q.asDict(False) area_type = areas.getAreaType(area) area_ids = { 'british-isles': areas.britishIsles(), 'countries': areas.countries(), 'sea-areas': areas.seaAreas(), 'progress-areas': areas.chartingProgressAreas(), 'ices-rectangles': areas.icesRectangles() } # get the themes for the dropdowns data_themes = vocab.getDataThemeIds() selected_data_themes = set( [theme[0] for theme in criteria['data_themes']]) sub_themes = vocab.getSubThemeIdsForDataThemeIds(selected_data_themes) selected_sub_themes = set( [theme[0] for theme in criteria['sub_themes']]) parameters = vocab.getParameterIdsForSubThemeIds(selected_sub_themes) selected_parameters = set( [theme[0] for theme in criteria['parameters']]) # get the data formats data_formats = vocab.getDataFormatIds() selected_data_formats = set( [item[0] for item in criteria['data_formats']]) # get the access types access = vocab.getAccessTypeIds() selected_access_types = set( [item[0] for item in criteria['access_types']]) # get the data holders data_holders = db.getDataHolders() selected_data_holders = set( [int(item[0]) for item in criteria['data_holders']]) tvars = dict(search_term=search_term, hits=r.hits, criteria=criteria, count=count, sort=sort, start_date=start_date, end_date=end_date, area=area, area_type=area_type, area_ids=area_ids, data_formats=data_formats, selected_data_formats=selected_data_formats, access_types=access, selected_access_types=selected_access_types, data_themes=data_themes, selected_data_themes=selected_data_themes, sub_themes=sub_themes, selected_sub_themes=selected_sub_themes, parameters=parameters, selected_parameters=selected_parameters, data_holders=data_holders, selected_data_holders=selected_data_holders, bboxes=bboxes) headers = [ ('Etag', etag), # propagate the result update time to the HTTP layer ('Cache-Control', 'no-cache, must-revalidate') ] return TemplateContext('Search the MEDIN Data Archive Centres', tvars=tvars, headers=headers)
class MetadataHTML(Metadata): def __init__(self): import re from medin.dws import SearchRequest self.search_request = SearchRequest() super(MetadataHTML, self).__init__(['%s', 'metadata.html']) # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls self.url_pattern = re.compile( r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""" ) self.filters.append( ObfuscateEmails()) # ensure emails are obfuscated when rendered def setup(self, environ): from medin.dws import RESULT_SIMPLE q = get_query(environ, True) # get the query from the HTTP referrer referrer_query_string = str(q) # call the base setup, using the referrer query string as etag data parser, headers = super(MetadataHTML, self).setup(environ, referrer_query_string) criteria = q.asDict(False) self.search_request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger']) r = self.search_request() if referrer_query_string: referrer_query_string = '?' + referrer_query_string metadata = parser.parse() title = 'Metadata: %s' % metadata.title # urlify strings if metadata.additional_info: metadata.additional_info = self.urlify(metadata.additional_info) custodians = [ contact.organisation or contact.name for contact in metadata.responsible_party.getContactsForRole('custodian') if contact and (contact.organisation or contact.name) ] tvars = dict(metadata=metadata, criteria=criteria, referrer_query_string=referrer_query_string, custodians=custodians, hits=r.hits) return TemplateContext(title, tvars=tvars, headers=headers) def urlify(self, text): """ Create active HTML links out of plain text URLs """ # see http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python return self.url_pattern.sub( lambda x: '<a href="%(url)s">%(url)s</a>' % dict(url=str(x.group()) ), text)
class Search(MakoApp): def __init__(self): from medin.dws import SearchRequest self.request = SearchRequest() super(Search, self).__init__(['%s', 'search.html'], check_etag=False) self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered def prepareSOAP(self, environ): """ The interface for generating a SOAPCaller """ from medin.dws import RESULT_SIMPLE q = get_query(environ) errors = q.verify() if errors: for error in errors: msg_error(environ, error) q.setCount(0) # we only need one result # Get the results in descending order so the result can be # used in an etag (as it is the latest). sort = q.getSort(cast=False) q.setSort('updated,0') # generate the soap caller return self.request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger']) def setup(self, environ): db = get_db(environ) vocab = get_vocab(environ) # run the query self.prepareSOAP(environ) r = self.request() # check the etag try: docid = list(r)[0] except IndexError: docid = 'none' etag = check_etag(environ, docid) areas = get_areas(environ) q = get_query(environ) count = q.getCount(default=None) # We need to get the number of hits for the query. search_term = q.getSearchTerm(cast=False) sort = q.getSort(cast=False) bboxes = q.getBoxes() start_date = q.getStartDate(cast=False) end_date = q.getEndDate(cast=False) area = q.getArea(cast=False) criteria = q.asDict(False) area_type = areas.getAreaType(area) area_ids = {'british-isles': areas.britishIsles(), 'countries': areas.countries(), 'sea-areas': areas.seaAreas(), 'progress-areas': areas.chartingProgressAreas(), 'ices-rectangles': areas.icesRectangles()} # get the themes for the dropdowns data_themes = vocab.getDataThemeIds() selected_data_themes = set([theme[0] for theme in criteria['data_themes']]) sub_themes = vocab.getSubThemeIdsForDataThemeIds(selected_data_themes) selected_sub_themes = set([theme[0] for theme in criteria['sub_themes']]) parameters = vocab.getParameterIdsForSubThemeIds(selected_sub_themes) selected_parameters = set([theme[0] for theme in criteria['parameters']]) # get the data formats data_formats = vocab.getDataFormatIds() selected_data_formats = set([item[0] for item in criteria['data_formats']]) # get the access types access = vocab.getAccessTypeIds() selected_access_types = set([item[0] for item in criteria['access_types']]) # get the data holders data_holders = db.getDataHolders() selected_data_holders = set([int(item[0]) for item in criteria['data_holders']]) tvars=dict(search_term=search_term, hits=r.hits, criteria=criteria, count=count, sort=sort, start_date=start_date, end_date=end_date, area=area, area_type=area_type, area_ids=area_ids, data_formats=data_formats, selected_data_formats=selected_data_formats, access_types=access, selected_access_types=selected_access_types, data_themes=data_themes, selected_data_themes=selected_data_themes, sub_themes=sub_themes, selected_sub_themes=selected_sub_themes, parameters=parameters, selected_parameters=selected_parameters, data_holders=data_holders, selected_data_holders=selected_data_holders, bboxes=bboxes) headers = [('Etag', etag), # propagate the result update time to the HTTP layer ('Cache-Control', 'no-cache, must-revalidate')] return TemplateContext('Search the MEDIN Data Archive Centres', tvars=tvars, headers=headers)