Ejemplo n.º 1
0
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()
        super(Search, self).__init__(['%s', 'search.html'], check_etag=False)

        self.filters.append(
            ObfuscateEmails())  # ensure emails are obfuscated when rendered
Ejemplo n.º 2
0
class MetadataHTML(Metadata):
    def __init__(self):
        import re
        from medin.dws import SearchRequest
        self.search_request = SearchRequest()

        super(MetadataHTML, self).__init__(['%s', 'metadata.html'])

        # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls
        self.url_pattern = re.compile(r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""")

        self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered

    def setup(self, environ):
        from medin.dws import RESULT_SIMPLE

        q = get_query(environ, True)    # get the query from the HTTP referrer
        referrer_query_string = str(q)

        # call the base setup, using the referrer query string as etag data
        parser, headers = super(MetadataHTML, self).setup(environ, referrer_query_string)

        criteria = q.asDict(False)
        self.search_request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger'])
        r = self.search_request()

        if referrer_query_string: referrer_query_string = '?'+referrer_query_string
        metadata = parser.parse()
        title = 'Metadata: %s' % metadata.title

        # urlify strings
        if metadata.additional_info:
            metadata.additional_info = self.urlify(metadata.additional_info)

        custodians = [contact.organisation or contact.name for contact in metadata.responsible_party.getContactsForRole('custodian') if contact and (contact.organisation or contact.name)]

        tvars = dict(metadata=metadata,
                     criteria=criteria,
                     referrer_query_string=referrer_query_string,
                     custodians=custodians,
                     hits=r.hits)

        return TemplateContext(title, tvars=tvars, headers=headers)

    def urlify(self, text):
        """
        Create active HTML links out of plain text URLs
        """

        # see http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python
        return self.url_pattern.sub(lambda x: '<a href="%(url)s">%(url)s</a>' % dict(url=str(x.group())), text);
Ejemplo n.º 3
0
    def __init__(self):
        import re
        from medin.dws import SearchRequest
        self.search_request = SearchRequest()

        super(MetadataHTML, self).__init__(['%s', 'metadata.html'])

        # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls
        self.url_pattern = re.compile(
            r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))"""
        )

        self.filters.append(
            ObfuscateEmails())  # ensure emails are obfuscated when rendered
Ejemplo n.º 4
0
class ResultSummary(object):
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()

    def prepareSOAP(self, environ):
        """
        The interface for generating a SOAPCaller
        """
        from medin.dws import RESULT_SIMPLE

        q = get_query(environ)
        q.setCount(0)  # we don't need any results

        return self.request.prepareCaller(q, RESULT_SIMPLE,
                                          environ['logging.logger'])

    def __call__(self, environ, start_response):
        from json import dumps as tojson

        self.prepareSOAP(environ)
        r = self.request()

        json = tojson({
            'status': bool(r),
            'hits': r.hits,
            'time': environ['portal.timer'].runtime()
        })

        headers = [('Content-Type', 'application/json')]

        start_response('200 OK', headers)
        return [json]
Ejemplo n.º 5
0
class ResultSummary(object):

    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()

    def prepareSOAP(self, environ):
        """
        The interface for generating a SOAPCaller
        """
        from medin.dws import RESULT_SIMPLE

        q = get_query(environ)
        q.setCount(0)                   # we don't need any results

        return self.request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger'])

    def __call__(self, environ, start_response):
        from json import dumps as tojson

        self.prepareSOAP(environ)
        r = self.request()

        json = tojson({'status': bool(r),
                       'hits': r.hits,
                       'time': environ['portal.timer'].runtime()})

        headers = [('Content-Type', 'application/json')]

        start_response('200 OK', headers)
        return [json]
Ejemplo n.º 6
0
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()
        super(Search, self).__init__(['%s', 'search.html'], check_etag=False)

        self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered
Ejemplo n.º 7
0
    def __init__(self):
        import re
        from medin.dws import SearchRequest
        self.search_request = SearchRequest()

        super(MetadataHTML, self).__init__(['%s', 'metadata.html'])

        # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls
        self.url_pattern = re.compile(r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""")

        self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered
Ejemplo n.º 8
0
class ResultsRequest(object):
    """
    Perform a request to obtain results from the DWS for a search

    This class is needed as it encapsulates the request. The result of
    the request can then be formatted as required, e.g. either by
    MakoAppResults or CSVResults.
    """
    def __init__(self, result_type):
        from medin.dws import SearchRequest

        self.result_type = result_type
        self.request = SearchRequest()

    def prepareSOAP(self, environ):
        """
        The interface for generating a SOAPCaller
        """
        q = get_query(environ)
        errors = q.verify()
        if errors:
            for error in errors:
                msg_error(environ, error)

        return self.request.prepareCaller(q, self.result_type,
                                          environ['logging.logger'])

    def __call__(self, environ):
        self.prepareSOAP(environ)
        r = self.request()

        updated = r.lastModified()
        timestamp = updated.strftime("%a, %d %b %Y %H:%M:%S GMT")
        etag = check_etag(environ, timestamp)

        return r, etag
Ejemplo n.º 9
0
class ResultsRequest(object):
    """
    Perform a request to obtain results from the DWS for a search

    This class is needed as it encapsulates the request. The result of
    the request can then be formatted as required, e.g. either by
    MakoAppResults or CSVResults.
    """

    def __init__(self, result_type):
        from medin.dws import SearchRequest

        self.result_type = result_type
        self.request = SearchRequest()

    def prepareSOAP(self, environ):
        """
        The interface for generating a SOAPCaller
        """
        q = get_query(environ)
        errors = q.verify()
        if errors:
            for error in errors:
                msg_error(environ, error)

        return self.request.prepareCaller(q, self.result_type, environ['logging.logger'])

    def __call__(self, environ):
        self.prepareSOAP(environ)
        r = self.request()

        updated = r.lastModified()
        timestamp = updated.strftime("%a, %d %b %Y %H:%M:%S GMT")
        etag = check_etag(environ, timestamp)

        return r, etag
Ejemplo n.º 10
0
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()
Ejemplo n.º 11
0
    def __init__(self, result_type):
        from medin.dws import SearchRequest

        self.result_type = result_type
        self.request = SearchRequest()
Ejemplo n.º 12
0
class Search(MakoApp):
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()
        super(Search, self).__init__(['%s', 'search.html'], check_etag=False)

        self.filters.append(
            ObfuscateEmails())  # ensure emails are obfuscated when rendered

    def prepareSOAP(self, environ):
        """
        The interface for generating a SOAPCaller
        """
        from medin.dws import RESULT_SIMPLE

        q = get_query(environ)
        errors = q.verify()
        if errors:
            for error in errors:
                msg_error(environ, error)

        q.setCount(0)  # we only need one result

        # Get the results in descending order so the result can be
        # used in an etag (as it is the latest).
        sort = q.getSort(cast=False)
        q.setSort('updated,0')

        # generate the soap caller
        return self.request.prepareCaller(q, RESULT_SIMPLE,
                                          environ['logging.logger'])

    def setup(self, environ):
        db = get_db(environ)
        vocab = get_vocab(environ)

        # run the query
        self.prepareSOAP(environ)
        r = self.request()

        # check the etag
        try:
            docid = list(r)[0]
        except IndexError:
            docid = 'none'
        etag = check_etag(environ, docid)

        areas = get_areas(environ)
        q = get_query(environ)

        count = q.getCount(
            default=None)  # We need to get the number of hits for the query.
        search_term = q.getSearchTerm(cast=False)
        sort = q.getSort(cast=False)
        bboxes = q.getBoxes()
        start_date = q.getStartDate(cast=False)
        end_date = q.getEndDate(cast=False)
        area = q.getArea(cast=False)
        criteria = q.asDict(False)
        area_type = areas.getAreaType(area)

        area_ids = {
            'british-isles': areas.britishIsles(),
            'countries': areas.countries(),
            'sea-areas': areas.seaAreas(),
            'progress-areas': areas.chartingProgressAreas(),
            'ices-rectangles': areas.icesRectangles()
        }

        # get the themes for the dropdowns
        data_themes = vocab.getDataThemeIds()
        selected_data_themes = set(
            [theme[0] for theme in criteria['data_themes']])
        sub_themes = vocab.getSubThemeIdsForDataThemeIds(selected_data_themes)
        selected_sub_themes = set(
            [theme[0] for theme in criteria['sub_themes']])
        parameters = vocab.getParameterIdsForSubThemeIds(selected_sub_themes)
        selected_parameters = set(
            [theme[0] for theme in criteria['parameters']])

        # get the data formats
        data_formats = vocab.getDataFormatIds()
        selected_data_formats = set(
            [item[0] for item in criteria['data_formats']])

        # get the access types
        access = vocab.getAccessTypeIds()
        selected_access_types = set(
            [item[0] for item in criteria['access_types']])

        # get the data holders
        data_holders = db.getDataHolders()
        selected_data_holders = set(
            [int(item[0]) for item in criteria['data_holders']])

        tvars = dict(search_term=search_term,
                     hits=r.hits,
                     criteria=criteria,
                     count=count,
                     sort=sort,
                     start_date=start_date,
                     end_date=end_date,
                     area=area,
                     area_type=area_type,
                     area_ids=area_ids,
                     data_formats=data_formats,
                     selected_data_formats=selected_data_formats,
                     access_types=access,
                     selected_access_types=selected_access_types,
                     data_themes=data_themes,
                     selected_data_themes=selected_data_themes,
                     sub_themes=sub_themes,
                     selected_sub_themes=selected_sub_themes,
                     parameters=parameters,
                     selected_parameters=selected_parameters,
                     data_holders=data_holders,
                     selected_data_holders=selected_data_holders,
                     bboxes=bboxes)

        headers = [
            ('Etag',
             etag),  # propagate the result update time to the HTTP layer
            ('Cache-Control', 'no-cache, must-revalidate')
        ]

        return TemplateContext('Search the MEDIN Data Archive Centres',
                               tvars=tvars,
                               headers=headers)
Ejemplo n.º 13
0
class MetadataHTML(Metadata):
    def __init__(self):
        import re
        from medin.dws import SearchRequest
        self.search_request = SearchRequest()

        super(MetadataHTML, self).__init__(['%s', 'metadata.html'])

        # see http://daringfireball.net/2010/07/improved_regex_for_matching_urls
        self.url_pattern = re.compile(
            r"""(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))"""
        )

        self.filters.append(
            ObfuscateEmails())  # ensure emails are obfuscated when rendered

    def setup(self, environ):
        from medin.dws import RESULT_SIMPLE

        q = get_query(environ, True)  # get the query from the HTTP referrer
        referrer_query_string = str(q)

        # call the base setup, using the referrer query string as etag data
        parser, headers = super(MetadataHTML,
                                self).setup(environ, referrer_query_string)

        criteria = q.asDict(False)
        self.search_request.prepareCaller(q, RESULT_SIMPLE,
                                          environ['logging.logger'])
        r = self.search_request()

        if referrer_query_string:
            referrer_query_string = '?' + referrer_query_string
        metadata = parser.parse()
        title = 'Metadata: %s' % metadata.title

        # urlify strings
        if metadata.additional_info:
            metadata.additional_info = self.urlify(metadata.additional_info)

        custodians = [
            contact.organisation or contact.name for contact in
            metadata.responsible_party.getContactsForRole('custodian')
            if contact and (contact.organisation or contact.name)
        ]

        tvars = dict(metadata=metadata,
                     criteria=criteria,
                     referrer_query_string=referrer_query_string,
                     custodians=custodians,
                     hits=r.hits)

        return TemplateContext(title, tvars=tvars, headers=headers)

    def urlify(self, text):
        """
        Create active HTML links out of plain text URLs
        """

        # see http://stackoverflow.com/questions/520031/whats-the-cleanest-way-to-extract-urls-from-a-string-using-python
        return self.url_pattern.sub(
            lambda x: '<a href="%(url)s">%(url)s</a>' % dict(url=str(x.group())
                                                             ), text)
Ejemplo n.º 14
0
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()
Ejemplo n.º 15
0
    def __init__(self, result_type):
        from medin.dws import SearchRequest

        self.result_type = result_type
        self.request = SearchRequest()
Ejemplo n.º 16
0
class Search(MakoApp):
    def __init__(self):
        from medin.dws import SearchRequest

        self.request = SearchRequest()
        super(Search, self).__init__(['%s', 'search.html'], check_etag=False)

        self.filters.append(ObfuscateEmails()) # ensure emails are obfuscated when rendered

    def prepareSOAP(self, environ):
        """
        The interface for generating a SOAPCaller
        """
        from medin.dws import RESULT_SIMPLE

        q = get_query(environ)
        errors = q.verify()
        if errors:
            for error in errors:
                msg_error(environ, error)

        q.setCount(0)                   # we only need one result

        # Get the results in descending order so the result can be
        # used in an etag (as it is the latest).
        sort = q.getSort(cast=False)
        q.setSort('updated,0')

        # generate the soap caller
        return self.request.prepareCaller(q, RESULT_SIMPLE, environ['logging.logger'])

    def setup(self, environ):
        db = get_db(environ)
        vocab = get_vocab(environ)

        # run the query
        self.prepareSOAP(environ)
        r = self.request()

        # check the etag
        try:
            docid = list(r)[0]
        except IndexError:
            docid = 'none'
        etag = check_etag(environ, docid)

        areas = get_areas(environ)
        q = get_query(environ)

        count = q.getCount(default=None) # We need to get the number of hits for the query.
        search_term = q.getSearchTerm(cast=False)
        sort = q.getSort(cast=False)
        bboxes = q.getBoxes()
        start_date = q.getStartDate(cast=False)
        end_date = q.getEndDate(cast=False)
        area = q.getArea(cast=False)
        criteria = q.asDict(False)
        area_type = areas.getAreaType(area)

        area_ids = {'british-isles': areas.britishIsles(),
                    'countries': areas.countries(),
                    'sea-areas': areas.seaAreas(),
                    'progress-areas': areas.chartingProgressAreas(),
                    'ices-rectangles': areas.icesRectangles()}

        # get the themes for the dropdowns
        data_themes = vocab.getDataThemeIds()
        selected_data_themes = set([theme[0] for theme in criteria['data_themes']])
        sub_themes = vocab.getSubThemeIdsForDataThemeIds(selected_data_themes)
        selected_sub_themes = set([theme[0] for theme in criteria['sub_themes']])
        parameters = vocab.getParameterIdsForSubThemeIds(selected_sub_themes)
        selected_parameters = set([theme[0] for theme in criteria['parameters']])

        # get the data formats
        data_formats = vocab.getDataFormatIds()
        selected_data_formats = set([item[0] for item in criteria['data_formats']])

        # get the access types
        access = vocab.getAccessTypeIds()
        selected_access_types = set([item[0] for item in criteria['access_types']])

        # get the data holders
        data_holders = db.getDataHolders()
        selected_data_holders = set([int(item[0]) for item in criteria['data_holders']])

        tvars=dict(search_term=search_term,
                   hits=r.hits,
                   criteria=criteria,
                   count=count,
                   sort=sort,
                   start_date=start_date,
                   end_date=end_date,
                   area=area,
                   area_type=area_type,
                   area_ids=area_ids,
                   data_formats=data_formats,
                   selected_data_formats=selected_data_formats,
                   access_types=access,
                   selected_access_types=selected_access_types,
                   data_themes=data_themes,
                   selected_data_themes=selected_data_themes,
                   sub_themes=sub_themes,
                   selected_sub_themes=selected_sub_themes,
                   parameters=parameters,
                   selected_parameters=selected_parameters,
                   data_holders=data_holders,
                   selected_data_holders=selected_data_holders,
                   bboxes=bboxes)

        headers = [('Etag', etag), # propagate the result update time to the HTTP layer
                   ('Cache-Control', 'no-cache, must-revalidate')]

        return TemplateContext('Search the MEDIN Data Archive Centres', tvars=tvars, headers=headers)