Beispiel #1
0
def set_favorite_item(ctrl, request):
    """
    """
    corpora = []
    main_size = None
    for i, c_id in enumerate(request.form.getlist('corpora')):
        corp = ctrl.cm.get_Corpus(c_id, subcname=request.form['subcorpus_id'] if i == 0 else None)
        if i == 0:
            main_size = corp.search_size()
        corpora.append(dict(id=c_id, name=l10n.import_string(
            corp.get_conf('NAME'), corp.get_conf('ENCODING'))))
    subcorpus_id = request.form['subcorpus_id']
    subcorpus_orig_id = request.form['subcorpus_orig_id']
    item = FavoriteItem(dict(
        name=u' || '.join(c['name'] for c in corpora) +
        (u' / ' + subcorpus_orig_id if subcorpus_orig_id else u''),
        corpora=corpora,
        subcorpus_id=subcorpus_id,
        subcorpus_orig_id=subcorpus_orig_id,
        size=main_size,
        size_info=l10n.simplify_num(main_size)
    ))
    with plugins.runtime.USER_ITEMS as uit:
        uit.add_user_item(ctrl._plugin_api, item)
        return item.to_dict()
Beispiel #2
0
    def set_favorite_item(self, request, conc_args):
        """
        """
        main_corp = self.cm.get_Corpus(request.form['corpus_id'], request.form['subcorpus_id'])
        corp_size = main_corp.search_size()
        data = {
            'corpora': [],
            'canonical_id': request.form['canonical_id'],
            'corpus_id': request.form['corpus_id'],
            'subcorpus_id': request.form['subcorpus_id'],
            'name': request.form['name'],
            'size': corp_size,
            'size_info': l10n.simplify_num(corp_size),
            'type': request.form['type']
        }

        aligned_corpnames = request.form.getlist('corpora[]')
        for ac in aligned_corpnames:
            data['corpora'].append({
                'name': ac,  # TODO fetch real name??
                'corpus_id': ac,
                'canonical_id': self._canonical_corpname(ac),
                'type': 'corpus'
            })

        item = plugins.get('user_items').from_dict(data)
        plugins.get('user_items').add_user_item(self._session_get('user', 'id'), item)
        return {'id': item.id}
Beispiel #3
0
    def _export_featured(self, plugin_api):
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)

        def is_featured(o):
            return o['metadata'].get('featured', False)

        featured = []
        for x in list(self._raw_list(plugin_api.user_lang).values()):
            if x['id'] in permitted_corpora and is_featured(x):
                featured.append({
                    # on client-side, this may contain also subc. id, aligned ids
                    'id':
                    x['id'],
                    'corpus_id':
                    x['id'],
                    'name':
                    self._manatee_corpora.get_info(x['id']).name,
                    'size':
                    self._manatee_corpora.get_info(x['id']).size,
                    'size_info':
                    l10n.simplify_num(
                        self._manatee_corpora.get_info(x['id']).size),
                    'description':
                    self._export_untranslated_label(
                        plugin_api,
                        self._manatee_corpora.get_info(x['id']).description)
                })
        return featured
Beispiel #4
0
def set_favorite_item(ctrl, request):
    """
    """
    corpora = []
    main_size = None
    for i, c_id in enumerate(request.form.getlist('corpora')):
        corp = ctrl.cm.get_Corpus(
            c_id, request.form['subcorpus_id'] if i == 0 else None)
        if i == 0:
            main_size = corp.search_size()
        corpora.append(
            dict(id=c_id,
                 canonical_id=ctrl._canonical_corpname(c_id),
                 name=corp.get_conf('NAME')))
    subcorpus_id = request.form['subcorpus_id']
    item = FavoriteItem(
        dict(name=' + '.join(c['name'] for c in corpora) +
             (' : ' + subcorpus_id if subcorpus_id else ''),
             corpora=corpora,
             subcorpus_id=request.form['subcorpus_id'],
             size=main_size,
             size_info=l10n.simplify_num(main_size)))
    with plugins.runtime.USER_ITEMS as uit:
        uit.add_user_item(ctrl._plugin_api, item)
        return dict(id=item.ident)
Beispiel #5
0
    def set_favorite_item(self, request, conc_args):
        """
        """
        main_corp = self.cm.get_Corpus(request.form['corpus_id'],
                                       request.form['subcorpus_id'])
        corp_size = main_corp.search_size()
        data = {
            'corpora': [],
            'canonical_id': request.form['canonical_id'],
            'corpus_id': request.form['corpus_id'],
            'subcorpus_id': request.form['subcorpus_id'],
            'name': request.form['name'],
            'size': corp_size,
            'size_info': l10n.simplify_num(corp_size),
            'type': request.form['type']
        }

        aligned_corpnames = request.form.getlist('corpora[]')
        for ac in aligned_corpnames:
            data['corpora'].append({
                'name': ac,  # TODO fetch real name??
                'corpus_id': ac,
                'canonical_id': self._canonical_corpname(ac),
                'type': 'corpus'
            })

        item = plugins.get('user_items').from_dict(data)
        plugins.get('user_items').add_user_item(
            self._session_get('user', 'id'), item)
        return {'id': item.id}
Beispiel #6
0
    def _export_featured(self, plugin_ctx):
        permitted_corpora = self._auth.permitted_corpora(plugin_ctx.user_dict)

        def is_featured(o):
            return o.metadata.featured

        featured = []
        for x in list(self._raw_list(plugin_ctx).values()):
            if x.id in permitted_corpora and is_featured(x):
                cinfo = plugin_ctx.corpus_manager.get_info(x.id)
                featured.append({
                    # on client-side, this may contain also subc. id, aligned ids
                    'id':
                    x.id,
                    'corpus_id':
                    x.id,
                    'name':
                    cinfo.name,
                    'size':
                    cinfo.size,
                    'size_info':
                    l10n.simplify_num(cinfo.size),
                    'description':
                    self._export_untranslated_label(plugin_ctx,
                                                    cinfo.description)
                })
        return featured
Beispiel #7
0
 def __init__(self, id=None, corpus_id=None, name=None, description=None, size=0, path=None,
              featured=False, keywords=None):
     self.id = id
     self.corpus_id = corpus_id
     self.name = name
     self.description = description
     self.size = size
     self.size_info = l10n.simplify_num(size)
     self.path = path
     self.featured = featured
     self.found_in = []
     self.keywords = [] if keywords is None else keywords
Beispiel #8
0
 def __init__(self, data=None):
     if data is None:
         data = {}
     self.name = data.get('name', 'New item')
     self.corpora = data.get('corpora', [])
     self.size = data.get('size', None)
     self.size_info = l10n.simplify_num(self.size) if self.size else None
     self.subcorpus_id = data.get('subcorpus_id', None)
     self.subcorpus_orig_id = data.get('subcorpus_orig_id',
                                       self.subcorpus_id)
     self.ident = data.get('id',
                           hashlib.md5(self.sort_key.encode()).hexdigest())
Beispiel #9
0
 def __init__(self, id=None, corpus_id=None, name=None, description=None, size=0, path=None,
              featured=False, keywords=None):
     self.id = id
     self.corpus_id = corpus_id
     self.name = name
     self.description = description
     self.size = size
     self.size_info = l10n.simplify_num(size)
     self.path = path
     self.featured = featured
     self.found_in = []
     self.keywords = [] if keywords is None else keywords
Beispiel #10
0
 def _export_featured(self, user_id):
     permitted_corpora = self._auth.permitted_corpora(user_id)
     is_featured = lambda o: o["metadata"].get("featured", False)
     featured = []
     for x in self._raw_list().values():
         if x["id"] in permitted_corpora and is_featured(x):
             featured.append(
                 {
                     "id": permitted_corpora[x["id"]],
                     "name": self._manatee_corpora.get_info(x["id"]).name,
                     "size": l10n.simplify_num(self._manatee_corpora.get_info(x["id"]).size),
                     "description": self._manatee_corpora.get_info(x["id"]).description,
                 }
             )
     return featured
Beispiel #11
0
    def _export_featured(self, plugin_api):
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)

        def is_featured(o):
            return o['metadata'].get('featured', False)

        featured = []
        for x in self._raw_list(plugin_api.user_lang).values():
            if x['id'] in permitted_corpora and is_featured(x):
                featured.append({
                    # on client-side, this may contain also subc. id, aligned ids
                    'id': x['id'],
                    'corpus_id': x['id'],
                    'name': self._manatee_corpora.get_info(x['id']).name,
                    'size': self._manatee_corpora.get_info(x['id']).size,
                    'size_info': l10n.simplify_num(self._manatee_corpora.get_info(x['id']).size),
                    'description': self._export_untranslated_label(
                        plugin_api, self._manatee_corpora.get_info(x['id']).description)
                })
        return featured
Beispiel #12
0
    def search(self, plugin_api, query, offset=0, limit=None, filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        ans = {'rows': []}
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)
        used_keywords = set()
        all_keywords_map = dict(self._corparch.all_keywords(plugin_api.user_lang))
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'), strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'), strict=False)
        else:
            max_size = None

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)

        normalized_query_substrs = [s.lower() for s in query_substrs]
        for corp in self._corparch.get_list(plugin_api, permitted_corpora):
            full_data = self._corparch.get_corpus_info(plugin_api.user_lang, corp['id'])
            if not isinstance(full_data, BrokenCorpusInfo):
                keywords = [k for k in full_data['metadata']['keywords'].keys()]
                tests = []
                found_in = []

                tests.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp['name'].lower():
                        tests.append(True)
                    elif s in (corp['desc'].lower() if corp['desc'] else ''):
                        tests.append(True)
                        found_in.append('defaultCorparch__found_in_desc')
                    else:
                        tests.append(False)
                tests.append(self.matches_size(corp, min_size, max_size))
                tests.append(self._corparch.custom_filter(
                    self._plugin_api, full_data, permitted_corpora))

                if self.matches_all(tests):
                    corp['size'] = corp['size']
                    corp['size_info'] = l10n.simplify_num(corp['size']) if corp['size'] else None
                    corp['keywords'] = [(k, all_keywords_map[k]) for k in keywords]
                    corp['found_in'] = found_in
                    corp['fav_id'] = fav_id(corp['id'])
                    # because of client-side fav/feat/search items compatibility
                    corp['corpus_id'] = corp['id']
                    ans['rows'].append(corp)
                    used_keywords.update(keywords)
                    if not self.should_fetch_next(ans, offset, limit):
                        break
        ans['rows'], ans['nextOffset'] = self.cut_result(
            self.sort(plugin_api, ans['rows']), offset, limit)
        ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang)
        ans['query'] = query
        ans['current_keywords'] = query_keywords
        ans['filters'] = dict(filter_dict)
        return ans
Beispiel #13
0
    def v1(self, req):
        self._response.set_header('Content-Type', 'application/xml')
        current_version = 1.2

        default_corp_list = settings.get('corpora', 'default_corpora', [])
        corpname = None
        if 0 == len(default_corp_list):
            _logger.critical(
                'FCS cannot work properly without a default_corpora set')
        else:
            corpname = default_corp_list[0]

        pr = urllib.parse.urlparse(req.host_url)
        # None values should be filled in later
        data = {
            'corpname': corpname,
            'corppid': None,
            'version': current_version,
            'recordPacking': 'xml',
            'result': [],
            'operation': None,
            'numberOfRecords': 0,
            'server_name': pr.hostname,
            'server_port': pr.port or 80,
            'database': req.path,
            'maximumRecords': None,
            'maximumTerms': None,
            'startRecord': None,
            'responsePosition': None,
        }
        # supported parameters for all operations
        supported_args = [
            'operation', 'stylesheet', 'version', 'extraRequestData'
        ]

        try:
            # check operation
            operation = req.args.get('operation', 'explain')
            data['operation'] = operation

            # check version
            version = req.args.get('version', None)
            if version is not None and current_version < float(version):
                raise Exception(5, version, 'Unsupported version')

            # check integer parameters
            maximumRecords = req.args.get('maximumRecords', 250)
            if 'maximumRecords' in req.args:
                try:
                    maximumRecords = int(maximumRecords)
                    if maximumRecords <= 0:
                        raise Exception(6, 'maximumRecords',
                                        'Unsupported parameter value')
                except:
                    raise Exception(6, 'maximumRecords',
                                    'Unsupported parameter value')
            data['maximumRecords'] = maximumRecords

            maximumTerms = req.args.get('maximumTerms', 100)
            if 'maximumTerms' in req.args:
                try:
                    maximumTerms = int(maximumTerms)
                except:
                    raise Exception(6, 'maximumTerms',
                                    'Unsupported parameter value')
            data['maximumTerms'] = maximumTerms

            startRecord = req.args.get('startRecord', 1)
            if 'startRecord' in req.args:
                try:
                    startRecord = int(startRecord)
                    if startRecord <= 0:
                        raise Exception(6, 'startRecord',
                                        'Unsupported parameter value')
                except:
                    raise Exception(6, 'startRecord',
                                    'Unsupported parameter value')
            data['startRecord'] = startRecord

            responsePosition = req.args.get('responsePosition', 0)
            if 'responsePosition' in req.args:
                try:
                    responsePosition = int(responsePosition)
                except:
                    raise Exception(6, 'responsePosition',
                                    'Unsupported parameter value')
            data['responsePosition'] = responsePosition

            # set content-type in HTTP header
            recordPacking = req.args.get('recordPacking', 'xml')
            if recordPacking == 'xml':
                pass
            elif recordPacking == 'string':
                # TODO(jm)!!!
                self._response.set_header('Content-Type',
                                          'text/plain; charset=utf-8')
            else:
                raise Exception(71, 'recordPacking',
                                'Unsupported record packing')

            # provide info about service
            if operation == ' te dal':
                self._check_args(
                    req, supported_args,
                    ['recordPacking', 'x-fcs-endpoint-description'])
                corpus = self.cm.get_corpus(corpname)
                data['result'] = corpus.get_posattrs()
                data['numberOfRecords'] = len(data['result'])
                data['corpus_desc'] = 'Corpus {0} ({1} tokens)'.format(
                    corpus.get_conf('NAME'), l10n.simplify_num(corpus.size))
                data['corpus_lang'] = Languages.get_iso_code(
                    corpus.get_conf('LANGUAGE'))
                data['show_endpoint_desc'] = (True if req.args.get(
                    'x-fcs-endpoint-description', 'false') == 'true' else
                                              False)

            # wordlist for a given attribute
            elif operation == 'scan':
                self._check_args(req, supported_args, [
                    'scanClause', 'responsePosition', 'maximumTerms',
                    'x-cmd-resource-info'
                ])
                data['resourceInfoRequest'] = req.args.get(
                    'x-cmd-resource-info', '') == 'true'
                scanClause = req.args.get('scanClause', '')
                if scanClause.startswith('fcs.resource='):
                    value = scanClause.split('=')[1]
                    data['result'] = self._corpora_info(value, maximumTerms)
                else:
                    data['result'] = conclib.fcs_scan(corpname, scanClause,
                                                      maximumTerms,
                                                      responsePosition)

            # simple concordancer
            elif operation == 'searchRetrieve':
                # TODO we should review the args here (especially x-cmd-context, resultSetTTL)
                self._check_args(req, supported_args, [
                    'query', 'startRecord', 'maximumRecords', 'recordPacking',
                    'recordSchema', 'resultSetTTL', 'x-cmd-context',
                    'x-fcs-context'
                ])
                if 'x-cmd-context' in req.args:
                    req_corpname = req.args['x-cmd-context']
                    user_corpora = plugins.runtime.AUTH.instance.permitted_corpora(
                        self.session_get('user'))
                    if req_corpname in user_corpora:
                        corpname = req_corpname
                    else:
                        _logger.warning(
                            'Requested unavailable corpus [%s], defaulting to [%s]',
                            req_corpname, corpname)
                    data['corpname'] = corpname

                corp_conf_info = plugins.runtime.CORPARCH.instance.get_corpus_info(
                    self._plugin_ctx, corpname)
                data['corppid'] = corp_conf_info.get('web', '')
                query = req.args.get('query', '')
                corpus = self.cm.get_corpus(corpname)
                if 0 == len(query):
                    raise Exception(7, 'fcs_query',
                                    'Mandatory parameter not supplied')
                data['result'], data['numberOfRecords'] = self.fcs_search(
                    corpus, corpname, query, maximumRecords, startRecord)

            # unsupported operation
            else:
                # show within explain template
                data['operation'] = 'explain'
                raise Exception(4, '', 'Unsupported operation')

        # catch exception and amend diagnostics in template
        except Exception as e:
            data['message'] = ('error', repr(e))
            try:
                data['code'], data['details'], data['msg'] = e
            except (ValueError, TypeError):
                data['code'], data['details'] = 1, repr(e)
                data['msg'] = 'General system error'

        return data
Beispiel #14
0
    def search(self,
               plugin_api,
               query,
               offset=0,
               limit=None,
               filter_dict=None):
        external_keywords = filter_dict.getlist('keyword')
        external_keywords = self._corparch.map_external_keywords(
            external_keywords, plugin_api.user_lang)
        if len(external_keywords) != 0:
            query_substrs = []
            query_keywords = external_keywords + [self.default_label]
        else:

            if self.SESSION_KEYWORDS_KEY not in plugin_api.session:
                plugin_api.session[self.SESSION_KEYWORDS_KEY] = [
                    self.default_label
                ]
            initial_query = query
            if query is False:
                query = ''
            query_substrs, query_keywords = parse_query(
                self._tag_prefix, query)
            if len(query_keywords) == 0 and initial_query is False:
                query_keywords = plugin_api.session[self.SESSION_KEYWORDS_KEY]
            else:
                plugin_api.session[self.SESSION_KEYWORDS_KEY] = query_keywords
        query = ' '.join(query_substrs) \
                + ' ' + ' '.join('%s%s' % (self._tag_prefix, s) for s in query_keywords)

        ans = {'rows': []}
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)

        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'),
                                           strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'),
                                           strict=False)
        else:
            max_size = None

        sorting_field = filter_dict.get('sortBySize', 'name')

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)
        all_keywords_map = dict(
            self._corparch.all_keywords(plugin_api.user_lang))
        normalized_query_substrs = [s.lower() for s in query_substrs]
        used_keywords = set()

        for corp in self._corparch.get_list(plugin_api):
            full_data = self._corparch.get_corpus_info(plugin_api.user_lang,
                                                       corp['id'])
            if not isinstance(full_data, BrokenCorpusInfo):
                keywords = [k for k, _ in full_data.metadata.keywords]
                tests = []
                found_in = []
                tests.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp['name'].lower():
                        tests.append(True)
                    elif s in (corp['desc'].lower() if corp['desc'] else ''):
                        tests.append(True)
                        found_in.append('defaultCorparch__found_in_desc')
                    else:
                        tests.append(False)
                tests.append(self.matches_size(corp, min_size, max_size))
                tests.append(
                    self._corparch.custom_filter(self._plugin_api, full_data,
                                                 permitted_corpora))

                if all(test for test in tests):
                    corp['size'] = corp['size']
                    corp['size_info'] = l10n.simplify_num(
                        corp['size']) if corp['size'] else None
                    corp['keywords'] = [(k, all_keywords_map[k])
                                        for k in keywords]
                    corp['found_in'] = found_in
                    corp['fav_id'] = fav_id(corp['id'])
                    # because of client-side fav/feat/search items compatibility
                    corp['corpus_id'] = corp['id']
                    corp['pmltq'] = full_data['pmltq']
                    corp['repo'] = full_data['web']
                    corp['access'] = full_data['access']
                    corp['tokenConnect'] = full_data['token_connect'][
                        'providers']
                    ans['rows'].append(corp)
                    used_keywords.update(keywords)
                    if not self.should_fetch_next(ans, offset, limit):
                        break

        ans['rows'], ans['nextOffset'] = self.cut_result(
            self.sort(plugin_api, ans['rows'], field=sorting_field), offset,
            limit)
        ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang)
        ans['query'] = query
        ans['current_keywords'] = query_keywords
        ans['filters'] = dict(filter_dict)
        return ans
Beispiel #15
0
    def search(self,
               plugin_api,
               query,
               offset=0,
               limit=None,
               filter_dict=None):
        if query is False:  # False means 'use default values'
            query = ''
        ans = {'rows': []}
        permitted_corpora = self._auth.permitted_corpora(plugin_api.user_dict)
        used_keywords = set()
        all_keywords_map = dict(
            self._corparch.all_keywords(plugin_api.user_lang))
        if filter_dict.get('minSize'):
            min_size = l10n.desimplify_num(filter_dict.get('minSize'),
                                           strict=False)
        else:
            min_size = 0
        if filter_dict.get('maxSize'):
            max_size = l10n.desimplify_num(filter_dict.get('maxSize'),
                                           strict=False)
        else:
            max_size = None
        if filter_dict.get('favOnly'):
            favourite_only = bool(int(filter_dict.get('favOnly')))
        else:
            favourite_only = False

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = int(self._corparch.max_page_size)
        else:
            limit = int(limit)

        user_items = self._corparch.user_items.get_user_items(plugin_api)

        def fav_id(corpus_id):
            for item in user_items:
                if item.is_single_corpus and item.main_corpus_id == corpus_id:
                    return item.ident
            return None

        query_substrs, query_keywords = parse_query(self._tag_prefix, query)

        normalized_query_substrs = [s.lower() for s in query_substrs]
        for corp in self._corparch.get_list(plugin_api, permitted_corpora):
            full_data = self._corparch.get_corpus_info(plugin_api.user_lang,
                                                       corp['id'])
            if not isinstance(full_data, BrokenCorpusInfo):
                if favourite_only and fav_id(corp['id']) is None:
                    continue

                keywords = [k for k, _ in full_data.metadata.keywords]
                tests = []
                found_in = []

                tests.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp['name'].lower():
                        tests.append(True)
                    elif s in (corp['desc'].lower() if corp['desc'] else ''):
                        tests.append(True)
                        found_in.append('defaultCorparch__found_in_desc')
                    else:
                        tests.append(False)
                tests.append(self.matches_size(corp, min_size, max_size))
                tests.append(
                    self._corparch.custom_filter(self._plugin_api, full_data,
                                                 permitted_corpora))

                if self.matches_all(tests):
                    corp['size'] = corp['size']
                    corp['size_info'] = l10n.simplify_num(
                        corp['size']) if corp['size'] else None
                    corp['keywords'] = [(k, all_keywords_map[k])
                                        for k in keywords]
                    corp['found_in'] = found_in
                    corp['fav_id'] = fav_id(corp['id'])
                    # because of client-side fav/feat/search items compatibility
                    corp['corpus_id'] = corp['id']
                    ans['rows'].append(corp)
                    used_keywords.update(keywords)
                    if not self.should_fetch_next(ans, offset, limit):
                        break
        ans['rows'], ans['nextOffset'] = self.cut_result(
            self.sort(plugin_api, ans['rows']), offset, limit)
        ans['keywords'] = l10n.sort(used_keywords, loc=plugin_api.user_lang)
        ans['query'] = query
        ans['current_keywords'] = query_keywords
        ans['filters'] = dict(filter_dict)
        return ans
Beispiel #16
0
    def search(self, user_id, query, offset=0, limit=None, filter_dict=None):
        ans = {"rows": []}
        permitted_corpora = self._auth.permitted_corpora(user_id)
        user_items = self._user_items.get_user_items(user_id)
        used_keywords = set()
        all_keywords_map = dict(self.all_keywords)
        if filter_dict.get("minSize"):
            min_size = l10n.desimplify_num(filter_dict.get("minSize"), strict=False)
        else:
            min_size = 0
        if filter_dict.get("maxSize"):
            max_size = l10n.desimplify_num(filter_dict.get("maxSize"), strict=False)
        else:
            max_size = None
        corplist = self.get_list(permitted_corpora)

        if offset is None:
            offset = 0
        else:
            offset = int(offset)

        if limit is None:
            limit = self._max_page_size

        def cut_result(res):
            if limit is not None:
                right_lim = offset + int(limit)
                new_res = res[offset:right_lim]
                if right_lim >= len(res):
                    right_lim = None
            else:
                right_lim = None
                new_res = res
            return new_res, right_lim

        def is_fav(corpus_id):
            for item in user_items:
                if isinstance(item, CorpusItem) and item.corpus_id == corpus_id:
                    return True
            return False

        query_substrs, query_keywords = self._parse_query(query)
        matches_all = lambda d: reduce(lambda t1, t2: t1 and t2, d, True)

        def matches_size(d):
            item_size = d.get("size", None)
            return (
                item_size is not None
                and (not min_size or int(item_size) >= int(min_size))
                and (not max_size or int(item_size) <= int(max_size))
            )

        normalized_query_substrs = [s.lower() for s in query_substrs]

        for corp in corplist:
            full_data = self.get_corpus_info(corp["id"], self.getlocal("lang"))
            if not isinstance(full_data, BrokenCorpusInfo):
                keywords = [k for k in full_data["metadata"]["keywords"].keys()]
                hits = []
                found_in = []

                hits.extend([k in keywords for k in query_keywords])
                for s in normalized_query_substrs:
                    # the name must be tested first to prevent the list 'found_in'
                    # to be filled in case item matches both name and description
                    if s in corp["name"].lower():
                        hits.append(True)
                    elif s in (corp["desc"].lower() if corp["desc"] else ""):
                        hits.append(True)
                        found_in.append(_("description"))
                    else:
                        hits.append(False)
                hits.append(matches_size(corp))
                hits.append(self.custom_filter(full_data, permitted_corpora))

                if matches_all(hits):
                    corp["raw_size"] = l10n.simplify_num(corp["size"]) if corp["size"] else None
                    corp["keywords"] = [(k, all_keywords_map[k]) for k in keywords]
                    corp["found_in"] = found_in
                    corp["user_item"] = is_fav(corp["id"])
                    self.customize_search_result_item(corp, full_data)
                    ans["rows"].append(corp)
                    used_keywords.update(keywords)

        corp_cmp_key = lambda c: c.get("name") if c.get("name") is not None else ""
        ans["rows"], ans["nextOffset"] = cut_result(l10n.sort(ans["rows"], loc=self._lang(), key=corp_cmp_key))
        ans["keywords"] = l10n.sort(used_keywords, loc=self._lang())
        ans["query"] = query
        ans["filters"] = dict(filter_dict)
        return ans
Beispiel #17
0
 def __post_init__(self):
     self.size_info = l10n.simplify_num(self.size)
Beispiel #18
0
    def v1(self, req):
        self._headers['Content-Type'] = 'application/xml'
        current_version = 1.2

        default_corp_list = settings.get('corpora', 'default_corpora', [])
        corpname = None
        if 0 == len(default_corp_list):
            _logger.critical('FCS cannot work properly without a default_corpora set')
        else:
            corpname = default_corp_list[0]

        pr = urlparse.urlparse(req.host_url)
        # None values should be filled in later
        data = {
            'corpname': corpname,
            'corppid': None,
            'version': current_version,
            'recordPacking': 'xml',
            'result': [],
            'operation': None,
            'numberOfRecords': 0,
            'server_name': pr.hostname,
            'server_port': pr.port or 80,
            'database': req.path,
            'maximumRecords': None,
            'maximumTerms': None,
            'startRecord': None,
            'responsePosition': None,
        }
        # supported parameters for all operations
        supported_args = ['operation', 'stylesheet', 'version', 'extraRequestData']

        try:
            # check operation
            operation = req.args.get('operation', 'explain')
            data['operation'] = operation

            # check version
            version = req.args.get('version', None)
            if version is not None and current_version < float(version):
                raise Exception(5, version, 'Unsupported version')

            # check integer parameters
            maximumRecords = req.args.get('maximumRecords', 250)
            if 'maximumRecords' in req.args:
                try:
                    maximumRecords = int(maximumRecords)
                    if maximumRecords <= 0:
                        raise Exception(6, 'maximumRecords', 'Unsupported parameter value')
                except:
                    raise Exception(6, 'maximumRecords', 'Unsupported parameter value')
            data['maximumRecords'] = maximumRecords

            maximumTerms = req.args.get('maximumTerms', 100)
            if 'maximumTerms' in req.args:
                try:
                    maximumTerms = int(maximumTerms)
                except:
                    raise Exception(6, 'maximumTerms', 'Unsupported parameter value')
            data['maximumTerms'] = maximumTerms

            startRecord = req.args.get('startRecord', 1)
            if 'startRecord' in req.args:
                try:
                    startRecord = int(startRecord)
                    if startRecord <= 0:
                        raise Exception(6, 'startRecord', 'Unsupported parameter value')
                except:
                    raise Exception(6, 'startRecord', 'Unsupported parameter value')
            data['startRecord'] = startRecord

            responsePosition = req.args.get('responsePosition', 0)
            if 'responsePosition' in req.args:
                try:
                    responsePosition = int(responsePosition)
                except:
                    raise Exception(6, 'responsePosition', 'Unsupported parameter value')
            data['responsePosition'] = responsePosition

            # set content-type in HTTP header
            recordPacking = req.args.get('recordPacking', 'xml')
            if recordPacking == 'xml':
                pass
            elif recordPacking == 'string':
                # TODO(jm)!!!
                self._headers['Content-Type'] = 'text/plain; charset=utf-8'
            else:
                raise Exception(71, 'recordPacking', 'Unsupported record packing')

            # provide info about service
            if operation == 'explain':
                self._check_args(
                    req, supported_args,
                    ['recordPacking', 'x-fcs-endpoint-description']
                )
                corpus = self.cm.get_Corpus(corpname)
                import_str = partial(l10n.import_string, from_encoding=corpus.get_conf('ENCODING'))
                data['result'] = corpus.get_conf('ATTRLIST').split(',')
                data['numberOfRecords'] = len(data['result'])
                data['corpus_desc'] = u'Corpus {0} ({1} tokens)'.format(
                    import_str(corpus.get_conf('NAME')), l10n.simplify_num(corpus.size()))
                data['corpus_lang'] = Languages.get_iso_code(corpus.get_conf('LANGUAGE'))
                data['show_endpoint_desc'] = (True if req.args.get('x-fcs-endpoint-description', 'false') == 'true'
                                              else False)

            # wordlist for a given attribute
            elif operation == 'scan':
                self._check_args(
                    req, supported_args,
                    ['scanClause', 'responsePosition', 'maximumTerms', 'x-cmd-resource-info']
                )
                data['resourceInfoRequest'] = req.args.get('x-cmd-resource-info', '') == 'true'
                scanClause = req.args.get('scanClause', '')
                if scanClause.startswith('fcs.resource='):
                    value = scanClause.split('=')[1]
                    data['result'] = self._corpora_info(value, maximumTerms)
                else:
                    data['result'] = conclib.fcs_scan(
                        corpname, scanClause, maximumTerms, responsePosition)

            # simple concordancer
            elif operation == 'searchRetrieve':
                # TODO we should review the args here (especially x-cmd-context, resultSetTTL)
                self._check_args(
                    req, supported_args,
                    ['query', 'startRecord', 'maximumRecords', 'recordPacking',
                        'recordSchema', 'resultSetTTL', 'x-cmd-context', 'x-fcs-context']
                )
                if 'x-cmd-context' in req.args:
                    req_corpname = req.args['x-cmd-context']
                    user_corpora = plugins.runtime.AUTH.instance.permitted_corpora(
                        self.session_get('user'))
                    if req_corpname in user_corpora:
                        corpname = req_corpname
                    else:
                        _logger.warning(
                            'Requested unavailable corpus [%s], defaulting to [%s]', req_corpname, corpname)
                    data['corpname'] = corpname

                corp_conf_info = plugins.runtime.CORPARCH.instance.get_corpus_info('en_US',
                                                                                   corpname)
                data['corppid'] = corp_conf_info.get('web', '')
                query = req.args.get('query', '')
                corpus = self.cm.get_Corpus(corpname)
                if 0 == len(query):
                    raise Exception(7, 'fcs_query', 'Mandatory parameter not supplied')
                data['result'], data['numberOfRecords'] = self.fcs_search(
                    corpus, corpname, query, maximumRecords, startRecord)

            # unsupported operation
            else:
                # show within explain template
                data['operation'] = 'explain'
                raise Exception(4, '', 'Unsupported operation')

        # catch exception and amend diagnostics in template
        except Exception as e:
            data['message'] = ('error', repr(e))
            try:
                data['code'], data['details'], data['msg'] = e
            except ValueError:
                data['code'], data['details'] = 1, repr(e)
                data['msg'] = 'General system error'

        return data