Ejemplos de ValidationException en Python, ejemplos de gargantext.util.http.ValidationException en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: ngramlists.py Proyecto: project-renard-survey/gargantext

    def initial(self, request):
        """
        Before dispatching to put(), delete()...

        1) Checks current user authentication to prevent remote DB manipulation
        2) Prepares self.list_objects from params
        """

        if not request.user.is_authenticated():
            raise Http404()
            # can't use return in initial() (although 401 maybe better than 404)
            # can't use @requires_auth because of positional 'self' within class

        # get validated params
        self.params = get_parameters(request)

        (self.base_list, self.change_list) = ListChange._validate(self.params)

        if not len(self.change_list.items):
            payload_ngrams = request.data['ngrams']
            # print("no change_list in params but we got:", payload_ngrams)
            # change_list can be in payload too
            change_ngram_ids = [int(n) for n in payload_ngrams.split(',')]
            if (not len(change_ngram_ids)):
                raise ValidationException(
                    'The "ngrams" parameter requires one or more ngram_ids separated by comma'
                )
            else:
                self.change_list = UnweightedList(change_ngram_ids)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: ngramlists.py Proyecto: fabelier/gargantext

    def get(self, request):
        parameters = get_parameters(request)

        maplist_id = None
        scores_id = None

        if "corpus" in parameters:
            corpus_id = parameters['corpus']
            corpus = cache.Node[corpus_id]
            maplist_id = corpus.children('MAPLIST').first().id
            # with a corpus_id, the explicit scoring pointer is optional
            if "scoring" in parameters:
                scores_id = parameters['scoring']
            else:
                scores_id = corpus.children('OCCURRENCES').first().id

        elif "maplist" in parameters and "scoring" in parameters:
            maplist_id = int(parameters['mainlist'])
            scores_id = int(parameters['scoring'])
        else:
            raise ValidationException("A 'corpus' id or 'maplist' id is required, and a 'scoring' for occurences counts")

        ngraminfo = {}           # ngram details sorted per ngram id
        listmembers = {'maplist':[]}         # ngram ids sorted per list name

        # infos for all ngrams from maplist
        map_ngrams = query_list(maplist_id, details=True,
                                      scoring_metric_id= scores_id).all()

        # ex:  [(8805, 'mean age', 4.0),
        #        (1632, 'activity', 4.0),
        #        (8423, 'present', 2.0),
        #        (2928, 'objective', 2.0)]


        # shortcut to useful function during loop
        add_to_members = listmembers['maplist'].append

        for ng in map_ngrams:
            ng_id   = ng[0]
            ngraminfo[ng_id] = ng[1:]

            # maplist ngrams will already be <=> ngraminfos
            # but the client side expects a membership lookup
            # as when there are multiple lists or some groupings
            add_to_members(ng_id)


        return JsonHttpResponse({
            'ngraminfos' : ngraminfo,
            'listmembers' : listmembers,
            'links' : {},   # no grouping links sent during glance (for speed)
            'nodeids' : {
                'mainlist':  None,
                'maplist' :  maplist_id,
                'stoplist':  None,
                'groups':  None,
                'scores':  None,
            }
        })

Ejemplo n.º 3

0

Mostrar archivo

Archivo: nodes.py Proyecto: project-renard-survey/gargantext

    def get(self, request, corpus_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters = get_parameters(request)
        parameters = validate(parameters, {'score': str, 'ngram_ids': list})

        try:
            ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')]
        except:
            raise ValidationException(
                '"ngram_ids" needs integers separated by comma.')

        limit = DEFAULT_N_DOCS_HAVING_NGRAM
        nodes_list = []

        corpus = session.query(Node).filter(Node.id == corpus_id).first()

        tfidf_id = (session.query(Node.id).filter(
            Node.typename == "TFIDF-CORPUS",
            Node.parent_id == corpus.id).first())

        tfidf_id = tfidf_id[0]
        print(tfidf_id)
        # request data
        nodes_query = (session.query(Node, func.sum(NodeNodeNgram.score)).join(
            NodeNodeNgram, NodeNodeNgram.node2_id == Node.id).filter(
                NodeNodeNgram.node1_id == tfidf_id).filter(
                    Node.typename == 'DOCUMENT',
                    Node.parent_id == corpus.id).filter(
                        or_(*[
                            NodeNodeNgram.ngram_id == ngram_id
                            for ngram_id in ngram_ids
                        ])).group_by(Node))

        # get the total count before applying limit
        nodes_count = nodes_query.count()

        # now the query with the limit
        nodes_results_query = (nodes_query.order_by(
            func.sum(NodeNodeNgram.score).desc()).limit(limit))

        for node, score in nodes_results_query:
            print(node, score)
            print("\t corpus:", corpus_id, "\t", node.name)
            node_dict = {
                'id': node.id,
                'score': score,
            }
            for key in ('title', 'publication_date', 'source', 'authors',
                        'fields'):
                if key in node.hyperdata:
                    node_dict[key] = node.hyperdata[key]
            nodes_list.append(node_dict)

        return JsonHttpResponse({'count': nodes_count, 'records': nodes_list})

Ejemplo n.º 4

0

Mostrar archivo

Archivo: nodes.py Proyecto: project-renard-survey/gargantext

    def put(self, request, corpus_id, check_each_doc=True):
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        # user is ok
        fav_node = self._get_fav_node(corpus_id)

        response = {}

        if fav_node == None:
            response = {
                'warning':
                'No favorites node is defined for this corpus (\'%s\')' %
                self.corpus.name,
                'count_added':
                0
            }
        else:
            req_params = validate(get_parameters(request), {
                'docs': list,
                'default': ""
            })
            nodeids_to_add = [
                int(did) for did in req_params['docs'].split(',')
            ]

            if check_each_doc:
                # verification que ce sont bien des documents du bon corpus
                # un peu long => désactiver par défaut ?
                known_docs_q = (session.query(
                    Node.id).filter(Node.parent_id == corpus_id).filter(
                        Node.typename == 'DOCUMENT'))
                lookup = {
                    known_doc.id: True
                    for known_doc in known_docs_q.all()
                }
                # debug
                # print("lookup hash", lookup)
                rejected_list = []
                for doc_node_id in nodeids_to_add:
                    if (doc_node_id not in lookup):
                        rejected_list.append(doc_node_id)
                if len(rejected_list):
                    raise ValidationException(
                        "Error on some requested docs: %s (Only nodes of type 'doc' AND belonging to corpus %i can be added to favorites.)"
                        % (str(rejected_list), int(corpus_id)))

            # add them
            bulk_insert(NodeNode, ('node1_id', 'node2_id', 'score'),
                        ((fav_node.id, doc_node_id, 1.0)
                         for doc_node_id in nodeids_to_add))

            # todo count really added (here: counts input param not result)
            response = {'count_added': len(nodeids_to_add)}

        return JsonHttpResponse(response)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: ngramlists.py Proyecto: project-renard-survey/gargantext

    def _validate(params):
        """
        Checks "list" and "ngrams" parameters for their:
          - presence
          - type

        These two parameters are mandatory for any ListChange methods.

        ngrams are also converted to an UnweightedList object for easy add/remove
        """
        if 'list' not in params:
            raise ValidationException(
                'The route /api/ngramlists/change requires a "list" \
                                       parameter, for instance /api/ngramlists/change?list_id=42'
            )
        # if 'ngrams' not in params:
        #     raise ValidationException('The route /api/ngramlists/change requires an "ngrams"\
        #                                parameter, for instance /api/ngramlists/change?ngrams=1,2,3,4')

        # 2 x retrieval => 2 x UnweightedLists
        # ------------------------------------
        base_list_id = None
        try:
            base_list_id = int(params['list'])
            # UnweightedList retrieved by id
        except:
            raise ValidationException(
                'The "list" parameter requires an existing list id.')
        base_list = UnweightedList(base_list_id)

        change_ngram_ids = []
        try:
            change_ngram_ids = [int(n) for n in params['ngrams'].split(',')]
            # UnweightedList created from items
        except:
            # ngrams no longer mandatory inline, see payload check afterwards
            pass
        change_list = UnweightedList(change_ngram_ids)

        return (base_list, change_list)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: nodes.py Proyecto: project-renard-survey/gargantext

    def get(self, request, node_id):
        # check that the node is a corpus
        #   ? faster from cache than: corpus = session.query(Node)...

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        corpus = cache.Node[node_id]
        if corpus.typename != 'CORPUS':
            raise ValidationException(
                "Only nodes of type CORPUS can accept facet queries" +
                " (but this node has type %s)..." % corpus.typename)
        else:
            self.corpus = corpus

        # check that the hyperfield parameter makes sense
        _facet_available_subfields = [
            'source', 'publication_year', 'rubrique', 'language_iso2',
            'language_iso3', 'language_name', 'authors'
        ]
        parameters = get_parameters(request)

        # validate() triggers an info message if subfield not in range
        parameters = validate(
            parameters, {
                'type': dict,
                'items': {
                    'hyperfield': {
                        'type': str,
                        'range': _facet_available_subfields
                    }
                }
            })

        subfield = parameters['hyperfield']

        # do the aggregated sum
        (xcounts, total) = self._ndocs_by_facet(subfield)

        # response
        return JsonHttpResponse({
            'doc_count': total,
            'by': {
                subfield: xcounts
            }
        })

Ejemplo n.º 7

0

Mostrar archivo

Archivo: nodes.py Proyecto: project-renard-survey/gargantext

    def _get_fav_node(self, corpus_id):
        """
        NB: fav_node can be None if no node is defined

        this query could be faster if we didn't check that corpus_id is a CORPUS
        ie: session.query(Node)
            .filter(Node.parent_id==corpus_id)
            .filter(Node.typename =='FAVORITES')
        """
        corpus = cache.Node[corpus_id]
        if corpus.typename != 'CORPUS':
            raise ValidationException(
                "Only nodes of type CORPUS can accept favorites queries" +
                " (but this node has type %s)..." % corpus.typename)
        else:
            self.corpus = corpus
        fav_node = self.corpus.children('FAVORITES').first()

        return fav_node

Ejemplo n.º 8

0

Mostrar archivo

Archivo: nodes.py Proyecto: fabelier/gargantext

    def delete(self, request):
        """Removes the list of nodes corresponding to the query.
        TODO : Should be a delete method!
        """
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters = get_parameters(request)
        parameters = validate(parameters, {'ids': list})
        try:
            node_ids = [int(n) for n in parameters['ids'].split(',')]
        except:
            raise ValidationException(
                '"ids" needs integers separated by comma.')

        result = session.execute(delete(Node).where(Node.id.in_(node_ids)))
        session.commit()

        return JsonHttpResponse({'deleted': result.rowcount})

Ejemplo n.º 9

0

Mostrar archivo

    def patch(self, request, corpusnode_id):
        """
        PATCH triggers recount of metrics for the specified corpus.

        ex PATCH http://localhost:8000/api/metrics/14072
                                                   -----
                                                 corpus_id
        """
        print("==> update metrics request on ", corpusnode_id)

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        try:
            corpus = cache.Node[int(corpusnode_id)]
        except:
            corpus = None

        if corpus is None:
            raise ValidationException("%s is not a valid corpus node id." %
                                      corpusnode_id)

        else:
            t_before = datetime.now()
            # =============
            scheduled(recount)(corpus.id)
            # =============
            t_after = datetime.now()

            return JsonHttpResponse({
                'corpus_id':
                corpusnode_id,
                'took':
                "%f s." % (t_after - t_before).total_seconds()
            })

Ejemplo n.º 10

0

Mostrar archivo

Archivo: nodes.py Proyecto: project-renard-survey/gargantext

def _query_nodes(request, node_id=None):

    if request.user.id is None:
        raise TypeError(
            "This API request must come from an authenticated user.")
    else:
        # we query among the nodes that belong to this user
        user = cache.User[request.user.id]

    # parameters validation
    # fixme: this validation does not allow custom keys in url (eg '?name=' for rename action)
    parameters = get_parameters(request)

    parameters = validate(
        parameters,
        {
            'type': dict,
            'items': {
                'formated': {
                    'type': str,
                    'required': False,
                    'default': 'json'
                },
                'pagination_limit': {
                    'type': int,
                    'default': 10
                },
                'pagination_offset': {
                    'type': int,
                    'default': 0
                },
                'fields': {
                    'type': list,
                    'default': _node_default_fields,
                    'items': {
                        'type': str,
                        'range': _node_available_fields,
                    }
                },
                # choice of hyperdata fields
                'hyperdata_filter': {
                    'type': list,
                    'required': False,
                    'items': {
                        'type': str,
                        'range': _hyperdata_available_fields,
                    }
                },
                # optional filtering parameters
                'types': {
                    'type': list,
                    'required': False,
                    'items': {
                        'type': str,
                        'range': _node_available_types,
                    }
                },
                'parent_id': {
                    'type': int,
                    'required': False
                },
            }
        })

    # debug
    # print('PARAMS', parameters)

    # additional validation for hyperdata_filter
    if (('hyperdata_filter' in parameters)
            and (not ('hyperdata' in parameters['fields']))):
        raise ValidationException(
            "Using the hyperdata_filter filter requires fields[]=hyperdata")

    # start the query
    query = user.nodes()

    # filter by id
    if node_id is not None:
        query = query.filter(Node.id == node_id)
    # filter by type
    if 'types' in parameters:
        query = query.filter(Node.typename.in_(parameters['types']))
    # filter by parent
    if 'parent_id' in parameters:
        query = query.filter(Node.parent_id == parameters['parent_id'])
    # count
    count = query.count()
    # order
    query = query.order_by(Node.hyperdata['publication_date'], Node.id)

    # paginate the query
    if parameters['pagination_limit'] == -1:
        query = query[parameters['pagination_offset']:]
    else:
        query = query[
            parameters['pagination_offset']:parameters['pagination_limit']]
    # return the result!
    # (the receiver function does the filtering of fields and hyperdata_filter)
    return parameters, query, count

Ejemplo n.º 11

0

Mostrar archivo

def validate(value, expected, path='input'):
    # Is the expected type respected?
    if 'type' in expected:
        expected_type = expected['type']
        if not isinstance(value, expected_type):
            if expected_type in (bool, int, float, str, datetime, ):
                try:
                    if expected_type == bool:
                        value = value not in {0, 0.0, '', '0', 'false'}
                    elif expected_type == datetime:
                        value = value + '2000-01-01T00:00:00Z'[len(value):]
                        value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%SZ')
                    else:
                        value = expected_type(value)
                except ValueError:
                    raise ValidationException('%s should be a JSON %s, but could not be parsed as such' % (path, _types_names[expected_type], ))
            else:
                raise ValidationException('%s should be a JSON %s' % (path, _types_names[expected_type], ))
    else:
        expected_type = type(value)

    # Is the value in the expected range?
    if 'range' in expected:
        expected_range = expected['range']
        if isinstance(expected_range, tuple):
            if expected_type in (int, float):
                tested_value = value
                tested_name = 'value'
            elif expected_type in (str, list):
                tested_value = len(value)
                tested_name = 'length'
            if tested_value < expected_range[0]:
                raise ValidationException('%s should have a minimum %s of %d' % (path, tested_name, expected_range[0], ))
            if len(expected_range) > 1 and tested_value > expected_range[1]:
                raise ValidationException('%s should have a maximum %s of %d' % (path, tested_name, expected_range[1], ))
        elif isinstance(expected_range, (list, set, )) and value not in expected_range:
            expected_values = expected_range if isinstance(expected_range, list) else expected_range
            expected_values = [str(value) for value in expected_values if isinstance(value, expected_type)]
            if len(expected_values) < 16:
                expected_values_str = '", "'.join(expected_values)
                expected_values_str = '"' + expected_values_str + '"'
            else:
                expected_values_str = '", "'.join(expected_values[:16])
                expected_values_str = '"' + expected_values_str + '"...'

            raise ValidationException('%s should take one of the following values: %s' % (path, expected_values_str, ))

    # Do we have to translate through a dictionary?
    if 'translate' in expected:
        translate = expected['translate']
        if callable(translate):
            value = translate(value)
            if value is None and expected.get('required', False):
                raise ValidationException('%s has been given an invalid value' % (path, ))
            return value
        try:
            value = expected['translate'][value]
        except KeyError:
            if expected.get('translate_fallback_keep', False):
                return value
            if expected.get('required', False):
                raise ValidationException('%s has been given an invalid value' % (path, ))
            else:
                return expected.get('default', value)

    # Are we handling an iterable?
    if expected_type in (list, dict):
        if 'items' in expected:
            expected_items = expected['items']
            if expected_type == list:
                for i, element in enumerate(value):
                    value[i] = validate(element, expected_items, '%s[%d]' % (path, i, ))
            elif expected_type == dict:
                if expected_items:
                    for key in value:
                        if key not in expected_items:
                            raise ValidationException('%s should not have a "%s" key.' % (path, key, ))
                for expected_key, expected_value in expected_items.items():
                    if expected_key in value:
                        value[expected_key] = validate(value[expected_key], expected_value, '%s["%s"]' % (path, expected_key, ))
                    elif 'required' in expected_value and expected_value['required']:
                        raise ValidationException('%s should have a "%s" key.' % (path, expected_key, ))
                    elif 'default' in expected_value:
                        value[expected_key] = expected_value['default']

    # Let's return the proper value!
    return value

Ejemplo n.º 12

0

Mostrar archivo

Archivo: ngramlists.py Proyecto: project-renard-survey/gargantext

    def get(self, request):

        parameters = get_parameters(request)
        glance_limit = None
        mainlist_id = None
        scores_id = None
        groups_id = None
        other_list_ids = {'maplist': None, 'stoplist': None}

        # 1) retrieve a mainlist_id and other lists
        ##########################################

        # simple request: just refers to the parent corpus
        # ------------------------------------------------
        if "corpus" in parameters:
            corpus_id = parameters['corpus']
            corpus = cache.Node[corpus_id]
            # with a corpus_id, the explicit scoring pointer is optional
            if "scoring" in parameters:
                scores_id = parameters['scoring']
            else:
                scores_id = corpus.children('OCCURRENCES').first().id
            # retrieve the family of lists that have corpus as parent
            mainlist_id = corpus.children('MAINLIST').first().id
            groups_id = corpus.children('GROUPLIST').first().id
            other_list_ids['stoplist'] = corpus.children('STOPLIST').first().id
            other_list_ids['maplist'] = corpus.children('MAPLIST').first().id

        # custom request: refers to each list individually
        # -------------------------------------------------
        elif "mainlist" in parameters and "scoring" in parameters:
            mainlist_id = parameters['mainlist']
            scores_id = parameters['scoring']
            groups_id = None
            if 'groups' in parameters:
                groups_id = parameters['scoring']
            for k in ['stoplist', 'maplist']:
                if k in parameters:
                    other_list_ids[k] = parameters[k]

        # or request has an error
        # -----------------------
        else:
            raise ValidationException(
                "Either a 'corpus' parameter or 'mainlist' & 'scoring' params are required"
            )

        # 2) get the infos for each list
        ################################
        ngraminfo = {}  # ngram details sorted per ngram id
        linkinfo = {}  # ngram groups sorted per ngram id
        listmembers = {}  # ngram ids sorted per list name
        if "head" in parameters:
            # head <=> only mainlist AND only k top ngrams
            glance_limit = int(parameters['head'])
            mainlist_query = query_list(mainlist_id,
                                        details=True,
                                        pagination_limit=glance_limit,
                                        scoring_metric_id=scores_id)
        else:
            # infos for all ngrams from mainlist
            mainlist_query = query_list(mainlist_id,
                                        details=True,
                                        scoring_metric_id=scores_id)
            # infos for grouped ngrams, absent from mainlist
            hidden_ngrams_query = query_grouped_ngrams(groups_id, details=True)

            # infos for stoplist terms, absent from mainlist
            stop_ngrams_query = query_list(other_list_ids['stoplist'],
                                           details=True,
                                           scoring_metric_id=scores_id)

            # and for the other lists (stop and map)
            # no details needed here, just the member ids
            for li in other_list_ids:
                li_elts = query_list(other_list_ids[li], details=False).all()
                # simple array of ngram_ids
                listmembers[li] = [ng[0] for ng in li_elts]

            # and the groupings
            if groups_id:
                links = Translations(groups_id)
                linkinfo = links.groups

        # list of
        ngrams_which_need_detailed_info = []
        if "head" in parameters:
            # head triggered simplified form: just the top of the mainlist
            # TODO add maplist membership
            ngrams_which_need_detailed_info = mainlist_query.all()
        else:
            ngrams_which_need_detailed_info = mainlist_query.all(
            ) + hidden_ngrams_query.all() + stop_ngrams_query.all()

        # the output form of details is:
        # ngraminfo[id] => [term, weight]
        for ng in ngrams_which_need_detailed_info:
            ng_id = ng[0]
            ngraminfo[ng_id] = ng[1:]

            # NB the client js will sort mainlist ngs from hidden ngs after ajax
            #    using linkinfo (otherwise needs redundant listmembers for main)

        return JsonHttpResponse({
            'ngraminfos': ngraminfo,
            'listmembers': listmembers,
            'links': linkinfo,
            'nodeids': {
                'mainlist': mainlist_id,
                'maplist': other_list_ids['maplist'],
                'stoplist': other_list_ids['stoplist'],
                'groups': groups_id,
                'scores': scores_id,
            }
        })

Ejemplo n.º 13

0

Mostrar archivo

Archivo: ngrams.py Proyecto: project-renard-survey/gargantext

    def put(self, request):
        """
        Basic external access for *creating an ngram*
        ---------------------------------------------

         1 - checks user authentication before any changes

         2 - checks if ngram to Ngram table in DB
              if yes returns ngram_id and optionally mainform_id
              otherwise continues

         3 - adds the ngram to Ngram table in DB

         4 - (if corpus param is present)
             adds the ngram doc counts to NodeNgram table in DB
             (aka "index the ngram" throught the docs of the corpus)

         5 - returns json with:
             'msg'   => a success msg
             'text'  => the initial text content
             'term'  => the normalized text content
             'id'    => the new ngram_id
             'count' => the number of docs with the ngram in the corpus
                        (if corpus param is present)
             'group' => the mainform_id if applicable

        possible inline parameters
        --------------------------
        @param    text=<ngram_string>         [required]
        @param    corpus=<CORPUS_ID>          [optional]
        @param    testgroup (true if present) [optional, requires corpus]
        """

        # 1 - check user authentication
        if not request.user.is_authenticated():
            res = HttpResponse("Unauthorized")
            res.status_code = 401
            return res

        # the params
        params = get_parameters(request)

        print("PARAMS", [(i,v) for (i,v) in params.items()])

        if 'text' in params:
            original_text = str(params.pop('text'))
            ngram_str = normalize_forms(normalize_chars(original_text))
        else:
            raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\
                                        It requires a "text" parameter,\
                                        for instance /api/ngrams?text=hydrometallurgy')

        if ('testgroup' in params) and (not ('corpus' in params)):
            raise ValidationException("'testgroup' param requires 'corpus' param")

        # if we have a 'corpus' param (to do the indexing)...
        do_indexation = False
        if 'corpus' in params:
            # we retrieve the corpus...
            corpus_id = int(params.pop('corpus'))
            corpus_node = cache.Node[corpus_id]
            # and the user must also have rights on the corpus
            if request.user.id == corpus_node.user_id:
                do_indexation = True
            else:
                res = HttpResponse("Unauthorized")
                res.status_code = 401
                return res

        # number of "words" in the ngram
        ngram_size = len(findall(r' +', ngram_str)) + 1

        # do the additions
        try:
            log_msg = ""
            ngram_id = None
            mainform_id = None

            preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first()

            if preexisting is not None:
                ngram_id = preexisting.id
                log_msg += "ngram already existed (id %i)\n" % ngram_id

                # in the context of a corpus we can also check if has mainform
                # (useful for)
                if 'testgroup' in params:
                    groupings_id = (session.query(Node.id)
                                           .filter(Node.parent_id == corpus_id)
                                           .filter(Node.typename == 'GROUPLIST')
                                           .first()
                                    )
                    had_mainform = (session.query(NodeNgramNgram.ngram1_id)
                                          .filter(NodeNgramNgram.node_id == groupings_id)
                                          .filter(NodeNgramNgram.ngram2_id == preexisting.id)
                                          .first()
                                    )
                    if had_mainform:
                        mainform_id = had_mainform[0]
                        log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id
                    else:
                        log_msg += "ngram was not in any group for this corpus"

            else:
                # 2 - insert into Ngrams
                new_ngram = Ngram(terms=ngram_str, n=ngram_size)
                session.add(new_ngram)
                session.commit()
                ngram_id = new_ngram.id
                log_msg += "ngram was added with new id %i\n" % ngram_id

            # 3 - index the term
            if do_indexation:
                n_added = index_new_ngrams([ngram_id], corpus_node)
                log_msg += 'ngram indexed in corpus %i\n' % corpus_id

            return JsonHttpResponse({
                'msg': log_msg,
                'text': original_text,
                'term': ngram_str,
                'id' : ngram_id,
                'group' : mainform_id,
                'count': n_added if do_indexation else 'no corpus provided for indexation'
                }, 200)

        # just in case
        except Exception as e:
            return JsonHttpResponse({
                'msg': str(e),
                'text': original_text
                }, 400)