Exemple #1
0
    def put(self, request):
        if request.user.id is None:
            raise TypeError(
                "This API request must come from an authenticated user.")
        else:
            # we query among the nodes that belong to this user
            user = cache.User[request.user.id]
        node_user = session.query(Node).filter(
            Node.user_id == user.id, Node.typename == "USER").first()
        if node_user is None:
            return Response({"detail": "Not Allowed"},
                            status=HTTP_401_UNAUTHORIZED)

        for k, v in request.data.items():
            node_user.hyperdata[k] = v
            # setattr(node_user.hyperdata, k, v)
            # print(node_user.hyperdata)
        node_user.save_hyperdata()
        session.add(node_user)
        session.commit()
        node_user = session.query(Node).filter(
            Node.user_id == user.id, Node.typename == "USER").first()
        print(node_user.hyperdata)
        return Response(
            {
                "detail": "Updated user parameters",
                "hyperdata": node_user.hyperdata
            },
            status=HTTP_202_ACCEPTED)
Exemple #2
0
def query_grouped_ngrams(groupings_id, details=False, scoring_metric_id=None):
    """
    Listing of "hidden" ngram_ids from the groups

    Works only for grouplists

    Parameter:
      - details: if False, send just the array of ngram_ids
                 if True, send couples with (ngram_id, term)
    """
    if not details:
        # simple contents
        query = session.query(NodeNgramNgram.ngram2_id)
    else:
        # detailed contents (id + terms)
        query = (session.query(
            NodeNgramNgram.ngram2_id,
            Ngram.terms,
        ).join(Ngram, NodeNgramNgram.ngram2_id == Ngram.id))

    # main filter
    # -----------
    query = query.filter(NodeNgramNgram.node_id == groupings_id)

    return query
Exemple #3
0
def query_groups(groupings_id, details=False):
    """
    Listing of couples (mainform,   subform)
                 aka   (ngram1_id, ngram2_id)

    Parameter:
      - details: if False, just send the array of couples
                 if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
    """
    if not details:
        # simple contents
        query = session.query(NodeNgramNgram.ngram1_id,
                              NodeNgramNgram.ngram2_id)
    else:
        # detailed contents (id + terms)
        Ngram1 = aliased(Ngram)
        Ngram2 = aliased(Ngram)
        query = (session.query(
            NodeNgramNgram.ngram1_id,
            Ngram1.terms,
            NodeNgramNgram.ngram2_id,
            Ngram2.terms,
        ).join(Ngram1, NodeNgramNgram.ngram1_id == Ngram1.id).join(
            Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id))

    # main filter
    # -----------
    query = query.filter(NodeNgramNgram.node_id == groupings_id)

    return query
Exemple #4
0
    def get(self, request, project_id, corpus_id, view = "DOCUMENT"):
        '''GET corpus detail
        default view full documents
        '''
        params = get_parameters(request)
        if "view" in params.keys():
            filter_view = params["view"].upper()
            if view in ["DOCUMENT", "JOURNAL", "TITLE", "ANALYTICS", "RESSOURCE"]:
                view = filter_view

        project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
        check_rights(request, project.id)
        if project is None:
            return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
                                  status = status.HTTP_404_NOT_FOUND)

        corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
        if corpus is None:
            return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
                                  status = status.HTTP_404_NOT_FOUND)




        documents = session.query(Node).filter(Node.parent_id == corpus_id, Node.typename == view).all()

        context = format_response(corpus, documents)
        return Response(context)
Exemple #5
0
    def get(self, request, doc_id):
        """Document by ID"""
        node = session.query(Node).filter(Node.id == doc_id).first()
        corpus = session.query(Node).filter(Node.id == node.parent_id).first()
        corpus_workflow_status = corpus.hyperdata['statuses'][0]
        if node is None:
            raise APIException('This node does not exist', 404)

        try:
            pub_date = datetime.datetime.strptime(
                node.hyperdata.get('publication_date'), "%Y-%m-%d %H:%M:%S")
            pub_date = pub_date.strftime("%x")
        except ValueError:
            pub_date = node.hyperdata.get('publication_date')

        data = {
            'corpus_status': corpus_workflow_status,
            'title': node.hyperdata.get('title'),
            'authors': node.hyperdata.get('authors'),
            'source': node.hyperdata.get('source'),
            'publication_date': pub_date,
            'full_text': node.hyperdata.get('full_text'),
            'abstract_text': node.hyperdata.get('abstract'),
            'id': node.id
        }
        return Response(data)
Exemple #6
0
    def put(self, request, project_id, corpus_id, view="DOCUMENT"):
        '''UPDATE corpus'''
        project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
        project = check_rights(request, project.id)
        if project is None:
            return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
                                  status = status.HTTP_404_NOT_FOUND)

        corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
        if corpus is None:
            return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
                                  status = status.HTTP_404_NOT_FOUND)


        #documents = session.query(Node).filter(Node.parent_id == corpus_id, Node.typename= view).all()
        for key, val in request.data.items():
            if key in ["name", "date", "username", "hyperdata"]:
                if key == "username":
                    #changement de propriétaire
                    #user = session.query(Node).filter(Node.typename=="USER", Node.username== username).first()
                    #print(user)
                    #set(node, user_id, user.id)
                    pass
                elif key == "hyperdata":
                    #updating some contextualvalues of the corpus
                    pass
                else:
                    setattr(node, key, val)
        session.add(node)
        session.commit()
        '''#updating children???

        '''
        return Response({"detail":"Updated corpus #" %str(corpus.id)}, status=HTTP_202_ACCEPTED)
Exemple #7
0
    def put(self, request):
        '''UPDATE EVERY projects of a given user'''
        user = cache.User[request.user.id]
        query = session.query(Node).filter(
            Node.typename == "PROJECT", Node.user_id == request.user.id).all()
        uids = []
        for node in query:
            for key, val in request.data.items():
                #here verify that key is in accepted modified keys
                if key in ["name", "date", "username"]:
                    if key == "username":
                        #changement de propriétaire
                        user = session.query(Node).filter(
                            Node.typename == "PROJECT",
                            Node.username == username).first()
                        set(node, user_id, user.id)
                    else:
                        setattr(node, key, val)

            #node.name = request.data["name"]
            session.add(node)
            session.commit()
            uids.append(node.id)
        return Response({"detail": "Updated %s projects" % len(uids)},
                        status=HTTP_202_ACCEPTED)
    def get(self, request, corpus_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        parameters = get_parameters(request)
        parameters = validate(parameters, {'score': str, 'ngram_ids': list})

        try:
            ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')]
        except:
            raise ValidationException(
                '"ngram_ids" needs integers separated by comma.')

        limit = DEFAULT_N_DOCS_HAVING_NGRAM
        nodes_list = []

        corpus = session.query(Node).filter(Node.id == corpus_id).first()

        tfidf_id = (session.query(Node.id).filter(
            Node.typename == "TFIDF-CORPUS",
            Node.parent_id == corpus.id).first())

        tfidf_id = tfidf_id[0]
        print(tfidf_id)
        # request data
        nodes_query = (session.query(Node, func.sum(NodeNodeNgram.score)).join(
            NodeNodeNgram, NodeNodeNgram.node2_id == Node.id).filter(
                NodeNodeNgram.node1_id == tfidf_id).filter(
                    Node.typename == 'DOCUMENT',
                    Node.parent_id == corpus.id).filter(
                        or_(*[
                            NodeNodeNgram.ngram_id == ngram_id
                            for ngram_id in ngram_ids
                        ])).group_by(Node))

        # get the total count before applying limit
        nodes_count = nodes_query.count()

        # now the query with the limit
        nodes_results_query = (nodes_query.order_by(
            func.sum(NodeNodeNgram.score).desc()).limit(limit))

        for node, score in nodes_results_query:
            print(node, score)
            print("\t corpus:", corpus_id, "\t", node.name)
            node_dict = {
                'id': node.id,
                'score': score,
            }
            for key in ('title', 'publication_date', 'source', 'authors',
                        'fields'):
                if key in node.hyperdata:
                    node_dict[key] = node.hyperdata[key]
            nodes_list.append(node_dict)

        return JsonHttpResponse({'count': nodes_count, 'records': nodes_list})
def do_stoplist(corpus, overwrite_id=None):
    '''
    Create list of stop words.
    TODO do a function to get all stop words with social scores

    Parameters:
        - overwrite_id: optional preexisting STOPLIST node to overwrite
    '''

    # Get preexisting StopList if provided in overwrite_id param
    if overwrite_id:
        stoplist_id = overwrite_id
    # At this step of development, a new StopList should be created
    else:
        stoplist = corpus.add_child(name="Stoplist (in:%s)" % corpus.id,
                                    typename="STOPLIST")
        session.add(stoplist)
        session.commit()
        stoplist_id = stoplist.id

    # Get common resources, all common StopWords on the platform
    ## First get the id of the StopList of Gargantua super user
    gargantua_id = session.query(
        User.id).filter(User.username == "gargantua").first()
    rootStopList_id = session.query(Node.id).filter(
        Node.user_id == gargantua_id, Node.typename == "STOPLIST").first()
    ## Then get all the stop words
    ## stop_words :: [String]
    stop_words = (session.query(Ngram.terms).join(
        NodeNgram, NodeNgram.ngram_id == Ngram.id).filter(
            NodeNgram.node_id == rootStopList_id).all())

    # print([n for n in stop_words])

    ## Get the ngrams
    ## ngrams :: [(Int, String, Int)]
    ngrams = (
        session.query(Ngram.id, Ngram.terms).join(
            NodeNgram, NodeNgram.ngram_id == Ngram.id).join(
                Node, Node.id == NodeNgram.node_id).filter(
                    Node.parent_id == corpus.id,
                    Node.typename == "DOCUMENT").group_by(Ngram.id)
        #.limit(limit)
        .all())

    ngrams_to_stop = filter(lambda x: is_stop_word(x, stop_words=stop_words),
                            ngrams)

    # print([n for n in ngrams_to_stop])

    stop = LISTTYPES["STOPLIST"]({n[0]: -1 for n in ngrams_to_stop})
    # stop = LISTTYPES["STOPLIST"]([n[0] for n in ngrams_to_stop])
    stop.save(stoplist_id)
    return stoplist_id
    def get(self, request, corpus_id):
        """
        2 possibilities with/without param

        1) GET http://localhost:8000/api/nodes/2/favorites
        (returns the full list of fav docs within corpus 2)

        2) GET http://localhost:8000/api/nodes/2/favorites?docs=53,54
        (will test if docs 53 and 54 are among the favorites of corpus 2)
        (returns the intersection of fav docs with [53,54])
        """

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        fav_node = self._get_fav_node(corpus_id)

        req_params = validate(get_parameters(request), {
            'docs': list,
            'default': ""
        })

        response = {}

        if fav_node == None:
            response = {
                'warning':
                'No favorites node is defined for this corpus (\'%s\')' %
                self.corpus.name,
                'favdocs': []
            }
        elif 'docs' not in req_params:
            # each docnode associated to the favnode of this corpusnode
            q = (session.query(
                NodeNode.node2_id).filter(NodeNode.node1_id == fav_node.id))
            all_doc_ids = [row.node2_id for row in q.all()]
            response = {'favdocs': all_doc_ids}
        else:
            nodeids_to_check = [
                int(did) for did in req_params['docs'].split(',')
            ]

            # each docnode from the input list, if it is associated to the favnode
            q = (session.query(NodeNode.node2_id).filter(
                NodeNode.node1_id == fav_node.id).filter(
                    NodeNode.node2_id.in_(nodeids_to_check)))
            present_doc_ids = [row.node2_id for row in q.all()]
            absent_doc_ids = [
                did for did in nodeids_to_check if did not in present_doc_ids
            ]
            response = {'favdocs': present_doc_ids, 'missing': absent_doc_ids}

        return JsonHttpResponse(response)
Exemple #11
0
    def post(self, request, project_id, corpus_id):
        '''ADD a new RESOURCE to CORPUS'''
        project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
        check_rights(request, project.id)
        if project is None:
            return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
                                  status = status.HTTP_404_NOT_FOUND)

        corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first()
        if corpus is None:
            return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) },
                                  status = status.HTTP_404_NOT_FOUND)
Exemple #12
0
 def save(self, node_id=None):
     from gargantext.models import NodeNgram
     if node_id is None:
         if hasattr(self, 'id'):
             node_id = self.id
         else:
             raise ValueError('Please mention an ID to save the node.')
     # delete previous data
     session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete()
     session.commit()
     # insert new data
     bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'),
                 ((node_id, key, 1.0) for key in self.items))
Exemple #13
0
    def post(self, request):
        '''CREATE a new project for a given user'''
        user = cache.User[request.user.id]
        try:
            #corpus name
            name = request.data["name"]
        except AttributeError:
            return Response(
                {"detail": "Invalid POST method: \"name\" field is required "},
                status=HTTP_406_NOT_ACCEPTABLE)

        if name == "":
            return Response(
                {"detail": "Invalid POST method: \"name\" field is empty "},
                status=HTTP_406_NOT_ACCEPTABLE)
        else:
            project = session.query(Node).filter(Node.typename == "PROJECT",
                                                 Node.name == name).first()
            if project is not None:
                return Response(
                    {
                        "detail": "Project with this name already exists",
                        "url": "/projects/%s" % str(project.id)
                    },
                    status=HTTP_409_CONFLICT)

            else:
                user_node = session.query(UserNode).filter_by(
                    user_id=request.user.id).one_or_none()

                if user_node is None:
                    print(
                        "??? Can't find UserNode for %r to create ProjectNode with name %r ???"
                        % (request.user, name))

                new_project = Node(
                    user_id=request.user.id,
                    typename='PROJECT',
                    name=name,
                    parent_id=user_node and user_node.id,
                )

                session.add(new_project)
                session.commit()
                return Response(
                    {
                        "detail": "Created",
                        "url": "/projects/%s" % str(new_project.id)
                    },
                    status=HTTP_201_CREATED)
Exemple #14
0
def notify_owner(corpus, cooc_id, distance, bridgeness):
    user = session.query(User).filter(User.id == corpus.user_id).first()

    message = '''
    Bonjour,
    votre graph vient de se terminer dans votre corpus intitulé:
                        %s

    Vous pouvez accéder et renommer votre Graph à l'adresse:
    http://%s/projects/%d/corpora/%d/explorer?cooc_id=%d&distance=%s&bridgeness=%d

    Nous restons à votre disposition pour tout complément d'information.
    Cordialement
    --
        L'équipe de Gargantext (CNRS)

    ''' % (corpus.name, BASE_URL, corpus.parent_id, corpus.id, cooc_id,
           distance, bridgeness)

    if user.email != "":
        send_mail('[Gargantext] Votre Graph est calculé',
                  message,
                  '*****@*****.**', [user.email],
                  fail_silently=False)
    else:
        print("User %s (%d), has no email" % (user.username, user.id))
def query_groups(groupings_id, details=False, sort=False):
    """
    Listing of couples (mainform,   subform)
                 aka   (ngram1_id, ngram2_id)

    Parameter:
      - details: if False, just send the array of couples
                 if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
      - sort: order results by terms of ngram1 then ngram2
    """
    if details or sort:
        Ngram1, Ngram2 = Ngram, aliased(Ngram)

    if not details:
        # simple contents
        columns = (NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id)
    else:
        # detailed contents (id + terms)
        columns = (Ngram1.id, Ngram1.terms, Ngram2.id, Ngram2.terms)

    query = session.query(*columns)

    if details or sort:
        query = (query.join(Ngram1,
                            NodeNgramNgram.ngram1_id == Ngram1.id).join(
                                Ngram2, NodeNgramNgram.ngram2_id == Ngram2.id))

    if sort:
        query = query.order_by(Ngram1.terms, Ngram2.terms)

    # main filter
    # -----------
    query = query.filter(NodeNgramNgram.node_id == groupings_id)

    return query
    def form_valid(self, form):
        username = form.cleaned_data['username']
        password = form.cleaned_data['password']
        user = authenticate(username=username, password=password)

        if user is not None and user.is_active:
            login(self.request, user)

            node_user = session.query(Node).filter(Node.user_id == user.id, Node.typename== "USER").first()
            #user hasn't been found inside Node table
            #create it from auth table => node table
            if node_user is None:
                node_user = Node(
                            typename = 'USER',
                            #in node = > name
                            #in user = > username
                            name = user.username,
                            user_id = user.id,
                        )
                node_user.hyperdata = {"language":"fr"}
                session.add(node_user)
                session.commit()
            

            return super(LoginView, self).form_valid(form)
        else:
            return self.form_invalid(form)
def notify_owner(corpus):
    user = session.query(User).filter(User.id == corpus.user_id).first()

    message = '''
    Bonjour,
    votre analyse sur Gargantext vient de se terminer.

    Vous pouvez accéder à votre corpus intitulé
        \"%s\"
    à l'adresse:

    http://%s/projects/%d/corpora/%d

    Nous restons à votre disposition pour tout complément d'information.
    Cordialement
    --
        L'équipe de Gargantext (CNRS)

    ''' % (corpus.name, BASE_URL, corpus.parent_id, corpus.id)
    
    if user.email != "" :
        send_mail('[Gargantext] Votre analyse est terminée'
                 , message
                 , '*****@*****.**'
                 , [user.email], fail_silently=False )
    else:
        print("User %s (%d), has no email" % (user.username, user.id) )
def nodes(parent=None,
          group_by='typename',
          order_by='typename',
          has_child='check'):
    if group_by or has_child is not None:
        select = [
            func.min(Node.id).label('id'),
            func.min(Node.name).label('name'),
            func.min(Node.typename).label('typename'),
            func.count(Node.id).label('cnt')
        ]
    else:
        select = [
            Node.id.label('id'),
            Node.name.label('name'),
            Node.typename.label('typename'),
            literal_column('1').label('cnt')
        ]

    if has_child is not None:
        N = aliased(Node)
        select.append(func.count(N.id).label('children'))
    else:
        select.append(literal_column('NULL').label('children'))

    parent_id = getattr(parent, 'id', parent)
    q = session.query(*select).filter_by(parent_id=parent_id) \
               .group_by(getattr(Node, group_by if group_by else 'id'))

    if has_child is not None:
        q = q.outerjoin(N, N.parent_id == Node.id).group_by(N.parent_id)

    return q.order_by(order_by)
def myProject_fromUrl(url):
    """
    myProject :: String -> Project
    """
    project_id = url.split("/")[4]
    project = session.query(ProjectNode).get(project_id)
    return project
Exemple #20
0
def active_user(username, active=True):
    '''
    To get inactive, active=False
    '''
    user = session.query(User).filter(User.username == username).first()
    user.is_active = True
    user.save()
Exemple #21
0
def mass_account_creation(csv_file=None, init=False, test=False, notify=False):
    '''
    CSV file as parameter:
    if partner:
        username,[email protected],group_iscpif,password,
    else:
        username,[email protected],group_others,password,
    '''
    accounts = open(csv_file, "r")
    for line in accounts.readlines():
        username, email, group, password, end = line.split(',')
        user = session.query(User).filter(User.username == username).first()

        if user is not None:
            if init == True:
                create_user(username, email, user=user, group=group, password=password, notify=notify)
                print("User %s exists and updated" % (username))
            else:
                print("User %s exists and not updated" % (username))

        else:
            create_user(username, email, group=group, password=password, notify=notify)
            print("User %s is created" % (username))
        
        if test==True:
            del_user(username)
            del_user(group)
    
    accounts.close()
Exemple #22
0
 def _copy(self, corpus, form):
     #find the target corpus
     new_corpus = session.query(Node).filter(
         Node.typename == "CORPUS",
         Node.corpus_id == form["corpus_id"]).first()
     #get the resource of this corpus and copy it two
     new_resource = self._find_resource_hyperdata(new_corpus, form)
     #copy new_corpus to previously created corpus
     new_resouce.method = "cloned CORPUS #%i" % (new_corpus.id)
     new_corpus.id = corpus.id
     # change new_corpus ownership
     new_corpus.parent_id = corpus.parent_id
     new_corpus.user_id = corpus.user_id
     #get the documents of the existing corpus
     for doc in new_corpus.get_children():
         doc.parent_id = new_corpus.parent_id
         doc.user_id = new_corpus.id
         #store it into corpus
         new_doc = corpus.add_child(doc)
         for ngrams in doc.get_children():
             new_ngrams.parent_id = new_doc.id
             new_ngrams.user_id = new_corpus.user_id
             #store it into corpus
             new_doc.add_child(new_ngrams)
     #save the corpus
     corpus.status(action="copy", progress=1, complete=True)
     session.add(corpus)
     session.commit()
     return Response({"log": "Corpus created", "uids": [corpus.id]}, 202)
Exemple #23
0
def get_user_node(user):
    if user is not None:
        node_user = session.query(Node).filter(
            Node.user_id == user.id, Node.typename == "USER").first()
        return node_user
    else:
        return None
    def delete(self, request):
        """
        Within a groupnode, deletes some group elements from some groups

        Data format just like in POST, everything in the url
        """

        # from the url
        params = get_parameters(request)
        # the node param is unique
        group_node = params.pop('node')
        # the others params are links to change
        couples_to_remove = self.links_to_couples(params)

        # debug
        # print("==couples_to_remove=================================dd=")
        # print(couples_to_remove)

        # remove selectively group_couples
        # using IN is correct in this case: list of ids is short and external
        # see stackoverflow.com/questions/444475/
        db_rows = (session.query(NodeNgramNgram).filter(
            NodeNgramNgram.node_id == group_node).filter(
                tuple_(NodeNgramNgram.ngram1_id,
                       NodeNgramNgram.ngram2_id).in_(couples_to_remove)))

        n_removed = db_rows.delete(synchronize_session=False)
        session.commit()

        return JsonHttpResponse({'count_removed': n_removed}, 200)
Exemple #25
0
def mass_account_creation(fichier=None, init=False):
    if fichier is None:
        fichier = "/tmp/comptes.csv"
    accounts = open(fichier, "r")
    for line in accounts.readlines():
        username, email, password, fin = line.split(',')
        try:
            user = session.query(User).filter(
                User.username == username).first()
            print("User %s does exist already" % (username))
            if init == True:
                create_user(username,
                            email,
                            user=user,
                            password=password,
                            active=True,
                            notify=True)
                print("User %s updated" % (username))
        except:
            print("User %s does not exist already" % (username))
            create_user(username,
                        email,
                        password=password,
                        active=True,
                        notify=True)
        #delete_user(username)
    accounts.close()
    def get(self, request, node_id):

        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        user = cache.User[request.user.id]
        # check_rights(request, node_id)
        # I commented check_rights because filter on user_id below does the job

        node = session.query(Node).filter(Node.id == node_id,
                                          Node.user_id == user.id).first()
        if node is None:
            return Response({"detail": "Node not Found for this user"},
                            status=HTTP_404_NOT_FOUND)
        else:

            # FIXME using the more generic strategy ---------------------------
            # context = format_response(node, [n for n in node.children()])
            # or perhaps ? context = format_response(None, [node])
            # -----------------------------------------------------------------

            # using a more direct strategy
            context = {}
            try:
                context["statuses"] = node.hyperdata["statuses"]
            except KeyError:
                context["statuses"] = None
            return Response(context)
def corpus_list(corpus_id,
                list_types=ALL_LIST_TYPES,
                with_synonyms=False,
                with_count=False):
    # Link between a GROUPLIST, a normal form (ngram1), and a synonym (ngram2)
    NNN = NodeNgramNgram

    # Get the list type from the Node type -- as in CSV export
    list_type = (case([(Node.typename == 'MAINLIST', 'main'),
                       (Node.typename == 'MAPLIST', 'map'),
                       (Node.typename == 'STOPLIST', 'stop')]).label('type'))

    # We will retrieve each ngram as the following tuple:
    entities = (list_type, Ngram.terms.label('ng'))

    if with_count:
        entities += (Ngram.id.label('id'), )

    # First, get ngrams from wanted lists
    ngrams = _ngrams(corpus_id, list_types, entities)

    # Secondly, exclude "synonyms" (grouped ngrams that are not normal forms).
    # We have to exclude synonyms first because data is inconsistent and some
    # of them can be both in GROUPLIST and in MAIN/MAP/STOP lists. We want to
    # take synonyms from GROUPLIST only -- see below.
    Groups = aliased(Node, name='groups')
    query = (ngrams.outerjoin(
        Groups, (Groups.parent_id == corpus_id) &
        (Groups.typename == 'GROUPLIST')).outerjoin(
            NNN, (NNN.node_id == Groups.id) &
            (NNN.ngram2_id == Ngram.id)).filter(NNN.ngram1_id == None))

    # If `with_synonyms` is True, add them from GROUPLIST: this is the reliable
    # source for them
    if with_synonyms:
        Synonym = aliased(Ngram)
        ent = (list_type, Synonym.terms.label('ng'), Synonym.id.label('id'))
        synonyms = (ngrams.with_entities(*ent).filter(
            NNN.ngram1_id == Ngram.id, NNN.ngram2_id == Synonym.id,
            NNN.node_id == Groups.id, Groups.parent_id == corpus_id,
            Groups.typename == 'GROUPLIST'))
        query = query.union(synonyms)

    # Again, data is inconsistent: MAINLIST may intersect with MAPLIST and
    # we don't wan't that
    if 'main' in list_types and 'map' not in list_types:
        # Exclude MAPLIST ngrams from MAINLIST
        query = query.except_(_ngrams(corpus_id, 'map', entities))

    if with_count:
        N = query.subquery()
        return (session.query(N.c.type, N.c.ng, NodeNodeNgram.score).join(
            Node, (Node.parent_id == corpus_id) &
            (Node.typename == 'OCCURRENCES')).outerjoin(
                NodeNodeNgram, (NodeNodeNgram.ngram_id == N.c.id) &
                (NodeNodeNgram.node1_id == Node.id) &
                (NodeNodeNgram.node2_id == corpus_id)))

    # Return found ngrams sorted by list type, and then alphabetically
    return query.order_by('type', 'ng')
Exemple #28
0
 def get(self, request):
     node_user = session.query(Node).filter(
         Node.user_id == request.user.id, Node.typename == "USER").first()
     if node_user is None:
         return Response({"detail": "Not Found"}, status=HTTP_404)
     else:
         #context = format_response(node_user, )
         return Response(node_user.hyperdata)
Exemple #29
0
    def put(self, request, project_id):
        '''UPDATE project '''
        project = session.query(Node).filter(Node.id == project_id).first()

        if project is None:
            return Response(
                {'detail': "PROJECT Node #%s not found" % (project_id)},
                status=HTTP_404_NOT_FOUND)
        check_rights(request, project_id)
        params = get_parameters(request)
        # print(params)
        #u_project = deepcopy(project)
        for key, val in params.items():
            if len(val) == 0:
                return Response(
                    {
                        "detail":
                        "Invalid POST method: \"%s\" field is empty " % key
                    },
                    status=HTTP_406_NOT_ACCEPTABLE)
            if key in ["name", "date", "username"]:
                if key == "username":
                    #change ownership
                    #find user
                    #user = session.query(Node).filter(Node.username == username, Node.typename="USER").first()
                    #if user.id
                    pass
                elif key == "name":
                    other = session.query(Node).filter(
                        Node.name == val).count()
                    if other == 0:
                        setattr(project, key, val)
                    else:
                        return Response(
                            {
                                "detail":
                                "Project with this name already exists"
                            },
                            status=HTTP_409_CONFLICT)
                else:
                    setattr(project, key, val)
        session.add(project)
        session.commit()
        return Response({"detail": "Updated PROJECT #%s" % str(project_id)},
                        status=HTTP_206_PARTIAL_CONTENT)
    def put(self, request, corpus_id, check_each_doc=True):
        if not request.user.is_authenticated():
            # can't use @requires_auth because of positional 'self' within class
            return HttpResponse('Unauthorized', status=401)

        # user is ok
        fav_node = self._get_fav_node(corpus_id)

        response = {}

        if fav_node == None:
            response = {
                'warning':
                'No favorites node is defined for this corpus (\'%s\')' %
                self.corpus.name,
                'count_added':
                0
            }
        else:
            req_params = validate(get_parameters(request), {
                'docs': list,
                'default': ""
            })
            nodeids_to_add = [
                int(did) for did in req_params['docs'].split(',')
            ]

            if check_each_doc:
                # verification que ce sont bien des documents du bon corpus
                # un peu long => désactiver par défaut ?
                known_docs_q = (session.query(
                    Node.id).filter(Node.parent_id == corpus_id).filter(
                        Node.typename == 'DOCUMENT'))
                lookup = {
                    known_doc.id: True
                    for known_doc in known_docs_q.all()
                }
                # debug
                # print("lookup hash", lookup)
                rejected_list = []
                for doc_node_id in nodeids_to_add:
                    if (doc_node_id not in lookup):
                        rejected_list.append(doc_node_id)
                if len(rejected_list):
                    raise ValidationException(
                        "Error on some requested docs: %s (Only nodes of type 'doc' AND belonging to corpus %i can be added to favorites.)"
                        % (str(rejected_list), int(corpus_id)))

            # add them
            bulk_insert(NodeNode, ('node1_id', 'node2_id', 'score'),
                        ((fav_node.id, doc_node_id, 1.0)
                         for doc_node_id in nodeids_to_add))

            # todo count really added (here: counts input param not result)
            response = {'count_added': len(nodeids_to_add)}

        return JsonHttpResponse(response)