def form_valid(self, form): username = form.cleaned_data['username'] password = form.cleaned_data['password'] user = authenticate(username=username, password=password) if user is not None and user.is_active: login(self.request, user) node_user = session.query(Node).filter(Node.user_id == user.id, Node.typename== "USER").first() #user hasn't been found inside Node table #create it from auth table => node table if node_user is None: node_user = Node( typename = 'USER', #in node = > name #in user = > username name = user.username, user_id = user.id, ) node_user.hyperdata = {"language":"fr"} session.add(node_user) session.commit() return super(LoginView, self).form_valid(form) else: return self.form_invalid(form)
def create_corpus(self): #create a corpus corpus = Node(name=self.query, user_id=self.user_id, parent_id=self.project_id, typename='CORPUS', hyperdata={ "action": "Scrapping data", "language_id": self.type["default_language"], }) self.corpus_id = corpus.id if len(self.paths) > 0: for path in self.paths: #add the resource corpus.add_resource(type=self.type["type"], name=self.type["name"], path=path) session.add(corpus) session.commit() scheduled(parse_extract_indexhyperdata(corpus.id)) else: #add the resource corpus.add_resource(type=self.type["type"], name=self.type["name"], path=self.path) session.add(corpus) session.commit() scheduled(parse_extract_indexhyperdata(corpus.id)) return corpus
def put(self, request): if request.user.id is None: raise TypeError( "This API request must come from an authenticated user.") else: # we query among the nodes that belong to this user user = cache.User[request.user.id] node_user = session.query(Node).filter( Node.user_id == user.id, Node.typename == "USER").first() if node_user is None: return Response({"detail": "Not Allowed"}, status=HTTP_401_UNAUTHORIZED) for k, v in request.data.items(): node_user.hyperdata[k] = v # setattr(node_user.hyperdata, k, v) # print(node_user.hyperdata) node_user.save_hyperdata() session.add(node_user) session.commit() node_user = session.query(Node).filter( Node.user_id == user.id, Node.typename == "USER").first() print(node_user.hyperdata) return Response( { "detail": "Updated user parameters", "hyperdata": node_user.hyperdata }, status=HTTP_202_ACCEPTED)
def delete(self, request): """ Within a groupnode, deletes some group elements from some groups Data format just like in POST, everything in the url """ # from the url params = get_parameters(request) # the node param is unique group_node = params.pop('node') # the others params are links to change couples_to_remove = self.links_to_couples(params) # debug # print("==couples_to_remove=================================dd=") # print(couples_to_remove) # remove selectively group_couples # using IN is correct in this case: list of ids is short and external # see stackoverflow.com/questions/444475/ db_rows = (session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id == group_node).filter( tuple_(NodeNgramNgram.ngram1_id, NodeNgramNgram.ngram2_id).in_(couples_to_remove))) n_removed = db_rows.delete(synchronize_session=False) session.commit() return JsonHttpResponse({'count_removed': n_removed}, 200)
def put(self, request, project_id, corpus_id, view="DOCUMENT"): '''UPDATE corpus''' project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first() project = check_rights(request, project.id) if project is None: return Response({'detail' : "PROJECT Node #%s not found" %(project_id) }, status = status.HTTP_404_NOT_FOUND) corpus = session.query(Node).filter(Node.id == corpus_id, Node.typename == "CORPUS").first() if corpus is None: return Response({'detail' : "CORPUS Node #%s not found" %(corpus_id) }, status = status.HTTP_404_NOT_FOUND) #documents = session.query(Node).filter(Node.parent_id == corpus_id, Node.typename= view).all() for key, val in request.data.items(): if key in ["name", "date", "username", "hyperdata"]: if key == "username": #changement de propriétaire #user = session.query(Node).filter(Node.typename=="USER", Node.username== username).first() #print(user) #set(node, user_id, user.id) pass elif key == "hyperdata": #updating some contextualvalues of the corpus pass else: setattr(node, key, val) session.add(node) session.commit() '''#updating children??? ''' return Response({"detail":"Updated corpus #" %str(corpus.id)}, status=HTTP_202_ACCEPTED)
def put(self, request): '''UPDATE EVERY projects of a given user''' user = cache.User[request.user.id] query = session.query(Node).filter( Node.typename == "PROJECT", Node.user_id == request.user.id).all() uids = [] for node in query: for key, val in request.data.items(): #here verify that key is in accepted modified keys if key in ["name", "date", "username"]: if key == "username": #changement de propriétaire user = session.query(Node).filter( Node.typename == "PROJECT", Node.username == username).first() set(node, user_id, user.id) else: setattr(node, key, val) #node.name = request.data["name"] session.add(node) session.commit() uids.append(node.id) return Response({"detail": "Updated %s projects" % len(uids)}, status=HTTP_202_ACCEPTED)
def _parse(self, corpus, form): '''internal method to parse a corpus >> resource >> corpus >> docs corpus >> resource (method + file params + parser ) ^ >> docs (resource.defaultlang <--------| ) | >> ngrams |------- le tout rappatrié dans corpus ''' #1. creating a resource resource = {} resource = Node( user_id=corpus.user_id, parent_id=corpus.id, typename="RESOURCE", #corpus_name = form["name"], ) resource.method = form["method"] resource.path = upload(form['file']) #mapping the default attribute of a given source from constant RESOURCETYPE for k, v in get_resource(int(form["source"])).items(): setattr(resource, k, v) resource.status(action="parse", progress=1, complete=False) session.add(resource) session.commit() try: workflow(resource) except Exception as e: print("=======except dans _parse===========") print(e) from traceback import print_tb print_tb(e.__traceback__) print("====================================") return True
def _copy(self, corpus, form): #find the target corpus new_corpus = session.query(Node).filter( Node.typename == "CORPUS", Node.corpus_id == form["corpus_id"]).first() #get the resource of this corpus and copy it two new_resource = self._find_resource_hyperdata(new_corpus, form) #copy new_corpus to previously created corpus new_resouce.method = "cloned CORPUS #%i" % (new_corpus.id) new_corpus.id = corpus.id # change new_corpus ownership new_corpus.parent_id = corpus.parent_id new_corpus.user_id = corpus.user_id #get the documents of the existing corpus for doc in new_corpus.get_children(): doc.parent_id = new_corpus.parent_id doc.user_id = new_corpus.id #store it into corpus new_doc = corpus.add_child(doc) for ngrams in doc.get_children(): new_ngrams.parent_id = new_doc.id new_ngrams.user_id = new_corpus.user_id #store it into corpus new_doc.add_child(new_ngrams) #save the corpus corpus.status(action="copy", progress=1, complete=True) session.add(corpus) session.commit() return Response({"log": "Corpus created", "uids": [corpus.id]}, 202)
def test_011_node_write(self): '''write new_node to DB and commit''' from gargantext.util.db import session self.assertFalse(self.new_node._sa_instance_state._attached) session.add(self.new_node) session.commit() self.assertTrue(self.new_node._sa_instance_state._attached)
def create_project(self): new_project = Node( typename='PROJECT', name="My project", ) session.add(new_project) session.commit() self.project = new_project
def create_corpus(self): #create a default corpus self.corpus = self.project.add_child( name="My Corpus", typename='CORPUS', ) session.add(self.corpus) session.commit()
def _sample(self, resource): resource = self._find_resource_hyperdata(corpus, form) crawlbot = eval(resource.crawler)(resource) records = crawlbot.sample() #resource.status.insert(0,"sampled") resource.ids = records corpus.status(action="sample", progress=1, complete=True) session.add(corpus) session.commit() return Response({"uids": [corpus.id]}, status=HTTP_200_OK)
def _fetch(self, resource): '''internal method to fetch from a corpus the resource.urls >>> resource._parser(urls)''' resource = self._find_resource_hyperdata(corpus, form) resource.status(action="fetch", progress=1, complete=False) crawlbot = eval(resource.typecrawler)(resource) #send job to celery scheduled(crawlbot.fetch()) corpus.status(action="fetch", progress=1, complete=True) session.add(corpus) session.commit() return Response({"uids": [corpus.id]}, 200)
def create_gargantua_resources(): gargantua_id = session.query( User.id).filter(User.username == "gargantua").first() project = Node(name="Resources", user_id=gargantua_id, typename="PROJECT") stopList = Node(name="STOPLIST", parent_id=project.id, user_id=gargantua_id, typename="STOPLIST") session.add(project) session.add(stopList) session.commit()
def do_stoplist(corpus, overwrite_id=None): ''' Create list of stop words. TODO do a function to get all stop words with social scores Parameters: - overwrite_id: optional preexisting STOPLIST node to overwrite ''' # Get preexisting StopList if provided in overwrite_id param if overwrite_id: stoplist_id = overwrite_id # At this step of development, a new StopList should be created else: stoplist = corpus.add_child(name="Stoplist (in:%s)" % corpus.id, typename="STOPLIST") session.add(stoplist) session.commit() stoplist_id = stoplist.id # Get common resources, all common StopWords on the platform ## First get the id of the StopList of Gargantua super user gargantua_id = session.query( User.id).filter(User.username == "gargantua").first() rootStopList_id = session.query(Node.id).filter( Node.user_id == gargantua_id, Node.typename == "STOPLIST").first() ## Then get all the stop words ## stop_words :: [String] stop_words = (session.query(Ngram.terms).join( NodeNgram, NodeNgram.ngram_id == Ngram.id).filter( NodeNgram.node_id == rootStopList_id).all()) # print([n for n in stop_words]) ## Get the ngrams ## ngrams :: [(Int, String, Int)] ngrams = ( session.query(Ngram.id, Ngram.terms).join( NodeNgram, NodeNgram.ngram_id == Ngram.id).join( Node, Node.id == NodeNgram.node_id).filter( Node.parent_id == corpus.id, Node.typename == "DOCUMENT").group_by(Ngram.id) #.limit(limit) .all()) ngrams_to_stop = filter(lambda x: is_stop_word(x, stop_words=stop_words), ngrams) # print([n for n in ngrams_to_stop]) stop = LISTTYPES["STOPLIST"]({n[0]: -1 for n in ngrams_to_stop}) # stop = LISTTYPES["STOPLIST"]([n[0] for n in ngrams_to_stop]) stop.save(stoplist_id) return stoplist_id
def delete(self, request, node_id): if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters, query, count = _query_nodes(request, node_id) if not len(query): raise Http404() result = session.execute(delete(Node).where(Node.id == node_id)) session.commit() return JsonHttpResponse({'deleted': result.rowcount})
def run_moissonneur(moissonneur, project, name, query): """ Run moissonneur and return resulting corpus """ # XXX Uber-kludge with gory details. Spaghetti rulezzzzz! class Dummy(object): pass request = Dummy() request.method = 'POST' request.path = 'nowhere' request.META = {} # XXX 'string' only have effect on moissonneurs.pubmed; its value is added # when processing request client-side, take a deep breath and see # templates/projects/project.html for more details. request.POST = {'string': name, 'query': query, 'N': QUERY_SIZE_N_MAX} request.user = Dummy() request.user.id = project.user_id request.user.is_authenticated = lambda: True if moissonneur.name == 'istex': # Replace ALL spaces by plus signs request.POST['query'] = '+'.join(filter(None, query.split(' '))) try: import json r = moissonneur.query(request) raw_json = r.content.decode('utf-8') data = json.loads(raw_json) if moissonneur.name == 'pubmed': count = sum(x['count'] for x in data) request.POST['query'] = raw_json elif moissonneur.name == 'istex': count = data.get('total', 0) else: count = data.get('results_nb', 0) if count > 0: corpus = moissonneur.save(request, project.id, return_corpus=True) else: return None except (ValueError, Http404) as e: raise e # Sometimes strange things happens... if corpus.name != name: corpus.name = name session.commit() return corpus
def save(self, node_id=None): from gargantext.models import NodeNgram if node_id is None: if hasattr(self, 'id'): node_id = self.id else: raise ValueError('Please mention an ID to save the node.') # delete previous data session.query(NodeNgram).filter(NodeNgram.node_id == node_id).delete() session.commit() # insert new data bulk_insert(NodeNgram, ('node_id', 'ngram_id', 'weight'), ((node_id, key, 1.0) for key in self.items))
def post(self, request): '''CREATE a new project for a given user''' user = cache.User[request.user.id] try: #corpus name name = request.data["name"] except AttributeError: return Response( {"detail": "Invalid POST method: \"name\" field is required "}, status=HTTP_406_NOT_ACCEPTABLE) if name == "": return Response( {"detail": "Invalid POST method: \"name\" field is empty "}, status=HTTP_406_NOT_ACCEPTABLE) else: project = session.query(Node).filter(Node.typename == "PROJECT", Node.name == name).first() if project is not None: return Response( { "detail": "Project with this name already exists", "url": "/projects/%s" % str(project.id) }, status=HTTP_409_CONFLICT) else: user_node = session.query(UserNode).filter_by( user_id=request.user.id).one_or_none() if user_node is None: print( "??? Can't find UserNode for %r to create ProjectNode with name %r ???" % (request.user, name)) new_project = Node( user_id=request.user.id, typename='PROJECT', name=name, parent_id=user_node and user_node.id, ) session.add(new_project) session.commit() return Response( { "detail": "Created", "url": "/projects/%s" % str(new_project.id) }, status=HTTP_201_CREATED)
def delete(self, request): ''' DELETE the projects of a given user''' user = cache.User[request.user.id] projects = session.query(Node).filter(Node.typename == "PROJECT", Node.user_id == user.id).all() #for project in projects: # project = check_rights(request, project) uids = [] for node in projects: session.delete(node) session.commit() uids.append(node.id) return Response({"detail": "Deleted %i projects" % len(uids)}, status=HTTP_204_NO_CONTENT)
def create_user(username, email, user=None, password=None, group=None, notify=False): ''' create_user : - create user - create its group if needed - create relation between user and its group ''' if user is None: user = User() user.username = username user.email = email user.is_active = True # Creating the password if password is None or len(password) < 3 : password = make_random_password() user.password = make_password(password) session.add(user) session.commit() if group is not None : # get or create group_iscpif group_iscpif_id = session.query(User.id).filter(User.username=="group_iscpif").first() if group_iscpif_id is None: group_iscpif = create_user("group_iscpif", "*****@*****.**", group=None, notify=False) group_iscpif_id = group_iscpif.id if group == "group_iscpif": block (user.id, group_iscpif_id, False) else: block (user.id, group_iscpif_id, True) if notify == True and group == "group_iscpif" : mail2user (username, email, password, drafts['partnerOk']) elif notify == True : mail2user (username, email, password, drafts['partnerKo']) else: print("User %s created, no notification" % username) return user
def put(self, request, project_id): '''UPDATE project ''' project = session.query(Node).filter(Node.id == project_id).first() if project is None: return Response( {'detail': "PROJECT Node #%s not found" % (project_id)}, status=HTTP_404_NOT_FOUND) check_rights(request, project_id) params = get_parameters(request) # print(params) #u_project = deepcopy(project) for key, val in params.items(): if len(val) == 0: return Response( { "detail": "Invalid POST method: \"%s\" field is empty " % key }, status=HTTP_406_NOT_ACCEPTABLE) if key in ["name", "date", "username"]: if key == "username": #change ownership #find user #user = session.query(Node).filter(Node.username == username, Node.typename="USER").first() #if user.id pass elif key == "name": other = session.query(Node).filter( Node.name == val).count() if other == 0: setattr(project, key, val) else: return Response( { "detail": "Project with this name already exists" }, status=HTTP_409_CONFLICT) else: setattr(project, key, val) session.add(project) session.commit() return Response({"detail": "Updated PROJECT #%s" % str(project_id)}, status=HTTP_206_PARTIAL_CONTENT)
def save(self, node_id=None): from gargantext.models import NodeNgramNgram if node_id is None: if hasattr(self, 'id'): node_id = self.id else: raise ValueError('Please mention an ID to save the node.') # delete previous data session.query(NodeNgramNgram).filter( NodeNgramNgram.node_id == node_id).delete() session.commit() # insert new data print("WeightedMatrix bulk_insert start") bulk_insert(NodeNgramNgram, ('node_id', 'ngram1_id', 'ngram2_id', 'weight'), ((node_id, key1, key2, value) for key1, key2, value in self)) print("WeightedMatrix bulk_insert stop")
def delete(self, request): '''delete status for node''' if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) user = cache.User[request.user.id] # check_rights(request, node_id) node = session.query(Node).filter(Node.id == node_id, Node.user_id == user.id).first() if node is None: return Response({"detail": "Node not Found"}, status=HTTP_404_NOT_FOUND) node.hyperdata["status"] = [] session.add(node) session.commit() return Response({"detail": "Deleted status for NODE #%i " % node.id}, status=HTTP_204_NO_CONTENT)
def post(self, request, node_id): """ For the moment, only used to rename a node params in request.GET: none (not allowed by _query_nodes validation) params in request.DATA: ["name": the_new_name_str] TODO 1 factorize with .projects.ProjectView.put and .post (thx c24b) TODO 2 allow other changes than name """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) # contains a check on user.id (within _query_nodes) parameters, query, count = _query_nodes(request, node_id) the_node = query.pop() # retrieve the name if 'name' in request.data: new_name = request.data['name'] else: return JsonHttpResponse( {"detail": "A 'name' parameter is required in data payload"}, 400) # check for conflicts other = session.query(Node).filter(Node.name == new_name).count() if other > 0: return JsonHttpResponse( {"detail": "A node with this name already exists"}, 409) # normal case: do the renaming else: setattr(the_node, 'name', new_name) session.commit() return JsonHttpResponse({'renamed': new_name}, 200)
def block(user1_id, user2_id, bool_): ''' user_group :: Int -> Int -> Bool Link between user1 and user2 If False: link blocked else: link not blocked ''' contact = Contact() contact.user1_id = user1_id contact.user2_id = user2_id contact.is_blocked = bool_ session.add(contact) session.commit() return contact
def delete(self, request): """Removes the list of nodes corresponding to the query. TODO : Should be a delete method! """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters = get_parameters(request) parameters = validate(parameters, {'ids': list}) try: node_ids = [int(n) for n in parameters['ids'].split(',')] except: raise ValidationException( '"ids" needs integers separated by comma.') result = session.execute(delete(Node).where(Node.id.in_(node_ids))) session.commit() return JsonHttpResponse({'deleted': result.rowcount})
def delete(self, request, project_id): '''DELETE project''' node = session.query(Node).filter(Node.id == project_id).first() if node is None: return Response( {'detail': "PROJECT Node #%s not found" % (project_id)}, status=HTTP_404_NOT_FOUND) else: try: check_rights(request, project_id) except Exception as e: return Response({'detail': "Unauthorized" % (project_id)}, status=403) session.delete(node) session.commit() return Response( {"detail": "Successfully deleted Node #%s" % project_id}, status=204)
def delete(self, request, corpus_id): """ DELETE http://localhost:8000/api/nodes/2/favorites?docs=53,54 (will delete docs 53 and 54 from the favorites of corpus 2) """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) # user is ok fav_node = self._get_fav_node(corpus_id) response = {} if fav_node == None: response = { 'warning': 'No favorites node is defined for this corpus (\'%s\')' % self.corpus.name, 'count_removed': 0 } else: req_params = validate(get_parameters(request), { 'docs': list, 'default': "" }) nodeids_to_delete = [ int(did) for did in req_params['docs'].split(',') ] try: # it deletes from favourites but not from DB result = session.execute( delete(NodeNode).where( NodeNode.node1_id == fav_node.id).where( NodeNode.node2_id.in_(nodeids_to_delete))) session.commit() response = {'count_removed': result.rowcount} finally: session.close() return JsonHttpResponse(response)
def create_user(username, email, user=None, password=None, active=False, notify=True): if user is None: user = User() user.username = username user.email = email user.is_active = True if password is None or password == "": password = make_random_password() user.password = make_password(password) session.add(user) session.commit() if notify == True: notify_user(username, email, password) return user