def get(self, request): """Displays the list of nodes corresponding to the query. """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters, query, count = _query_nodes(request) if parameters['formated'] == 'json': records_array = [] add_record = records_array.append # FIXME filter in rawsql in _query_nodes for node in query: add_record(_filter_node_fields(node, parameters)) return JsonHttpResponse({ 'parameters': parameters, 'count': count, 'records': records_array }) elif parameters['formated'] == 'csv': # TODO add support for fields and hyperdata_filter response = HttpResponse(content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename="Gargantext_Corpus.csv"' writer = csv.writer(response, delimiter='\t', quoting=csv.QUOTE_MINIMAL) keys = [ 'title', 'source', 'publication_year', 'publication_month', 'publication_day', 'abstract', 'authors' ] writer.writerow(keys) for node in query: data = list() for key in keys: try: data.append(node.hyperdata[key]) except: data.append("") writer.writerow(data) return response
def get(self, request, node_id): if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) user = cache.User[request.user.id] # check_rights(request, node_id) # I commented check_rights because filter on user_id below does the job node = session.query(Node).filter(Node.id == node_id, Node.user_id == user.id).first() if node is None: return Response({"detail": "Node not Found for this user"}, status=HTTP_404_NOT_FOUND) else: # FIXME using the more generic strategy --------------------------- # context = format_response(node, [n for n in node.children()]) # or perhaps ? context = format_response(None, [node]) # ----------------------------------------------------------------- # using a more direct strategy context = {} try: context["statuses"] = node.hyperdata["statuses"] except KeyError: context["statuses"] = None return Response(context)
def post(self, request, data): '''create a new status for node''' if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) raise NotImplementedError
def CsvHttpResponse(data, headers=None, status=200): response = HttpResponse(content_type="text/csv", status=status) writer = csv.writer(response, delimiter=',') if headers: writer.writerow(headers) for row in data: writer.writerow(row) return response
def get(self, request, corpus_id): if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters = get_parameters(request) parameters = validate(parameters, {'score': str, 'ngram_ids': list}) try: ngram_ids = [int(n) for n in parameters['ngram_ids'].split(',')] except: raise ValidationException( '"ngram_ids" needs integers separated by comma.') limit = DEFAULT_N_DOCS_HAVING_NGRAM nodes_list = [] corpus = session.query(Node).filter(Node.id == corpus_id).first() tfidf_id = (session.query(Node.id).filter( Node.typename == "TFIDF-CORPUS", Node.parent_id == corpus.id).first()) tfidf_id = tfidf_id[0] print(tfidf_id) # request data nodes_query = (session.query(Node, func.sum(NodeNodeNgram.score)).join( NodeNodeNgram, NodeNodeNgram.node2_id == Node.id).filter( NodeNodeNgram.node1_id == tfidf_id).filter( Node.typename == 'DOCUMENT', Node.parent_id == corpus.id).filter( or_(*[ NodeNodeNgram.ngram_id == ngram_id for ngram_id in ngram_ids ])).group_by(Node)) # get the total count before applying limit nodes_count = nodes_query.count() # now the query with the limit nodes_results_query = (nodes_query.order_by( func.sum(NodeNodeNgram.score).desc()).limit(limit)) for node, score in nodes_results_query: print(node, score) print("\t corpus:", corpus_id, "\t", node.name) node_dict = { 'id': node.id, 'score': score, } for key in ('title', 'publication_date', 'source', 'authors', 'fields'): if key in node.hyperdata: node_dict[key] = node.hyperdata[key] nodes_list.append(node_dict) return JsonHttpResponse({'count': nodes_count, 'records': nodes_list})
def put(self, request, corpus_id, check_each_doc=True): if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) # user is ok fav_node = self._get_fav_node(corpus_id) response = {} if fav_node == None: response = { 'warning': 'No favorites node is defined for this corpus (\'%s\')' % self.corpus.name, 'count_added': 0 } else: req_params = validate(get_parameters(request), { 'docs': list, 'default': "" }) nodeids_to_add = [ int(did) for did in req_params['docs'].split(',') ] if check_each_doc: # verification que ce sont bien des documents du bon corpus # un peu long => désactiver par défaut ? known_docs_q = (session.query( Node.id).filter(Node.parent_id == corpus_id).filter( Node.typename == 'DOCUMENT')) lookup = { known_doc.id: True for known_doc in known_docs_q.all() } # debug # print("lookup hash", lookup) rejected_list = [] for doc_node_id in nodeids_to_add: if (doc_node_id not in lookup): rejected_list.append(doc_node_id) if len(rejected_list): raise ValidationException( "Error on some requested docs: %s (Only nodes of type 'doc' AND belonging to corpus %i can be added to favorites.)" % (str(rejected_list), int(corpus_id))) # add them bulk_insert(NodeNode, ('node1_id', 'node2_id', 'score'), ((fav_node.id, doc_node_id, 1.0) for doc_node_id in nodeids_to_add)) # todo count really added (here: counts input param not result) response = {'count_added': len(nodeids_to_add)} return JsonHttpResponse(response)
def get(self, request, corpus_id): """ 2 possibilities with/without param 1) GET http://localhost:8000/api/nodes/2/favorites (returns the full list of fav docs within corpus 2) 2) GET http://localhost:8000/api/nodes/2/favorites?docs=53,54 (will test if docs 53 and 54 are among the favorites of corpus 2) (returns the intersection of fav docs with [53,54]) """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) fav_node = self._get_fav_node(corpus_id) req_params = validate(get_parameters(request), { 'docs': list, 'default': "" }) response = {} if fav_node == None: response = { 'warning': 'No favorites node is defined for this corpus (\'%s\')' % self.corpus.name, 'favdocs': [] } elif 'docs' not in req_params: # each docnode associated to the favnode of this corpusnode q = (session.query( NodeNode.node2_id).filter(NodeNode.node1_id == fav_node.id)) all_doc_ids = [row.node2_id for row in q.all()] response = {'favdocs': all_doc_ids} else: nodeids_to_check = [ int(did) for did in req_params['docs'].split(',') ] # each docnode from the input list, if it is associated to the favnode q = (session.query(NodeNode.node2_id).filter( NodeNode.node1_id == fav_node.id).filter( NodeNode.node2_id.in_(nodeids_to_check))) present_doc_ids = [row.node2_id for row in q.all()] absent_doc_ids = [ did for did in nodeids_to_check if did not in present_doc_ids ] response = {'favdocs': present_doc_ids, 'missing': absent_doc_ids} return JsonHttpResponse(response)
def delete(self, request, node_id): if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters, query, count = _query_nodes(request, node_id) if not len(query): raise Http404() result = session.execute(delete(Node).where(Node.id == node_id)) session.commit() return JsonHttpResponse({'deleted': result.rowcount})
def get(self, request, node_id): if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters, query, count = _query_nodes(request, node_id) if not len(query): raise Http404() node = query[0] return JsonHttpResponse(_filter_node_fields(node, parameters))
def get(self, request): params = get_parameters(request) corpus_id = int(params.pop("corpus")) corpus_node = cache.Node[corpus_id] # response is file-like + headers response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="corpus-%i_gargantext_term_list.csv"' % corpus_id # fill the response with the data export_ngramlists(corpus_node, fname=response, titles=True) return response
def patch(self,request): """ A copy of POST (merging list) but with the source == just an internal corpus_id params in request.GET: onto_corpus: the corpus whose lists are getting patched from: the corpus from which we take the source lists to merge in todo: an array of the list types ("map", "main", "stop") to merge in """ if not request.user.is_authenticated(): res = HttpResponse("Unauthorized") res.status_code = 401 return res params = get_parameters(request) print(params) # the corpus with the target lists to be patched corpus_id = int(params.pop("onto_corpus")) corpus_node = cache.Node[corpus_id] print(params) if request.user.id != corpus_node.user_id: res = HttpResponse("Unauthorized") res.status_code = 401 return res list_types = {'map':'MAPLIST', 'main':'MAINLIST', 'stop':'STOPLIST'} # internal DB retrieve source_lists source_corpus_id = int(params.pop("from_corpus")) source_node = cache.Node[source_corpus_id] todo_lists = params.pop("todo").split(',') # ex: ['map', 'stop'] source_lists = {} for key in todo_lists: source_lists[key] = UnweightedList( source_node.children(list_types[key]).first().id ) # add the groupings too source_lists['groupings'] = Translations( source_node.children("GROUPLIST").first().id ) # attempt to merge and send response try: # merge the source_lists onto those of the target corpus log_msg = merge_ngramlists(source_lists, onto_corpus=corpus_node) return JsonHttpResponse({ 'log': log_msg, }, 200) except Exception as e: return JsonHttpResponse({ 'err': str(e), }, 400)
def put(self, request, data): '''update status for node''' if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) user = cache.User[request.user.id] # check_rights(request, node_id) node = session.query(Node).filter(Node.id == node_id, Node.user_id == user.id).first() raise NotImplementedError
def get(self, request, node_id): # check that the node is a corpus # ? faster from cache than: corpus = session.query(Node)... if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) corpus = cache.Node[node_id] if corpus.typename != 'CORPUS': raise ValidationException( "Only nodes of type CORPUS can accept facet queries" + " (but this node has type %s)..." % corpus.typename) else: self.corpus = corpus # check that the hyperfield parameter makes sense _facet_available_subfields = [ 'source', 'publication_year', 'rubrique', 'language_iso2', 'language_iso3', 'language_name', 'authors' ] parameters = get_parameters(request) # validate() triggers an info message if subfield not in range parameters = validate( parameters, { 'type': dict, 'items': { 'hyperfield': { 'type': str, 'range': _facet_available_subfields } } }) subfield = parameters['hyperfield'] # do the aggregated sum (xcounts, total) = self._ndocs_by_facet(subfield) # response return JsonHttpResponse({ 'doc_count': total, 'by': { subfield: xcounts } })
def delete(self, request): '''delete status for node''' if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) user = cache.User[request.user.id] # check_rights(request, node_id) node = session.query(Node).filter(Node.id == node_id, Node.user_id == user.id).first() if node is None: return Response({"detail": "Node not Found"}, status=HTTP_404_NOT_FOUND) node.hyperdata["status"] = [] session.add(node) session.commit() return Response({"detail": "Deleted status for NODE #%i " % node.id}, status=HTTP_204_NO_CONTENT)
def post(self, request): """ Merge the lists of a corpus with other lists from a CSV source or from another corpus params in request.GET: onto_corpus: the corpus whose lists are getting patched params in request.data: csvfile: the csv file /!\ We assume we checked the file size client-side before upload """ if not request.user.is_authenticated(): res = HttpResponse("Unauthorized") res.status_code = 401 return res # the corpus with the target lists to be patched params = get_parameters(request) corpus_id = int(params.pop("onto_corpus")) corpus_node = cache.Node[corpus_id] if request.user.id != corpus_node.user_id: res = HttpResponse("Unauthorized") res.status_code = 401 return res # request also contains the file # csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile # ---------------------- csv_file = request.data['csvfile'] csv_contents = csv_file.read().decode("UTF-8").split("\n") csv_file.close() del csv_file # import the csv # try: log_msg = "Async generation" corpus_node_id = corpus_node.id scheduled(import_and_merge_ngramlists)(csv_contents, corpus_node_id, overwrite=bool( params.get('overwrite'))) return JsonHttpResponse({ 'log': log_msg, }, 200)
def post(self, request, node_id): """ For the moment, only used to rename a node params in request.GET: none (not allowed by _query_nodes validation) params in request.DATA: ["name": the_new_name_str] TODO 1 factorize with .projects.ProjectView.put and .post (thx c24b) TODO 2 allow other changes than name """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) # contains a check on user.id (within _query_nodes) parameters, query, count = _query_nodes(request, node_id) the_node = query.pop() # retrieve the name if 'name' in request.data: new_name = request.data['name'] else: return JsonHttpResponse( {"detail": "A 'name' parameter is required in data payload"}, 400) # check for conflicts other = session.query(Node).filter(Node.name == new_name).count() if other > 0: return JsonHttpResponse( {"detail": "A node with this name already exists"}, 409) # normal case: do the renaming else: setattr(the_node, 'name', new_name) session.commit() return JsonHttpResponse({'renamed': new_name}, 200)
def delete(self, request): """Removes the list of nodes corresponding to the query. TODO : Should be a delete method! """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) parameters = get_parameters(request) parameters = validate(parameters, {'ids': list}) try: node_ids = [int(n) for n in parameters['ids'].split(',')] except: raise ValidationException( '"ids" needs integers separated by comma.') result = session.execute(delete(Node).where(Node.id.in_(node_ids))) session.commit() return JsonHttpResponse({'deleted': result.rowcount})
def delete(self, request, corpus_id): """ DELETE http://localhost:8000/api/nodes/2/favorites?docs=53,54 (will delete docs 53 and 54 from the favorites of corpus 2) """ if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) # user is ok fav_node = self._get_fav_node(corpus_id) response = {} if fav_node == None: response = { 'warning': 'No favorites node is defined for this corpus (\'%s\')' % self.corpus.name, 'count_removed': 0 } else: req_params = validate(get_parameters(request), { 'docs': list, 'default': "" }) nodeids_to_delete = [ int(did) for did in req_params['docs'].split(',') ] try: # it deletes from favourites but not from DB result = session.execute( delete(NodeNode).where( NodeNode.node1_id == fav_node.id).where( NodeNode.node2_id.in_(nodeids_to_delete))) session.commit() response = {'count_removed': result.rowcount} finally: session.close() return JsonHttpResponse(response)
def patch(self, request, corpusnode_id): """ PATCH triggers recount of metrics for the specified corpus. ex PATCH http://localhost:8000/api/metrics/14072 ----- corpus_id """ print("==> update metrics request on ", corpusnode_id) if not request.user.is_authenticated(): # can't use @requires_auth because of positional 'self' within class return HttpResponse('Unauthorized', status=401) try: corpus = cache.Node[int(corpusnode_id)] except: corpus = None if corpus is None: raise ValidationException("%s is not a valid corpus node id." % corpusnode_id) else: t_before = datetime.now() # ============= scheduled(recount)(corpus.id) # ============= t_after = datetime.now() return JsonHttpResponse({ 'corpus_id': corpusnode_id, 'took': "%f s." % (t_after - t_before).total_seconds() })
def DebugHttpResponse(data): return HttpResponse( '<html><body style="background:#000;color:#FFF"><pre>%s</pre></body></html>' % (str(data), ))
def JsonHttpResponse(data, status=200): return HttpResponse(content=json_encoder.encode(data), content_type='application/json; charset=utf-8', status=status)
def put(self, request): """ Basic external access for *creating an ngram* --------------------------------------------- 1 - checks user authentication before any changes 2 - checks if ngram to Ngram table in DB if yes returns ngram_id and optionally mainform_id otherwise continues 3 - adds the ngram to Ngram table in DB 4 - (if corpus param is present) adds the ngram doc counts to NodeNgram table in DB (aka "index the ngram" throught the docs of the corpus) 5 - returns json with: 'msg' => a success msg 'text' => the initial text content 'term' => the normalized text content 'id' => the new ngram_id 'count' => the number of docs with the ngram in the corpus (if corpus param is present) 'group' => the mainform_id if applicable possible inline parameters -------------------------- @param text=<ngram_string> [required] @param corpus=<CORPUS_ID> [optional] @param testgroup (true if present) [optional, requires corpus] """ # 1 - check user authentication if not request.user.is_authenticated(): res = HttpResponse("Unauthorized") res.status_code = 401 return res # the params params = get_parameters(request) print("PARAMS", [(i,v) for (i,v) in params.items()]) if 'text' in params: original_text = str(params.pop('text')) ngram_str = normalize_forms(normalize_chars(original_text)) else: raise ValidationException('The route PUT /api/ngrams/ is used to create a new ngram\ It requires a "text" parameter,\ for instance /api/ngrams?text=hydrometallurgy') if ('testgroup' in params) and (not ('corpus' in params)): raise ValidationException("'testgroup' param requires 'corpus' param") # if we have a 'corpus' param (to do the indexing)... do_indexation = False if 'corpus' in params: # we retrieve the corpus... corpus_id = int(params.pop('corpus')) corpus_node = cache.Node[corpus_id] # and the user must also have rights on the corpus if request.user.id == corpus_node.user_id: do_indexation = True else: res = HttpResponse("Unauthorized") res.status_code = 401 return res # number of "words" in the ngram ngram_size = len(findall(r' +', ngram_str)) + 1 # do the additions try: log_msg = "" ngram_id = None mainform_id = None preexisting = session.query(Ngram).filter(Ngram.terms==ngram_str).first() if preexisting is not None: ngram_id = preexisting.id log_msg += "ngram already existed (id %i)\n" % ngram_id # in the context of a corpus we can also check if has mainform # (useful for) if 'testgroup' in params: groupings_id = (session.query(Node.id) .filter(Node.parent_id == corpus_id) .filter(Node.typename == 'GROUPLIST') .first() ) had_mainform = (session.query(NodeNgramNgram.ngram1_id) .filter(NodeNgramNgram.node_id == groupings_id) .filter(NodeNgramNgram.ngram2_id == preexisting.id) .first() ) if had_mainform: mainform_id = had_mainform[0] log_msg += "ngram had mainform (id %i) in this corpus" % mainform_id else: log_msg += "ngram was not in any group for this corpus" else: # 2 - insert into Ngrams new_ngram = Ngram(terms=ngram_str, n=ngram_size) session.add(new_ngram) session.commit() ngram_id = new_ngram.id log_msg += "ngram was added with new id %i\n" % ngram_id # 3 - index the term if do_indexation: n_added = index_new_ngrams([ngram_id], corpus_node) log_msg += 'ngram indexed in corpus %i\n' % corpus_id return JsonHttpResponse({ 'msg': log_msg, 'text': original_text, 'term': ngram_str, 'id' : ngram_id, 'group' : mainform_id, 'count': n_added if do_indexation else 'no corpus provided for indexation' }, 200) # just in case except Exception as e: return JsonHttpResponse({ 'msg': str(e), 'text': original_text }, 400)