def search(request): """Perform search request and return html string with results""" logger.info('services/search/ - user: {}'.format(request.user.username)) params = get_search_parameters(request.GET) if not validate_dates(params['dates']): msg = 'You entered an invalid date range. Please check your date filters.' return json_response_message('error', msg) valid_q, result = do_search( settings.ES_INDEX, settings.ES_DOCTYPE, params['query'], params['start'] - 1, # Zero based counting params['result_size'], params['dates'], params['distributions'], params['article_types'], params['pillars'], sort_order=params['sort_order']) if valid_q: return json_response_message('ok', 'Search completed', {'hits': result['hits']}) else: result = escape(result).replace('\n', '<br />') msg = 'Unable to parse query "{q}"<br /><br />'. \ format(q=params['query']) msg = msg + result.replace('\n', '<br />') return json_response_message('error', msg)
def doc_count(request): """ Returns the number of documents returned by the current query """ logger.info('services/doc_count/ - user: {}'.format(request.user.username)) if settings.DEBUG: print >> stderr, "doc_count()" params = get_search_parameters(request.GET) result = count_search_results(settings.ES_INDEX, settings.ES_DOCTYPE, params['query'], params['dates'], params['distributions'], params['article_types'], params['pillars']) count = result.get('count', None) if count: params = {'doc_count': count} logger.info('services/doc_count/ - returned calculated count.') return json_response_message('ok', 'Retrieved document count.', params) logger.info('services/doc_count/ - returned "unable to retrieve".') return json_response_message('error', 'Unable to retrieve document count')
def user_login(request): username = request.POST.get('username') password = request.POST.get('password') next_url = request.POST.get('next_url') user = authenticate(username=username, password=password) if user is not None: if user.is_active: login(request, user) # TODO: are these date_limits really necessary? date_limits = daterange2dates('') dates = [date_limits['lower'], date_limits['upper']] daterange = [int(d.replace('-', '')) for d in dates] params = { "user_id": user.id, "user_name": user.username, "daterange": daterange, # TODO: what is timestamp used for? Is it really necessary "timestamp": TIMESTAMP, "next_url": next_url } return json_response_message('SUCCESS', '', params) else: return json_response_message( 'ERROR', 'Account disabled.\n' 'Please contact the system ' 'administrator.') return json_response_message('ERROR', 'Oops, that is not correct!')
def search(request): """Perform search request and return html string with results""" logger.info('services/search/ - user: {}'.format(request.user.username)) params = get_search_parameters(request.REQUEST) valid_q, result = do_search(settings.ES_INDEX, settings.ES_DOCTYPE, params['query'], params['start']-1, # Zero based counting params['result_size'], params['dates'], params['distributions'], params['article_types'], params['pillars'], sort_order=params['sort_order']) if valid_q: html_str = elasticsearch_htmlresp(settings.ES_INDEX, params['start'], params['result_size'], result) return json_response_message('ok', 'Search completed', {'html': html_str}) else: result = escape(result).replace('\n', '<br />') msg = 'Unable to parse query "{q}"<br /><br />'. \ format(q=params['query']) msg = msg + result.replace('\n', '<br />') return json_response_message('error', msg)
def user_login(request): username = request.POST.get('username') password = request.POST.get('password') next_url = request.POST.get('next_url') user = authenticate(username=username, password=password) if user is not None: if user.is_active: login(request, user) # TODO: are these date_limits really necessary? date_limits = daterange2dates('') dates = [date_limits['lower'], date_limits['upper']] daterange = [int(d.replace('-', '')) for d in dates] params = { "user_id": user.id, "user_name": user.username, "daterange": daterange, # TODO: what is timestamp used for? Is it really necessary "timestamp": TIMESTAMP, "next_url": next_url } return json_response_message('SUCCESS', '', params) else: return json_response_message('ERROR', 'Account disabled.\n' 'Please contact the system ' 'administrator.') return json_response_message('ERROR', 'Oops, that is not correct!')
def check_status_by_task_id(request, task_id): """Returns the status of the generate_tv_cloud task If the task is finished, the results of the task are returned. """ if not request.is_ajax(): return json_response_message('ERROR', 'No access.') # TODO: use generic AsyncResult (from celery.result import AsyncResult) # so this function can be used to check the status of all asynchronous # tasks. However, when this import statement is put in this module, an # error is produced (celery module has no attribute result). # When typing this import statement in the Python # console or in the Django interactive shell, there is no error message. task = generate_tv_cloud.AsyncResult(task_id) try: if task.ready(): if task.successful(): return json_response_message('ok', '', task.get()) else: return json_response_message('ERROR', 'Generating word cloud ' 'failed.') else: return json_response_message('WAITING', task.status, task.result) except AttributeError as e: return json_response_message('ERROR', 'Other error: {}'.format(str(e)))
def search(request): """Perform search request and return html string with results""" logger.info('services/search/ - user: {}'.format(request.user.username)) params = get_search_parameters(request.GET) if not validate_dates(params['dates']): msg = 'You entered an invalid date range. Please check your date filters.' return json_response_message('error', msg) valid_q, result = do_search(settings.ES_INDEX, settings.ES_DOCTYPE, params['query'], params['start']-1, # Zero based counting params['result_size'], params['dates'], params['distributions'], params['article_types'], params['pillars'], sort_order=params['sort_order']) if valid_q: return json_response_message('ok', 'Search completed', {'hits': result['hits']}) else: result = escape(result).replace('\n', '<br />') msg = 'Unable to parse query "{q}"<br /><br />'. \ format(q=params['query']) msg = msg + result.replace('\n', '<br />') return json_response_message('error', msg)
def get_query(request, query_id): """ Returns a single Query, checks if Query belongs to User. """ query = get_object_or_404(Query, pk=query_id) if not request.user == query.user: return json_response_message('ERROR', 'Query does not belong to user.') return json_response_message('OK', '', {'query': query.get_query_dict()})
def retrieve_document(request, doc_id): """Retrieve a document from the ES index""" logger.info('services/retrieve/{}'.format(doc_id)) document = get_document(settings.ES_INDEX, settings.ES_DOCTYPE, doc_id) if document: return json_response_message('SUCCESS', '', document) return json_response_message('ERROR', 'Document not found.')
def query(request, query_id): """Returns a query. """ query = get_object_or_404(Query, pk=query_id) if not request.user == query.user: return json_response_message('ERROR', 'Query does not belong to user.') params = {'query': query.get_query_dict()} return json_response_message('OK', '', params)
def delete_stopword(request, stopword_id): """ Deletes a Stopword. """ stopword = StopWord.objects.get_object_or_404(pk=stopword_id) if request.user != stopword.user: return json_response_message('ERROR', 'Stopword does not belong to this user.') msg = 'Stopword {} deleted.'.format(stopword.word) stopword.delete() return json_response_message('SUCCESS', msg)
def query(request, query_id): """Returns a query. """ query = get_object_or_404(Query, pk=query_id) if not request.user == query.user: return json_response_message('ERROR', 'Query does not belong to user.') params = { 'query': query.get_query_dict() } return json_response_message('OK', '', params)
def delete_stopword(request, stopword_id): """Deletes a stopword from the stopword list. """ stopword = StopWord.objects.get(pk=stopword_id) if not stopword: return json_response_message('ERROR', 'Stopword not found.') if not request.user == stopword.user: return json_response_message('ERROR', 'Stopword does not belong to ' 'user.') stopword.delete() return json_response_message('SUCCESS', 'Stopword deleted.')
def update(request, query_id): """ Updates an existing Query. """ query = Query.objects.get(pk=query_id) if not query: return json_response_message('ERROR', 'Query not found.') if not request.user == query.user: return json_response_message('ERROR', 'Query does not belong to user.') params = get_search_parameters(request.POST) try: query.query = params['query'] query.title = request.POST.get('title') query.comment = request.POST.get('comment') query.save() Period.objects.filter(query__pk=query_id).delete() for date_range in params['dates']: date_lower = datetime.strptime(date_range['lower'], '%Y-%m-%d') date_upper = datetime.strptime(date_range['upper'], '%Y-%m-%d') p = Period(query=query, date_lower=date_lower, date_upper=date_upper) p.save() query.exclude_distributions.clear() for distr in Distribution.objects.all(): if distr.id in params['distributions']: query.exclude_distributions.add(distr) query.exclude_article_types.clear() for art_type in ArticleType.objects.all(): if art_type.id in params['article_types']: query.exclude_article_types.add(art_type) query.selected_pillars.clear() for pillar in Pillar.objects.all(): if pillar.id in params['pillars']: query.selected_pillars.add(pillar) query.nr_results = count_results(query) query.save() except Exception as e: return json_response_message('ERROR', str(e)) return json_response_message('SUCCESS', 'Query updated.')
def delete(request, query_id): """Deletes a query. """ query = Query.objects.get(pk=query_id) if not query: return json_response_message('ERROR', 'Query not found.') if not request.user == query.user: return json_response_message('ERROR', 'Query does not belong to user.') q = query.title query.delete() return json_response_message('SUCCESS', 'Query "{}" deleted.'.format(q))
def metadata(request): """This view will show metadata aggregations""" params = get_search_parameters(request.REQUEST) result = metadata_aggregation(settings.ES_INDEX, settings.ES_DOCTYPE, params['query'], params['dates'], params['distributions'], params['article_types'], params['pillars']) # Categorize newspaper_ids per Pillar pillars = Counter() for n in result['aggregations']['newspaper_ids']['buckets']: pillar = 'None' try: newspaper = Newspaper.objects.get(pk=n['key']) except Newspaper.DoesNotExist: # TODO: this means there's a paper_dc_identifier in ElasticSearch without a corresponding Newspaper. newspaper = None if newspaper and newspaper.pillar: pillar = newspaper.pillar.name pillars[pillar] += n['doc_count'] # Mimic the result of the other aggregations result['aggregations']['pillar'] = [{'key': k, 'doc_count': v} for (k, v) in pillars.iteritems()] return json_response_message('success', 'Complete', result['aggregations'])
def index(request): """ Returns the list of Queries for the current User. """ queries = Query.objects.filter(user=request.user).order_by('-date_created') queries_json = [q.get_query_dict() for q in queries] return json_response_message('OK', '', {'queries': queries_json})
def retrieve_stopwords(request): """ Returns all Stopwords for the currently logged in User. """ stopwords = StopWord.objects.filter(user=request.user).order_by('query', 'word') params = {'stopwords': [s.get_stopword_dict() for s in stopwords]} return json_response_message('SUCCESS', '', params)
def cancel_by_task_id(request, task_id): """Cancel Celery task. """ logger.info('services/cancel_task/{}'.format(task_id)) AsyncResult(task_id).revoke(terminate=True) return json_response_message('ok', '')
def index(request): """Returns a list of queries for a given user.""" lexicon_items = Query.objects.filter(user=request.user) \ .order_by('-date_created') params = {'lexicon_items': serializers.serialize('json', lexicon_items)} return json_response_message('OK', '', params)
def retrieve_kb_resolver(request, doc_id): logger.info('services/kb/resolver/') try: params = { 'identifier': 'DDD:{}'.format(doc_id), 'verb': 'GetRecord', 'metadataPrefix': 'didl'} if settings.DEBUG: print >> stderr, 'url: {}, params: {}'.format(settings.KB_RESOLVER_URL, params) response = requests.get(settings.KB_RESOLVER_URL + settings.KB_API_KEY, params=params) except requests.exceptions.HTTPError as e: msg = 'KB Resolver request failed: {}'.format(str(e)) if settings.DEBUG: print >> stderr, msg return json_response_message('error', msg) return json_response_message('success', 'Resolving successful', {'text': response.content})
def add_stopword(request): """Adds a stopword to the stopword list. """ query_id = request.POST.get('query_id') word = request.POST.get('stopword') q = None try: q = Query.objects.get(pk=query_id) except Query.DoesNotExist: pass except Exception as e: return json_response_message('ERROR', str(e)) StopWord.objects.get_or_create(user=request.user, query=q, word=word) return json_response_message('SUCCESS', 'Stopword added.')
def retrieve_timeframes(request): """Retrieves all timeframes of Terms as JSON objects """ return json_response_message('ok', '', { 'result': [{ 'id': t[0], 'name': t[1] } for t in Term.TIMEFRAME_CHOICES] })
def retrieve_pillars(request): """Retrieves all Pillars as JSON objects """ pillars = Pillar.objects.all() return json_response_message( 'ok', '', {'result': [{ 'id': p.id, 'name': p.name } for p in pillars]})
def retrieve_stopwords(request): """ Returns all Stopwords for the currently logged in User. """ stopwords = StopWord.objects.filter(user=request.user).order_by( 'query', 'word') params = {'stopwords': [s.get_stopword_dict() for s in stopwords]} return json_response_message('SUCCESS', '', params)
def index(request): """Returns a list of queries for a given user.""" lexicon_items = Query.objects.filter(user=request.user) \ .order_by('-date_created') params = { 'lexicon_items': serializers.serialize('json', lexicon_items) } return json_response_message('OK', '', params)
def update_nr_results(request, query_id): query, response = get_query_object(query_id) if not query: return response query.nr_results = count_results(query) query.save() return json_response_message('SUCCESS', 'Number of results updated.', {'count': query.nr_results})
def update(request, query_id): """Updates a query. """ query = Query.objects.get(pk=query_id) if not query: return json_response_message('ERROR', 'Query not found.') if not request.user == query.user: return json_response_message('ERROR', 'Query does not belong to user.') params = get_search_parameters(request.POST) title = request.POST.get('title') comment = request.POST.get('comment') date_lower = datetime.strptime(params['dates']['lower'], '%Y-%m-%d') date_upper = datetime.strptime(params['dates']['upper'], '%Y-%m-%d') try: Query.objects.filter(pk=query_id).update(query=params['query'], title=title, comment=comment, date_lower=date_lower, date_upper=date_upper) query.exclude_distributions.clear() for distr in Distribution.objects.all(): if distr.id in params['distributions']: query.exclude_distributions.add(distr) query.exclude_article_types.clear() for art_type in ArticleType.objects.all(): if art_type.id in params['article_types']: query.exclude_article_types.add(art_type) query.selected_pillars.clear() for pillar in Pillar.objects.all(): if pillar.id in params['pillars']: query.selected_pillars.add(pillar) except Exception as e: return json_response_message('ERROR', str(e)) return json_response_message('SUCCESS', 'Query saved.')
def get_query_object(query_id): """Returns the query object stored for a query id and an appropriate error message if the query cannot be retrieved. """ query = None response = None try: query = Query.objects.get(pk=query_id) except Query.DoesNotExist: msg = "Query with id %s cannot be found." % query_id response = json_response_message("error", msg) except DatabaseError: response = json_response_message("error", "Database error while retrieving query.") if not query and not response: response = json_response_message("error", "No query found.") return query, response
def check_status_by_task_id(request, task_id): """ Returns the status of the generate_tv_cloud task. If the task is finished, the results of the task are returned. """ if not request.is_ajax(): return json_response_message('ERROR', 'No access.') task = AsyncResult(task_id) try: if task.ready(): if task.successful(): return json_response_message('ok', '', task.get()) else: return json_response_message('ERROR', 'Generating word cloud failed.') else: return json_response_message('WAITING', task.status, task.result) except AttributeError as e: return json_response_message('ERROR', 'Other error: {}'.format(str(e)))
def get_query_object(query_id): """Returns the query object stored for a query id and an appropriate error message if the query cannot be retrieved. """ query = None response = None try: query = Query.objects.get(pk=query_id) except Query.DoesNotExist: msg = 'Query with id %s cannot be found.' % query_id response = json_response_message('error', msg) except DatabaseError: response = json_response_message( 'error', 'Database error while retrieving query.') if not query and not response: response = json_response_message('error', 'No query found.') return query, response
def retrieve_kb_resolver(request): logger.info('services/kb/resolver/') host = 'resolver.kb.nl' port = 80 path = 'resolve' logger.debug('retrieve_kb_resolver: %s', request.META["QUERY_STRING"]) kb_resolver_url = "http://" + host + ':' + str(port) + '/' + path + '?urn=' + request.REQUEST["id"] try: response = requests.get(kb_resolver_url) except: if settings.DEBUG: print >> stderr, "url: %s" % kb_resolver_url type, value, tb = exc_info() msg = "KB Resolver request failed: %s" % value.message if settings.DEBUG: print >> stderr, msg return json_response_message('error', msg) return json_response_message('success', 'Resolving successful', {'text': response.content})
def create_query(request): """ Creates a new Query. """ params = get_search_parameters(request.POST) try: q = Query(query=params['query'], title=request.POST.get('title'), comment=request.POST.get('comment'), user=request.user) q.save() for date_range in params['dates']: date_lower = datetime.strptime(date_range['lower'], '%Y-%m-%d') date_upper = datetime.strptime(date_range['upper'], '%Y-%m-%d') p = Period(query=q, date_lower=date_lower, date_upper=date_upper) p.save() for distr in Distribution.objects.all(): if distr.id in params['distributions']: q.exclude_distributions.add(distr) for art_type in ArticleType.objects.all(): if art_type.id in params['article_types']: q.exclude_article_types.add(art_type) for pillar in Pillar.objects.all(): if pillar.id in params['pillars']: q.selected_pillars.add(pillar) q.nr_results = count_results(q) q.save() except IntegrityError as _: return json_response_message( 'ERROR', 'A query with this title already exists.') except Exception as e: return json_response_message('ERROR', str(e)) return json_response_message('SUCCESS', '')
def timeline(request, query_id, resolution): """ Generates a timeline for a query. TODO: the timeline view should be moved to a separate app """ logger.info('query/timeline/ - user: {}'.format(request.user.username)) normalize = request.GET.get('normalize') == '1' if settings.DEBUG: print >> stderr, "query/bursts() query_id:", query_id print >> stderr, "resolution:", resolution print >> stderr, "query_id:", query_id print >> stderr, "normalize:", normalize print >> stderr, "resolution:", resolution query = get_object_or_404(Query, pk=query_id) # Retrieve the min/max date periods = Period.objects.filter(query=query) _, begindate = periods.aggregate(Min('date_lower')).popitem() _, enddate = periods.aggregate(Max('date_upper')).popitem() # Retrieve a dictionary with date and corresponding documents for this Query date2doc = query2docidsdate(query, resolution) # Retrieve normalization values (if necessary) if normalize: values = DayStatistic.objects.\ filter(date__range=(begindate, enddate)).\ exclude(article_type__in=query.exclude_article_types.all()).\ exclude(distribution__in=query.exclude_distributions.all()).\ values('date').\ annotate(count=Sum('count')) date2normalize = defaultdict(int) for v in values: y = v['date'].year m = v['date'].month if resolution != 'year' else 1 d = v['date'].day if resolution == 'day' else 1 date2normalize[datetime(y, m, d)] += v['count'] # Calculate relative frequencies result = dict() for d, docs in date2doc.items(): value = len(docs) if normalize: value = round(value / float(date2normalize[d]), 10) result[d.isoformat()] = (value, len(docs), docs) # Return the result return json_response_message('SUCCESS', 'Timeline retrieved', {'result': result})
def create_query(request): """ Creates a new Query. """ params = get_search_parameters(request.POST) try: q = Query(query=params['query'], title=request.POST.get('title'), comment=request.POST.get('comment'), user=request.user) q.save() for date_range in params['dates']: date_lower = datetime.strptime(date_range['lower'], '%Y-%m-%d') date_upper = datetime.strptime(date_range['upper'], '%Y-%m-%d') p = Period(query=q, date_lower=date_lower, date_upper=date_upper) p.save() for distr in Distribution.objects.all(): if distr.id in params['distributions']: q.exclude_distributions.add(distr) for art_type in ArticleType.objects.all(): if art_type.id in params['article_types']: q.exclude_article_types.add(art_type) for pillar in Pillar.objects.all(): if pillar.id in params['pillars']: q.selected_pillars.add(pillar) q.nr_results = count_results(q) q.save() except IntegrityError as _: return json_response_message('ERROR', 'A query with this title already exists.') except Exception as e: return json_response_message('ERROR', str(e)) return json_response_message('SUCCESS', '')
def retrieve_kb_resolver(request, doc_id): logger.info('services/kb/resolver/') try: params = { 'identifier': 'DDD:{}'.format(doc_id), 'verb': 'GetRecord', 'metadataPrefix': 'didl' } if settings.DEBUG: print >> stderr, 'url: {}, params: {}'.format( settings.KB_RESOLVER_URL, params) response = requests.get(settings.KB_RESOLVER_URL + settings.KB_API_KEY, params=params) except requests.exceptions.HTTPError as e: msg = 'KB Resolver request failed: {}'.format(str(e)) if settings.DEBUG: print >> stderr, msg return json_response_message('error', msg) return json_response_message('success', 'Resolving successful', {'text': response.content})
def stopwords(request): """Returns the stopword list for a user and query. """ stopwords = StopWord.objects.select_related().filter(user=request.user) \ .order_by('word').order_by('query') stopwordlist = [] for word in stopwords: stopwordlist.append(word.get_stopword_dict()) params = {'stopwords': stopwordlist, 'editglob': False} return json_response_message('SUCCESS', '', params)
def doc_count(request): """Returns the number of documents returned by a query """ logger.info('services/doc_count/ - user: {}'.format(request.user.username)) if settings.DEBUG: print >> stderr, "doc_count()" query_id = request.REQUEST.get('queryID') if query_id: query, response = get_query_object(query_id) if not query: return response else: return json_response_message('error', 'Missing query id.') params = query.get_query_dict() result = count_search_results(settings.ES_INDEX, settings.ES_DOCTYPE, params['query'], params['dates'], params['exclude_distributions'], params['exclude_article_types'], params['selected_pillars']) count = result.get('count', 'error') if not count == 'error': params = {'doc_count': str(count)} return json_response_message('ok', 'Retrieved document count.', params) return json_response_message('error', 'Unable to retrieve document count' ' for query "{query}"' % query)
def user_login(request): username = request.POST.get("username") password = request.POST.get("password") next_url = request.POST.get("next_url") user = authenticate(username=username, password=password) if user is not None: if user.is_active: login(request, user) params = { "user_id": user.id, "user_name": user.username, # TODO: what is timestamp used for? Is it really necessary "timestamp": TIMESTAMP, "next_url": next_url, } return json_response_message("SUCCESS", "", params) else: return json_response_message("ERROR", "Account disabled.\n" "Please contact the system " "administrator.") return json_response_message("ERROR", "Oops, that is not correct!")
def stopwords(request): """Returns the stopword list for a user and query. """ stopwords = StopWord.objects.select_related().filter(user=request.user) \ .order_by('word').order_by('query') stopwordlist = [] for word in stopwords: stopwordlist.append(word.get_stopword_dict()) params = { 'stopwords': stopwordlist, 'editglob': False } return json_response_message('SUCCESS', '', params)
def add_stopword(request): """ Adds a Stopword. """ query_id = request.POST.get('query_id') word = request.POST.get('stopword') # Retrieve the Query, set to None if not found q = None try: q = Query.objects.get(pk=query_id) except Query.DoesNotExist: pass StopWord.objects.create(user=request.user, query=q, word=word) return json_response_message('SUCCESS', 'Stopword "{}" added.'.format(word))