def tv_cloud(request): """Generate termvector word cloud using the termvector approach. Returns word cloud data for a single document word cloud (based on a single document id) and multiple document word clouds (either based on a list of document ids (i.e., timeline burst cloud) or a query with metadata). For multiple document word clouds, a celery task generates the cloud data. """ if settings.DEBUG: print >> stderr, "termvector cloud()" logger.info('services/cloud/ - termvector word cloud') logger.info('services/cloud/ - user: {}'.format(request.user.username)) # Retrieve the cloud settings query_id = request.GET.get('queryID') min_length = int(request.GET.get('min_length', 2)) use_stopwords = request.GET.get('stopwords') == "1" use_default_stopwords = request.GET.get('stopwords_default') == "1" stems = request.GET.get('stems') == "1" # Retrieve the stopwords stopwords = [] if use_stopwords: stopwords_user = list(StopWord.objects .filter(user=request.user) .filter(query=None) .values_list('word', flat=True)) stopwords_query = [] if query_id: stopwords_query = list(StopWord.objects .filter(user=request.user) .filter(query__id=query_id) .values_list('word', flat=True)) stopwords_default = [] if use_default_stopwords: stopwords_default = list(StopWord.objects .filter(user=None) .filter(query=None) .values_list('word', flat=True)) stopwords = stopwords_user + stopwords_query + stopwords_default record_id = request.GET.get('record_id') logger.info('services/cloud/ - record_id: {}'.format(record_id)) idf_timeframe = request.GET.get('idf_timeframe') if record_id: # Cloud for a single document t_vector = single_document_word_cloud(settings.ES_INDEX, settings.ES_DOCTYPE, record_id, min_length, stopwords, stems) normalized = normalize_cloud(t_vector['result'], idf_timeframe) return json_response_message('ok', 'Word cloud generated', {'result': normalized}) else: # Cloud for a query logger.info('services/cloud/ - multiple document word cloud') query = get_object_or_404(Query, pk=query_id) params = query.get_query_dict() # If we're creating a timeline cloud, set the min/max dates date_range = None if request.GET.get('is_timeline'): date_range = daterange2dates(request.GET.get('date_range')) task = generate_tv_cloud.delay(params, min_length, stopwords, date_range, stems, idf_timeframe) logger.info('services/cloud/ - Celery task id: {}'.format(task.id)) return json_response_message('ok', '', {'task': task.id})
def tv_cloud(request): """Generate termvector word cloud using the termvector approach. Returns word cloud data for a single document word cloud (based on a single document id) and multiple document word clouds (either based on a list of document ids (i.e., timeline burst cloud) or a query with metadata). For multiple document word clouds, a celery task generates the cloud data. """ if settings.DEBUG: print >> stderr, "termvector cloud()" logger.info('services/cloud/ - termvector word cloud') logger.info('services/cloud/ - user: {}'.format(request.user.username)) params = get_search_parameters(request.REQUEST) ids = request.REQUEST.get('ids') query_id = request.GET.get('queryID') min_length = int(request.GET.get('min_length', 2)) use_stopwords = request.GET.get('stopwords') == "1" use_default_stopwords = request.GET.get('stopwords_default') == "1" stems = request.GET.get('stems') == "1" # Retrieve the stopwords stopwords = [] if use_stopwords: stopwords_user = list(StopWord.objects .filter(user=request.user) .filter(query=None) .values_list('word', flat=True)) stopwords_query = [] if query_id: stopwords_query = list(StopWord.objects .filter(user=request.user) .filter(query__id=query_id) .values_list('word', flat=True)) stopwords_default = [] if use_default_stopwords: stopwords_default = list(StopWord.objects .filter(user=None) .filter(query=None) .values_list('word', flat=True)) stopwords = stopwords_user + stopwords_query + stopwords_default # Cloud by ids if ids: ids = ids.split(',') if len(ids) == 1: # Word cloud for single document logger.info('services/cloud/ - single document word cloud') t_vector = single_document_word_cloud(settings.ES_INDEX, settings.ES_DOCTYPE, ids[0], min_length, stopwords, stems) return json_response_message('ok', 'Word cloud generated', t_vector) # Cloud by queryID or multiple ids logger.info('services/cloud/ - multiple document word cloud') task = generate_tv_cloud.delay(params, min_length, stopwords, ids, stems) logger.info('services/cloud/ - Celery task id: {}'.format(task.id)) return json_response_message('ok', '', {'task': task.id})
def tv_cloud(request): """Generate termvector word cloud using the termvector approach. Returns word cloud data for a single document word cloud (based on a single document id) and multiple document word clouds (either based on a list of document ids (i.e., timeline burst cloud) or a query with metadata). For multiple document word clouds, a celery task generates the cloud data. """ if settings.DEBUG: print >> stderr, "termvector cloud()" logger.info('services/cloud/ - termvector word cloud') logger.info('services/cloud/ - user: {}'.format(request.user.username)) # Retrieve the cloud settings query_id = request.GET.get('queryID') min_length = int(request.GET.get('min_length', 2)) use_stopwords = request.GET.get('stopwords') == "1" use_default_stopwords = request.GET.get('stopwords_default') == "1" stems = request.GET.get('stems') == "1" # Retrieve the stopwords stopwords = [] if use_stopwords: stopwords_user = list( StopWord.objects.filter(user=request.user).filter( query=None).values_list('word', flat=True)) stopwords_query = [] if query_id: stopwords_query = list( StopWord.objects.filter(user=request.user).filter( query__id=query_id).values_list('word', flat=True)) stopwords_default = [] if use_default_stopwords: stopwords_default = list( StopWord.objects.filter(user=None).filter( query=None).values_list('word', flat=True)) stopwords = stopwords_user + stopwords_query + stopwords_default record_id = request.GET.get('record_id') logger.info('services/cloud/ - record_id: {}'.format(record_id)) idf_timeframe = request.GET.get('idf_timeframe') if record_id: # Cloud for a single document t_vector = single_document_word_cloud(settings.ES_INDEX, settings.ES_DOCTYPE, record_id, min_length, stopwords, stems) normalized = normalize_cloud(t_vector['result'], idf_timeframe) return json_response_message('ok', 'Word cloud generated', {'result': normalized}) else: # Cloud for a query logger.info('services/cloud/ - multiple document word cloud') query = get_object_or_404(Query, pk=query_id) params = query.get_query_dict() # If we're creating a timeline cloud, set the min/max dates date_range = None if request.GET.get('is_timeline'): date_range = daterange2dates(request.GET.get('date_range')) task = generate_tv_cloud.delay(params, min_length, stopwords, date_range, stems, idf_timeframe) logger.info('services/cloud/ - Celery task id: {}'.format(task.id)) return json_response_message('ok', '', {'task': task.id})