def test_daterange2dates(): assert_equals(daterange2dates(TEXCAVATOR_DATE_RANGE), {'lower': '1850-01-01', 'upper': '1990-12-31'}) # First date after second date assert_equals(daterange2dates("19901231,18500101"), {'lower': '1850-01-01', 'upper': '1990-12-31'}) # Input single date assert_equals(daterange2dates("19901231"), {'lower': '1850-01-01', 'upper': '1990-12-31'}) # Empty input assert_equals(daterange2dates(""), {'lower': '1850-01-01', 'upper': '1990-12-31'})
def handle(self, *args, **options): query_size = 100000 n_repetitions = 10 if len(args) > 0: query_size = int(args[0]) if len(args) > 1: n_repetitions = int(args[1]) response_times = [] for repetition in range(n_repetitions): # select random documents document_set = DocID.objects.order_by('?')[0:query_size] doc_ids = [doc.doc_id for doc in document_set] aggr_resp = multiple_document_word_cloud(settings.ES_INDEX, settings.ES_DOCTYPE, None, daterange2dates(''), [], [], doc_ids) response_times.append(int(aggr_resp.get('took'))) self.stdout.write(str(aggr_resp.get('took'))) self.stdout.flush() avg = float(sum(response_times)/len(response_times)) print 'Average response time for aggregating over {num} documents: ' \ '{avg} miliseconds'.format(num=query_size, avg=avg)
def handle(self, *args, **options): query_size = 100000 n_repetitions = 10 if len(args) > 0: query_size = int(args[0]) if len(args) > 1: n_repetitions = int(args[1]) response_times = [] for repetition in range(n_repetitions): # select random documents document_set = DocID.objects.order_by('?')[0:query_size] doc_ids = [doc.doc_id for doc in document_set] aggr_resp = multiple_document_word_cloud(settings.ES_INDEX, settings.ES_DOCTYPE, None, daterange2dates(''), [], [], doc_ids) response_times.append(int(aggr_resp.get('took'))) self.stdout.write(str(aggr_resp.get('took'))) self.stdout.flush() avg = float(sum(response_times) / len(response_times)) print 'Average response time for aggregating over {num} documents: ' \ '{avg} miliseconds'.format(num=query_size, avg=avg)
def user_login(request): username = request.POST.get('username') password = request.POST.get('password') next_url = request.POST.get('next_url') user = authenticate(username=username, password=password) if user is not None: if user.is_active: login(request, user) # TODO: are these date_limits really necessary? date_limits = daterange2dates('') dates = [date_limits['lower'], date_limits['upper']] daterange = [int(d.replace('-', '')) for d in dates] params = { "user_id": user.id, "user_name": user.username, "daterange": daterange, # TODO: what is timestamp used for? Is it really necessary "timestamp": TIMESTAMP, "next_url": next_url } return json_response_message('SUCCESS', '', params) else: return json_response_message( 'ERROR', 'Account disabled.\n' 'Please contact the system ' 'administrator.') return json_response_message('ERROR', 'Oops, that is not correct!')
def user_login(request): username = request.POST.get('username') password = request.POST.get('password') next_url = request.POST.get('next_url') user = authenticate(username=username, password=password) if user is not None: if user.is_active: login(request, user) # TODO: are these date_limits really necessary? date_limits = daterange2dates('') dates = [date_limits['lower'], date_limits['upper']] daterange = [int(d.replace('-', '')) for d in dates] params = { "user_id": user.id, "user_name": user.username, "daterange": daterange, # TODO: what is timestamp used for? Is it really necessary "timestamp": TIMESTAMP, "next_url": next_url } return json_response_message('SUCCESS', '', params) else: return json_response_message('ERROR', 'Account disabled.\n' 'Please contact the system ' 'administrator.') return json_response_message('ERROR', 'Oops, that is not correct!')
def test_daterange2dates(): assert_equals(daterange2dates(TEXCAVATOR_DATE_RANGE), { 'lower': '1850-01-01', 'upper': '1990-12-31' }) # First date after second date assert_equals(daterange2dates("19901231,18500101"), { 'lower': '1850-01-01', 'upper': '1990-12-31' }) # Input single date assert_equals(daterange2dates("19901231"), { 'lower': '1850-01-01', 'upper': '1990-12-31' }) # Empty input assert_equals(daterange2dates(""), { 'lower': '1850-01-01', 'upper': '1990-12-31' })
def index(request): """Render main page.""" date_limits = daterange2dates('') dates = [date_limits['lower'], date_limits['upper']] daterange = [int(d.replace('-', '')) for d in dates] data = { "PROJECT_NAME": settings.PROJECT_NAME, "SRU_DATE_LIMITS": daterange, "QUERY_DATA_DOWNLOAD_ALLOW": settings.QUERY_DATA_DOWNLOAD_ALLOW, "ES_INDEX": settings.ES_INDEX, "ILPS_LOGGING": settings.ILPS_LOGGING } return render_to_response('index.html', data, RequestContext(request))
def handle(self, *args, **options): print 'Emptying table...' DayStatistic.objects.all().delete() date_range_str = settings.TEXCAVATOR_DATE_RANGE dates = daterange2dates(date_range_str) year_lower = datetime.strptime(dates['lower'], '%Y-%m-%d').date().year year_upper = datetime.strptime(dates['upper'], '%Y-%m-%d').date().year if len(args) > 0: year_lower = int(args[0]) if len(args) > 1: year_upper = int(args[1]) print 'Gathering statistics from %s until %s.' \ % (year_lower, year_upper) agg_name = 'daystatistic' for year in range(year_lower, year_upper+1): date_range = { 'lower': '{y}-01-01'.format(y=year), 'upper': '{y}-12-31'.format(y=year) } print year results = day_statistics(settings.ES_INDEX, settings.ES_DOCTYPE, date_range, agg_name) if results: # save results to database agg_data = results['aggregations'][agg_name]['buckets'] for date in agg_data: try: d = datetime.strptime(date['key_as_string'], '%Y-%m-%dT00:00:00.000Z').date() DayStatistic.objects.create(date=str(d), count=date['doc_count']) except DatabaseError, exc: msg = "Database Error: %s" % exc if settings.DEBUG: print msg
def handle(self, *args, **options): if QueryTerm.objects.all().count() == 0: print 'No query terms stored in the database. Please run ' \ 'python manage.py gatherqueryterms\' first.' sys.exit(1) query_size = 10 n_repetitions = 10 if len(args) > 0: query_size = int(args[0]) if len(args) > 1: n_repetitions = int(args[1]) response_times = [] es_wall_clock = [] for repetition in range(n_repetitions): # generate random weigthed query query_terms = QueryTerm.objects.order_by('?')[0:query_size] query_list = [ '{}^{}'.format(t.term, randint(1, 40)) for t in query_terms ] q = ' OR '.join(query_list) t1 = time.time() valid_q, result = do_search(settings.ES_INDEX, settings.ES_DOCTYPE, q, 0, 20, daterange2dates(''), [], []) t2 = time.time() if not valid_q: print 'Invalid query: {}'.format(q) else: es_wall_clock.append((t2 - t1) * 1000) response_times.append(int(result.get('took'))) self.stdout.write(str(result.get('took'))) self.stdout.flush() avg = float(sum(response_times) / len(response_times)) avg_wall_clock = float(sum(es_wall_clock) / len(es_wall_clock)) print 'Average response time for queries of size {}: {} miliseconds'. \ format(query_size, avg) print 'Average wall clock time for queries of size {}: {} ' \ 'miliseconds'.format(query_size, avg_wall_clock)
def handle(self, *args, **options): print 'Emptying table...' DayStatistic.objects.all().delete() date_range_str = settings.TEXCAVATOR_DATE_RANGE dates = daterange2dates(date_range_str) year_lower = datetime.strptime(dates['lower'], '%Y-%m-%d').date().year year_upper = datetime.strptime(dates['upper'], '%Y-%m-%d').date().year if len(args) > 0: year_lower = int(args[0]) if len(args) > 1: year_upper = int(args[1]) print 'Gathering statistics from %s until %s.' \ % (year_lower, year_upper) agg_name = 'daystatistic' for year in range(year_lower, year_upper + 1): date_range = { 'lower': '{y}-01-01'.format(y=year), 'upper': '{y}-12-31'.format(y=year) } print year results = day_statistics(settings.ES_INDEX, settings.ES_DOCTYPE, date_range, agg_name) if results: # save results to database agg_data = results['aggregations'][agg_name]['buckets'] for date in agg_data: try: d = datetime.strptime(date['key_as_string'], '%Y-%m-%dT00:00:00.000Z').date() DayStatistic.objects.create(date=str(d), count=date['doc_count']) except DatabaseError, exc: msg = "Database Error: %s" % exc if settings.DEBUG: print msg