def search_task(parameters, search_process_id, background=True): from bims.utils.celery import memcache_lock from bims.api_views.search import CollectionSearch from bims.models.search_process import ( SearchProcess, SEARCH_PROCESSING, SEARCH_FINISHED, SEARCH_FAILED ) try: search_process = SearchProcess.objects.get(id=search_process_id) except SearchProcess.DoesNotExist: return if background: lock_id = '{0}-lock-{1}'.format( search_process.file_path, search_process.process_id ) oid = '{0}'.format(search_process.process_id) with memcache_lock(lock_id, oid) as acquired: if acquired: search_process.set_status(SEARCH_PROCESSING) search = CollectionSearch(parameters) search_results = search.get_summary_data() if search_results: search_process.set_search_raw_query( search.location_sites_raw_query ) search_process.create_view() search_process.set_status(SEARCH_FINISHED, False) search_results['status'] = SEARCH_FINISHED search_results['extent'] = search.extent() search_process.save_to_file(search_results) else: search_process.set_status(SEARCH_FAILED) return logger.info( 'Search %s is already being processed by another worker', search_process.process_id) else: search = CollectionSearch(parameters) search_results = search.get_summary_data() if search_results: search_process.set_search_raw_query( search.location_sites_raw_query ) search_process.create_view() search_process.set_status(SEARCH_FINISHED, False) search_results['status'] = SEARCH_FINISHED search_results['extent'] = search.extent() search_process.save_to_file(search_results) else: search_process.set_status(SEARCH_FAILED) return search_results
def download_data_to_csv(path_file, request): from bims.serializers.bio_collection_serializer import \ BioCollectionOneRowSerializer from bims.api_views.collection import GetCollectionAbstract from bims.utils.celery import memcache_lock from bims.models import BiologicalCollectionRecord path_file_hexdigest = md5(path_file).hexdigest() lock_id = '{0}-lock-{1}'.format(download_data_to_csv.name, path_file_hexdigest) oid = '{0}'.format(path_file_hexdigest) with memcache_lock(lock_id, oid) as acquired: if acquired: query_value = request.get('search', '') filters = request is_using_filters = GetCollectionAbstract.is_using_filters(filters) site_results = None if is_using_filters or query_value: collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.\ apply_filter( query_value, filters, ignore_bbox=True) else: collection_results = GetCollectionAbstract.get_all_validated() if not collection_results and site_results: site_ids = site_results.values_list('id', flat=True) collection_results = BiologicalCollectionRecord.objects.filter( site__id__in=site_ids).distinct() serializer = BioCollectionOneRowSerializer(collection_results, many=True) headers = serializer.data[0].keys() rows = serializer.data with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=headers) writer.writeheader() for row in rows: writer.writerow(row) return logger.info('Csv %s is already being processed by another worker', path_file)
def download_chemical_data_to_csv(path_file, site_id): from bims.models.chemical_record import ChemicalRecord from bims.serializers.chemical_records_serializer import ( ChemicalRecordsOneRowSerializer) from bims.utils.celery import memcache_lock path_file_hexdigest = sha256(path_file.encode('utf-8')).hexdigest() lock_id = '{}-lock-{}'.format(download_chemical_data_to_csv.name, path_file_hexdigest) oid = '{0}'.format(path_file_hexdigest) with memcache_lock(lock_id, oid) as acquired: if acquired: queryset = ChemicalRecord.objects.filter( Q(location_site_id=site_id) | Q(survey__site_id=site_id)) serializer = ChemicalRecordsOneRowSerializer(queryset, many=True) headers = serializer.data[0].keys() rows = serializer.data formatted_headers = [] # Rename headers for header in headers: if header == 'class_name': header = 'class' header = header.replace('_or_', '/') header = header.replace('_', ' ').capitalize() formatted_headers.append(header) with open(path_file, 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=formatted_headers) writer.writeheader() writer.fieldnames = headers for row in rows: writer.writerow(row) return logger.info('Csv %s is already being processed by another worker', path_file)
def download_sass_data_site_task(filename, filters, path_file): from bims.utils.celery import memcache_lock lock_id = '{0}-lock-{1}'.format(filename, len(filters)) oid = '{0}'.format(filename) with memcache_lock(lock_id, oid) as acquired: if acquired: search = Search(filters) context = {'filters': filters} collection_records = search.process_search() site_visit_taxon = SiteVisitTaxon.objects.filter( id__in=collection_records).order_by( 'site_visit__site_visit_date') serializer = SassDataSerializer(site_visit_taxon, many=True, context=context) headers = serializer.data[0].keys() rows = serializer.data formatted_headers = [] # Rename headers for header in headers: formatted_headers.append(header.replace('_', ' ').capitalize()) with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=formatted_headers) writer.writeheader() writer.fieldnames = headers for row in rows: try: writer.writerow(row) except ValueError: writer.fieldnames = row.keys() writer.writerow(row) return logger.info('Csv %s is already being processed by another worker', filename)
def download_data_to_csv(path_file, request): from bims.serializers.bio_collection_serializer import \ BioCollectionOneRowSerializer from bims.api_views.collection import GetCollectionAbstract from bims.utils.celery import memcache_lock path_file_hexdigest = md5(path_file).hexdigest() lock_id = '{0}-lock-{1}'.format( download_data_to_csv.name, path_file_hexdigest ) oid = '{0}'.format(path_file_hexdigest) with memcache_lock(lock_id, oid) as acquired: if acquired: queryset = GetCollectionAbstract.apply_filter(request, ignore_bbox=True) serializer = BioCollectionOneRowSerializer( queryset, many=True ) headers = serializer.data[0].keys() rows = serializer.data with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=headers) writer.writeheader() for row in rows: writer.writerow(row) return logger.info( 'Csv %s is already being processed by another worker', path_file)
def download_sass_summary_data_task(filename, filters, path_file): from bims.utils.celery import memcache_lock import random lock_id = '{0}-lock-{1}'.format(filename, len(filters)) oid = random.randint(1, 101) with memcache_lock(lock_id, oid) as acquired: if acquired: search = Search(filters) context = {'filters': filters} collection_records = search.process_search() collection_ids = list( collection_records.values_list('id', flat=True)) # Get SASS data site_visit_taxa = SiteVisitTaxon.objects.filter( id__in=collection_ids) summary = site_visit_taxa.annotate( sampling_date=F('site_visit__site_visit_date'), ).values('sampling_date').annotate( count=Count('sass_taxon'), sass_score=Sum( Case(When(condition=Q( site_visit__sass_version=5, sass_taxon__sass_5_score__isnull=False), then='sass_taxon__sass_5_score'), default='sass_taxon__score')), sass_id=F('site_visit__id'), FBIS_site_code=Case(When( site_visit__location_site__site_code__isnull=False, then='site_visit__location_site__site_code'), default='site_visit__location_site__name'), site_id=F('site_visit__location_site__id'), assessor=F('site_visit__assessor__username'), accredited=F('site_visit__assessor__' 'bims_profile__sass_accredited'), sass_version=F('site_visit__sass_version'), site_description=F( 'site_visit__location_site__site_description'), river_name=Case(When( site_visit__location_site__river__isnull=False, then='site_visit__location_site__river__name'), default=Value('-')), latitude=F('site_visit__location_site__latitude'), longitude=F('site_visit__location_site__longitude'), time_of_day=F('site_visit__time'), reference=F('reference'), reference_category=F('reference_category'), ).annotate( aspt=Cast(F('sass_score'), FloatField()) / Cast(F('count'), FloatField()), ).order_by('sampling_date') serializer = SassSummaryDataSerializer(summary, many=True, context=context) headers = serializer.data[0].keys() rows = serializer.data formatted_headers = [] # Rename headers for header in headers: formatted_headers.append(header.replace('_', ' ').capitalize()) with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=formatted_headers) writer.writeheader() writer.fieldnames = headers for row in rows: try: writer.writerow(row) except ValueError: writer.fieldnames = row.keys() writer.writerow(row) return logger.info('Csv %s is already being processed by another worker', filename)
def download_sass_summary_data_task(filename, filters, path_file): from bims.utils.celery import memcache_lock import random lock_id = '{0}-lock-{1}'.format(filename, len(filters)) oid = random.randint(1, 101) with memcache_lock(lock_id, oid) as acquired: if acquired: search = CollectionSearch(filters) context = {'filters': filters} collection_records = search.process_search() collection_ids = list( collection_records.values_list('id', flat=True)) # Get SASS data site_visit_taxa = SiteVisitTaxon.objects.filter( id__in=collection_ids) summary = site_visit_taxa.annotate( date=F('collection_date'), ).values('date').annotate( sampling_date=F('site_visit__site_visit_date'), full_name=Concat('site_visit__owner__first_name', Value(' '), 'site_visit__owner__last_name', output_field=CharField()) ).values('sampling_date', 'full_name').annotate( count=Count('sass_taxon'), sass_score=Sum( Case(When(condition=Q( site_visit__sass_version=5, sass_taxon__sass_5_score__isnull=False), then='sass_taxon__sass_5_score'), default='sass_taxon__score')), sass_id=F('site_visit__id'), FBIS_site_code=Case( When( site_visit__location_site__site_code__isnull=False, then='site_visit__location_site__site_code'), default='site_visit__location_site__name'), site_id=F('site_visit__location_site__id'), sass_version=F('site_visit__sass_version'), site_description=F( 'site_visit__location_site__site_description'), river_name=Case(When( site_visit__location_site__river__isnull=False, then='site_visit__location_site__river__name'), default=Value('-')), latitude=F('site_visit__location_site__latitude'), longitude=F('site_visit__location_site__longitude'), source_reference=F('source_reference'), ecological_category=F( 'site_visit__' 'sitevisitecologicalcondition__' 'ecological_condition__category')).annotate( aspt=Cast(F('sass_score'), FloatField()) / Cast(F('count'), FloatField()), ).order_by('sampling_date') context['location_contexts'] = LocationContext.objects.filter( site__in=site_visit_taxa.values('site_visit__location_site')) serializer = SassSummaryDataSerializer(summary, many=True, context=context) headers = serializer.data[0].keys() rows = serializer.data formatted_headers = [] # Rename headers for header in headers: header_split = [ word[0].upper() + word[1:] for word in header.split('_') ] header = ' '.join(header_split) formatted_headers.append(header) with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=formatted_headers) writer.writeheader() writer.fieldnames = headers for row in rows: try: writer.writerow(row) except ValueError: writer.fieldnames = row.keys() writer.writerow(row) return logger.info('Csv %s is already being processed by another worker', filename)
def search_collection(query_value, filters, path_file, process): from bims.utils.celery import memcache_lock from bims.api_views.collection import GetCollectionAbstract from bims.api_views.search import SearchObjects from bims.models.search_process import SearchProcess lock_id = '{0}-lock-{1}'.format(path_file, process) oid = '{0}'.format(process) category = 'search_results' processing_label = 'processing' finished_label = 'finish' max_result = 100 with memcache_lock(lock_id, oid) as acquired: if acquired: collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) search_process, created = SearchProcess.objects.get_or_create( category=category, process_id=process) search_results = dict() search_results['status'] = { 'current_status': processing_label, 'process': process } with open(path_file, 'wb') as status_file: status_file.write(json.dumps(search_results)) search_process.file_path = path_file search_process.save() all_record_results = {} all_site_results = {} search_results['fuzzy_search'] = fuzzy_search search_results['records'] = [] search_results['sites'] = [] collection_paginator = Paginator(collection_results, max_result) for num_page in range(1, collection_paginator.num_pages + 1): collection_page = collection_paginator.page(num_page) if not collection_page.object_list: break collection_result = collection_page.object_list all_record_results, all_site_results = \ SearchObjects.process_search( collection_result, query_value, all_record_results, all_site_results) search_results['total_records'] = len(all_record_results) search_results['total_sites'] = len(all_site_results) with open(path_file, 'wb') as result_file: result_file.write(json.dumps(search_results)) sites_paginator = Paginator(site_results, max_result) for num_page in range(1, sites_paginator.num_pages + 1): site_page = sites_paginator.page(num_page) if not site_page.object_list: break all_site_results = SearchObjects.process_sites_search( site_page.object_list, all_site_results, query_value) search_results['total_sites'] = len(all_site_results) with open(path_file, 'wb') as result_file: result_file.write(json.dumps(search_results)) if search_results: search_results['records'] = all_record_results search_results['sites'] = all_site_results search_results['status']['current_status'] = finished_label search_process.finished = True search_process.save() with open(path_file, 'wb') as result_file: result_file.write(json.dumps(search_results)) return logger.info('Search %s is already being processed by another worker', process)
def generate_search_cluster(query_value, filters, filename, path_file): from bims.api_views.collection import GetCollectionAbstract from bims.utils.celery import memcache_lock from bims.models.search_process import SearchProcess lock_id = '{0}-lock-{1}'.format(path_file, filename) oid = '{0}'.format(filename) name_label = 'n' coordinates_label = 'o' id_label = 'id' with memcache_lock(lock_id, oid) as acquired: if acquired: collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) search_process, created = SearchProcess.objects.get_or_create( category='cluster_generation', process_id=filename) status = {'current_status': 'processing', 'process': filename} with open(path_file, 'wb') as status_file: status_file.write(json.dumps({'status': status})) search_process.file_path = path_file search_process.save() collection_sites = [] if collection_results: collection_sites += list( collection_results.values('location_site_id', 'location_coordinates', 'location_site_name')) if site_results: collection_sites += list( site_results.values('location_site_id', 'location_coordinates', 'location_site_name')) collection_distinct = {} all_sites = [] paginator = Paginator(collection_sites, 100) response_data = dict() response_data['status'] = status response_data['data'] = [] # Get location site distinct values for num_page in range(1, paginator.num_pages + 1): object_list = paginator.page(num_page).object_list for site in object_list: location_site_id = int(site['location_site_id']) if location_site_id not in collection_distinct: collection_distinct[location_site_id] = site all_sites.append({ id_label: site['location_site_id'], coordinates_label: site['location_coordinates'], name_label: site['location_site_name'] }) response_data['data'] = all_sites with open(path_file, 'wb') as cluster_file: cluster_file.write(json.dumps(response_data)) response_data['status']['current_status'] = 'finish' search_process.finished = True search_process.save() with open(path_file, 'wb') as cluster_file: cluster_file.write(json.dumps(response_data)) return logger.info( 'Cluster search %s is already being processed by another worker', path_file)