def download_data_to_csv(path_file, request): from bims.serializers.bio_collection_serializer import \ BioCollectionOneRowSerializer from bims.api_views.collection import GetCollectionAbstract from bims.utils.celery import memcache_lock from bims.models import BiologicalCollectionRecord path_file_hexdigest = md5(path_file).hexdigest() lock_id = '{0}-lock-{1}'.format(download_data_to_csv.name, path_file_hexdigest) oid = '{0}'.format(path_file_hexdigest) with memcache_lock(lock_id, oid) as acquired: if acquired: query_value = request.get('search', '') filters = request is_using_filters = GetCollectionAbstract.is_using_filters(filters) site_results = None if is_using_filters or query_value: collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.\ apply_filter( query_value, filters, ignore_bbox=True) else: collection_results = GetCollectionAbstract.get_all_validated() if not collection_results and site_results: site_ids = site_results.values_list('id', flat=True) collection_results = BiologicalCollectionRecord.objects.filter( site__id__in=site_ids).distinct() serializer = BioCollectionOneRowSerializer(collection_results, many=True) headers = serializer.data[0].keys() rows = serializer.data with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=headers) writer.writeheader() for row in rows: writer.writerow(row) return logger.info('Csv %s is already being processed by another worker', path_file)
def get(self, request): results = GetCollectionAbstract.apply_filter(request, True) bio_ids = results.values_list('model_pk', flat=True) taxon_ids = list(set(results.values_list('taxon_gbif', flat=True))) taxons = Taxon.objects.filter( id__in=taxon_ids).annotate( num_occurrences=Count( Case(When( biologicalcollectionrecord__id__in=bio_ids, then=1)))).order_by('species') location_site_ids = list( set(results.values_list('location_site_id', flat=True))) location_sites = LocationSite.objects.filter( id__in=location_site_ids).annotate( num_occurrences=Count(Case(When( biological_collection_record__id__in=bio_ids, then=1)))).order_by('name') search_result = dict() search_result['records'] = TaxonOccurencesSerializer( taxons, many=True).data search_result['sites'] = LocationOccurrencesSerializer( location_sites, many=True).data return Response(search_result)
def get(self, request): query_value = request.GET.get('search') filters = request.GET # Search collection collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) try: collection_ids = list(collection_results.values_list( 'model_pk', flat=True )) records = BiologicalCollectionRecord.objects.filter( pk__in=collection_ids ) serializer = BioCollectionSerializer( records, many=True) return Response(serializer.data) except BiologicalCollectionRecord.DoesNotExist: return HttpResponse( 'Object Does Not Exist', status=status.HTTP_400_BAD_REQUEST )
def get(self, request): query_value = request.GET.get('search') filters = request.GET search_result = dict() search_result['sites'] = [] search_result['records'] = [] search_uri = request.build_absolute_uri() folder = 'search_results' status = {'current_status': 'processing'} search_process, created = SearchProcess.objects.get_or_create( category=folder, query=search_uri) if not created and search_process.file_path: if os.path.exists(search_process.file_path): raw_data = open(search_process.file_path) return Response(json.load(raw_data)) else: if search_process.finished: search_process.finished = False search_process.save() # Search collection collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) # Check if filename exists data_for_filename = dict() data_for_filename['search_uri'] = search_uri data_for_filename['collection_results_length'] = len( collection_results) data_for_filename['site_results_length'] = len(site_results) process_id = hashlib.md5(json.dumps(data_for_filename, sort_keys=True)).hexdigest() path_folder = os.path.join(settings.MEDIA_ROOT, folder) path_file = os.path.join(path_folder, process_id) status['process'] = process_id search_process.process_id = process_id search_process.save() try: os.mkdir(path_folder) except OSError as exc: if exc.errno != errno.EEXIST: raise pass search_collection(query_value, filters, path_file, process_id) if os.path.exists(path_file): raw_data = open(path_file) if raw_data: json_data = json.load(raw_data) return Response(json_data) return Response({'status': status})
def download_csv_site_taxa_records(request): taxon_id = request.GET.get('taxon') query_value = request.GET.get('search') filters = request.GET # Search collection collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) records = [q.object for q in collection_results] current_model = BiologicalCollectionRecord try: current_model = records[0].get_children() except: pass fields = [f.name for f in current_model._meta.get_fields()] fields.remove('ready_for_validation') fields.remove('validated') if 'biologicalcollectionrecord_ptr' in fields: fields.remove('biologicalcollectionrecord_ptr') taxon = Taxon.objects.get(pk=taxon_id) # Create the HttpResponse object with the appropriate CSV header. response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = \ 'attachment; filename="'+ taxon.common_name +'.csv"' writer = csv.writer(response) writer.writerow(['Taxon', taxon.common_name]) writer.writerow(['Total records', len(records)]) writer.writerow(['GBIF ID', taxon.gbif_id]) writer.writerow(['']) writer.writerow(fields + ['coordinates']) for record in records: try: record = record.get_children() except: pass row_object = [] for field in fields: row_object.append(getattr(record, field)) row_object.append('%s,%s' % ( record.site.get_centroid().coords[1], record.site.get_centroid().coords[0], )) writer.writerow(row_object) return response
def get_queryset(self): query_value = self.request.GET.get('search') filters = self.request.GET (collection_results, site_results, fuzzy_search) = GetCollectionAbstract.apply_filter(query_value, filters, ignore_bbox=True, only_site=True) return collection_results
def get(self, request): site_id = request.GET.get('siteId') query_value = request.GET.get('search') filters = request.GET # Search collection (collection_results, site_results, fuzzy_search) = GetCollectionAbstract.apply_filter(query_value, filters, ignore_bbox=True) collection_ids = [] if collection_results: collection_ids = list( collection_results.values_list('model_pk', flat=True)) context = {'collection_ids': collection_ids} location_site = self.get_object(site_id) serializer = LocationSiteDetailSerializer(location_site, context=context) return Response(serializer.data)
def download_data_to_csv(path_file, request): from bims.serializers.bio_collection_serializer import \ BioCollectionOneRowSerializer from bims.api_views.collection import GetCollectionAbstract from bims.utils.celery import memcache_lock path_file_hexdigest = md5(path_file).hexdigest() lock_id = '{0}-lock-{1}'.format( download_data_to_csv.name, path_file_hexdigest ) oid = '{0}'.format(path_file_hexdigest) with memcache_lock(lock_id, oid) as acquired: if acquired: queryset = GetCollectionAbstract.apply_filter(request, ignore_bbox=True) serializer = BioCollectionOneRowSerializer( queryset, many=True ) headers = serializer.data[0].keys() rows = serializer.data with open(path_file, 'wb') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=headers) writer.writeheader() for row in rows: writer.writerow(row) return logger.info( 'Csv %s is already being processed by another worker', path_file)
def search_collection(query_value, filters, path_file, process): from bims.utils.celery import memcache_lock from bims.api_views.collection import GetCollectionAbstract from bims.api_views.search import SearchObjects from bims.models.search_process import SearchProcess lock_id = '{0}-lock-{1}'.format(path_file, process) oid = '{0}'.format(process) category = 'search_results' processing_label = 'processing' finished_label = 'finish' max_result = 100 with memcache_lock(lock_id, oid) as acquired: if acquired: collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) search_process, created = SearchProcess.objects.get_or_create( category=category, process_id=process) search_results = dict() search_results['status'] = { 'current_status': processing_label, 'process': process } with open(path_file, 'wb') as status_file: status_file.write(json.dumps(search_results)) search_process.file_path = path_file search_process.save() all_record_results = {} all_site_results = {} search_results['fuzzy_search'] = fuzzy_search search_results['records'] = [] search_results['sites'] = [] collection_paginator = Paginator(collection_results, max_result) for num_page in range(1, collection_paginator.num_pages + 1): collection_page = collection_paginator.page(num_page) if not collection_page.object_list: break collection_result = collection_page.object_list all_record_results, all_site_results = \ SearchObjects.process_search( collection_result, query_value, all_record_results, all_site_results) search_results['total_records'] = len(all_record_results) search_results['total_sites'] = len(all_site_results) with open(path_file, 'wb') as result_file: result_file.write(json.dumps(search_results)) sites_paginator = Paginator(site_results, max_result) for num_page in range(1, sites_paginator.num_pages + 1): site_page = sites_paginator.page(num_page) if not site_page.object_list: break all_site_results = SearchObjects.process_sites_search( site_page.object_list, all_site_results, query_value) search_results['total_sites'] = len(all_site_results) with open(path_file, 'wb') as result_file: result_file.write(json.dumps(search_results)) if search_results: search_results['records'] = all_record_results search_results['sites'] = all_site_results search_results['status']['current_status'] = finished_label search_process.finished = True search_process.save() with open(path_file, 'wb') as result_file: result_file.write(json.dumps(search_results)) return logger.info('Search %s is already being processed by another worker', process)
def generate_search_cluster(query_value, filters, filename, path_file): from bims.api_views.collection import GetCollectionAbstract from bims.utils.celery import memcache_lock from bims.models.search_process import SearchProcess lock_id = '{0}-lock-{1}'.format(path_file, filename) oid = '{0}'.format(filename) name_label = 'n' coordinates_label = 'o' id_label = 'id' with memcache_lock(lock_id, oid) as acquired: if acquired: collection_results, \ site_results, \ fuzzy_search = GetCollectionAbstract.apply_filter( query_value, filters, ignore_bbox=True) search_process, created = SearchProcess.objects.get_or_create( category='cluster_generation', process_id=filename) status = {'current_status': 'processing', 'process': filename} with open(path_file, 'wb') as status_file: status_file.write(json.dumps({'status': status})) search_process.file_path = path_file search_process.save() collection_sites = [] if collection_results: collection_sites += list( collection_results.values('location_site_id', 'location_coordinates', 'location_site_name')) if site_results: collection_sites += list( site_results.values('location_site_id', 'location_coordinates', 'location_site_name')) collection_distinct = {} all_sites = [] paginator = Paginator(collection_sites, 100) response_data = dict() response_data['status'] = status response_data['data'] = [] # Get location site distinct values for num_page in range(1, paginator.num_pages + 1): object_list = paginator.page(num_page).object_list for site in object_list: location_site_id = int(site['location_site_id']) if location_site_id not in collection_distinct: collection_distinct[location_site_id] = site all_sites.append({ id_label: site['location_site_id'], coordinates_label: site['location_coordinates'], name_label: site['location_site_name'] }) response_data['data'] = all_sites with open(path_file, 'wb') as cluster_file: cluster_file.write(json.dumps(response_data)) response_data['status']['current_status'] = 'finish' search_process.finished = True search_process.save() with open(path_file, 'wb') as cluster_file: cluster_file.write(json.dumps(response_data)) return logger.info( 'Cluster search %s is already being processed by another worker', path_file)
def get(self, request): query_value = request.GET.get('search') filters = request.GET # Search collection (collection_results, site_results, fuzzy_search) = GetCollectionAbstract.apply_filter(query_value, filters, ignore_bbox=True) search_process, created = get_or_create_search_process( SITES_SUMMARY, query=json.dumps(filters)) if search_process.file_path: if os.path.exists(search_process.file_path): try: raw_data = open(search_process.file_path) return Response(json.load(raw_data)) except ValueError: pass records_graph_data = {} records_occurrence = {} for collection in collection_results: collection_year = collection.collection_date_year if collection_year not in records_graph_data: records_graph_data[collection_year] = {} if collection.category not in records_graph_data[collection_year]: records_graph_data[collection_year][collection.category] = 1 else: records_graph_data[collection_year][collection.category] += 1 if collection.taxonomy not in records_graph_data[collection_year]: records_graph_data[collection_year][collection.taxonomy] = 1 else: records_graph_data[collection_year][collection.taxonomy] += 1 if collection.taxonomy not in records_occurrence: records_occurrence[collection.taxonomy] = { self.COUNT: 0, self.ORIGIN: collection.category, self.TAXONOMY_NAME: collection.taxon_canonical_name } records_occurrence[collection.taxonomy]['count'] += 1 response_data = { self.TOTAL_RECORDS: len(collection_results), self.RECORDS_GRAPH_DATA: records_graph_data, self.RECORDS_OCCURRENCE: records_occurrence } file_path = create_search_process_file(data=response_data, search_process=search_process, finished=True) file_data = open(file_path) try: return Response(json.load(file_data)) except ValueError: return Response(response_data)