Ejemplo n.º 1
0
def download_data_to_csv(path_file, request):
    from bims.serializers.bio_collection_serializer import \
        BioCollectionOneRowSerializer
    from bims.api_views.collection import GetCollectionAbstract
    from bims.utils.celery import memcache_lock
    from bims.models import BiologicalCollectionRecord

    path_file_hexdigest = md5(path_file).hexdigest()

    lock_id = '{0}-lock-{1}'.format(download_data_to_csv.name,
                                    path_file_hexdigest)

    oid = '{0}'.format(path_file_hexdigest)

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            query_value = request.get('search', '')
            filters = request
            is_using_filters = GetCollectionAbstract.is_using_filters(filters)
            site_results = None

            if is_using_filters or query_value:
                collection_results, \
                    site_results, \
                    fuzzy_search = GetCollectionAbstract.\
                    apply_filter(
                        query_value,
                        filters,
                        ignore_bbox=True)
            else:
                collection_results = GetCollectionAbstract.get_all_validated()

            if not collection_results and site_results:
                site_ids = site_results.values_list('id', flat=True)
                collection_results = BiologicalCollectionRecord.objects.filter(
                    site__id__in=site_ids).distinct()

            serializer = BioCollectionOneRowSerializer(collection_results,
                                                       many=True)
            headers = serializer.data[0].keys()
            rows = serializer.data

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=headers)
                writer.writeheader()
                for row in rows:
                    writer.writerow(row)

            return

    logger.info('Csv %s is already being processed by another worker',
                path_file)
Ejemplo n.º 2
0
    def get(self, request):
        results = GetCollectionAbstract.apply_filter(request, True)
        bio_ids = results.values_list('model_pk', flat=True)
        taxon_ids = list(set(results.values_list('taxon_gbif', flat=True)))
        taxons = Taxon.objects.filter(
            id__in=taxon_ids).annotate(
            num_occurrences=Count(
                Case(When(
                    biologicalcollectionrecord__id__in=bio_ids,
                    then=1)))).order_by('species')

        location_site_ids = list(
            set(results.values_list('location_site_id', flat=True)))
        location_sites = LocationSite.objects.filter(
            id__in=location_site_ids).annotate(
            num_occurrences=Count(Case(When(
                biological_collection_record__id__in=bio_ids,
                then=1)))).order_by('name')

        search_result = dict()
        search_result['records'] = TaxonOccurencesSerializer(
            taxons, many=True).data
        search_result['sites'] = LocationOccurrencesSerializer(
            location_sites, many=True).data
        return Response(search_result)
Ejemplo n.º 3
0
    def get(self, request):
        query_value = request.GET.get('search')
        filters = request.GET

        # Search collection
        collection_results, \
            site_results, \
            fuzzy_search = GetCollectionAbstract.apply_filter(
                query_value,
                filters,
                ignore_bbox=True)

        try:
            collection_ids = list(collection_results.values_list(
                'model_pk', flat=True
            ))
            records = BiologicalCollectionRecord.objects.filter(
                pk__in=collection_ids
            )
            serializer = BioCollectionSerializer(
                    records,
                    many=True)
            return Response(serializer.data)
        except BiologicalCollectionRecord.DoesNotExist:
            return HttpResponse(
                'Object Does Not Exist',
                status=status.HTTP_400_BAD_REQUEST
            )
Ejemplo n.º 4
0
    def get(self, request):
        query_value = request.GET.get('search')
        filters = request.GET
        search_result = dict()
        search_result['sites'] = []
        search_result['records'] = []
        search_uri = request.build_absolute_uri()
        folder = 'search_results'
        status = {'current_status': 'processing'}

        search_process, created = SearchProcess.objects.get_or_create(
            category=folder, query=search_uri)

        if not created and search_process.file_path:
            if os.path.exists(search_process.file_path):
                raw_data = open(search_process.file_path)
                return Response(json.load(raw_data))
            else:
                if search_process.finished:
                    search_process.finished = False
                    search_process.save()

        # Search collection
        collection_results, \
            site_results, \
            fuzzy_search = GetCollectionAbstract.apply_filter(
                query_value,
                filters,
                ignore_bbox=True)

        # Check if filename exists
        data_for_filename = dict()
        data_for_filename['search_uri'] = search_uri
        data_for_filename['collection_results_length'] = len(
            collection_results)
        data_for_filename['site_results_length'] = len(site_results)
        process_id = hashlib.md5(json.dumps(data_for_filename,
                                            sort_keys=True)).hexdigest()
        path_folder = os.path.join(settings.MEDIA_ROOT, folder)
        path_file = os.path.join(path_folder, process_id)

        status['process'] = process_id
        search_process.process_id = process_id
        search_process.save()

        try:
            os.mkdir(path_folder)
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
            pass
        search_collection(query_value, filters, path_file, process_id)

        if os.path.exists(path_file):
            raw_data = open(path_file)
            if raw_data:
                json_data = json.load(raw_data)
                return Response(json_data)

        return Response({'status': status})
Ejemplo n.º 5
0
def download_csv_site_taxa_records(request):
    taxon_id = request.GET.get('taxon')
    query_value = request.GET.get('search')
    filters = request.GET

    # Search collection
    collection_results, \
    site_results, \
    fuzzy_search = GetCollectionAbstract.apply_filter(
            query_value,
            filters,
            ignore_bbox=True)

    records = [q.object for q in collection_results]

    current_model = BiologicalCollectionRecord

    try:
        current_model = records[0].get_children()
    except:
        pass

    fields = [f.name for f in current_model._meta.get_fields()]
    fields.remove('ready_for_validation')
    fields.remove('validated')

    if 'biologicalcollectionrecord_ptr' in fields:
        fields.remove('biologicalcollectionrecord_ptr')

    taxon = Taxon.objects.get(pk=taxon_id)
    # Create the HttpResponse object with the appropriate CSV header.
    response = HttpResponse(content_type='text/csv')
    response['Content-Disposition'] = \
        'attachment; filename="'+ taxon.common_name +'.csv"'

    writer = csv.writer(response)
    writer.writerow(['Taxon', taxon.common_name])
    writer.writerow(['Total records', len(records)])
    writer.writerow(['GBIF ID', taxon.gbif_id])
    writer.writerow([''])
    writer.writerow(fields + ['coordinates'])

    for record in records:
        try:
            record = record.get_children()
        except:
            pass
        row_object = []
        for field in fields:
            row_object.append(getattr(record, field))
        row_object.append('%s,%s' % (
            record.site.get_centroid().coords[1],
            record.site.get_centroid().coords[0],
        ))
        writer.writerow(row_object)

    return response
Ejemplo n.º 6
0
 def get_queryset(self):
     query_value = self.request.GET.get('search')
     filters = self.request.GET
     (collection_results, site_results,
      fuzzy_search) = GetCollectionAbstract.apply_filter(query_value,
                                                         filters,
                                                         ignore_bbox=True,
                                                         only_site=True)
     return collection_results
Ejemplo n.º 7
0
    def get(self, request):
        site_id = request.GET.get('siteId')
        query_value = request.GET.get('search')
        filters = request.GET

        # Search collection
        (collection_results, site_results,
         fuzzy_search) = GetCollectionAbstract.apply_filter(query_value,
                                                            filters,
                                                            ignore_bbox=True)

        collection_ids = []
        if collection_results:
            collection_ids = list(
                collection_results.values_list('model_pk', flat=True))
        context = {'collection_ids': collection_ids}
        location_site = self.get_object(site_id)
        serializer = LocationSiteDetailSerializer(location_site,
                                                  context=context)
        return Response(serializer.data)
Ejemplo n.º 8
0
def download_data_to_csv(path_file, request):
    from bims.serializers.bio_collection_serializer import \
        BioCollectionOneRowSerializer
    from bims.api_views.collection import GetCollectionAbstract
    from bims.utils.celery import memcache_lock

    path_file_hexdigest = md5(path_file).hexdigest()

    lock_id = '{0}-lock-{1}'.format(
            download_data_to_csv.name,
            path_file_hexdigest
    )

    oid = '{0}'.format(path_file_hexdigest)

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            queryset = GetCollectionAbstract.apply_filter(request,
                                                          ignore_bbox=True)
            serializer = BioCollectionOneRowSerializer(
                    queryset,
                    many=True
            )
            headers = serializer.data[0].keys()
            rows = serializer.data

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=headers)
                writer.writeheader()
                for row in rows:
                    writer.writerow(row)

            return

    logger.info(
            'Csv %s is already being processed by another worker',
            path_file)
Ejemplo n.º 9
0
def search_collection(query_value, filters, path_file, process):
    from bims.utils.celery import memcache_lock
    from bims.api_views.collection import GetCollectionAbstract
    from bims.api_views.search import SearchObjects
    from bims.models.search_process import SearchProcess

    lock_id = '{0}-lock-{1}'.format(path_file, process)

    oid = '{0}'.format(process)

    category = 'search_results'
    processing_label = 'processing'
    finished_label = 'finish'
    max_result = 100

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:

            collection_results, \
                site_results, \
                fuzzy_search = GetCollectionAbstract.apply_filter(
                    query_value,
                    filters,
                    ignore_bbox=True)

            search_process, created = SearchProcess.objects.get_or_create(
                category=category, process_id=process)

            search_results = dict()
            search_results['status'] = {
                'current_status': processing_label,
                'process': process
            }

            with open(path_file, 'wb') as status_file:
                status_file.write(json.dumps(search_results))
                search_process.file_path = path_file
                search_process.save()

            all_record_results = {}
            all_site_results = {}
            search_results['fuzzy_search'] = fuzzy_search
            search_results['records'] = []
            search_results['sites'] = []

            collection_paginator = Paginator(collection_results, max_result)
            for num_page in range(1, collection_paginator.num_pages + 1):
                collection_page = collection_paginator.page(num_page)
                if not collection_page.object_list:
                    break
                collection_result = collection_page.object_list
                all_record_results, all_site_results = \
                    SearchObjects.process_search(
                            collection_result,
                            query_value,
                            all_record_results,
                            all_site_results)

                search_results['total_records'] = len(all_record_results)
                search_results['total_sites'] = len(all_site_results)
                with open(path_file, 'wb') as result_file:
                    result_file.write(json.dumps(search_results))

            sites_paginator = Paginator(site_results, max_result)
            for num_page in range(1, sites_paginator.num_pages + 1):
                site_page = sites_paginator.page(num_page)
                if not site_page.object_list:
                    break
                all_site_results = SearchObjects.process_sites_search(
                    site_page.object_list, all_site_results, query_value)
                search_results['total_sites'] = len(all_site_results)
                with open(path_file, 'wb') as result_file:
                    result_file.write(json.dumps(search_results))

            if search_results:
                search_results['records'] = all_record_results
                search_results['sites'] = all_site_results
                search_results['status']['current_status'] = finished_label
                search_process.finished = True
                search_process.save()
                with open(path_file, 'wb') as result_file:
                    result_file.write(json.dumps(search_results))

            return

    logger.info('Search %s is already being processed by another worker',
                process)
Ejemplo n.º 10
0
def generate_search_cluster(query_value, filters, filename, path_file):
    from bims.api_views.collection import GetCollectionAbstract
    from bims.utils.celery import memcache_lock
    from bims.models.search_process import SearchProcess

    lock_id = '{0}-lock-{1}'.format(path_file, filename)

    oid = '{0}'.format(filename)

    name_label = 'n'
    coordinates_label = 'o'
    id_label = 'id'

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            collection_results, \
                site_results, \
                fuzzy_search = GetCollectionAbstract.apply_filter(
                    query_value,
                    filters,
                    ignore_bbox=True)
            search_process, created = SearchProcess.objects.get_or_create(
                category='cluster_generation', process_id=filename)
            status = {'current_status': 'processing', 'process': filename}
            with open(path_file, 'wb') as status_file:
                status_file.write(json.dumps({'status': status}))
                search_process.file_path = path_file
                search_process.save()

            collection_sites = []
            if collection_results:
                collection_sites += list(
                    collection_results.values('location_site_id',
                                              'location_coordinates',
                                              'location_site_name'))
            if site_results:
                collection_sites += list(
                    site_results.values('location_site_id',
                                        'location_coordinates',
                                        'location_site_name'))

            collection_distinct = {}
            all_sites = []

            paginator = Paginator(collection_sites, 100)
            response_data = dict()
            response_data['status'] = status
            response_data['data'] = []

            # Get location site distinct values
            for num_page in range(1, paginator.num_pages + 1):
                object_list = paginator.page(num_page).object_list
                for site in object_list:
                    location_site_id = int(site['location_site_id'])
                    if location_site_id not in collection_distinct:
                        collection_distinct[location_site_id] = site
                        all_sites.append({
                            id_label:
                            site['location_site_id'],
                            coordinates_label:
                            site['location_coordinates'],
                            name_label:
                            site['location_site_name']
                        })

                response_data['data'] = all_sites
                with open(path_file, 'wb') as cluster_file:
                    cluster_file.write(json.dumps(response_data))

            response_data['status']['current_status'] = 'finish'
            search_process.finished = True
            search_process.save()
            with open(path_file, 'wb') as cluster_file:
                cluster_file.write(json.dumps(response_data))

            return

    logger.info(
        'Cluster search %s is already being processed by another worker',
        path_file)
Ejemplo n.º 11
0
    def get(self, request):
        query_value = request.GET.get('search')
        filters = request.GET

        # Search collection
        (collection_results, site_results,
         fuzzy_search) = GetCollectionAbstract.apply_filter(query_value,
                                                            filters,
                                                            ignore_bbox=True)

        search_process, created = get_or_create_search_process(
            SITES_SUMMARY, query=json.dumps(filters))

        if search_process.file_path:
            if os.path.exists(search_process.file_path):
                try:
                    raw_data = open(search_process.file_path)
                    return Response(json.load(raw_data))
                except ValueError:
                    pass

        records_graph_data = {}
        records_occurrence = {}

        for collection in collection_results:
            collection_year = collection.collection_date_year
            if collection_year not in records_graph_data:
                records_graph_data[collection_year] = {}
            if collection.category not in records_graph_data[collection_year]:
                records_graph_data[collection_year][collection.category] = 1
            else:
                records_graph_data[collection_year][collection.category] += 1
            if collection.taxonomy not in records_graph_data[collection_year]:
                records_graph_data[collection_year][collection.taxonomy] = 1
            else:
                records_graph_data[collection_year][collection.taxonomy] += 1

            if collection.taxonomy not in records_occurrence:
                records_occurrence[collection.taxonomy] = {
                    self.COUNT: 0,
                    self.ORIGIN: collection.category,
                    self.TAXONOMY_NAME: collection.taxon_canonical_name
                }

            records_occurrence[collection.taxonomy]['count'] += 1

        response_data = {
            self.TOTAL_RECORDS: len(collection_results),
            self.RECORDS_GRAPH_DATA: records_graph_data,
            self.RECORDS_OCCURRENCE: records_occurrence
        }

        file_path = create_search_process_file(data=response_data,
                                               search_process=search_process,
                                               finished=True)
        file_data = open(file_path)

        try:
            return Response(json.load(file_data))
        except ValueError:
            return Response(response_data)