Exemple #1
0
def search_task(parameters, search_process_id, background=True):
    from bims.utils.celery import memcache_lock
    from bims.api_views.search import CollectionSearch
    from bims.models.search_process import (
        SearchProcess,
        SEARCH_PROCESSING,
        SEARCH_FINISHED,
        SEARCH_FAILED
    )

    try:
        search_process = SearchProcess.objects.get(id=search_process_id)
    except SearchProcess.DoesNotExist:
        return

    if background:
        lock_id = '{0}-lock-{1}'.format(
            search_process.file_path,
            search_process.process_id
        )
        oid = '{0}'.format(search_process.process_id)
        with memcache_lock(lock_id, oid) as acquired:
            if acquired:
                search_process.set_status(SEARCH_PROCESSING)

                search = CollectionSearch(parameters)
                search_results = search.get_summary_data()
                if search_results:
                    search_process.set_search_raw_query(
                        search.location_sites_raw_query
                    )
                    search_process.create_view()
                    search_process.set_status(SEARCH_FINISHED, False)
                    search_results['status'] = SEARCH_FINISHED
                    search_results['extent'] = search.extent()
                    search_process.save_to_file(search_results)
                else:
                    search_process.set_status(SEARCH_FAILED)
                return
        logger.info(
            'Search %s is already being processed by another worker',
            search_process.process_id)
    else:
        search = CollectionSearch(parameters)
        search_results = search.get_summary_data()
        if search_results:
            search_process.set_search_raw_query(
                search.location_sites_raw_query
            )
            search_process.create_view()
            search_process.set_status(SEARCH_FINISHED, False)
            search_results['status'] = SEARCH_FINISHED
            search_results['extent'] = search.extent()
            search_process.save_to_file(search_results)
        else:
            search_process.set_status(SEARCH_FAILED)
        return search_results
Exemple #2
0
def download_data_to_csv(path_file, request):
    from bims.serializers.bio_collection_serializer import \
        BioCollectionOneRowSerializer
    from bims.api_views.collection import GetCollectionAbstract
    from bims.utils.celery import memcache_lock
    from bims.models import BiologicalCollectionRecord

    path_file_hexdigest = md5(path_file).hexdigest()

    lock_id = '{0}-lock-{1}'.format(download_data_to_csv.name,
                                    path_file_hexdigest)

    oid = '{0}'.format(path_file_hexdigest)

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            query_value = request.get('search', '')
            filters = request
            is_using_filters = GetCollectionAbstract.is_using_filters(filters)
            site_results = None

            if is_using_filters or query_value:
                collection_results, \
                    site_results, \
                    fuzzy_search = GetCollectionAbstract.\
                    apply_filter(
                        query_value,
                        filters,
                        ignore_bbox=True)
            else:
                collection_results = GetCollectionAbstract.get_all_validated()

            if not collection_results and site_results:
                site_ids = site_results.values_list('id', flat=True)
                collection_results = BiologicalCollectionRecord.objects.filter(
                    site__id__in=site_ids).distinct()

            serializer = BioCollectionOneRowSerializer(collection_results,
                                                       many=True)
            headers = serializer.data[0].keys()
            rows = serializer.data

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=headers)
                writer.writeheader()
                for row in rows:
                    writer.writerow(row)

            return

    logger.info('Csv %s is already being processed by another worker',
                path_file)
Exemple #3
0
def download_chemical_data_to_csv(path_file, site_id):
    from bims.models.chemical_record import ChemicalRecord
    from bims.serializers.chemical_records_serializer import (
        ChemicalRecordsOneRowSerializer)
    from bims.utils.celery import memcache_lock

    path_file_hexdigest = sha256(path_file.encode('utf-8')).hexdigest()

    lock_id = '{}-lock-{}'.format(download_chemical_data_to_csv.name,
                                  path_file_hexdigest)

    oid = '{0}'.format(path_file_hexdigest)

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            queryset = ChemicalRecord.objects.filter(
                Q(location_site_id=site_id) | Q(survey__site_id=site_id))
            serializer = ChemicalRecordsOneRowSerializer(queryset, many=True)
            headers = serializer.data[0].keys()
            rows = serializer.data

            formatted_headers = []
            # Rename headers
            for header in headers:
                if header == 'class_name':
                    header = 'class'
                header = header.replace('_or_', '/')
                header = header.replace('_', ' ').capitalize()
                formatted_headers.append(header)

            with open(path_file, 'w') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=formatted_headers)
                writer.writeheader()
                writer.fieldnames = headers
                for row in rows:
                    writer.writerow(row)

            return

    logger.info('Csv %s is already being processed by another worker',
                path_file)
Exemple #4
0
def download_sass_data_site_task(filename, filters, path_file):
    from bims.utils.celery import memcache_lock

    lock_id = '{0}-lock-{1}'.format(filename, len(filters))
    oid = '{0}'.format(filename)

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            search = Search(filters)
            context = {'filters': filters}
            collection_records = search.process_search()
            site_visit_taxon = SiteVisitTaxon.objects.filter(
                id__in=collection_records).order_by(
                    'site_visit__site_visit_date')
            serializer = SassDataSerializer(site_visit_taxon,
                                            many=True,
                                            context=context)
            headers = serializer.data[0].keys()
            rows = serializer.data

            formatted_headers = []
            # Rename headers
            for header in headers:
                formatted_headers.append(header.replace('_', ' ').capitalize())

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=formatted_headers)
                writer.writeheader()
                writer.fieldnames = headers
                for row in rows:
                    try:
                        writer.writerow(row)
                    except ValueError:
                        writer.fieldnames = row.keys()
                        writer.writerow(row)
            return
    logger.info('Csv %s is already being processed by another worker',
                filename)
Exemple #5
0
def download_data_to_csv(path_file, request):
    from bims.serializers.bio_collection_serializer import \
        BioCollectionOneRowSerializer
    from bims.api_views.collection import GetCollectionAbstract
    from bims.utils.celery import memcache_lock

    path_file_hexdigest = md5(path_file).hexdigest()

    lock_id = '{0}-lock-{1}'.format(
            download_data_to_csv.name,
            path_file_hexdigest
    )

    oid = '{0}'.format(path_file_hexdigest)

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            queryset = GetCollectionAbstract.apply_filter(request,
                                                          ignore_bbox=True)
            serializer = BioCollectionOneRowSerializer(
                    queryset,
                    many=True
            )
            headers = serializer.data[0].keys()
            rows = serializer.data

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=headers)
                writer.writeheader()
                for row in rows:
                    writer.writerow(row)

            return

    logger.info(
            'Csv %s is already being processed by another worker',
            path_file)
Exemple #6
0
def download_sass_summary_data_task(filename, filters, path_file):
    from bims.utils.celery import memcache_lock
    import random

    lock_id = '{0}-lock-{1}'.format(filename, len(filters))
    oid = random.randint(1, 101)
    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            search = Search(filters)
            context = {'filters': filters}

            collection_records = search.process_search()
            collection_ids = list(
                collection_records.values_list('id', flat=True))
            # Get SASS data
            site_visit_taxa = SiteVisitTaxon.objects.filter(
                id__in=collection_ids)
            summary = site_visit_taxa.annotate(
                sampling_date=F('site_visit__site_visit_date'),
            ).values('sampling_date').annotate(
                count=Count('sass_taxon'),
                sass_score=Sum(
                    Case(When(condition=Q(
                        site_visit__sass_version=5,
                        sass_taxon__sass_5_score__isnull=False),
                              then='sass_taxon__sass_5_score'),
                         default='sass_taxon__score')),
                sass_id=F('site_visit__id'),
                FBIS_site_code=Case(When(
                    site_visit__location_site__site_code__isnull=False,
                    then='site_visit__location_site__site_code'),
                                    default='site_visit__location_site__name'),
                site_id=F('site_visit__location_site__id'),
                assessor=F('site_visit__assessor__username'),
                accredited=F('site_visit__assessor__'
                             'bims_profile__sass_accredited'),
                sass_version=F('site_visit__sass_version'),
                site_description=F(
                    'site_visit__location_site__site_description'),
                river_name=Case(When(
                    site_visit__location_site__river__isnull=False,
                    then='site_visit__location_site__river__name'),
                                default=Value('-')),
                latitude=F('site_visit__location_site__latitude'),
                longitude=F('site_visit__location_site__longitude'),
                time_of_day=F('site_visit__time'),
                reference=F('reference'),
                reference_category=F('reference_category'),
            ).annotate(
                aspt=Cast(F('sass_score'), FloatField()) /
                Cast(F('count'), FloatField()), ).order_by('sampling_date')

            serializer = SassSummaryDataSerializer(summary,
                                                   many=True,
                                                   context=context)
            headers = serializer.data[0].keys()
            rows = serializer.data
            formatted_headers = []

            # Rename headers
            for header in headers:
                formatted_headers.append(header.replace('_', ' ').capitalize())

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=formatted_headers)
                writer.writeheader()
                writer.fieldnames = headers
                for row in rows:
                    try:
                        writer.writerow(row)
                    except ValueError:
                        writer.fieldnames = row.keys()
                        writer.writerow(row)
            return
    logger.info('Csv %s is already being processed by another worker',
                filename)
Exemple #7
0
def download_sass_summary_data_task(filename, filters, path_file):
    from bims.utils.celery import memcache_lock
    import random

    lock_id = '{0}-lock-{1}'.format(filename, len(filters))
    oid = random.randint(1, 101)
    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            search = CollectionSearch(filters)
            context = {'filters': filters}

            collection_records = search.process_search()
            collection_ids = list(
                collection_records.values_list('id', flat=True))
            # Get SASS data
            site_visit_taxa = SiteVisitTaxon.objects.filter(
                id__in=collection_ids)
            summary = site_visit_taxa.annotate(
                date=F('collection_date'), ).values('date').annotate(
                    sampling_date=F('site_visit__site_visit_date'),
                    full_name=Concat('site_visit__owner__first_name',
                                     Value(' '),
                                     'site_visit__owner__last_name',
                                     output_field=CharField())
                ).values('sampling_date', 'full_name').annotate(
                    count=Count('sass_taxon'),
                    sass_score=Sum(
                        Case(When(condition=Q(
                            site_visit__sass_version=5,
                            sass_taxon__sass_5_score__isnull=False),
                                  then='sass_taxon__sass_5_score'),
                             default='sass_taxon__score')),
                    sass_id=F('site_visit__id'),
                    FBIS_site_code=Case(
                        When(
                            site_visit__location_site__site_code__isnull=False,
                            then='site_visit__location_site__site_code'),
                        default='site_visit__location_site__name'),
                    site_id=F('site_visit__location_site__id'),
                    sass_version=F('site_visit__sass_version'),
                    site_description=F(
                        'site_visit__location_site__site_description'),
                    river_name=Case(When(
                        site_visit__location_site__river__isnull=False,
                        then='site_visit__location_site__river__name'),
                                    default=Value('-')),
                    latitude=F('site_visit__location_site__latitude'),
                    longitude=F('site_visit__location_site__longitude'),
                    source_reference=F('source_reference'),
                    ecological_category=F(
                        'site_visit__'
                        'sitevisitecologicalcondition__'
                        'ecological_condition__category')).annotate(
                            aspt=Cast(F('sass_score'), FloatField()) /
                            Cast(F('count'),
                                 FloatField()), ).order_by('sampling_date')
            context['location_contexts'] = LocationContext.objects.filter(
                site__in=site_visit_taxa.values('site_visit__location_site'))

            serializer = SassSummaryDataSerializer(summary,
                                                   many=True,
                                                   context=context)
            headers = serializer.data[0].keys()
            rows = serializer.data
            formatted_headers = []

            # Rename headers
            for header in headers:
                header_split = [
                    word[0].upper() + word[1:] for word in header.split('_')
                ]
                header = ' '.join(header_split)
                formatted_headers.append(header)

            with open(path_file, 'wb') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=formatted_headers)
                writer.writeheader()
                writer.fieldnames = headers
                for row in rows:
                    try:
                        writer.writerow(row)
                    except ValueError:
                        writer.fieldnames = row.keys()
                        writer.writerow(row)
            return
    logger.info('Csv %s is already being processed by another worker',
                filename)
Exemple #8
0
def search_collection(query_value, filters, path_file, process):
    from bims.utils.celery import memcache_lock
    from bims.api_views.collection import GetCollectionAbstract
    from bims.api_views.search import SearchObjects
    from bims.models.search_process import SearchProcess

    lock_id = '{0}-lock-{1}'.format(path_file, process)

    oid = '{0}'.format(process)

    category = 'search_results'
    processing_label = 'processing'
    finished_label = 'finish'
    max_result = 100

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:

            collection_results, \
                site_results, \
                fuzzy_search = GetCollectionAbstract.apply_filter(
                    query_value,
                    filters,
                    ignore_bbox=True)

            search_process, created = SearchProcess.objects.get_or_create(
                category=category, process_id=process)

            search_results = dict()
            search_results['status'] = {
                'current_status': processing_label,
                'process': process
            }

            with open(path_file, 'wb') as status_file:
                status_file.write(json.dumps(search_results))
                search_process.file_path = path_file
                search_process.save()

            all_record_results = {}
            all_site_results = {}
            search_results['fuzzy_search'] = fuzzy_search
            search_results['records'] = []
            search_results['sites'] = []

            collection_paginator = Paginator(collection_results, max_result)
            for num_page in range(1, collection_paginator.num_pages + 1):
                collection_page = collection_paginator.page(num_page)
                if not collection_page.object_list:
                    break
                collection_result = collection_page.object_list
                all_record_results, all_site_results = \
                    SearchObjects.process_search(
                            collection_result,
                            query_value,
                            all_record_results,
                            all_site_results)

                search_results['total_records'] = len(all_record_results)
                search_results['total_sites'] = len(all_site_results)
                with open(path_file, 'wb') as result_file:
                    result_file.write(json.dumps(search_results))

            sites_paginator = Paginator(site_results, max_result)
            for num_page in range(1, sites_paginator.num_pages + 1):
                site_page = sites_paginator.page(num_page)
                if not site_page.object_list:
                    break
                all_site_results = SearchObjects.process_sites_search(
                    site_page.object_list, all_site_results, query_value)
                search_results['total_sites'] = len(all_site_results)
                with open(path_file, 'wb') as result_file:
                    result_file.write(json.dumps(search_results))

            if search_results:
                search_results['records'] = all_record_results
                search_results['sites'] = all_site_results
                search_results['status']['current_status'] = finished_label
                search_process.finished = True
                search_process.save()
                with open(path_file, 'wb') as result_file:
                    result_file.write(json.dumps(search_results))

            return

    logger.info('Search %s is already being processed by another worker',
                process)
Exemple #9
0
def generate_search_cluster(query_value, filters, filename, path_file):
    from bims.api_views.collection import GetCollectionAbstract
    from bims.utils.celery import memcache_lock
    from bims.models.search_process import SearchProcess

    lock_id = '{0}-lock-{1}'.format(path_file, filename)

    oid = '{0}'.format(filename)

    name_label = 'n'
    coordinates_label = 'o'
    id_label = 'id'

    with memcache_lock(lock_id, oid) as acquired:
        if acquired:
            collection_results, \
                site_results, \
                fuzzy_search = GetCollectionAbstract.apply_filter(
                    query_value,
                    filters,
                    ignore_bbox=True)
            search_process, created = SearchProcess.objects.get_or_create(
                category='cluster_generation', process_id=filename)
            status = {'current_status': 'processing', 'process': filename}
            with open(path_file, 'wb') as status_file:
                status_file.write(json.dumps({'status': status}))
                search_process.file_path = path_file
                search_process.save()

            collection_sites = []
            if collection_results:
                collection_sites += list(
                    collection_results.values('location_site_id',
                                              'location_coordinates',
                                              'location_site_name'))
            if site_results:
                collection_sites += list(
                    site_results.values('location_site_id',
                                        'location_coordinates',
                                        'location_site_name'))

            collection_distinct = {}
            all_sites = []

            paginator = Paginator(collection_sites, 100)
            response_data = dict()
            response_data['status'] = status
            response_data['data'] = []

            # Get location site distinct values
            for num_page in range(1, paginator.num_pages + 1):
                object_list = paginator.page(num_page).object_list
                for site in object_list:
                    location_site_id = int(site['location_site_id'])
                    if location_site_id not in collection_distinct:
                        collection_distinct[location_site_id] = site
                        all_sites.append({
                            id_label:
                            site['location_site_id'],
                            coordinates_label:
                            site['location_coordinates'],
                            name_label:
                            site['location_site_name']
                        })

                response_data['data'] = all_sites
                with open(path_file, 'wb') as cluster_file:
                    cluster_file.write(json.dumps(response_data))

            response_data['status']['current_status'] = 'finish'
            search_process.finished = True
            search_process.save()
            with open(path_file, 'wb') as cluster_file:
                cluster_file.write(json.dumps(response_data))

            return

    logger.info(
        'Cluster search %s is already being processed by another worker',
        path_file)