Esempio n. 1
0
def generate_source_reference_filter(file_path=None):
    references = SourceReference.objects.filter(
        biologicalcollectionrecord__isnull=False, ).distinct('id')
    results = []
    reference_source_list = []
    for reference in references:
        if (reference.reference_type == 'Peer-reviewed scientific article'
                or reference.reference_type == 'Published report or thesis'):
            source = u'{authors} | {year} | {title}'.format(
                authors=reference.authors,
                year=reference.year,
                title=reference.title)
        else:
            source = str(reference.source)
        if source not in reference_source_list:
            reference_source_list.append(source)
        else:
            continue
        results.append({
            'id': reference.id,
            'reference': source,
            'type': reference.reference_type
        })
    if not file_path:
        file_path = os.path.join(settings.MEDIA_ROOT,
                                 SOURCE_REFERENCE_FILTER_FILE)
    log(file_path)
    with open(file_path, 'w') as file_handle:
        json.dump(results, file_handle)
Esempio n. 2
0
    def process_row(self, row, index):
        valid_from = self.get_row_value('validfrom')
        valid_to = self.get_row_value('validto')
        user = self.get_object_from_uuid(column='userid',
                                         model=get_user_model())
        status = self.get_row_value('status')
        if user:
            profile, created = Profile.objects.get_or_create(user=user)
            if not profile.sass_accredited_date_from:
                profile.sass_accredited_date_from = valid_from
            else:
                if valid_from.date() < profile.sass_accredited_date_from:
                    profile.sass_accredited_date_from = valid_from
            if not profile.sass_accredited_date_to:
                profile.sass_accredited_date_to = valid_to
            else:
                if valid_to.date() > profile.sass_accredited_date_to:
                    profile.sass_accredited_date_to = valid_to
            try:
                json_data = json.loads(profile.data)
                json_data['sass_accredited_status'] = status
                profile.data = json_data
            except ValueError:
                pass
            profile.save()

            log('{user}-{valid_from}-{valid_to}-{status}'.format(
                user=profile,
                valid_from=valid_from,
                valid_to=valid_to,
                status=status))
    def handle(self, *args, **options):
        signals.post_save.disconnect(location_site_post_save_handler)
        signals.post_save.disconnect(collection_post_save_handler)

        biobase_collection = BiologicalCollectionRecord.objects.filter(
            additional_data__BioBaseData=True)
        index = 0
        for biobase in biobase_collection:
            index += 1
            print('Processing -- %s/%s' % (index, biobase_collection.count()))
            if not biobase.source_reference:
                continue
            authors = biobase.source_reference.source.get_authors()
            try:
                author = authors[0]
                if not author.user:
                    author.save()
                author_user = (
                    biobase.source_reference.source.get_authors()[0].user)
                if biobase.owner != author_user:
                    biobase.owner = author_user
                    biobase.save()
                    log('Collection {id} - new owner : {owner}'.format(
                        id=biobase.id, owner=biobase.owner))
                if biobase.site.owner != author_user:
                    biobase.site.owner = author_user
                    biobase.site.save()
                    log('Site {id} - new owner : {owner}'.format(
                        id=biobase.site.id, owner=biobase.site.owner))
            except IndexError:
                continue
    def handle(self, *args, **options):
        signals.post_save.disconnect(
            collection_post_save_handler,
            sender=BiologicalCollectionRecord
        )

        # Get all collections that came from gbif and have reference
        collections = BiologicalCollectionRecord.objects.filter(
            additional_data__fetch_from_gbif=True
        ).exclude(reference__iexact='')

        index = 0
        for collection in collections:
            index += 1
            log('Processing : {index}/{len}'.format(
                index=index,
                len=collections.count()
            ))

            if collection.collector and not collection.collector_user:
                users = create_users_from_string(collection.collector)
                if len(users) > 0:
                    log('Update owner and collector to {}'.format(
                        users[0].username
                    ))
                    collection.collector_user = users[0]
                    collection.owner = users[0]

            collection.save()
 def handle(self, *args, **options):
     log('Deleting all search results...')
     all_search_process = SearchProcess.objects.all()
     all_search_process.delete()
     csv_path = os.path.join(settings.MEDIA_ROOT, 'csv_processed')
     shutil.rmtree(csv_path)
     os.mkdir(csv_path)
Esempio n. 6
0
 def wrap(*args, **kwargs):
     time1 = time.time()
     ret = f(*args, **kwargs)
     time2 = time.time()
     log('%s function took %0.3f ms' % (
         f.func_name, (time2 - time1) * 1000.0))
     return ret
Esempio n. 7
0
    def handle(self, *args, **options):
        self.init(options)
        csv_file_name = self.csv_file_name(options)
        csv_file_path = os.path.join(self.csv_root_folder, csv_file_name)
        if not csv_file_name or not os.path.exists(csv_file_path):
            log('Csv file name not provided / csv file does not exist')
            return False

        with open(csv_file_path) as csv_file:
            self.csv_dict_reader(csv.DictReader(csv_file))
Esempio n. 8
0
def site_visit_post_save_handler(**kwargs):
    from sass.scripts.site_visit_ecological_condition_generator import (
        generate_site_visit_ecological_condition)
    try:
        site_visit = kwargs['instance']
    except KeyError:
        return
    log('Generate site visit ecological condition')
    site_visits = list()
    site_visits.append(site_visit)
    generate_site_visit_ecological_condition(site_visits)
    def csv_dict_reader(self, csv_reader):
        errors = []
        success = []
        units = []

        index = 2

        for row in csv_reader:
            if row[UNIT] not in units:
                units.append(row[UNIT])
            try:
                chems = Chem.objects.filter(chem_code__iexact=row[CODE])
                if chems.exists():
                    print('exist')
                else:
                    chems = Chem.objects.filter(chem_code__iexact=row[NAME])
                    if not chems.exists():
                        chem = Chem.objects.create(
                            chem_code=row[CODE] if row[CODE] else row[NAME],
                            chem_description=row[DESCRIPTION],
                        )
                        chems = Chem.objects.filter(id=chem.id)
                if chems.count() > 1:
                    chem_id = chems[0].id
                    # Change unit of chemical records to use the first one
                    ChemicalRecord.objects.filter(chem__in=chems).update(
                        chem=chem_id)
                    # Delete chemical units except the first one
                    chems.exclude(id=chem_id).delete()
                    chems = Chem.objects.filter(id=chem_id)

                if chems:
                    chem_unit = None
                    for unit in ChemUnit:
                        if unit.value == row[UNIT]:
                            chem_unit = unit
                            break
                    chems.update(
                        minimum=row[MIN] if row[MIN] else None,
                        maximum=row[MAX] if row[MAX] else None,
                        show_in_abiotic_list=row[RETAIN_IN_LIST] == 'Yes',
                        chem_code=row[CODE] if row[CODE] else row[NAME],
                        chem_description=row[DESCRIPTION],
                        chem_unit=chem_unit.name)

            except Exception as e:  # noqa
                errors.append({'row': index, 'error': str(e)})
            index += 1

        if len(errors) > 0: logger.debug(errors)
        log('----')
        if len(success) > 0: logger.debug(success)

        print(units)
Esempio n. 10
0
 def handle(self, *args, **options):
     collections = BiologicalCollectionRecord.objects.filter(
         module_group__isnull=True
     )
     index = 0
     for bio in collections:
         index += 1
         log('Processing {current}/{total}'.format(
             current=index,
             total=collections.count()
         ))
         bio.save()
    def handle(self, *args, **options):
        location_sites = LocationSite.objects.filter(
            legacy_site_code__iregex=r'([A-Za-z0-9]){1,6}-([A-Za-z]*)$'
        ).exclude(site_code=F('legacy_site_code'))

        log('Update {} location site(s)'.format(location_sites.count()))

        signals.post_save.disconnect(location_site_post_save_handler)

        location_sites.update(site_code=F('legacy_site_code'))

        signals.post_save.connect(location_site_post_save_handler)
Esempio n. 12
0
def collections_upload(session_id):
    from bims.utils.logger import log
    from bims.models import UploadSession as TaxaUploadSession
    from bims.scripts.collections_upload import CollectionsCSVUpload
    try:
        upload_session = (
            TaxaUploadSession.objects.get(id=session_id)
        )
    except TaxaUploadSession.DoesNotExist:
        log('Session does not exist')
        return

    # - Check the headers
    upload_session.progress = 'Checking header row'
    upload_session.save()

    def check_header(_csv_file):
        reader = csv.DictReader(_csv_file)
        headers = reader.fieldnames
        for header in FILE_HEADERS:
            if header not in headers:
                error_message = (
                    'Header row does not follow the correct format'
                )
                upload_session.progress = error_message
                upload_session.error_file = (
                    upload_session.process_file
                )
                upload_session.processed = True
                upload_session.save()
                return False
        return True

    try:
        with open(upload_session.process_file.path) as csv_file:
            checked = check_header(csv_file)
    except UnicodeDecodeError:
        with open(
            upload_session.process_file.path,
            encoding='ISO-8859-1'
        ) as csv_file:
            checked = check_header(csv_file)

    if not checked:
        return

    upload_session.progress = 'Processing'
    upload_session.save()
    taxa_csv_upload = CollectionsCSVUpload()
    taxa_csv_upload.upload_session = upload_session
    taxa_csv_upload.start()
 def handle(self, *args, **options):
     date_and_sites = ChemicalRecord.objects.filter(
         survey__isnull=True).annotate(site=F('location_site')).values(
             'site', 'date')
     index = 1
     for date_and_site in date_and_sites:
         log('{index}/{count}'.format(index=index,
                                      count=date_and_sites.count()))
         index += 1
         site = LocationSite.objects.get(id=date_and_site['site'])
         survey, survey_created = Survey.objects.get_or_create(
             site=site, date=date_and_site['date'])
         ChemicalRecord.objects.filter(
             location_site=site,
             date=date_and_site['date']).update(survey=survey)
 def add_to_error_summary(self,
                          error_message,
                          row,
                          add_to_error=True,
                          only_log=False):
     error_message = '{id} : {error}'.format(id=row + 2,
                                             error=error_message)
     log(error_message)
     if only_log:
         return
     if add_to_error:
         self.errors.append(error_message)
         self.data_failed += 1
     else:
         self.warnings.append(error_message)
Esempio n. 15
0
 def handle(self, *args, **options):
     sites = LocationSite.objects.filter(
         biological_collection_record__notes__icontains='sass',
         biological_collection_record__source_collection__icontains='fbis',
     ).distinct()
     index = 0
     models.signals.post_save.disconnect(location_site_post_save_handler, )
     for site in sites:
         index += 1
         log('Processing {0}/{1}'.format(index, sites.count()))
         additional_data = json.loads(site.additional_data)
         additional_data['comment'] = site.site_description
         site.additional_data = additional_data
         site.site_description = site.name
         site.save()
     models.signals.post_save.connect(location_site_post_save_handler, )
Esempio n. 16
0
def update_location_context(location_site_id):
    from bims.utils.logger import log
    from bims.models import LocationSite
    from bims.utils.location_context import get_location_context_data
    if isinstance(location_site_id, str):
        if ',' in location_site_id:
            get_location_context_data(site_id=str(location_site_id),
                                      only_empty=False)
            return
    try:
        LocationSite.objects.get(id=location_site_id)
    except LocationSite.DoesNotExist:
        log('Location site does not exist')
        return

    get_location_context_data(site_id=str(location_site_id), only_empty=False)
Esempio n. 17
0
def generate_spatial_scale_filter(file_path=None):
    spatial_tree = []
    location_context_filters = LocationContextFilter.objects.all().order_by(
        'display_order', )
    for location_context_filter in location_context_filters:
        spatial_tree_data = {
            'name': location_context_filter.title,
            'key': slugify(location_context_filter.title),
            'children': []
        }
        for group in location_context_filter.location_context_groups.all(
        ).order_by('locationcontextfiltergrouporder__group_display_order'):
            location_contexts = LocationContext.objects.filter(
                group=group).distinct('value').order_by('value').exclude(
                    value='None')
            if not location_contexts:
                continue
            spatial_tree_value = list(
                location_contexts.annotate(query=F('value'),
                                           key=F('group__key')).values(
                                               'query', 'key'))
            spatial_tree_value_sorted = sorted(
                spatial_tree_value,
                key=lambda i:
                (int(i['query'].split(' ')[0])
                 if i['query'].split(' ')[0].isdigit() else i['query']))
            layer_name = group.layer_name
            spatial_tree_children = {
                'key': group.key,
                'name': group.name,
                'value': spatial_tree_value_sorted,
                'layer_name': layer_name,
                'wms_url': group.wms_url,
                'wms_format': group.wms_format,
                'layer_identifier': group.layer_identifier,
            }
            spatial_tree_data['children'].append(spatial_tree_children)

        spatial_tree.append(spatial_tree_data)

    if spatial_tree:
        if not file_path:
            file_name = 'spatial_scale_filter_list.txt'
            file_path = os.path.join(settings.MEDIA_ROOT, file_name)
        log(file_path)
        with open(file_path, 'w') as file_handle:
            json.dump(spatial_tree, file_handle)
Esempio n. 18
0
    def handle(self, *args, **options):
        location_sites = LocationSite.objects.exclude(
            site_code__iregex=r'([A-Za-z0-9]){1,6}-([A-Za-z0-9]*)$')

        log('Update {} location site(s)'.format(location_sites.count()))

        signals.post_save.disconnect(location_site_post_save_handler)

        index = 0
        for location_site in location_sites:
            log('processing %s of %s' % (index, location_sites.count()))
            index += 1
            # Allocate site code
            allocate_site_codes_from_river(update_site_code=True,
                                           location_id=location_site.id)

        signals.post_save.connect(location_site_post_save_handler)
    def handle(self, *args, **options):
        site_ids = options.get('location_sites', None)
        if site_ids:
            site_ids = site_ids.split(',')

        location_sites = LocationSite.objects.filter(
            location_context__isnull=False)

        if site_ids:
            location_sites = location_sites.filter(id__in=site_ids)

        if not location_sites:
            log('Location site does not exist')
            return

        site_count = 1
        for site in location_sites:
            log('Migrating (%s) %s/%s' %
                (site.id, site_count, location_sites.count()))
            site_count += 1
            context_json = json.loads(site.location_context)
            try:
                for key, group in (
                        context_json['context_group_values'].iteritems()):
                    group_key = group['key']
                    if isinstance(group['service_registry_values'], list):
                        continue
                    for k, context_value in (
                            group['service_registry_values'].iteritems()):
                        context_key = context_value['key']
                        context_name = context_value['name']
                        context_value = str(context_value['value'])
                        LocationContext.objects.get_or_create(
                            site=site,
                            group_key=group_key,
                            key=context_key,
                            name=context_name,
                            value=context_value)
            except (KeyError, UnicodeEncodeError):
                continue
            site.location_context = None
            site.location_context_document = None
            site.save()
Esempio n. 20
0
    def handle(self, *args, **options):
        found_all = True
        sass_version = options.get('sass_version', 5)  # noqa
        sass_taxa = SassTaxon.objects.filter(sass_5_score__isnull=False,
                                             taxon_sass_5__isnull=False)

        current_index = 1
        for taxa in TAXON_LIST:
            sass_taxon = sass_taxa.filter(taxon_sass_5__icontains=taxa)
            if not sass_taxon.exists():
                log('This taxa not found : {}'.format(taxa))
                found_all = False
                continue
            else:
                sass_taxon = sass_taxon[0]
                sass_taxon.display_order_sass_5 = current_index
                sass_taxon.save()
                current_index += 1

        log('Found all : {}'.format(found_all))
Esempio n. 21
0
def get_location_context_data(group_keys=None, site_id=None, only_empty=False):
    # Get location context data from GeoContext

    if not group_keys:
        group_keys = preferences.SiteSetting.geocontext_keys.split(',')
    else:
        if not isinstance(group_keys, list):
            group_keys = group_keys.split(',')

    if site_id:
        location_sites = LocationSite.objects.filter(id__in=site_id.split(','))
    else:
        location_sites = LocationSite.objects.all()

    if only_empty:
        location_sites = location_sites.exclude(
            reduce(operator.and_,
                   (Q(locationcontext__group__geocontext_group_key=x)
                    for x in group_keys)))
    num = len(location_sites)
    i = 1

    if num == 0:
        log('No locations with applied filters were found')
        return

    for location_site in location_sites:
        log('Updating %s of %s, %s' % (i, num, location_site.name))
        i += 1
        all_context = None
        if only_empty:
            try:
                all_context = list(
                    LocationContext.objects.filter(
                        site=location_site).values_list(
                            'group__geocontext_group_key', flat=True))
            except (ValueError, TypeError):
                pass
        for group_key in group_keys:
            if (all_context and group_key in all_context):
                log('Context data already exists for {}'.format(group_key))
                continue
            current_outcome, message = (
                location_site.add_context_group(group_key))
            success = current_outcome
            log(
                str('[{status}] [{site_id}] [{group}] - {message}').format(
                    status='SUCCESS' if success else 'FAILED',
                    site_id=location_site.id,
                    message=message,
                    group=group_key))
Esempio n. 22
0
    def handle(self, *args, **options):
        clear_site_code = options.get('clear_site_code')
        restore_legacy_site_code = options.get('restore_legacy_site_code')

        if clear_site_code:
            location_site_to_clear = LocationSite.objects.filter(
                river__isnull=False)
            log('Clear site code for for {} sites'.format(
                location_site_to_clear.count()))
            location_site_to_clear.update(site_code='')

        if restore_legacy_site_code:
            sites_with_legacy_site_code = LocationSite.objects.filter(
                legacy_site_code__isnull=False).exclude(legacy_site_code='')
            log('Restoring legacy site code for {} sites'.format(
                sites_with_legacy_site_code.count()))
            sites_with_legacy_site_code.update(site_code=F('legacy_site_code'))

        location_sites = LocationSite.objects.filter(site_code__exact='',
                                                     river__isnull=False)

        index = 0
        for location_site in location_sites:
            log('processing %s of %s' % (index, location_sites.count()))
            index += 1
            # Allocate site code
            allocate_site_codes_from_river(update_site_code=True,
                                           location_id=location_site.id)
Esempio n. 23
0
def harvest_collections(session_id):
    from bims.utils.logger import log
    from bims.models import HarvestSession
    from bims.scripts.import_gbif_occurrences import (
        import_gbif_occurrences
    )
    try:
        harvest_session = (
            HarvestSession.objects.get(id=session_id)
        )
    except HarvestSession.DoesNotExist:
        log('Session does not exist')
        return

    harvest_session.status = 'Processing'
    harvest_session.save()

    taxonomies = harvest_session.module_group.taxonomies.all()
    index = 1

    for taxon in taxonomies:
        if HarvestSession.objects.get(id=session_id).canceled:
            print('Canceled')
            return
        harvest_session.status = 'Fetching gbif data for {c} ({i}/{t})'.format(
            c=taxon.canonical_name,
            i=index,
            t=taxonomies.count()
        )
        index += 1
        harvest_session.save()
        import_gbif_occurrences(
            taxonomy=taxon,
            log_file_path=harvest_session.log_file.path,
            session_id=session_id
        )

    harvest_session.status = 'Finished'
    harvest_session.finished = True
    harvest_session.save()
Esempio n. 24
0
    def handle(self, *args, **options):
        dev_folder = '/home/web/django_project'
        folder_name = 'data'
        if os.path.exists(dev_folder):
            root = dev_folder
        else:
            root = '/usr/src/bims'
        csv_file_path = os.path.join(
            root, 'scripts/static/{folder}/{filename}'.format(
                folder=folder_name, filename=self.file_name))

        if not os.path.exists(csv_file_path):
            log('File not found')

        with open(csv_file_path) as csv_file:
            csv_reader = csv.DictReader(csv_file)
            current_taxon_group = None
            for row in csv_reader:
                taxon_group_name = row[TAXON_GROUP]
                if taxon_group_name:
                    current_taxon_group = TaxonGroup.objects.get(
                        name__iexact=taxon_group_name)
                sass_taxa = SassTaxon.objects.filter(
                    Q(taxon_sass_5__iexact=row[TAXON])
                    | Q(taxon_sass_4__iexact=row[TAXON]))
                if sass_taxa.exists():
                    if not current_taxon_group.taxonomies.filter(
                            id__in=sass_taxa.values_list('taxon_id')).exists():
                        log('Sass taxon does not in the correct group')
                        current_taxon_group.taxonomies.add(sass_taxa[0].taxon)
                else:
                    log('Sass Taxon does not exist')
Esempio n. 25
0
 def handle(self, *args, **options):
     sass_taxon_4 = SassTaxon.objects.filter(taxon_sass_4__isnull=False)
     for sass_taxon in sass_taxon_4:
         sass_taxon_name = sass_taxon.taxon_sass_4.lower().replace(
             '1 sp', '1')
         taxon_5 = SassTaxon.objects.filter(
             taxon_sass_4__isnull=True,
             taxon_sass_5__isnull=False,
             taxon_sass_5__icontains=sass_taxon_name)
         if taxon_5.count() > 0:
             taxon_5 = taxon_5[0]
             log('Found taxon 5 {0}'.format(taxon_5.taxon_sass_5))
             site_visit_taxon = SiteVisitTaxon.objects.filter(
                 sass_taxon=taxon_5)
             site_visit_taxon.update(sass_taxon=sass_taxon)
             site_visit_biotope_taxon = (
                 SiteVisitBiotopeTaxon.objects.filter(sass_taxon=taxon_5))
             site_visit_biotope_taxon.update(sass_taxon=sass_taxon)
             sass_taxon.sass_5_score = taxon_5.sass_5_score
             sass_taxon.taxon_sass_5 = taxon_5.taxon_sass_5
             sass_taxon.save()
             taxon_5.delete()
Esempio n. 26
0
def merge_context_group(excluded_group=None, group_list=None):
    """
    Merge multiple location context groups
    """
    if not excluded_group:
        return
    if not group_list:
        return
    groups = group_list.exclude(id=excluded_group.id)

    if groups.count() < 1:
        return

    log('Merging %s data' % groups.count())

    links = [
        rel.get_accessor_name() for rel in excluded_group._meta.get_fields()
        if issubclass(type(rel), ForeignObjectRel)
    ]

    if links:
        for group in groups:
            log('----- {} -----'.format(str(group)))
            for link in links:
                try:
                    objects = getattr(group, link).all()
                    if objects.count() > 0:
                        print('Updating {obj} for : {taxon}'.format(
                            obj=str(objects.model._meta.label),
                            taxon=str(group)))
                        update_dict = {
                            getattr(group, link).field.name: excluded_group
                        }
                        objects.update(**update_dict)
                except Exception as e:  # noqa
                    continue
            log(''.join(['-' for i in range(len(str(group)) + 12)]))

    groups.delete()
Esempio n. 27
0
def format_location_context(location_site_id, force_update=False):
    try:
        location_site = LocationSite.objects.get(id=location_site_id)
    except LocationSite.DoesNotExist:
        log('LocationSite Does Not Exist', 'debug')
        return

    if not location_site.location_context_document:
        log('LocationSite context document does not exist', 'debug')
        return

    location_context = json.loads(location_site.location_context_document)
    hash_string = hashlib.md5(
        location_site.location_context_document).hexdigest()
    formatted = {}

    if location_site.location_context and not force_update:
        formatted_location_context = json.loads(location_site.location_context)

        if not location_site.original_geomorphological:
            try:
                context_geo = formatted_location_context[
                    'context_group_values']['geomorphological_group'][
                        'service_registry_values']['geo_class_recoded'][
                            'value']
                models.signals.post_save.disconnect(
                    location_site_post_save_handler, )
                location_site.original_geomorphological = context_geo
                location_site.save()
                models.signals.post_save.connect(
                    location_site_post_save_handler, )
            except (KeyError, TypeError):
                pass

        if 'hash' in formatted_location_context:
            if formatted_location_context['hash'] == hash_string:
                process_spatial_scale_data(
                    formatted_location_context['context_group_values'])
                if location_site.refined_geomorphological:
                    # Update geo value in geocontext data
                    try:
                        context_geo = formatted_location_context[
                            'context_group_values']['geomorphological_group'][
                                'service_registry_values'][
                                    'geo_class_recoded']['value']
                        if (context_geo ==
                                location_site.refined_geomorphological):
                            log('Formatted location context already exist')
                            return
                    except KeyError:
                        log('Formatted location context already exist')
                        return
                else:
                    log('Formatted location context already exist')
                    return

    if not isinstance(location_context, dict):
        return
    for context_key, context_value in location_context.iteritems():
        if isinstance(context_value, list):
            formatted[context_key] = array_to_dict(context_value,
                                                   key_name='key')
        else:
            formatted[context_key] = context_value

    models.signals.post_save.disconnect(location_site_post_save_handler, )

    if not location_site.original_geomorphological:
        try:
            context_geo = formatted['context_group_values'][
                'geomorphological_group']['service_registry_values'][
                    'geo_class_recoded']['value']
            location_site.original_geomorphological = context_geo
        except KeyError:
            pass

    if location_site.refined_geomorphological:
        try:
            formatted['context_group_values']['geomorphological_group'][
                'service_registry_values']['geo_class_recoded']['value'] = (
                    location_site.refined_geomorphological)
        except KeyError:
            pass

    process_spatial_scale_data(formatted['context_group_values'])
    formatted['hash'] = hash_string
    location_site.location_context = formatted
    location_site.save()
    log('Location context formatted', 'info')

    models.signals.post_save.connect(location_site_post_save_handler, )
    def handle(self, *args, **options):
        folder_name = 'data'

        file_path = os.path.join(
            os.path.abspath(os.path.dirname(__name__)),
            'scripts/static/{folder}/{filename}'.format(
                folder=folder_name,
                filename=self.file_name
            ))

        found = 0
        not_found = []
        data_error = []

        with open(file_path, 'r') as csvfile:
            csv_reader = csv.DictReader(csvfile)
            for index, record in enumerate(csv_reader):
                collection_records = BiologicalCollectionRecord.objects.none()
                try:
                    record_point = Point(
                        float(record[LONGITUDE]),
                        float(record[LATITUDE]))

                    location_sites = LocationSite.objects.filter(
                        geometry_point=record_point,
                        name=record[LOCATION_SITE]
                    )

                    if not location_sites.exists():
                        log('no location site')
                        continue
                    else:
                        location_site = location_sites[0]

                    if record[SAMPLING_DATE].lower() == 'unspecified':
                        log('Unspecified date -> Next row')
                        continue
                    uuid_value = uuid.UUID(record[UUID])
                    collection_records = (
                        BiologicalCollectionRecord.objects.filter(
                            uuid=uuid_value
                        )
                    )
                    if not collection_records.exists():
                        if record[ORIGIN] == 'Native':
                            category = 'indigenous'
                        else:
                            category = 'alien'
                        collection_records = (
                            BiologicalCollectionRecord.objects.filter(
                                site=location_site,
                                original_species_name=record[
                                    SPECIES_NAME
                                ],
                                collection_date=datetime.strptime(
                                    record[SAMPLING_DATE], '%Y/%m/%d'),
                                category=category,
                                collector=record[COLLECTOR],
                                notes=record['Notes']
                            )
                        )
                    if collection_records.count() != 1:
                        print('multiple collection records or zero')
                        if collection_records.count() == 0:
                            not_found.append(-99)
                        else:
                            not_found.extend(
                                list(
                                    collection_records.values_list(
                                        'id', flat=True)))
                        continue

                    print(
                            'found collection record %s' %
                            collection_records[0].id)
                    found += 1
                    collection_record = collection_records[0]
                    collection_record.uuid = str(uuid_value)
                    collection_record.save()

                    reference_category = record['Reference category']
                    document = None
                    document_link = record['Document Upload Link']
                    document_id = document_link.split('/')[
                        len(document_link.split('/'))-1
                    ]
                    if document_id:
                        document_id = int(document_id)
                        try:
                            document = Document.objects.get(
                                id=document_id
                            )
                            bims_document, b_created = (
                                BimsDocument.objects.get_or_create(
                                    document=document
                            ))
                            author = record['Reference']
                            if (
                                    bims_document.author and
                                    bims_document.author != author
                            ):
                                bims_document.author = author
                                bims_document.save()
                        except Document.DoesNotExist:
                            pass
                    if (
                            reference_category ==
                            'Peer-reviewed scientific article'):
                        # peer-reviewed
                        doi = record[DOI].strip()
                        if not doi and not document:
                            continue
                        if doi:
                            # Add doi
                            try:
                                entry = Entry.objects.get(doi__iexact=doi)
                            except Entry.DoesNotExist:
                                doi_loader = DOILoader()
                                try:
                                    doi_loader.load_records(DOIs=[doi])
                                except DOILoaderError as e:
                                    log('DOILoaderError, skipping')
                                    continue
                                except HTTPError:
                                    log('Could not fetch the doi, skipping')
                                    continue
                                doi_loader.save_records()
                                try:
                                    entry = Entry.objects.get(doi__iexact=doi)
                                except Entry.DoesNotExist:
                                    log('Entry does not exist, skipping')
                                    continue
                            source_reference = (
                                SourceReference.create_source_reference(
                                    category='bibliography',
                                    source_id=entry.id,
                                    note=None
                                )
                            )
                            print('Add DOI to %s' % collection_record.id)
                            collection_record.source_reference = (
                                source_reference
                            )
                            collection_record.save()
                        else:
                            source_reference, sr_created = (
                                SourceReferenceBibliography.objects.
                                    get_or_create(
                                    document=document
                                )
                            )
                            collection_record.source_reference = (
                                source_reference
                            )
                            collection_record.save()
                            print('Add Bibliography Document to %s'
                                  % collection_record.id)
                    elif reference_category == 'Database':
                        # Database
                        if not document:
                            continue
                        database_name = record['Reference']
                        database, created = (
                            DatabaseRecord.objects.get_or_create(
                                name=database_name,))
                        source_reference = (
                            SourceReference.create_source_reference(
                                category='database',
                                source_id=database.id,
                                note=None
                            )
                        )
                        source_reference.document = document
                        source_reference.save()
                        collection_record.source_reference = source_reference
                        collection_record.save()

                        print('Add Database Document to %s' %
                              collection_record.id)
                    elif (
                            reference_category == 'Thesis' or
                            reference_category == 'Published report'):
                        # published
                        if not document:
                            continue
                        source_reference = (
                            SourceReference.create_source_reference(
                                category='document',
                                source_id=document.id,
                                note=None
                            )
                        )
                        collection_record.source_reference = source_reference
                        collection_record.save()
                        print('Add Published Document to %s'
                              % collection_record.id)
                    else:
                        # unpublished
                        source_reference, created = (
                            SourceReference.objects.get_or_create(
                                note=record['Reference']
                            ))
                        collection_record.source_reference = source_reference
                        collection_record.save()
                        print('Add Unpublished to %s'
                              % collection_record.id)

                except KeyError as e:
                    print('KeyError')
                    data_error.extend(
                        list(collection_records.values_list('id', flat=True)))
                    continue
                except ValueError as e:
                    print('ValueError')
                    data_error.extend(
                        list(collection_records.values_list('id', flat=True)))
                    continue

            log('Summary')
            log('Total found : %s' % found)
            log('Total not found: %s' % len(not_found))
            log('Total data error: %s' % len(data_error))
Esempio n. 29
0
    def csv_dict_reader(self, csv_reader):
        signals.pre_save.disconnect(taxonomy_pre_save_handler, sender=Taxonomy)
        errors = []
        success = []
        csv_data = []

        index = 1

        for row in csv_reader:
            index += 1
            taxon_name = self.row_value(row, TAXON)
            if SCIENTIFIC_NAME in row:
                scientific_name = (self.row_value(
                    row, SCIENTIFIC_NAME) if self.row_value(
                        row, SCIENTIFIC_NAME) else taxon_name)
            else:
                scientific_name = taxon_name
            scientific_name = scientific_name.strip()
            # Get rank
            rank = self.row_value(row, 'Taxon Rank')
            if not rank:
                rank = self.row_value(row, 'Taxon rank')
            if not rank:
                if self.row_value(row, SUBSPECIES):
                    rank = SUBSPECIES
                elif self.row_value(row, SPECIES):
                    rank = SPECIES
                elif self.row_value(row, GENUS):
                    rank = GENUS
                elif self.row_value(row, SUBFAMILY):
                    rank = SUBFAMILY
                elif self.row_value(row, FAMILY):
                    rank = FAMILY
                elif self.row_value(row, ORDER):
                    rank = ORDER
                elif self.row_value(row, CLASS):
                    rank = CLASS
                elif self.row_value(row, PHYLUM):
                    rank = PHYLUM
                else:
                    rank = KINGDOM
            taxa = Taxonomy.objects.filter(canonical_name__iexact=taxon_name,
                                           rank=rank.upper())
            print('---------')
            ids = []

            if self.check_only:
                print('Checking data {}'.format(taxon_name))
                if not taxa.exists():
                    errors.append('Missing taxon {taxon} - {row}'.format(
                        taxon=taxon_name, row=index))
                else:
                    if taxa.count() > 1:
                        errors.append(
                            'Duplicate taxa for {taxon} - {row}'.format(
                                taxon=taxon_name, row=index))
                        check_taxa_duplicates(taxon_name, rank)
                    if taxa[0].id not in ids:
                        ids.append(taxa[0].id)
                    else:
                        errors.append(
                            'Duplicate ids for {taxon} - {row}'.format(
                                taxon=taxon_name, row=index))
                continue

            try:
                taxonomy = None
                if self.missing_only and taxa.exists():
                    logger.debug(
                        'Skip ingesting existing data {}'.format(taxon_name))
                    continue
                if taxa.exists():
                    taxonomy = taxa[0]
                    logger.debug('{} already in the system'.format(taxon_name))
                if not taxonomy:
                    # Fetch from gbif
                    taxonomy = fetch_all_species_from_gbif(
                        species=taxon_name,
                        taxonomic_rank=rank,
                        should_get_children=False,
                        fetch_vernacular_names=False,
                        use_name_lookup=True,
                        **self.rank_classifier())
                if taxonomy:
                    success.append(taxonomy.id)
                else:
                    # Try again with lookup
                    logger.debug('Use different method')
                    taxonomy = fetch_all_species_from_gbif(
                        species=taxon_name,
                        taxonomic_rank=rank,
                        should_get_children=False,
                        fetch_vernacular_names=False,
                        use_name_lookup=False,
                        **self.rank_classifier())
                    if not taxonomy:
                        errors.append({
                            'row': index,
                            'error': 'Taxonomy not found'
                        })
                    else:
                        success.append(taxonomy.id)

                # Validate data
                if taxonomy:
                    if (taxon_name not in taxonomy.scientific_name
                            and taxon_name.lower().strip() !=
                            taxonomy.canonical_name.lower().strip()
                            and taxon_name.lower()
                            not in taxonomy.legacy_canonical_name.lower()):
                        taxonomy = None
                    else:
                        if not taxonomy.parent:
                            taxonomy.parent = self.get_parent(row, rank)

                # Data from GBIF couldn't be found, so add it manually
                if not taxonomy:
                    parent = self.get_parent(row, rank)
                    if not parent:
                        errors.append({
                            'row':
                            index,
                            'error':
                            'Parent not found {}'.format(taxon_name)
                        })
                    else:
                        # Taxonomy not found, create one
                        taxonomy, _ = Taxonomy.objects.get_or_create(
                            scientific_name=scientific_name,
                            canonical_name=taxon_name,
                            rank=TaxonomicRank[rank.upper()].name,
                            parent=parent)
                        success.append(taxonomy.id)

                # -- Finish
                if taxonomy:
                    # Merge taxon with same canonical name
                    legacy_canonical_name = taxonomy.legacy_canonical_name
                    legacy_canonical_name = legacy_canonical_name.replace(
                        '\\xa0', '')
                    if FORMER_SPECIES_NAME in row:
                        former_species_name = self.row_value(
                            row, FORMER_SPECIES_NAME)
                        if len(former_species_name) > 500:
                            former_species_name = former_species_name[:500]
                        if former_species_name not in legacy_canonical_name:
                            legacy_canonical_name += ';' + former_species_name
                    taxonomy.legacy_canonical_name = legacy_canonical_name[:
                                                                           700]
                    # -- Import date
                    if self.import_date:
                        taxonomy.import_date = parse_date(self.import_date)
                    self.additional_data(taxonomy, row)

                    # Add to csv data
                    if self.csv_name:
                        csv_data.append(self.process_csv_data(taxonomy))

                # -- Validate parents
                self.validate_parents(taxon=taxonomy, row=row)

                if taxonomy.canonical_name != taxon_name:
                    taxonomy.canonical_name = taxon_name
                    taxonomy.save()

            except Exception as e:  # noqa
                print(str(e))
                errors.append({'row': index, 'error': str(e)})

        if len(errors) > 0: logger.debug(errors)
        log('----')
        if len(success) > 0: logger.debug(success)
        if self.csv_name:
            self.export_to_csv(csv_data)
Esempio n. 30
0
def generate_site_visit_ecological_condition(site_visits):
    """
    Generate site visit ecological condition from list of site visit
    :param site_visits: list of site visit query object
    """
    for site_visit in site_visits:
        log('Generate ecological condition for site visit : {}'.format(
            site_visit.id
        ))

        site_visit_taxa = SiteVisitTaxon.objects.filter(
            site_visit=site_visit
        )

        summary = site_visit_taxa.annotate(
            count=Count('sass_taxon'),
            sass_score=Coalesce(Sum(Case(
                When(
                    condition=Q(site_visit__sass_version=5,
                                sass_taxon__sass_5_score__isnull=False,
                                taxon_abundance__isnull=False),
                    then='sass_taxon__sass_5_score'),
                When(
                    condition=Q(site_visit__sass_version=4,
                                sass_taxon__score__isnull=False,
                                taxon_abundance__isnull=False),
                    then='sass_taxon__score'),
                default=0),
            ), 0),
            sass_id=F('site_visit__id')
        ).annotate(
            aspt=Cast(F('sass_score'), FloatField()) / Cast(F('count'),
                                                            FloatField()),
        ).values('sass_score', 'aspt', 'count')

        if not summary:
            continue

        aspt_score = summary[0]['aspt']
        sass_score = summary[0]['sass_score']

        site_visit_ecological, created = (
            SiteVisitEcologicalCondition.objects.get_or_create(
                site_visit=site_visit,
                sass_score=sass_score,
                aspt_score=aspt_score
            )
        )

        try:
            location_context = json.loads(
                site_visit.location_site.location_context
            )
            eco_region = (
                location_context['context_group_values'][
                    'river_ecoregion_group'][
                    'service_registry_values']['eco_region_1'][
                    'value'].encode(
                    'utf-8')
            )
            geo_class = (
                location_context['context_group_values'][
                    'geomorphological_group'][
                    'service_registry_values']['geo_class'][
                    'value'].encode(
                    'utf-8')
            )
            # Fix eco_region name
            eco_region_splits = eco_region.split(' ')
            if eco_region_splits[0].isdigit():
                eco_region_splits.pop(0)
                eco_region = ' '.join(eco_region_splits)
        except (TypeError, ValueError, KeyError):
            continue

        sass_ecological_conditions = (
            SassEcologicalCondition.objects.filter(
                ecoregion_level_1__icontains=eco_region.strip(),
                geomorphological_zone__icontains=geo_class.strip()
            )
        )

        found_ecological_condition = False
        for sass_ecological_condition in sass_ecological_conditions:
            if (
                    sass_score >
                    sass_ecological_condition.sass_score_precentile or
                    aspt_score >
                    sass_ecological_condition.aspt_score_precentile
            ):
                site_visit_ecological.ecological_condition = (
                    sass_ecological_condition.ecological_category
                )
                site_visit_ecological.save()
                found_ecological_condition = True
                log(
                    'Found ecological condition : {}'.format(
                        sass_ecological_condition.ecological_category
                    ))
                break

        if found_ecological_condition:
            continue
        # Set to lowest category
        lowest_category = SassEcologicalCategory.objects.filter(
            Q(category__icontains='e') | Q(category__icontains='f')
        ).order_by('category')
        if not lowest_category:
            continue

        log(
            'Set to lowest ecological category : {}'.format(
                lowest_category[0].category
            ))
        site_visit_ecological.ecological_condition = lowest_category[0]
        site_visit_ecological.save()