Ejemplo n.º 1
0
    def import_additional_data(self, collection_record, record):
        """
        Override this to import additional data to collection_record.
        :param collection_record: BiologicalCollectionRecord object
        :param record: csv record
        """
        # -- Algae data
        try:
            algae_data, _ = AlgaeData.objects.get_or_create(
                survey=self.survey
            )
        except AlgaeData.MultipleObjectsReturned:
            algae_data = AlgaeData.objects.filter(
                survey=self.survey
            )[0]
            print('Duplicated algae data')
        algae_data.curation_process = record[CURATION_PROCESS]
        algae_data.indicator_chl_a = record[INDICATOR_CHL_A]
        algae_data.indicator_afdm = record[INDICATOR_AFDM]
        if record[AI]:
            algae_data.ai = record[AI]
        algae_data.save()

        # -- Analyst
        analyst = create_users_from_string(record[ANALYST])
        if analyst:
            analyst = analyst[0]
            analyst.organization = record[ANALYST_INSTITUTE]
            analyst.save()
            collection_record.analyst = analyst
            collection_record.save()
    def handle(self, *args, **options):
        signals.post_save.disconnect(
            collection_post_save_handler,
            sender=BiologicalCollectionRecord
        )

        # Get all collections that came from gbif and have reference
        collections = BiologicalCollectionRecord.objects.filter(
            additional_data__fetch_from_gbif=True
        ).exclude(reference__iexact='')

        index = 0
        for collection in collections:
            index += 1
            log('Processing : {index}/{len}'.format(
                index=index,
                len=collections.count()
            ))

            if collection.collector and not collection.collector_user:
                users = create_users_from_string(collection.collector)
                if len(users) > 0:
                    log('Update owner and collector to {}'.format(
                        users[0].username
                    ))
                    collection.collector_user = users[0]
                    collection.owner = users[0]

            collection.save()
def process_source_reference(reference=None,
                             reference_category=None,
                             doi=None,
                             document_link=None,
                             document_url=None,
                             document_title=None,
                             document_author=None,
                             source_year=None):
    """Processing source reference data from csv"""
    source_reference = None
    document_id = 0
    document = None
    source_reference_found = False

    if not document_author:
        return 'Missing author', None

    if (not reference and not reference_category and not doi
            and not document_link and not document_url and not document_title
            and not document_author and not source_year):
        return '', None

    if not reference:
        if document_title:
            reference = document_title
    if not document_title and reference:
        document_title = reference

    # if there is document link, get the id of the document
    if document_link:
        try:
            doc_split = document_link.split('/')
            document_id = int(doc_split[len(doc_split) - 1])
            document = Document.objects.get(id=document_id)
        except (ValueError, Document.DoesNotExist):
            return 'Document {} does not exist'.format(document_id), None

    # if there is document url, get or create document based on url
    if document_url:
        document_fields = {'doc_url': document_url}
        if source_year:
            document_fields['date'] = date(year=int(source_year),
                                           month=1,
                                           day=1)
        authors = create_users_from_string(document_author)
        if len(authors) > 0:
            author = authors[0]
        else:
            author = None
        document_fields['owner'] = author
        document = get_or_create_data_from_model(Document, document_fields)
        try:
            bims_document, _ = BimsDocument.objects.get_or_create(
                document=document)
        except BimsDocument.MultipleObjectsReturned:
            bims_document = BimsDocument.objects.filter(document=document)[0]
        for author in authors:
            bims_document.authors.add(author)
        if document.title != document_title:
            document.title = document_title
            document.save()

    # if DOI provided, check in bibliography records
    if doi:
        entry = get_or_create_data_from_model(model=Entry,
                                              fields={'doi': doi},
                                              create=False)
        if not entry:
            doi_loader = DOILoader()
            try:
                doi_loader.load_records(DOIs=[doi])
                doi_loader.save_records()
                entry_fields = {'doi__iexact': doi}
                entry = get_or_create_data_from_model(Entry,
                                                      entry_fields,
                                                      create=False)
                if entry:
                    source_reference = (
                        SourceReference.create_source_reference(
                            category='bibliography',
                            source_id=entry.id,
                            note=None))
                    source_reference_found = True
            except (DOILoaderError, requests.exceptions.HTTPError) as e:
                print(e)
            finally:
                if not entry:
                    return 'Error Fetching DOI : {doi}'.format(doi=doi), None
                if entry and not source_reference:
                    SourceReference.create_source_reference(
                        category='bibliography', source_id=entry.id, note=None)
                    source_reference, _ = (
                        SourceReferenceBibliography.objects.get_or_create(
                            source=entry))
                    source_reference_found = True

    if not source_reference_found:
        if ('peer-reviewed' in reference_category.lower()):
            # Peer reviewed
            # should be bibliography type
            # If url, title, year, and author(s) exists, crete new entry
            if (document_url and document_title and document_author
                    and source_year):
                optional_values = {}
                if doi:
                    optional_values['doi'] = doi
                entry, _ = Entry.objects.get_or_create(
                    url=document_url,
                    title=document_title,
                    publication_date=date(int(source_year), 1, 1),
                    is_partial_publication_date=True,
                    type='article',
                    **optional_values)
                authors = create_users_from_string(document_author)
                rank = 1
                for author in authors:
                    _author, _ = Author.objects.get_or_create(
                        first_name=author.first_name,
                        last_name=author.last_name,
                        user=author)
                    AuthorEntryRank.objects.get_or_create(author=_author,
                                                          entry=entry,
                                                          rank=rank)
                    rank += 1
                try:
                    source_reference = SourceReferenceBibliography.objects.get(
                        source=entry)
                except SourceReferenceBibliography.DoesNotExist:
                    source_reference = (
                        SourceReferenceBibliography.objects.create(
                            source=entry))
            else:
                raise ValueError('Peer reviewed should have a DOI')
        elif (reference_category.lower().startswith('published')
              or 'thesis' in reference_category.lower()):
            # Document
            if document:
                source_reference = (SourceReference.create_source_reference(
                    category='document', source_id=document.id, note=None))
        elif 'database' in reference_category.lower():
            reference_name = reference
            if source_year:
                reference_name += ', ' + source_year
            database_record, dr_created = (
                DatabaseRecord.objects.get_or_create(name=reference_name))
            source_reference = (SourceReference.create_source_reference(
                category='database', source_id=database_record.id, note=None))
        else:
            # Unpublished data
            reference_name = reference
            if source_year:
                reference_name += ', ' + source_year
            source_reference = (SourceReference.create_source_reference(
                category=None, source_id=None, note=reference_name))
    if (document and source_reference
            and not isinstance(source_reference.source, Document)):
        source_reference.document = document
        source_reference.save()

    if reference and source_reference:
        source_reference.source_name = reference
        source_reference.save()
    elif reference and not source_reference:
        return 'Reference {} is not created'.format(reference), None

    return 'Reference created', source_reference
Ejemplo n.º 4
0
    def process_row(self, row):
        optional_data = {}
        # -- Location site
        location_site = self.location_site(row)
        if not location_site:
            return

        # -- UUID
        # If no uuid provided then it will be generated after collection record
        # saved
        uuid_value = ''
        if self.row_value(row, UUID):
            try:
                uuid_value = uuid.UUID(self.row_value(row, UUID)[0:36]).hex
            except ValueError:
                self.error_file(error_row=row, error_message='Bad UUID format')
                return

        # -- Source reference
        message, source_reference = process_source_reference(
            reference=self.row_value(row, SOURCE),
            reference_category=self.row_value(row, REFERENCE_CATEGORY),
            doi=self.row_value(row, DOI),
            document_title=self.row_value(row, DOCUMENT_TITLE),
            document_link=self.row_value(row, DOCUMENT_UPLOAD_LINK),
            document_url=self.row_value(row, DOCUMENT_URL),
            document_author=self.row_value(row, DOCUMENT_AUTHOR),
            source_year=self.row_value(row, SOURCE_YEAR))
        if message and not source_reference:
            # Source reference data from csv exists but not created
            self.error_file(error_row=row, error_message=message)
            return
        else:
            optional_data['source_reference'] = source_reference

        # -- Sampling date
        sampling_date = self.parse_date(row)
        if not sampling_date:
            return

        # -- Processing Taxonomy
        taxonomy = self.taxonomy(row)
        if not taxonomy:
            return

        # -- Processing collectors
        custodian = self.row_value(row, CUSTODIAN)
        collectors = create_users_from_string(
            self.row_value(row, COLLECTOR_OR_OWNER))
        if not collectors:
            self.error_file(error_row=row,
                            error_message='Missing collector/owner')
            return
        collector = collectors
        optional_data['collector'] = self.row_value(row, COLLECTOR_OR_OWNER)
        if len(collectors) > 0:
            collector = collectors[0]
            optional_data['collector_user'] = collectors[0]
            optional_data['owner'] = collectors[0]
            # Add owner and creator to location site
            # if it doesnt exist yet
            if not location_site.owner:
                location_site.owner = collectors[0]
            if not location_site.creator:
                location_site.creator = collectors[0]
            location_site.save()
            if custodian:
                for _collector in collectors:
                    _collector.organization = self.row_value(row, CUSTODIAN)
                    _collector.save()

        # -- Get or create a survey
        self.process_survey(
            row,
            location_site,
            sampling_date,
            collector=collectors[0],
        )

        # -- Optional data - Present
        if PRESENT in row:
            optional_data['present'] = bool(self.row_value(row, PRESENT))

        # -- Process origin
        category = None
        if CATEGORY in row:
            category = self.row_value(row, CATEGORY).lower()
        if ORIGIN in row and self.row_value(row, ORIGIN):
            origin = self.row_value(row, ORIGIN)
            if ('translocated' in origin.lower()
                    or 'non-native' in origin.lower()):
                category = 'alien'
            elif 'native' == origin.lower():
                category = 'native'
            else:
                category = None
        if not category:
            category = taxonomy.origin
        optional_data['category'] = category

        # -- Optional data - Habitat
        if HABITAT in row and self.row_value(row, HABITAT):
            habitat_choices = {
                v: k
                for k, v in BiologicalCollectionRecord.HABITAT_CHOICES
            }
            optional_data['collection_habitat'] = (
                habitat_choices[self.row_value(row, HABITAT)])

        # -- Optional data - Sampling method
        sampling_method = None
        if SAMPLING_METHOD in row and self.row_value(row, SAMPLING_METHOD):
            if self.row_value(row, SAMPLING_METHOD).lower() != 'unspecified':
                try:
                    sampling_method, sm_created = (
                        SamplingMethod.objects.get_or_create(
                            sampling_method=self.row_value(
                                row, SAMPLING_METHOD)))
                except SamplingMethod.MultipleObjectsReturned:
                    sampling_method = (SamplingMethod.objects.filter(
                        sampling_method=self.row_value(row, SAMPLING_METHOD))
                                       )[0]
        if sampling_method:
            optional_data['sampling_method'] = sampling_method

        # -- Optional data - Sampling effort
        sampling_effort = ''
        if SAMPLING_EFFORT_VALUE in row and self.row_value(
                row, SAMPLING_EFFORT_VALUE):
            sampling_effort += self.row_value(row, SAMPLING_EFFORT_VALUE) + ' '
        if self.row_value(row, SAMPLING_EFFORT):
            sampling_effort += self.row_value(row, SAMPLING_EFFORT)
        optional_data['sampling_effort'] = sampling_effort

        # -- Optional data - Processing biotope
        # Broad biotope
        optional_data['biotope'] = self.biotope(row, BROAD_BIOTOPE,
                                                BIOTOPE_TYPE_BROAD)
        # -- Optional data - Specific biotope
        optional_data['specific_biotope'] = self.biotope(
            row, SPECIFIC_BIOTOPE, BIOTOPE_TYPE_SPECIFIC)
        # -- Optional data - Substratum
        optional_data['substratum'] = self.biotope(row, SUBSTRATUM,
                                                   BIOTOPE_TYPE_SUBSTRATUM)

        # -- Optional data - Abundance
        abundance_type = ''
        abundance_number = None

        if self.row_value(row, ABUNDANCE_MEASURE):
            abundance_type = self.row_value(row, ABUNDANCE_MEASURE).lower()
            if 'count' in abundance_type:
                abundance_type = 'number'
            elif 'density' in abundance_type:
                abundance_type = 'density'
            elif 'percentage' in abundance_type:
                abundance_type = 'percentage'
        if self.row_value(row, ABUNDANCE_VALUE):
            try:
                abundance_number = float(self.row_value(row, ABUNDANCE_VALUE))
            except ValueError:
                pass
        if abundance_number:
            optional_data['abundance_number'] = abundance_number
        if abundance_type:
            optional_data['abundance_type'] = abundance_type

        # -- Processing chemical records
        self.chemical_records(row, location_site, sampling_date)

        record = None
        fields = {
            'site': location_site,
            'original_species_name': self.row_value(row, SPECIES_NAME),
            'collection_date': sampling_date,
            'taxonomy': taxonomy,
            'category': category,
            'collector_user': collector
        }
        if uuid_value:
            records = BiologicalCollectionRecord.objects.filter(
                uuid=uuid_value)
            if records.exists():
                records.update(**fields)
                record = records[0]
            else:
                fields['uuid'] = uuid_value

        if not record:
            try:
                record, _ = (BiologicalCollectionRecord.objects.get_or_create(
                    **fields))
            except Exception as e:  # noqa
                self.error_file(error_row=row, error_message=str(e))
                return
        if not uuid_value:
            row[UUID] = record.uuid

        # Update existing data
        if self.survey:
            record.survey = self.survey
        if self.upload_session.module_group:
            record.module_group = self.upload_session.module_group
        for field in optional_data:
            setattr(record, field, optional_data[field])

        # -- Additional data
        record.additional_data = json.dumps(row)
        record.validated = True
        record.save()

        if not str(record.site.id) in self.site_ids:
            self.site_ids.append(str(record.site.id))

        self.success_file(success_row=row, data_id=record.id)
Ejemplo n.º 5
0
 def csv_dict_reader(self, csv_reader):
     for row in csv_reader:
         title = row['Title']
         fixed_title = re.sub(' +', ' ', title)
         url = row['URL']
         dul = row['Document Upload Link']
         reference_category = row['Reference category']
         source_references = SourceReference.objects.filter(
             Q(sourcereferencebibliography__source__title__icontains=title) |
             Q(sourcereferencedocument__source__title__icontains=title)
         )
         if source_references.exists():
             source_reference = source_references[0]
             if reference_category.lower() not in source_reference.reference_type.lower():
                 print('---Change to document---')
                 if dul:
                     try:
                         doc_split = dul.split('/')
                         document_id = int(doc_split[len(doc_split) - 1])
                         document = Document.objects.get(id=document_id)
                         print('---Create new source reference')
                         new_source_reference = (
                             SourceReference.create_source_reference(
                                 category='document',
                                 source_id=document.id,
                                 note=None
                             )
                         )
                         print('---Update bio records---')
                         BiologicalCollectionRecord.objects.filter(
                             source_reference=source_reference
                         ).update(
                             source_reference=new_source_reference
                         )
                         ChemicalRecord.objects.filter(
                             source_reference=source_reference
                         ).update(
                             source_reference=new_source_reference
                         )
                         print('---Delete old source reference---')
                         source_reference.delete()
                     except (ValueError, Document.DoesNotExist):
                         print ('Document {} does not exist'.format(
                             dul))
                 if url:
                     document_fields = {
                         'doc_url': url,
                         'title': fixed_title
                     }
                     if row['Year']:
                         document_fields['date'] = date(
                             year=int(row['Year']),
                             month=1,
                             day=1
                         )
                     authors = create_users_from_string(
                        row['Author(s)'])
                     if len(authors) > 0:
                         author = authors[0]
                     else:
                         author = None
                     document_fields['owner'] = author
                     document, document_created = Document.objects.get_or_create(
                         **document_fields
                     )
                     new_source_reference = (
                         SourceReference.create_source_reference(
                             category='document',
                             source_id=document.id,
                             note=None
                         )
                     )
                     print('---Update bio records---')
                     BiologicalCollectionRecord.objects.filter(
                         source_reference=source_reference
                     ).update(
                         source_reference=new_source_reference
                     )
                     ChemicalRecord.objects.filter(
                         source_reference=source_reference
                     ).update(
                         source_reference=new_source_reference
                     )
                     print('---Delete old source reference---')
                     source_reference.delete()
                 if reference_category.lower() == 'unpublished data':
                     print(fixed_title)
             else:
                 if title != fixed_title:
                     print('---Fix title---')
                     print(fixed_title)
                     source_reference.source.title = fixed_title
                     source_reference.source.save()
    def handle(self, *args, **options):
        source_collection = options.get('source_collection')
        file_name = options.get('csv_file')
        json_additional_data = options.get('additional_data')
        only_add = options.get('only_add')
        self.find_taxon = ast.literal_eval(options.get('find_taxon'))
        if not self.group_key:
            self.group_key = options.get('group_key')
        if self.group_key:
            try:
                self.group = (TaxonGroup.objects.get(
                    name__iexact=self.group_key))
            except TaxonGroup.DoesNotExist:
                self.group = None
        try:
            sites_only = ast.literal_eval(options.get('sites_only'))
        except ValueError:
            sites_only = False
        try:
            additional_data = json.loads(json_additional_data)
        except ValueError:
            additional_data = {}

        self.disconnect_signals()

        file_path = os.path.join(settings.MEDIA_ROOT, file_name)

        with open(file_path, 'r') as csvfile:
            csv_reader = csv.DictReader(csvfile)
            for index, record in enumerate(csv_reader):
                try:
                    uuid_value = None

                    # -- Processing LocationSite
                    location_site = self.location_site(record)
                    if sites_only:
                        log('Importing sites data : %s' % str(location_site))
                        continue

                    if UUID in record and self.row_value(record, UUID):
                        try:
                            uuid_value = uuid.UUID(
                                self.row_value(record, UUID)[0:36]).hex
                        except ValueError:
                            self.add_to_error_summary('Bad UUID format', index)
                            continue
                    if uuid_value:
                        if BiologicalCollectionRecord.objects.filter(
                                uuid=uuid_value).exists():
                            if only_add:
                                bio = (
                                    BiologicalCollectionRecord.objects.filter(
                                        uuid=uuid_value)[0])
                                bio_additional_data = bio.additional_data
                                try:
                                    bio_additional_data = json.loads(
                                        bio_additional_data)
                                except ValueError:
                                    bio_additional_data = {}
                                for key in additional_data:
                                    bio_additional_data[key] = additional_data[
                                        key]
                                bio.additional_data = bio_additional_data
                                bio.save()
                                continue

                    log('Processing : %s' %
                        self.row_value(record, SPECIES_NAME))
                    optional_records = {}

                    if self.row_value(record,
                                      SAMPLING_DATE).lower() == 'unspecified':
                        self.add_to_error_summary(
                            'Unspecified date -> Next row', index)
                        continue
                    sampling_date = self.parse_date(
                        self.row_value(record, SAMPLING_DATE))

                    # -- Processing Taxonomy
                    taxonomy = self.taxonomy(record, index)
                    if not taxonomy:
                        continue

                    # -- Processing collectors
                    collectors = create_users_from_string(
                        self.row_value(record, COLLECTOR_OR_OWNER))
                    optional_records['collector'] = self.row_value(
                        record, COLLECTOR_OR_OWNER)
                    if len(collectors) > 0:
                        optional_records['collector_user'] = collectors[0]
                        # Add owner and creator to location site
                        # if it doesnt exist yet
                        if not location_site.owner:
                            location_site.owner = collectors[0]
                        if not location_site.creator:
                            location_site.creator = collectors[0]
                        location_site.save()
                        for collector in collectors:
                            collector.organization = self.row_value(
                                record, CUSTODIAN)
                            collector.save()

                    # -- Get superuser as owner
                    superusers = get_user_model().objects.filter(
                        is_superuser=True)

                    # -- Get or create a survey
                    try:
                        self.survey, _ = Survey.objects.get_or_create(
                            site=location_site,
                            date=sampling_date,
                            collector_user=collectors[0]
                            if len(collectors) > 0 else None,
                            owner=superusers[0])
                    except Survey.MultipleObjectsReturned:
                        self.survey = Survey.objects.filter(
                            site=location_site,
                            date=sampling_date,
                            collector_user=collectors[0]
                            if len(collectors) > 0 else None,
                            owner=superusers[0])[0]

                    all_survey_data = {
                        WATER_LEVEL: 'Water level',
                        WATER_TURBIDITY: 'Water turbidity',
                        EMBEDDEDNESS: 'Embeddedness'
                    }
                    for survey_data_key in all_survey_data:
                        if survey_data_key in record and self.row_value(
                                record, survey_data_key):
                            survey_data, _ = SurveyData.objects.get_or_create(
                                name=all_survey_data[survey_data_key])
                            survey_option = SurveyDataOption.objects.filter(
                                option__iexact=self.row_value(
                                    record, survey_data_key).strip(),
                                survey_data=survey_data)
                            if not survey_option.exists():
                                survey_option = SurveyDataOption.objects.create(
                                    options=self.row_value(
                                        record, survey_data_key).strip(),
                                    survey_data=survey_data)
                            else:
                                survey_option = survey_option[0]
                            if survey_option:
                                SurveyDataValue.objects.get_or_create(
                                    survey=self.survey,
                                    survey_data=survey_data,
                                    survey_data_option=survey_option,
                                )

                    # -- Processing source reference
                    optional_records['source_reference'] = (
                        self.source_reference(record, index))

                    # Custodian field)
                    if PRESENT in record:
                        optional_records['present'] = bool(
                            self.row_value(record, PRESENT))
                    category = ''
                    if CATEGORY in record:
                        category = self.row_value(record, CATEGORY).lower()
                    if ORIGIN in record and self.row_value(record, ORIGIN):
                        origin = self.row_value(record, ORIGIN)
                        if ('translocated' in origin.lower()
                                or 'non-native' in origin.lower()):
                            category = 'alien'
                        elif 'native' == origin.lower():
                            category = 'native'
                        else:
                            category = None

                    if HABITAT in record and self.row_value(record, HABITAT):
                        habitat_choices = {
                            v: k
                            for k, v in
                            BiologicalCollectionRecord.HABITAT_CHOICES
                        }
                        optional_records['collection_habitat'] = (
                            habitat_choices[self.row_value(record, HABITAT)])

                    # Sampling method
                    sampling_method = None
                    if SAMPLING_METHOD in record and self.row_value(
                            record, SAMPLING_METHOD):
                        if self.row_value(
                                record,
                                SAMPLING_METHOD).lower() != 'unspecified':
                            try:
                                sampling_method, sm_created = (
                                    SamplingMethod.objects.get_or_create(
                                        sampling_method=self.row_value(
                                            record, SAMPLING_METHOD)))
                            except SamplingMethod.MultipleObjectsReturned:
                                sampling_method = (
                                    SamplingMethod.objects.filter(
                                        sampling_method=self.row_value(
                                            record, SAMPLING_METHOD)))[0]

                    # Sampling effort
                    sampling_effort = ''
                    if SAMPLING_EFFORT_VALUE in record and self.row_value(
                            record, SAMPLING_EFFORT_VALUE):
                        sampling_effort += self.row_value(
                            record, SAMPLING_EFFORT_VALUE) + ' '
                    if self.row_value(record, SAMPLING_EFFORT):
                        sampling_effort += self.row_value(
                            record, SAMPLING_EFFORT)
                    optional_records['sampling_effort'] = sampling_effort

                    # -- Processing biotope
                    # Broad biotope
                    optional_records['biotope'] = self.biotope(
                        record, BROAD_BIOTOPE, BIOTOPE_TYPE_BROAD)
                    # Specific biotope
                    optional_records['specific_biotope'] = self.biotope(
                        record, SPECIFIC_BIOTOPE, BIOTOPE_TYPE_SPECIFIC)
                    # Substratum
                    optional_records['substratum'] = self.biotope(
                        record, SUBSTRATUM, BIOTOPE_TYPE_SUBSTRATUM)

                    # -- Processing Abundance
                    abundance_type = ''
                    abundance_number = None

                    if self.row_value(record, ABUNDANCE_MEASURE):
                        abundance_type = self.row_value(
                            record, ABUNDANCE_MEASURE).lower()
                        if 'count' in abundance_type:
                            abundance_type = 'number'
                        elif 'density' in abundance_type:
                            abundance_type = 'density'
                        elif 'percentage' in abundance_type:
                            abundance_type = 'percentage'
                    if self.row_value(record, ABUNDANCE_VALUE):
                        try:
                            abundance_number = float(
                                self.row_value(record, ABUNDANCE_VALUE))
                        except ValueError:
                            pass

                    # -- Processing chemical records
                    self.chemical_records(record, location_site, sampling_date)

                    created = False
                    collection_record = None
                    if uuid_value:
                        collection_records = (
                            BiologicalCollectionRecord.objects.filter(
                                uuid=uuid_value))
                        if collection_records.exists():
                            collection_records.update(
                                site=location_site,
                                original_species_name=self.row_value(
                                    record, SPECIES_NAME),
                                collection_date=sampling_date,
                                taxonomy=taxonomy,
                                category=category,
                                collector=self.row_value(record, COLLECTOR),
                                sampling_method=sampling_method,
                                abundance_type=abundance_type,
                                abundance_number=abundance_number)
                            collection_record = collection_records[0]

                    if not collection_record:
                        fields = {
                            'site':
                            location_site,
                            'original_species_name':
                            self.row_value(record, SPECIES_NAME),
                            'collection_date':
                            sampling_date,
                            'taxonomy':
                            taxonomy,
                            'category':
                            category,
                            'sampling_method':
                            sampling_method,
                            'abundance_type':
                            abundance_type,
                            'abundance_number':
                            abundance_number
                        }
                        if uuid_value:
                            fields['uuid'] = uuid_value
                        try:
                            collection_record, created = (
                                BiologicalCollectionRecord.objects.
                                get_or_create(**fields))
                            collection_record.collector = self.row_value(
                                record, COLLECTOR)
                            if not created:
                                if collection_record.uuid and uuid_value:
                                    if collection_record.uuid != uuid_value:
                                        self.data_duplicated += 1
                                        self.add_to_error_summary(
                                            'Duplicated data', index, False)
                                        continue

                        except BiologicalCollectionRecord.MultipleObjectsReturned:
                            BiologicalCollectionRecord.objects.filter(
                                **fields).delete()
                            collection_record = BiologicalCollectionRecord.objects.create(
                                **fields)
                            created = True

                    # More additional data
                    if CATCH_NUMBER in record:
                        additional_data['catch_per_number'] = (self.row_value(
                            record, CATCH_NUMBER))
                    if CATCH_PER_UNIT in record:
                        additional_data['catch_per_unit_effort'] = (
                            self.row_value(record, CATCH_PER_UNIT))
                    if NUMBER_OF_REPLICATES in record:
                        additional_data['number_of_replicates'] = (
                            self.row_value(record, NUMBER_OF_REPLICATES))

                    collection_record.notes = self.row_value(record, NOTES)
                    collection_record.owner = superusers[0]
                    collection_record.additional_data = additional_data
                    collection_record.source_collection = source_collection
                    collection_record.survey = self.survey
                    if self.group:
                        collection_record.module_group = self.group
                    for field in optional_records:
                        setattr(collection_record, field,
                                optional_records[field])
                    collection_record.save()

                    if not created:
                        self.data_updated += 1
                    else:
                        self.data_added += 1

                    # Update common names
                    if COMMON_NAME in record and self.row_value(
                            record, COMMON_NAME):
                        common_name = self.row_value(record, COMMON_NAME)
                        try:
                            vernacular_name, vernacular_created = (
                                VernacularName.objects.get_or_create(
                                    name=common_name, language='eng'))
                        except VernacularName.MultipleObjectsReturned:
                            vernacular_name = VernacularName.objects.filter(
                                name=common_name)[0]
                        collection_record.taxonomy.vernacular_names.clear()
                        collection_record.taxonomy.vernacular_names.add(
                            vernacular_name)

                    # Import more additional data
                    self.import_additional_data(collection_record, record)
                    collection_record.save()

                except KeyError as e:
                    self.add_to_error_summary(
                        'KeyError : {}'.format(e.message), index)
                    continue
                except ValueError as e:
                    self.add_to_error_summary(
                        'ValueError : {}'.format(e.message), index)
                    continue

        self.summary['data_added'] = self.data_added
        self.summary['data_updated'] = self.data_updated
        self.summary['data_failed'] = self.data_failed
        self.summary['data_duplicated'] = self.data_duplicated
        self.summary['total_processed_data'] = (self.data_added +
                                                self.data_updated +
                                                self.data_failed +
                                                self.data_duplicated)
        self.summary['error_list'] = self.errors
        self.summary['warning_list'] = self.warnings
        log(json.dumps(self.summary))
        self.reconnect_signals()
    def source_reference(self, record, index):
        source_reference = None
        reference = self.row_value(record, SOURCE)
        reference_category = self.row_value(record, REFERENCE_CATEGORY)
        doi = self.row_value(record, DOI)
        document_link = self.row_value(record, DOCUMENT_UPLOAD_LINK)
        document_url = self.row_value(record, DOCUMENT_URL)
        document_id = 0
        document = None
        source_reference_found = False

        # if there is document link, get the id of the document
        if document_link:
            try:
                doc_split = document_link.split('/')
                document_id = int(doc_split[len(doc_split) - 1])
                document = Document.objects.get(id=document_id)
            except (ValueError, Document.DoesNotExist):
                log('Document {} does not exist'.format(document_id))

        # if there is document url, get or create document based on url
        if document_url:
            document_fields = {
                'doc_url': document_url,
                'title': self.row_value(record, DOCUMENT_TITLE),
            }
            if self.row_value(record, SOURCE_YEAR):
                document_fields['date'] = date(year=int(
                    self.row_value(record, SOURCE_YEAR)),
                                               month=1,
                                               day=1)
            authors = create_users_from_string(
                self.row_value(record, DOCUMENT_AUTHOR))
            if len(authors) > 0:
                author = authors[0]
            else:
                author = None
            document_fields['owner'] = author
            document, document_created = Document.objects.get_or_create(
                **document_fields)

        # if DOI provided, check in bibliography records
        if doi:
            try:
                try:
                    entry = Entry.objects.get(doi=doi)
                except Entry.MultipleObjectsReturned:
                    entry = Entry.objects.filter(doi=doi)[0]
                try:
                    source_reference = SourceReferenceBibliography.objects.get(
                        source=entry)
                except SourceReferenceBibliography.DoesNotExist:
                    source_reference = (
                        SourceReferenceBibliography.objects.create(
                            source=entry))
                source_reference_found = True
            except Entry.DoesNotExist:
                doi_loader = DOILoader()
                try:
                    doi_loader.load_records(DOIs=[doi])
                    doi_loader.save_records()
                    entry = Entry.objects.get(doi__iexact=doi)
                    source_reference = (
                        SourceReference.create_source_reference(
                            category='bibliography',
                            source_id=entry.id,
                            note=None))
                    source_reference_found = True
                except (DOILoaderError, requests.exceptions.HTTPError,
                        Entry.DoesNotExist):
                    self.add_to_error_summary(
                        'Error Fetching DOI : {doi}'.format(doi=doi, ),
                        index,
                        only_log=True)
                except Entry.MultipleObjectsReturned:
                    entry = Entry.objects.filter(doi__iexact=doi)[0]
                    source_reference = (
                        SourceReference.create_source_reference(
                            category='bibliography',
                            source_id=entry.id,
                            note=None))
                    source_reference_found = True

        if not source_reference_found:
            if ('peer-reviewed' in reference_category.lower()):
                # Peer reviewed
                # should be bibliography type
                # If url, title, year, and author(s) exists, crete new entry
                if (self.row_value(record, DOCUMENT_URL)
                        and self.row_value(record, DOCUMENT_TITLE)
                        and self.row_value(record, DOCUMENT_AUTHOR)
                        and self.row_value(record, SOURCE_YEAR)):
                    optional_values = {}
                    if doi:
                        optional_values['doi'] = doi
                    entry, _ = Entry.objects.get_or_create(
                        url=self.row_value(record, DOCUMENT_URL),
                        title=self.row_value(record, DOCUMENT_TITLE),
                        publication_date=date(
                            int(self.row_value(record, SOURCE_YEAR)), 1, 1),
                        is_partial_publication_date=True,
                        type='article',
                        **optional_values)
                    authors = create_users_from_string(
                        self.row_value(record, DOCUMENT_AUTHOR))
                    rank = 1
                    for author in authors:
                        _author, _ = Author.objects.get_or_create(
                            first_name=author.first_name,
                            last_name=author.last_name,
                            user=author)
                        AuthorEntryRank.objects.get_or_create(author=_author,
                                                              entry=entry,
                                                              rank=rank)
                        rank += 1
                    try:
                        source_reference = SourceReferenceBibliography.objects.get(
                            source=entry)
                    except SourceReferenceBibliography.DoesNotExist:
                        source_reference = (
                            SourceReferenceBibliography.objects.create(
                                source=entry))
                else:
                    raise ValueError('Peer reviewed should have a DOI')
            elif (reference_category.lower().startswith('published')
                  or 'thesis' in reference_category.lower()):
                # Document
                if document:
                    source_reference = (
                        SourceReference.create_source_reference(
                            category='document',
                            source_id=document.id,
                            note=None))
            elif 'database' in reference_category.lower():
                reference_name = reference
                if self.row_value(record, SOURCE_YEAR):
                    reference_name += ', ' + self.row_value(
                        record, SOURCE_YEAR)
                database_record, dr_created = (
                    DatabaseRecord.objects.get_or_create(name=reference_name))
                source_reference = (SourceReference.create_source_reference(
                    category='database',
                    source_id=database_record.id,
                    note=None))
            else:
                # Unpublished data
                reference_name = reference
                if self.row_value(record, SOURCE_YEAR):
                    reference_name += ', ' + self.row_value(
                        record, SOURCE_YEAR)
                source_reference = (SourceReference.create_source_reference(
                    category=None, source_id=None, note=reference_name))
        if (document and source_reference
                and not isinstance(source_reference.source, Document)):
            source_reference.document = document
            source_reference.save()

        if reference and source_reference:
            source_reference.source_name = reference
            source_reference.save()
        elif reference and not source_reference:
            self.add_to_error_summary(
                'Reference {} is not created'.format(reference), index)

        return source_reference
Ejemplo n.º 8
0
    def form_valid(self, form):
        """
        If the form is valid, save the associated model.
        """
        self.object = form.save(commit=False)
        self.object.owner = self.request.user
        # by default, if RESOURCE_PUBLISHING=True then document.is_published
        # must be set to False
        # RESOURCE_PUBLISHING works in similar way as ADMIN_MODERATE_UPLOADS,
        # but is applied to documents only. ADMIN_MODERATE_UPLOADS has wider
        # usage
        is_published = not (settings.RESOURCE_PUBLISHING
                            or settings.ADMIN_MODERATE_UPLOADS)
        self.object.is_published = is_published

        # save abstract
        try:
            self.object.abstract = form.data['description']
        except KeyError:
            pass

        # Save document source
        try:
            self.object.supplemental_information = json.dumps(
                {'document_source': form.data['document_source']})
        except KeyError:
            pass

        self.object.save()
        super(SourceReferenceBimsDocumentUploadView, self).form_valid(form)

        # tag keyword of document as Bims Source Reference
        keyword = None
        try:
            keyword = HierarchicalKeyword.objects.get(
                slug='bims_source_reference')
        except HierarchicalKeyword.DoesNotExist:
            try:
                last_keyword = HierarchicalKeyword.objects.filter(
                    depth=1).order_by('path').last()
                if not last_keyword:
                    path = '0000'
                else:
                    path = last_keyword.path
                path = "{:04d}".format(int(path) + 1)
                keyword, created = HierarchicalKeyword.objects.get_or_create(
                    slug='bims_source_reference',
                    name='Bims Source Reference',
                    depth=1,
                    path=path)
            except Exception:
                pass
        if keyword:
            TaggedContentItem.objects.get_or_create(content_object=self.object,
                                                    tag=keyword)

        # add additional metadata
        bims_document, created = BimsDocument.objects.get_or_create(
            document=self.object)
        bims_document.update_metadata(form.data)

        # Update authors
        try:
            authors = form.data['author']
            authors = create_users_from_string(authors)
            if authors:
                bims_document.authors.clear()
                for author in authors:
                    bims_document.authors.add(author)
        except KeyError:
            pass

        return HttpResponse(json.dumps({
            'id': self.object.id,
            'title': self.object.title,
            'author': self.object.bimsdocument.authors_string,
            'year': self.object.bimsdocument.year
        }),
                            content_type='application/json',
                            status=200)