def import_additional_data(self, collection_record, record): """ Override this to import additional data to collection_record. :param collection_record: BiologicalCollectionRecord object :param record: csv record """ # -- Algae data try: algae_data, _ = AlgaeData.objects.get_or_create( survey=self.survey ) except AlgaeData.MultipleObjectsReturned: algae_data = AlgaeData.objects.filter( survey=self.survey )[0] print('Duplicated algae data') algae_data.curation_process = record[CURATION_PROCESS] algae_data.indicator_chl_a = record[INDICATOR_CHL_A] algae_data.indicator_afdm = record[INDICATOR_AFDM] if record[AI]: algae_data.ai = record[AI] algae_data.save() # -- Analyst analyst = create_users_from_string(record[ANALYST]) if analyst: analyst = analyst[0] analyst.organization = record[ANALYST_INSTITUTE] analyst.save() collection_record.analyst = analyst collection_record.save()
def handle(self, *args, **options): signals.post_save.disconnect( collection_post_save_handler, sender=BiologicalCollectionRecord ) # Get all collections that came from gbif and have reference collections = BiologicalCollectionRecord.objects.filter( additional_data__fetch_from_gbif=True ).exclude(reference__iexact='') index = 0 for collection in collections: index += 1 log('Processing : {index}/{len}'.format( index=index, len=collections.count() )) if collection.collector and not collection.collector_user: users = create_users_from_string(collection.collector) if len(users) > 0: log('Update owner and collector to {}'.format( users[0].username )) collection.collector_user = users[0] collection.owner = users[0] collection.save()
def process_source_reference(reference=None, reference_category=None, doi=None, document_link=None, document_url=None, document_title=None, document_author=None, source_year=None): """Processing source reference data from csv""" source_reference = None document_id = 0 document = None source_reference_found = False if not document_author: return 'Missing author', None if (not reference and not reference_category and not doi and not document_link and not document_url and not document_title and not document_author and not source_year): return '', None if not reference: if document_title: reference = document_title if not document_title and reference: document_title = reference # if there is document link, get the id of the document if document_link: try: doc_split = document_link.split('/') document_id = int(doc_split[len(doc_split) - 1]) document = Document.objects.get(id=document_id) except (ValueError, Document.DoesNotExist): return 'Document {} does not exist'.format(document_id), None # if there is document url, get or create document based on url if document_url: document_fields = {'doc_url': document_url} if source_year: document_fields['date'] = date(year=int(source_year), month=1, day=1) authors = create_users_from_string(document_author) if len(authors) > 0: author = authors[0] else: author = None document_fields['owner'] = author document = get_or_create_data_from_model(Document, document_fields) try: bims_document, _ = BimsDocument.objects.get_or_create( document=document) except BimsDocument.MultipleObjectsReturned: bims_document = BimsDocument.objects.filter(document=document)[0] for author in authors: bims_document.authors.add(author) if document.title != document_title: document.title = document_title document.save() # if DOI provided, check in bibliography records if doi: entry = get_or_create_data_from_model(model=Entry, fields={'doi': doi}, create=False) if not entry: doi_loader = DOILoader() try: doi_loader.load_records(DOIs=[doi]) doi_loader.save_records() entry_fields = {'doi__iexact': doi} entry = get_or_create_data_from_model(Entry, entry_fields, create=False) if entry: source_reference = ( SourceReference.create_source_reference( category='bibliography', source_id=entry.id, note=None)) source_reference_found = True except (DOILoaderError, requests.exceptions.HTTPError) as e: print(e) finally: if not entry: return 'Error Fetching DOI : {doi}'.format(doi=doi), None if entry and not source_reference: SourceReference.create_source_reference( category='bibliography', source_id=entry.id, note=None) source_reference, _ = ( SourceReferenceBibliography.objects.get_or_create( source=entry)) source_reference_found = True if not source_reference_found: if ('peer-reviewed' in reference_category.lower()): # Peer reviewed # should be bibliography type # If url, title, year, and author(s) exists, crete new entry if (document_url and document_title and document_author and source_year): optional_values = {} if doi: optional_values['doi'] = doi entry, _ = Entry.objects.get_or_create( url=document_url, title=document_title, publication_date=date(int(source_year), 1, 1), is_partial_publication_date=True, type='article', **optional_values) authors = create_users_from_string(document_author) rank = 1 for author in authors: _author, _ = Author.objects.get_or_create( first_name=author.first_name, last_name=author.last_name, user=author) AuthorEntryRank.objects.get_or_create(author=_author, entry=entry, rank=rank) rank += 1 try: source_reference = SourceReferenceBibliography.objects.get( source=entry) except SourceReferenceBibliography.DoesNotExist: source_reference = ( SourceReferenceBibliography.objects.create( source=entry)) else: raise ValueError('Peer reviewed should have a DOI') elif (reference_category.lower().startswith('published') or 'thesis' in reference_category.lower()): # Document if document: source_reference = (SourceReference.create_source_reference( category='document', source_id=document.id, note=None)) elif 'database' in reference_category.lower(): reference_name = reference if source_year: reference_name += ', ' + source_year database_record, dr_created = ( DatabaseRecord.objects.get_or_create(name=reference_name)) source_reference = (SourceReference.create_source_reference( category='database', source_id=database_record.id, note=None)) else: # Unpublished data reference_name = reference if source_year: reference_name += ', ' + source_year source_reference = (SourceReference.create_source_reference( category=None, source_id=None, note=reference_name)) if (document and source_reference and not isinstance(source_reference.source, Document)): source_reference.document = document source_reference.save() if reference and source_reference: source_reference.source_name = reference source_reference.save() elif reference and not source_reference: return 'Reference {} is not created'.format(reference), None return 'Reference created', source_reference
def process_row(self, row): optional_data = {} # -- Location site location_site = self.location_site(row) if not location_site: return # -- UUID # If no uuid provided then it will be generated after collection record # saved uuid_value = '' if self.row_value(row, UUID): try: uuid_value = uuid.UUID(self.row_value(row, UUID)[0:36]).hex except ValueError: self.error_file(error_row=row, error_message='Bad UUID format') return # -- Source reference message, source_reference = process_source_reference( reference=self.row_value(row, SOURCE), reference_category=self.row_value(row, REFERENCE_CATEGORY), doi=self.row_value(row, DOI), document_title=self.row_value(row, DOCUMENT_TITLE), document_link=self.row_value(row, DOCUMENT_UPLOAD_LINK), document_url=self.row_value(row, DOCUMENT_URL), document_author=self.row_value(row, DOCUMENT_AUTHOR), source_year=self.row_value(row, SOURCE_YEAR)) if message and not source_reference: # Source reference data from csv exists but not created self.error_file(error_row=row, error_message=message) return else: optional_data['source_reference'] = source_reference # -- Sampling date sampling_date = self.parse_date(row) if not sampling_date: return # -- Processing Taxonomy taxonomy = self.taxonomy(row) if not taxonomy: return # -- Processing collectors custodian = self.row_value(row, CUSTODIAN) collectors = create_users_from_string( self.row_value(row, COLLECTOR_OR_OWNER)) if not collectors: self.error_file(error_row=row, error_message='Missing collector/owner') return collector = collectors optional_data['collector'] = self.row_value(row, COLLECTOR_OR_OWNER) if len(collectors) > 0: collector = collectors[0] optional_data['collector_user'] = collectors[0] optional_data['owner'] = collectors[0] # Add owner and creator to location site # if it doesnt exist yet if not location_site.owner: location_site.owner = collectors[0] if not location_site.creator: location_site.creator = collectors[0] location_site.save() if custodian: for _collector in collectors: _collector.organization = self.row_value(row, CUSTODIAN) _collector.save() # -- Get or create a survey self.process_survey( row, location_site, sampling_date, collector=collectors[0], ) # -- Optional data - Present if PRESENT in row: optional_data['present'] = bool(self.row_value(row, PRESENT)) # -- Process origin category = None if CATEGORY in row: category = self.row_value(row, CATEGORY).lower() if ORIGIN in row and self.row_value(row, ORIGIN): origin = self.row_value(row, ORIGIN) if ('translocated' in origin.lower() or 'non-native' in origin.lower()): category = 'alien' elif 'native' == origin.lower(): category = 'native' else: category = None if not category: category = taxonomy.origin optional_data['category'] = category # -- Optional data - Habitat if HABITAT in row and self.row_value(row, HABITAT): habitat_choices = { v: k for k, v in BiologicalCollectionRecord.HABITAT_CHOICES } optional_data['collection_habitat'] = ( habitat_choices[self.row_value(row, HABITAT)]) # -- Optional data - Sampling method sampling_method = None if SAMPLING_METHOD in row and self.row_value(row, SAMPLING_METHOD): if self.row_value(row, SAMPLING_METHOD).lower() != 'unspecified': try: sampling_method, sm_created = ( SamplingMethod.objects.get_or_create( sampling_method=self.row_value( row, SAMPLING_METHOD))) except SamplingMethod.MultipleObjectsReturned: sampling_method = (SamplingMethod.objects.filter( sampling_method=self.row_value(row, SAMPLING_METHOD)) )[0] if sampling_method: optional_data['sampling_method'] = sampling_method # -- Optional data - Sampling effort sampling_effort = '' if SAMPLING_EFFORT_VALUE in row and self.row_value( row, SAMPLING_EFFORT_VALUE): sampling_effort += self.row_value(row, SAMPLING_EFFORT_VALUE) + ' ' if self.row_value(row, SAMPLING_EFFORT): sampling_effort += self.row_value(row, SAMPLING_EFFORT) optional_data['sampling_effort'] = sampling_effort # -- Optional data - Processing biotope # Broad biotope optional_data['biotope'] = self.biotope(row, BROAD_BIOTOPE, BIOTOPE_TYPE_BROAD) # -- Optional data - Specific biotope optional_data['specific_biotope'] = self.biotope( row, SPECIFIC_BIOTOPE, BIOTOPE_TYPE_SPECIFIC) # -- Optional data - Substratum optional_data['substratum'] = self.biotope(row, SUBSTRATUM, BIOTOPE_TYPE_SUBSTRATUM) # -- Optional data - Abundance abundance_type = '' abundance_number = None if self.row_value(row, ABUNDANCE_MEASURE): abundance_type = self.row_value(row, ABUNDANCE_MEASURE).lower() if 'count' in abundance_type: abundance_type = 'number' elif 'density' in abundance_type: abundance_type = 'density' elif 'percentage' in abundance_type: abundance_type = 'percentage' if self.row_value(row, ABUNDANCE_VALUE): try: abundance_number = float(self.row_value(row, ABUNDANCE_VALUE)) except ValueError: pass if abundance_number: optional_data['abundance_number'] = abundance_number if abundance_type: optional_data['abundance_type'] = abundance_type # -- Processing chemical records self.chemical_records(row, location_site, sampling_date) record = None fields = { 'site': location_site, 'original_species_name': self.row_value(row, SPECIES_NAME), 'collection_date': sampling_date, 'taxonomy': taxonomy, 'category': category, 'collector_user': collector } if uuid_value: records = BiologicalCollectionRecord.objects.filter( uuid=uuid_value) if records.exists(): records.update(**fields) record = records[0] else: fields['uuid'] = uuid_value if not record: try: record, _ = (BiologicalCollectionRecord.objects.get_or_create( **fields)) except Exception as e: # noqa self.error_file(error_row=row, error_message=str(e)) return if not uuid_value: row[UUID] = record.uuid # Update existing data if self.survey: record.survey = self.survey if self.upload_session.module_group: record.module_group = self.upload_session.module_group for field in optional_data: setattr(record, field, optional_data[field]) # -- Additional data record.additional_data = json.dumps(row) record.validated = True record.save() if not str(record.site.id) in self.site_ids: self.site_ids.append(str(record.site.id)) self.success_file(success_row=row, data_id=record.id)
def csv_dict_reader(self, csv_reader): for row in csv_reader: title = row['Title'] fixed_title = re.sub(' +', ' ', title) url = row['URL'] dul = row['Document Upload Link'] reference_category = row['Reference category'] source_references = SourceReference.objects.filter( Q(sourcereferencebibliography__source__title__icontains=title) | Q(sourcereferencedocument__source__title__icontains=title) ) if source_references.exists(): source_reference = source_references[0] if reference_category.lower() not in source_reference.reference_type.lower(): print('---Change to document---') if dul: try: doc_split = dul.split('/') document_id = int(doc_split[len(doc_split) - 1]) document = Document.objects.get(id=document_id) print('---Create new source reference') new_source_reference = ( SourceReference.create_source_reference( category='document', source_id=document.id, note=None ) ) print('---Update bio records---') BiologicalCollectionRecord.objects.filter( source_reference=source_reference ).update( source_reference=new_source_reference ) ChemicalRecord.objects.filter( source_reference=source_reference ).update( source_reference=new_source_reference ) print('---Delete old source reference---') source_reference.delete() except (ValueError, Document.DoesNotExist): print ('Document {} does not exist'.format( dul)) if url: document_fields = { 'doc_url': url, 'title': fixed_title } if row['Year']: document_fields['date'] = date( year=int(row['Year']), month=1, day=1 ) authors = create_users_from_string( row['Author(s)']) if len(authors) > 0: author = authors[0] else: author = None document_fields['owner'] = author document, document_created = Document.objects.get_or_create( **document_fields ) new_source_reference = ( SourceReference.create_source_reference( category='document', source_id=document.id, note=None ) ) print('---Update bio records---') BiologicalCollectionRecord.objects.filter( source_reference=source_reference ).update( source_reference=new_source_reference ) ChemicalRecord.objects.filter( source_reference=source_reference ).update( source_reference=new_source_reference ) print('---Delete old source reference---') source_reference.delete() if reference_category.lower() == 'unpublished data': print(fixed_title) else: if title != fixed_title: print('---Fix title---') print(fixed_title) source_reference.source.title = fixed_title source_reference.source.save()
def handle(self, *args, **options): source_collection = options.get('source_collection') file_name = options.get('csv_file') json_additional_data = options.get('additional_data') only_add = options.get('only_add') self.find_taxon = ast.literal_eval(options.get('find_taxon')) if not self.group_key: self.group_key = options.get('group_key') if self.group_key: try: self.group = (TaxonGroup.objects.get( name__iexact=self.group_key)) except TaxonGroup.DoesNotExist: self.group = None try: sites_only = ast.literal_eval(options.get('sites_only')) except ValueError: sites_only = False try: additional_data = json.loads(json_additional_data) except ValueError: additional_data = {} self.disconnect_signals() file_path = os.path.join(settings.MEDIA_ROOT, file_name) with open(file_path, 'r') as csvfile: csv_reader = csv.DictReader(csvfile) for index, record in enumerate(csv_reader): try: uuid_value = None # -- Processing LocationSite location_site = self.location_site(record) if sites_only: log('Importing sites data : %s' % str(location_site)) continue if UUID in record and self.row_value(record, UUID): try: uuid_value = uuid.UUID( self.row_value(record, UUID)[0:36]).hex except ValueError: self.add_to_error_summary('Bad UUID format', index) continue if uuid_value: if BiologicalCollectionRecord.objects.filter( uuid=uuid_value).exists(): if only_add: bio = ( BiologicalCollectionRecord.objects.filter( uuid=uuid_value)[0]) bio_additional_data = bio.additional_data try: bio_additional_data = json.loads( bio_additional_data) except ValueError: bio_additional_data = {} for key in additional_data: bio_additional_data[key] = additional_data[ key] bio.additional_data = bio_additional_data bio.save() continue log('Processing : %s' % self.row_value(record, SPECIES_NAME)) optional_records = {} if self.row_value(record, SAMPLING_DATE).lower() == 'unspecified': self.add_to_error_summary( 'Unspecified date -> Next row', index) continue sampling_date = self.parse_date( self.row_value(record, SAMPLING_DATE)) # -- Processing Taxonomy taxonomy = self.taxonomy(record, index) if not taxonomy: continue # -- Processing collectors collectors = create_users_from_string( self.row_value(record, COLLECTOR_OR_OWNER)) optional_records['collector'] = self.row_value( record, COLLECTOR_OR_OWNER) if len(collectors) > 0: optional_records['collector_user'] = collectors[0] # Add owner and creator to location site # if it doesnt exist yet if not location_site.owner: location_site.owner = collectors[0] if not location_site.creator: location_site.creator = collectors[0] location_site.save() for collector in collectors: collector.organization = self.row_value( record, CUSTODIAN) collector.save() # -- Get superuser as owner superusers = get_user_model().objects.filter( is_superuser=True) # -- Get or create a survey try: self.survey, _ = Survey.objects.get_or_create( site=location_site, date=sampling_date, collector_user=collectors[0] if len(collectors) > 0 else None, owner=superusers[0]) except Survey.MultipleObjectsReturned: self.survey = Survey.objects.filter( site=location_site, date=sampling_date, collector_user=collectors[0] if len(collectors) > 0 else None, owner=superusers[0])[0] all_survey_data = { WATER_LEVEL: 'Water level', WATER_TURBIDITY: 'Water turbidity', EMBEDDEDNESS: 'Embeddedness' } for survey_data_key in all_survey_data: if survey_data_key in record and self.row_value( record, survey_data_key): survey_data, _ = SurveyData.objects.get_or_create( name=all_survey_data[survey_data_key]) survey_option = SurveyDataOption.objects.filter( option__iexact=self.row_value( record, survey_data_key).strip(), survey_data=survey_data) if not survey_option.exists(): survey_option = SurveyDataOption.objects.create( options=self.row_value( record, survey_data_key).strip(), survey_data=survey_data) else: survey_option = survey_option[0] if survey_option: SurveyDataValue.objects.get_or_create( survey=self.survey, survey_data=survey_data, survey_data_option=survey_option, ) # -- Processing source reference optional_records['source_reference'] = ( self.source_reference(record, index)) # Custodian field) if PRESENT in record: optional_records['present'] = bool( self.row_value(record, PRESENT)) category = '' if CATEGORY in record: category = self.row_value(record, CATEGORY).lower() if ORIGIN in record and self.row_value(record, ORIGIN): origin = self.row_value(record, ORIGIN) if ('translocated' in origin.lower() or 'non-native' in origin.lower()): category = 'alien' elif 'native' == origin.lower(): category = 'native' else: category = None if HABITAT in record and self.row_value(record, HABITAT): habitat_choices = { v: k for k, v in BiologicalCollectionRecord.HABITAT_CHOICES } optional_records['collection_habitat'] = ( habitat_choices[self.row_value(record, HABITAT)]) # Sampling method sampling_method = None if SAMPLING_METHOD in record and self.row_value( record, SAMPLING_METHOD): if self.row_value( record, SAMPLING_METHOD).lower() != 'unspecified': try: sampling_method, sm_created = ( SamplingMethod.objects.get_or_create( sampling_method=self.row_value( record, SAMPLING_METHOD))) except SamplingMethod.MultipleObjectsReturned: sampling_method = ( SamplingMethod.objects.filter( sampling_method=self.row_value( record, SAMPLING_METHOD)))[0] # Sampling effort sampling_effort = '' if SAMPLING_EFFORT_VALUE in record and self.row_value( record, SAMPLING_EFFORT_VALUE): sampling_effort += self.row_value( record, SAMPLING_EFFORT_VALUE) + ' ' if self.row_value(record, SAMPLING_EFFORT): sampling_effort += self.row_value( record, SAMPLING_EFFORT) optional_records['sampling_effort'] = sampling_effort # -- Processing biotope # Broad biotope optional_records['biotope'] = self.biotope( record, BROAD_BIOTOPE, BIOTOPE_TYPE_BROAD) # Specific biotope optional_records['specific_biotope'] = self.biotope( record, SPECIFIC_BIOTOPE, BIOTOPE_TYPE_SPECIFIC) # Substratum optional_records['substratum'] = self.biotope( record, SUBSTRATUM, BIOTOPE_TYPE_SUBSTRATUM) # -- Processing Abundance abundance_type = '' abundance_number = None if self.row_value(record, ABUNDANCE_MEASURE): abundance_type = self.row_value( record, ABUNDANCE_MEASURE).lower() if 'count' in abundance_type: abundance_type = 'number' elif 'density' in abundance_type: abundance_type = 'density' elif 'percentage' in abundance_type: abundance_type = 'percentage' if self.row_value(record, ABUNDANCE_VALUE): try: abundance_number = float( self.row_value(record, ABUNDANCE_VALUE)) except ValueError: pass # -- Processing chemical records self.chemical_records(record, location_site, sampling_date) created = False collection_record = None if uuid_value: collection_records = ( BiologicalCollectionRecord.objects.filter( uuid=uuid_value)) if collection_records.exists(): collection_records.update( site=location_site, original_species_name=self.row_value( record, SPECIES_NAME), collection_date=sampling_date, taxonomy=taxonomy, category=category, collector=self.row_value(record, COLLECTOR), sampling_method=sampling_method, abundance_type=abundance_type, abundance_number=abundance_number) collection_record = collection_records[0] if not collection_record: fields = { 'site': location_site, 'original_species_name': self.row_value(record, SPECIES_NAME), 'collection_date': sampling_date, 'taxonomy': taxonomy, 'category': category, 'sampling_method': sampling_method, 'abundance_type': abundance_type, 'abundance_number': abundance_number } if uuid_value: fields['uuid'] = uuid_value try: collection_record, created = ( BiologicalCollectionRecord.objects. get_or_create(**fields)) collection_record.collector = self.row_value( record, COLLECTOR) if not created: if collection_record.uuid and uuid_value: if collection_record.uuid != uuid_value: self.data_duplicated += 1 self.add_to_error_summary( 'Duplicated data', index, False) continue except BiologicalCollectionRecord.MultipleObjectsReturned: BiologicalCollectionRecord.objects.filter( **fields).delete() collection_record = BiologicalCollectionRecord.objects.create( **fields) created = True # More additional data if CATCH_NUMBER in record: additional_data['catch_per_number'] = (self.row_value( record, CATCH_NUMBER)) if CATCH_PER_UNIT in record: additional_data['catch_per_unit_effort'] = ( self.row_value(record, CATCH_PER_UNIT)) if NUMBER_OF_REPLICATES in record: additional_data['number_of_replicates'] = ( self.row_value(record, NUMBER_OF_REPLICATES)) collection_record.notes = self.row_value(record, NOTES) collection_record.owner = superusers[0] collection_record.additional_data = additional_data collection_record.source_collection = source_collection collection_record.survey = self.survey if self.group: collection_record.module_group = self.group for field in optional_records: setattr(collection_record, field, optional_records[field]) collection_record.save() if not created: self.data_updated += 1 else: self.data_added += 1 # Update common names if COMMON_NAME in record and self.row_value( record, COMMON_NAME): common_name = self.row_value(record, COMMON_NAME) try: vernacular_name, vernacular_created = ( VernacularName.objects.get_or_create( name=common_name, language='eng')) except VernacularName.MultipleObjectsReturned: vernacular_name = VernacularName.objects.filter( name=common_name)[0] collection_record.taxonomy.vernacular_names.clear() collection_record.taxonomy.vernacular_names.add( vernacular_name) # Import more additional data self.import_additional_data(collection_record, record) collection_record.save() except KeyError as e: self.add_to_error_summary( 'KeyError : {}'.format(e.message), index) continue except ValueError as e: self.add_to_error_summary( 'ValueError : {}'.format(e.message), index) continue self.summary['data_added'] = self.data_added self.summary['data_updated'] = self.data_updated self.summary['data_failed'] = self.data_failed self.summary['data_duplicated'] = self.data_duplicated self.summary['total_processed_data'] = (self.data_added + self.data_updated + self.data_failed + self.data_duplicated) self.summary['error_list'] = self.errors self.summary['warning_list'] = self.warnings log(json.dumps(self.summary)) self.reconnect_signals()
def source_reference(self, record, index): source_reference = None reference = self.row_value(record, SOURCE) reference_category = self.row_value(record, REFERENCE_CATEGORY) doi = self.row_value(record, DOI) document_link = self.row_value(record, DOCUMENT_UPLOAD_LINK) document_url = self.row_value(record, DOCUMENT_URL) document_id = 0 document = None source_reference_found = False # if there is document link, get the id of the document if document_link: try: doc_split = document_link.split('/') document_id = int(doc_split[len(doc_split) - 1]) document = Document.objects.get(id=document_id) except (ValueError, Document.DoesNotExist): log('Document {} does not exist'.format(document_id)) # if there is document url, get or create document based on url if document_url: document_fields = { 'doc_url': document_url, 'title': self.row_value(record, DOCUMENT_TITLE), } if self.row_value(record, SOURCE_YEAR): document_fields['date'] = date(year=int( self.row_value(record, SOURCE_YEAR)), month=1, day=1) authors = create_users_from_string( self.row_value(record, DOCUMENT_AUTHOR)) if len(authors) > 0: author = authors[0] else: author = None document_fields['owner'] = author document, document_created = Document.objects.get_or_create( **document_fields) # if DOI provided, check in bibliography records if doi: try: try: entry = Entry.objects.get(doi=doi) except Entry.MultipleObjectsReturned: entry = Entry.objects.filter(doi=doi)[0] try: source_reference = SourceReferenceBibliography.objects.get( source=entry) except SourceReferenceBibliography.DoesNotExist: source_reference = ( SourceReferenceBibliography.objects.create( source=entry)) source_reference_found = True except Entry.DoesNotExist: doi_loader = DOILoader() try: doi_loader.load_records(DOIs=[doi]) doi_loader.save_records() entry = Entry.objects.get(doi__iexact=doi) source_reference = ( SourceReference.create_source_reference( category='bibliography', source_id=entry.id, note=None)) source_reference_found = True except (DOILoaderError, requests.exceptions.HTTPError, Entry.DoesNotExist): self.add_to_error_summary( 'Error Fetching DOI : {doi}'.format(doi=doi, ), index, only_log=True) except Entry.MultipleObjectsReturned: entry = Entry.objects.filter(doi__iexact=doi)[0] source_reference = ( SourceReference.create_source_reference( category='bibliography', source_id=entry.id, note=None)) source_reference_found = True if not source_reference_found: if ('peer-reviewed' in reference_category.lower()): # Peer reviewed # should be bibliography type # If url, title, year, and author(s) exists, crete new entry if (self.row_value(record, DOCUMENT_URL) and self.row_value(record, DOCUMENT_TITLE) and self.row_value(record, DOCUMENT_AUTHOR) and self.row_value(record, SOURCE_YEAR)): optional_values = {} if doi: optional_values['doi'] = doi entry, _ = Entry.objects.get_or_create( url=self.row_value(record, DOCUMENT_URL), title=self.row_value(record, DOCUMENT_TITLE), publication_date=date( int(self.row_value(record, SOURCE_YEAR)), 1, 1), is_partial_publication_date=True, type='article', **optional_values) authors = create_users_from_string( self.row_value(record, DOCUMENT_AUTHOR)) rank = 1 for author in authors: _author, _ = Author.objects.get_or_create( first_name=author.first_name, last_name=author.last_name, user=author) AuthorEntryRank.objects.get_or_create(author=_author, entry=entry, rank=rank) rank += 1 try: source_reference = SourceReferenceBibliography.objects.get( source=entry) except SourceReferenceBibliography.DoesNotExist: source_reference = ( SourceReferenceBibliography.objects.create( source=entry)) else: raise ValueError('Peer reviewed should have a DOI') elif (reference_category.lower().startswith('published') or 'thesis' in reference_category.lower()): # Document if document: source_reference = ( SourceReference.create_source_reference( category='document', source_id=document.id, note=None)) elif 'database' in reference_category.lower(): reference_name = reference if self.row_value(record, SOURCE_YEAR): reference_name += ', ' + self.row_value( record, SOURCE_YEAR) database_record, dr_created = ( DatabaseRecord.objects.get_or_create(name=reference_name)) source_reference = (SourceReference.create_source_reference( category='database', source_id=database_record.id, note=None)) else: # Unpublished data reference_name = reference if self.row_value(record, SOURCE_YEAR): reference_name += ', ' + self.row_value( record, SOURCE_YEAR) source_reference = (SourceReference.create_source_reference( category=None, source_id=None, note=reference_name)) if (document and source_reference and not isinstance(source_reference.source, Document)): source_reference.document = document source_reference.save() if reference and source_reference: source_reference.source_name = reference source_reference.save() elif reference and not source_reference: self.add_to_error_summary( 'Reference {} is not created'.format(reference), index) return source_reference
def form_valid(self, form): """ If the form is valid, save the associated model. """ self.object = form.save(commit=False) self.object.owner = self.request.user # by default, if RESOURCE_PUBLISHING=True then document.is_published # must be set to False # RESOURCE_PUBLISHING works in similar way as ADMIN_MODERATE_UPLOADS, # but is applied to documents only. ADMIN_MODERATE_UPLOADS has wider # usage is_published = not (settings.RESOURCE_PUBLISHING or settings.ADMIN_MODERATE_UPLOADS) self.object.is_published = is_published # save abstract try: self.object.abstract = form.data['description'] except KeyError: pass # Save document source try: self.object.supplemental_information = json.dumps( {'document_source': form.data['document_source']}) except KeyError: pass self.object.save() super(SourceReferenceBimsDocumentUploadView, self).form_valid(form) # tag keyword of document as Bims Source Reference keyword = None try: keyword = HierarchicalKeyword.objects.get( slug='bims_source_reference') except HierarchicalKeyword.DoesNotExist: try: last_keyword = HierarchicalKeyword.objects.filter( depth=1).order_by('path').last() if not last_keyword: path = '0000' else: path = last_keyword.path path = "{:04d}".format(int(path) + 1) keyword, created = HierarchicalKeyword.objects.get_or_create( slug='bims_source_reference', name='Bims Source Reference', depth=1, path=path) except Exception: pass if keyword: TaggedContentItem.objects.get_or_create(content_object=self.object, tag=keyword) # add additional metadata bims_document, created = BimsDocument.objects.get_or_create( document=self.object) bims_document.update_metadata(form.data) # Update authors try: authors = form.data['author'] authors = create_users_from_string(authors) if authors: bims_document.authors.clear() for author in authors: bims_document.authors.add(author) except KeyError: pass return HttpResponse(json.dumps({ 'id': self.object.id, 'title': self.object.title, 'author': self.object.bimsdocument.authors_string, 'year': self.object.bimsdocument.year }), content_type='application/json', status=200)