Ejemplo n.º 1
0
 def create_media_file(self):
     """ Create and save a new media file object"""
     mf = Mediafile()
     mf.uuid = self.uuid
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = self.file_type
     mf.file_uri = self.file_uri
     mf.filesize = 0
     mf.mime_type_ur = ''
     mf.save()
Ejemplo n.º 2
0
 def add_from_merritt_dump_row(self, row):
     """ processes a merrit dump row
         to update an item
     """
     raw_ark = row[0]
     ark = raw_ark.replace('ark:/', '')
     uri = row[1] # 2nd column
     merritt_uri = row[2]  # 3rd column
     uri_ex = uri.split('/')
     uuid = uri_ex[-1]
     ark_id_ok = StableIdentifer.objects\
                                .filter(stable_id=ark,
                                        uuid=uuid)[:1]
     if len(ark_id_ok) > 0:
         media_id_obj = ark_id_ok[0]
         # print('Identifers check: ' + ark + ' = ' + uuid)
         old_files = Mediafile.objects.filter(uuid=uuid)
         if len(old_files) < 3:
             print('Missing files for: ' + uuid)
             all_patterns = {
                 'oc-gen:thumbnail': ['/thumb',
                                      '%20thumbs',
                                      '_thumbs'],
                 'oc-gen:preview': ['/preview',
                                    '%20preview',
                                    '_preview'],
                 'oc-gen:fullfile': ['/full',
                                     '/docs',
                                     'opencontext/Domuz/DT Images',
                                     'opencontext/geissenklosterle/Geissenklosterle%20scans',
                                     'opencontext/geissenklosterle/Geissenklosterle_scans',
                                     'opencontext/geissenklosterle/Geissenklosterle scans',
                                     'opencontext/hayonim/Hayonim%20photos'
                                     'opencontext/hayonim/Hayonim_photos'
                                     'opencontext/hayonim/Hayonim photos']}
             for ftype_key, patterns in all_patterns.items():
                 p_not_found = True
                 for pattern in patterns:
                     if pattern in merritt_uri and p_not_found:
                         # found a match
                         p_not_found = False
                         new_file = Mediafile()
                         new_file.uuid = uuid
                         new_file.project_uuid = media_id_obj.project_uuid
                         new_file.source_id = 'Merritt-media-manifest'
                         new_file.file_type = ftype_key
                         new_file.file_uri = merritt_uri.replace(' ', '%20')
                         try:
                             new_file.save()
                             if uuid not in self.updated_uuids:
                                 self.updated_uuids.append(uuid)
                             self.updated_file_count += 1
                             output = '\n\n'
                             output += 'Saved file: ' + str(self.updated_file_count)
                             output += ' of uuid: ' + str(len(self.updated_uuids))
                             output += '\n'
                             output += merritt_uri
                             print(output)
                         except:
                             pass
Ejemplo n.º 3
0
 def update_project_hero(self, post_data):
     """ Updates a project's hero picture """
     ok = True
     errors = []
     note = ''
     required_params = ['source_id',
                        'file_uri']
     for r_param in required_params:
         if r_param not in post_data:
             # we're missing some required data
             # don't create the item
             ok = False
             message = 'Missing paramater: ' + r_param + ''
             errors.append(message)
             note = '; '.join(errors)
     if self.manifest.item_type != 'projects':
         ok = False
         message = 'Item type must be a project'
         errors.append(message)
         note = '; '.join(errors)
     if ok:
         file_uri = post_data['file_uri'].strip()
         source_id = post_data['source_id'].strip()
         if 'http://' in file_uri or 'https://' in file_uri:
             ok = True
         else:
             ok = False
             message = 'Need "http://" or "https://" in file_uri: ' + file_uri
             errors.append(message)
             note = '; '.join(errors)
         if ok:
             # delete the old hero picture
             # doing this in a complicated way
             # to trace why project hero files disappear!
             med_check = Mediafile.objects\
                                  .filter(uuid=self.manifest.uuid,
                                          file_type='oc-gen:hero')
             if len(med_check) > 0:
                 for med_old in med_check:
                     med_old.delete()
             new_hero = Mediafile()
             new_hero.uuid = self.manifest.uuid
             new_hero.project_uuid = self.manifest.project_uuid
             new_hero.source_id = source_id
             new_hero.file_type = 'oc-gen:hero'
             new_hero.file_uri = file_uri
             new_hero.save()
             note = 'Updated hero image for project'
     if ok:
         # now clear the cache a change was made
         self.clear_caches()
     self.response = {'action': 'update-project-hero',
                      'ok': ok,
                      'change': {'note': note}}
     return self.response
Ejemplo n.º 4
0
 def save_media_file(self, uuid, source_id, file_type, file_uri):
     """ saves a media file for a given UUID
     """
     if 'http://' in file_uri or 'https://' in file_uri:
         ok = True
         new_media = Mediafile()
         new_media.uuid = uuid
         new_media.project_uuid = self.project_uuid
         new_media.source_id = source_id
         new_media.file_type = file_type
         new_media.file_uri = file_uri
         new_media.save()
     else:
         ok = False
         self.errors.append('Need a valid file_uri: ' + file_uri)
Ejemplo n.º 5
0
 def make_media_file(self, scan_uuid, file_type, url_dir, filename):
     """ makes a media file for a uuid, of the appropriate file_type
         directory and file name
     """
     sleep(.3)
     file_uri = self.base_url + url_dir + '/' + filename
     mf = Mediafile()
     mf.uuid = scan_uuid
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = file_type
     mf.file_uri = file_uri
     mf.filesize = 0
     mf.mime_type_ur = ''
     ok = True
     try:
         mf.save()
     except:
         ok = False
     return ok
Ejemplo n.º 6
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     i = 0
     for record in recs:
         i += 1
         allow_write = self.check_allow_write(act_table, record)
         record = self.prep_update_keep_old(act_table, record)
         if (allow_write is False and self.update_keep_old is False):
             print('\n Not allowed to overwite record.' + str(i))
         else:
             # print('\n Adding record:' + str(record))
             newr = False
             if (act_table == 'link_annotations'):
                 newr = LinkAnnotation(**record)
             elif (act_table == 'link_entities'):
                 newr = LinkEntity(**record)
             elif (act_table == 'oc_assertions'):
                 newr = Assertion(**record)
             elif (act_table == 'oc_manifest'):
                 newr = Manifest(**record)
             elif (act_table == 'oc_subjects'):
                 newr = Subject(**record)
             elif (act_table == 'oc_mediafiles'):
                 newr = Mediafile(**record)
             elif (act_table == 'oc_documents'):
                 newr = OCdocument(**record)
             elif (act_table == 'oc_persons'):
                 newr = Person(**record)
             elif (act_table == 'oc_projects'):
                 newr = Project(**record)
             elif (act_table == 'oc_strings'):
                 newr = OCstring(**record)
             elif (act_table == 'oc_types'):
                 newr = OCtype(**record)
             elif (act_table == 'oc_geospace'):
                 newr = Geospace(**record)
             elif (act_table == 'oc_events'):
                 newr = Event(**record)
             elif (act_table == 'oc_predicates'):
                 newr = Predicate(**record)
             elif (act_table == 'oc_identifiers'):
                 newr = StableIdentifer(**record)
             elif (act_table == 'oc_obsmetadata'):
                 newr = ObsMetadata(**record)
             if (newr is not False):
                 try:
                     newr.save(force_insert=self.force_insert,
                               force_update=self.update_keep_old)
                 except Exception as error:
                     print('Something slipped past in ' + act_table +
                           '...' + str(error))
Ejemplo n.º 7
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     for rkey, record in recs.items():
         if (act_table == 'link_annotations'):
             newr = LinkAnnotation(**record)
             newr.save()
         elif (act_table == 'link_entities'):
             newr = LinkEntity(**record)
             newr.save()
         elif (act_table == 'link_hierarchies'):
             newr = LinkHierarchy(**record)
             newr.save()
         elif (act_table == 'oc_chronology'):
             newr = Chronology(**record)
             newr.save()
         elif (act_table == 'oc_geodata'):
             newr = Geodata(**record)
             newr.save()
         elif (act_table == 'oc_mediafiles'):
             newr = Mediafile(**record)
             newr.save()
         elif (act_table == 'oc_documents'):
             newr = OCdocument(**record)
             newr.save()
         elif (act_table == 'oc_persons'):
             newr = Person(**record)
             newr.save()
         elif (act_table == 'oc_projects'):
             newr = Project(**record)
             newr.save()
         elif (act_table == 'oc_strings'):
             newr = OCstring(**record)
             newr.save()
         elif (act_table == 'oc_types'):
             newr = OCtype(**record)
             newr.save()
         elif (act_table == 'oc_events'):
             newr = Event(**record)
             newr.save()
         elif (act_table == 'oc_predicates'):
             newr = Predicate(**record)
             newr.save()
         elif (act_table == 'oc_identifiers'):
             newr = StableIdentifer(**record)
             newr.save()
         elif (act_table == 'oc_obsmetadata'):
             newr = ObsMetadata(**record)
             newr.save()
Ejemplo n.º 8
0
 def save_media_file(self,
                     uuid,
                     source_id,
                     file_type,
                     file_uri):
     """ saves a media file for a given UUID
     """
     if 'http://' in file_uri or 'https://' in file_uri:
         ok = True
         new_media = Mediafile()
         new_media.uuid = uuid
         new_media.project_uuid = self.project_uuid
         new_media.source_id = source_id
         new_media.file_type = file_type
         new_media.file_uri = file_uri
         new_media.save()
     else:
         ok = False
         self.errors.append('Need a valid file_uri: ' + file_uri)
Ejemplo n.º 9
0
 def update_media_file(self, post_data):
     """ Updates a file associated with a media item """
     ok = True
     errors = []
     note = ''
     file_list = []
     required_params = ['source_id',
                        'file_type',
                        'file_uri']
     for r_param in required_params:
         if r_param not in post_data:
             # we're missing some required data
             # don't create the item
             ok = False
             message = 'Missing paramater: ' + r_param + ''
             errors.append(message)
             note = '; '.join(errors)
     if self.manifest.item_type != 'media':
         ok = False
         message = 'Item type must be a media item'
         errors.append(message)
         note = '; '.join(errors)
     if ok:
         file_type = post_data['file_type'].strip()
         file_uri = post_data['file_uri'].strip()
         source_id = post_data['source_id'].strip()
         if 'http://' in file_uri or 'https://' in file_uri:
             ok = True
         else:
             ok = False
             message = 'Need "http://" or "https://" in file_uri: ' + file_uri
             errors.append(message)
             note = '; '.join(errors)
         if ok:
             # delete the file of the same type for this media item
             med_check = Mediafile.objects\
                                  .filter(uuid=self.manifest.uuid,
                                          file_type=file_type)
             if len(med_check) > 0:
                 for med_old in med_check:
                     med_old.delete()
             new_file = Mediafile()
             new_file.uuid = self.manifest.uuid
             new_file.project_uuid = self.manifest.project_uuid
             new_file.source_id = source_id
             new_file.file_type = file_type
             new_file.file_uri = file_uri
             new_file.save()
             note = 'Updated file for this media item'
     if ok:
         # now clear the cache a change was made
         self.clear_caches()
     # now return the full list of media files for this item
     media_files = Mediafile.objects\
                            .filter(uuid=self.manifest.uuid)
     for media_file in media_files:
         file_obj = {'id': media_file.file_uri,
                     'type': media_file.file_type,
                     'dcat:size': float(media_file.filesize),
                     'dc-terms:hasFormat': media_file.mime_type_uri}
         file_list.append(file_obj)
     self.response = {'action': 'update-media-file',
                      'ok': ok,
                      'file_list': file_list,
                      'change': {'note': note}}
     return self.response
Ejemplo n.º 10
0
 def create_media_file(self):
     """ Create and save a new media file object"""
     sleep(.1)
     ok = True
     mf = Mediafile()
     mf.uuid = str(self.uuid)
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = self.file_type
     mf.file_uri = self.file_uri
     mf.filesize = 0
     mf.mime_type_uri = ''
     ok = True
     try:
         mf.save()
     except:
         self.new_entity = False
         ok = False
     if ok and mf.filesize == 0:
         # filesize is still zero, meaning URI didn't
         # give an OK response to a HEAD request.
         # try again with a different capitalization
         # of the file extension (.JPG vs .jpg)
         if '.' in self.file_uri:
             f_ex = self.file_uri.split('.')
             f_extension = '.' + f_ex[-1]
             f_ext_upper = f_extension.upper()
             f_ext_lower = f_extension.lower()
             f_alt_exts = []
             f_alt_exts.append(self.file_uri.replace(f_extension,
                                                     f_ext_upper))
             f_alt_exts.append(self.file_uri.replace(f_extension,
                                                     f_ext_lower))
             check_extension = True
             for f_alt_ext in f_alt_exts:
                 # do a loop, since sometimes the user provided data with totally
                 # wrong extention capitalizations
                 if check_extension:
                     print('Pause before checking extension capitalization...')
                     sleep(self.SLEEP_TIME)
                     self.file_uri = f_alt_ext
                     mf.file_uri = self.file_uri
                     mf.save()
                     if mf.filesize > 0 or self.file_uri.endswith('.nxs') or  self.file_uri.endswith('.zip'):
                         print('Corrected extension capitalization: ' + str(self.file_uri))
                         check_extension = False
                         # yeah! We found the correct extention
                         # capitalization
                         # Now, save the corrected file_uri import cell record
                         # So if we have to re-run the import, we don't have to do
                         # multiple checks for capitalization
                         self.imp_cell_obj.record = self.file_uri
                         self.imp_cell_obj.save()
                         print('Saved corrected extension import cell record')
                         break
Ejemplo n.º 11
0
 def archive_image(self, man_obj, file_name=None, mf_file=None):
     """ does the work of archiving an image,
         1. gets the image from a remote server, makes a local file
         2. makes metadata
         3. saves the file
     """
     self.prep_bin_file_obj()
     ok = False
     json_ld = self.make_oc_item(man_obj)
     if isinstance(json_ld, dict):
         # cache the remote file locally to upload it
         item_id = self.id_prefix + '-' + json_ld['slug']
         if not isinstance(file_name, str):
             file_name = self.bin_file_obj.get_cache_full_file(
                 json_ld, man_obj)
             self.errors += self.bin_file_obj.errors
         if not isinstance(file_name, str):
             print('Failed to cache file!')
         else:
             sleep(self.delay_before_request)
             print('Ready to upload: ' + file_name)
             # start an internet archive session
             s = self.start_ia_session()
             # get or make an item
             item = get_item(item_id, archive_session=s, debug=True)
             # now make some metadata for the first item to be uploaded
             metadata = self.make_metadata_dict(json_ld, man_obj)
             metadata['collection'] = self.ia_collection
             metadata['mediatype'] = 'image'
             if mf_file is not None:
                 metadata = {}
             # now upload the image file
             dir_file = self.bin_file_obj.join_dir_filename(
                 file_name, self.cache_file_dir)
             save_ia_files = False
             try:
                 r = item.upload_file(dir_file,
                                      key=file_name,
                                      metadata=metadata)
                 if r.status_code == requests.codes.ok or self.save_db:
                     save_ia_files = True
                 else:
                     print('Bad status: ' + str(r.status_code))
             except:
                 print('Problem uploading: ' + dir_file)
                 save_ia_files = False
             # set the uri for the media item just uploaded
             if save_ia_files:
                 ia_file_uri = self.make_ia_image_uri(item_id, file_name)
                 iiif_file_uri = self.make_ia_iiif_image_uri(
                     item_id, file_name)
                 # now save the link to the IA full file
                 if mf_file is None:
                     # make a new media file model object
                     mf = Mediafile()
                 else:
                     # we're passing an existing media file model object, not making a new one
                     mf = mf_file
                 mf.uuid = man_obj.uuid
                 mf.project_uuid = man_obj.project_uuid
                 mf.source_id = man_obj.source_id
                 mf.file_type = self.IA_FILE_TYPE
                 mf.file_uri = ia_file_uri
                 mf.filesize = 0
                 try:
                     mf.save()
                     ok = True
                 except:
                     error_msg = 'UUID: ' + man_obj.uuid + ' item_id: ' + item_id
                     error_msg += ' Cannot save oc_mediafile for ia-fullfile'
                     self.errors.append(error_msg)
                     ok = False
                 # save the link to the IIIF version, but only if we're
                 # not passing an existing media_file model object
                 if mf_file is None:
                     # newly archived image file, so make the IIIF
                     mf_b = Mediafile()
                     mf_b.uuid = man_obj.uuid
                     mf_b.project_uuid = man_obj.project_uuid
                     mf_b.source_id = man_obj.source_id
                     mf_b.file_type = self.IIIF_FILE_TYPE
                     mf_b.file_uri = iiif_file_uri
                     mf_b.filesize = 0
                     try:
                         mf_b.save()
                         ok = True
                     except:
                         error_msg = 'UUID: ' + man_obj.uuid + ' item_id: ' + item_id
                         error_msg += ' Cannot save oc_mediafile for ia-iiif'
                         self.errors.append(error_msg)
                         ok = False
     return ok
Ejemplo n.º 12
0
 def make_media_file_obj(self, media_uuid, file_type, file_uri):
     """ makes a new media file object in the database """
     sleep(.1)
     mf = Mediafile()
     mf.uuid = media_uuid
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = file_type
     mf.file_uri = file_uri
     mf.filesize = 0
     mf.mime_type_ur = ''
     ok = True
     try:
         mf.save()
     except:
         ok = False
     return ok
Ejemplo n.º 13
0
 def create_media_file(self):
     """ Create and save a new media file object"""
     mf = Mediafile()
     mf.uuid = self.uuid
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = self.file_type
     mf.file_uri = self.file_uri
     mf.filesize = 0
     mf.mime_type_ur = ''
     mf.save()
Ejemplo n.º 14
0
 def create_media_file(self):
     """ Create and save a new media file object"""
     sleep(.1)
     ok = True
     mf = Mediafile()
     mf.uuid = str(self.uuid)
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = self.file_type
     mf.file_uri = self.file_uri
     mf.filesize = 0
     mf.mime_type_uri = ''
     ok = True
     try:
         mf.save()
     except:
         self.new_entity = False
         ok = False
     if ok and mf.filesize == 0:
         # filesize is still zero, meaning URI didn't
         # give an OK response to a HEAD request.
         # try again with a different capitalization
         # of the file extension (.JPG vs .jpg)
         if '.' in self.file_uri:
             f_ex = self.file_uri.split('.')
             f_extension = '.' + f_ex[-1]
             f_ext_upper = f_extension.upper()
             f_ext_lower = f_extension.lower()
             f_alt_exts = []
             f_alt_exts.append(self.file_uri.replace(f_extension,
                                                     f_ext_upper))
             f_alt_exts.append(self.file_uri.replace(f_extension,
                                                     f_ext_lower))
             check_extension = True
             for f_alt_ext in f_alt_exts:
                 # do a loop, since sometimes the user provided data with totally
                 # wrong extention capitalizations
                 if check_extension:
                     print('Pause before checking extension capitalization...')
                     sleep(self.SLEEP_TIME)
                     self.file_uri = f_alt_ext
                     mf.file_uri = self.file_uri
                     mf.save()
                     if mf.filesize > 0 or self.file_uri.endswith('.nxs') or  self.file_uri.endswith('.zip'):
                         print('Corrected extension capitalization: ' + str(self.file_uri))
                         check_extension = False
                         # yeah! We found the correct extention
                         # capitalization
                         # Now, save the corrected file_uri import cell record
                         # So if we have to re-run the import, we don't have to do
                         # multiple checks for capitalization
                         self.imp_cell_obj.record = self.file_uri
                         self.imp_cell_obj.save()
                         print('Saved corrected extension import cell record')
                         break
Ejemplo n.º 15
0
 def make_media_file(self, scan_uuid, file_type, url_dir, filename):
     """ makes a media file for a uuid, of the appropriate file_type
         directory and file name
     """
     sleep(.3)
     file_uri = self.base_url + url_dir + '/' + filename
     mf = Mediafile()
     mf.uuid = scan_uuid
     mf.project_uuid = self.project_uuid
     mf.source_id = self.source_id
     mf.file_type = file_type
     mf.file_uri = file_uri
     mf.filesize = 0
     mf.mime_type_ur = ''
     ok = True
     try:
         mf.save()
     except:
         ok = False
     return ok