예제 #1
0
 def make_doi_metadata_by_uuid(self, uuid, oc_item=None):
     """ makes metadata for an ARK id """
     metadata = None
     if oc_item is None:
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
     if oc_item.exists:
         oc_item.generate_json_ld()
         meta_doi = metaDOI()
         if 'dc-terms:title' in oc_item.json_ld:
             meta_doi.title = oc_item.json_ld['dc-terms:title']
         if 'dc-terms:issued' in oc_item.json_ld:
             meta_doi.publicationyear = oc_item.json_ld['dc-terms:issued']
         elif 'dc-terms:modified' in oc_item.json_ld:
             meta_doi.publicationyear = oc_item.json_ld['dc-terms:modified']
         else:
             meta_doi.publicationyear = str(datetime.datetime.now().year)
         creator_list = []
         if 'dc-terms:contributor' in oc_item.json_ld:
             for dc_item in oc_item.json_ld['dc-terms:contributor']:
                 creator_list.append(str(dc_item['label']))
         if 'dc-terms:creator' in oc_item.json_ld and len(creator_list) < 1:
             for dc_item in oc_item.json_ld['dc-terms:creator']:
                 creator_list.append(str(dc_item['label']))
         meta_doi.make_creator_list(creator_list)
         metadata = meta_doi.make_metadata_dict()
         metadata['_target'] = oc_item.json_ld['id']
     return metadata
예제 #2
0
 def make_ark_metadata_by_uuid(self, uuid, oc_item=None):
     """ makes metadata for an ARK id """
     metadata = None
     if oc_item is None:
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
     if oc_item.exists:
         oc_item.generate_json_ld()
         meta_ark = metaARK()
         if 'dc-terms:title' in oc_item.json_ld:
             meta_ark.what = oc_item.json_ld['dc-terms:title']
         if 'dc-terms:issued' in oc_item.json_ld:
             meta_ark.when = oc_item.json_ld['dc-terms:issued']
         elif 'dc-terms:modified' in oc_item.json_ld:
             meta_ark.when = oc_item.json_ld['dc-terms:modified']
         else:
             meta_ark.when = str(datetime.datetime.now().year)
         who_list = []
         if 'dc-terms:contributor' in oc_item.json_ld:
             for who_item in oc_item.json_ld['dc-terms:contributor']:
                 who_list.append(str(who_item['label']))
         if 'dc-terms:creator' in oc_item.json_ld and len(who_list) < 1:
             for who_item in oc_item.json_ld['dc-terms:creator']:
                 who_list.append(str(who_item['label']))
         meta_ark.make_who_list(who_list)
         metadata = meta_ark.make_metadata_dict()
         metadata['_target'] = oc_item.json_ld['id']
     return metadata
예제 #3
0
 def make_ark_metadata_by_uuid(self, uuid, oc_item=None):
     """ makes metadata for an ARK id """
     metadata = None
     if oc_item is None:
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
     if oc_item.exists:
         oc_item.generate_json_ld()
         meta_ark = metaARK()
         if 'dc-terms:title' in oc_item.json_ld:
             meta_ark.what = oc_item.json_ld['dc-terms:title']
         if 'dc-terms:issued' in oc_item.json_ld:
             meta_ark.when = oc_item.json_ld['dc-terms:issued']
         elif 'dc-terms:modified' in oc_item.json_ld:
             meta_ark.when = oc_item.json_ld['dc-terms:modified']
         else:
             meta_ark.when = str(datetime.datetime.now().year)
         who_list = []
         if 'dc-terms:contributor' in oc_item.json_ld:
             for who_item in oc_item.json_ld['dc-terms:contributor']:
                 who_list.append(str(who_item['label']))
         if 'dc-terms:creator' in oc_item.json_ld and len(who_list) < 1:
             for who_item in oc_item.json_ld['dc-terms:creator']:
                 who_list.append(str(who_item['label']))
         meta_ark.make_who_list(who_list)
         metadata = meta_ark.make_metadata_dict()
         metadata['_target'] = oc_item.json_ld['id']
     return metadata
예제 #4
0
 def make_doi_metadata_by_uuid(self, uuid, oc_item=None):
     """ makes metadata for an ARK id """
     metadata = None
     if oc_item is None:
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
     if oc_item.exists:
         oc_item.generate_json_ld()
         meta_doi = metaDOI()
         if 'dc-terms:title' in oc_item.json_ld:
             meta_doi.title = oc_item.json_ld['dc-terms:title']
         if 'dc-terms:issued' in oc_item.json_ld:
             meta_doi.publicationyear = oc_item.json_ld['dc-terms:issued']
         elif 'dc-terms:modified' in oc_item.json_ld:
             meta_doi.publicationyear = oc_item.json_ld['dc-terms:modified']
         else:
             meta_doi.publicationyear = str(datetime.datetime.now().year)
         creator_list = []
         if 'dc-terms:contributor' in oc_item.json_ld:
             for dc_item in oc_item.json_ld['dc-terms:contributor']:
                 creator_list.append(str(dc_item['label']))
         if 'dc-terms:creator' in oc_item.json_ld and len(creator_list) < 1:
             for dc_item in oc_item.json_ld['dc-terms:creator']:
                 creator_list.append(str(dc_item['label']))
         meta_doi.make_creator_list(creator_list)
         metadata = meta_doi.make_metadata_dict()
         metadata['_target'] = oc_item.json_ld['id']
     return metadata
예제 #5
0
 def make_save_ark_by_uuid(self, uuid, metadata=None):
     """ makes an saves an ARK identifier by a uuid """
     ok = False
     oc_uri = None
     arks = StableIdentifer.objects.filter(uuid=uuid, stable_type='ark')[:1]
     if len(arks) < 1:
         # the item doesn't yet have an ARK id, so make one!
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
         if oc_item.exists:
             if metadata is None:
                 metadata = self.make_ark_metadata_by_uuid(uuid, oc_item)
             if isinstance(metadata, dict):
                 if '_target' in metadata:
                     oc_uri = metadata['_target']
                 else:
                     oc_uri = URImanagement.make_oc_uri(
                         oc_item.manifest.uuid, oc_item.item_type)
                 if isinstance(oc_uri, str):
                     print('Make ARK id for: ' + oc_uri)
                     ark_id = self.ezid.mint_identifier(
                         oc_uri, metadata, 'ark')
                     if isinstance(ark_id, str):
                         # success! we have an ARK id!
                         stable_id = ark_id.replace('ark:/', '')
                         ok = self.save_oc_item_stable_id(
                             oc_item, stable_id, 'ark')
     return ok
예제 #6
0
 def make_save_doi_by_uuid(self, uuid, metadata=None):
     """ makes an saves an DOI identifier by a uuid """
     ok = False
     oc_uri = None
     dois = StableIdentifer.objects.filter(uuid=uuid,
                                           stable_type='doi')[:1]
     if len(dois) < 1:
         # the item doesn't yet have an ARK id, so make one!
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
         if oc_item.exists:
             if metadata is None:
                 metadata = self.make_doi_metadata_by_uuid(uuid, oc_item)
             if isinstance(metadata, dict):
                 if '_target' in metadata:
                     oc_uri = metadata['_target']
                 else:
                     oc_uri = URImanagement.make_oc_uri(oc_item.manifest.uuid,
                                                        oc_item.item_type)
                 if isinstance(oc_uri, str):
                     print('Make DOI id for: ' + oc_uri)
                     ezid_response = self.ezid.mint_identifier(oc_uri, metadata, 'doi')
                     if self.do_test:
                         print('EZID response: ' + str(ezid_response))
                     if isinstance(ezid_response, str):
                         if '|' in ezid_response:
                             resp_ex = ezid_response.split('|')
                             for resp_id in resp_ex:
                                 if 'doi:' in resp_id:
                                     ok = self.save_oc_item_stable_id(oc_item, resp_id, 'doi')
                                 else:
                                     pass
                         else:
                             ok = self.save_oc_item_stable_id(oc_item, ezid_response, 'doi')
     return ok           
예제 #7
0
 def save_item_and_rels(self, uuid, archive_proj_uuid, do_rels=True):
     """ saves an item based on its uuid,
         and optionally ALSO saves related items
         
         archive_proj_uuid is the uuid for the project we're
         archiving now. An item in that archive may actuall come
         from another project, but is included in this archive
         because of a dependency through referencing (context, people)
         
     """
     if uuid in self.saved_uuids:
         # we have a memory of this already saved
         item_saved = True
     else:
         item_saved = False
         archive_proj = self.get_proj_manifest_obj(archive_proj_uuid)
         oc_item = OCitem(True)  # use cannonical URIs
         exists = oc_item.check_exists(uuid)
         if exists and archive_proj is not None:
             act_dirs = []
             act_dirs.append(archive_proj.slug
                             )  # the archive project slug is the directory
             item_type = oc_item.manifest.item_type
             if item_type != 'projects':
                 act_dirs.append(
                     item_type
                 )  # put items of different types into different directories
             file_name = oc_item.manifest.uuid + '.json'
             file_exists = self.check_exists(act_dirs, file_name)
             if file_exists:
                 # we already saved it
                 item_saved = True
             else:
                 # we have not made the file, so make it now
                 oc_item.generate_json_ld()
                 self.save_serialized_json(act_dirs, file_name,
                                           oc_item.json_ld)
                 new_file_exists = self.check_exists(act_dirs, file_name)
                 if new_file_exists:
                     # we saved the new file!
                     item_saved = True
                     self.saved_uuids.append(oc_item.manifest.uuid)
                 else:
                     # we have a problem with this file
                     item_saved = False
                     print('ERROR! Did not save: ' + oc_item.manifest.uuid)
                     self.error_uuids.append(oc_item.manifest.uuid)
                 if do_rels:
                     rel_uuids = self.get_related_uuids(oc_item.json_ld)
                     for rel_uuid in rel_uuids:
                         rel_saved = self.save_item_and_rels(
                             rel_uuid, archive_proj_uuid, False)
     return item_saved
예제 #8
0
 def save_item_and_rels(self, uuid, archive_proj_uuid, do_rels=True):
     """ saves an item based on its uuid,
         and optionally ALSO saves related items
         
         archive_proj_uuid is the uuid for the project we're
         archiving now. An item in that archive may actuall come
         from another project, but is included in this archive
         because of a dependency through referencing (context, people)
         
     """
     if uuid in self.saved_uuids:
         # we have a memory of this already saved
         item_saved = True
     else:
         item_saved = False
         archive_proj = self.get_proj_manifest_obj(archive_proj_uuid)
         oc_item = OCitem(True)  # use cannonical URIs
         exists = oc_item.check_exists(uuid)
         if exists and archive_proj is not None:
             act_dirs = []
             act_dirs.append(archive_proj.slug)  # the archive project slug is the directory
             item_type = oc_item.manifest.item_type
             if item_type != 'projects':
                 act_dirs.append(item_type)  # put items of different types into different directories
             file_name = oc_item.manifest.uuid + '.json'
             file_exists = self.check_exists(act_dirs, file_name)
             if file_exists:
                 # we already saved it
                 item_saved = True
             else:
                 # we have not made the file, so make it now
                 oc_item.generate_json_ld()
                 self.save_serialized_json(act_dirs,
                                           file_name,
                                           oc_item.json_ld)
                 new_file_exists = self.check_exists(act_dirs, file_name)
                 if new_file_exists:
                     # we saved the new file!
                     item_saved = True
                     self.saved_uuids.append(oc_item.manifest.uuid)
                 else:
                     # we have a problem with this file
                     item_saved = False
                     print('ERROR! Did not save: ' + oc_item.manifest.uuid)
                     self.error_uuids.append(oc_item.manifest.uuid)
                 if do_rels:
                     rel_uuids = self.get_related_uuids(oc_item.json_ld)
                     for rel_uuid in rel_uuids:
                         rel_saved = self.save_item_and_rels(rel_uuid,
                                                             archive_proj_uuid,
                                                             False)
     return item_saved   
예제 #9
0
 def make_save_ark_by_uuid(self, uuid, metadata=None):
     """ makes an saves an ARK identifier by a uuid """
     ok = False
     oc_uri = None
     arks = StableIdentifer.objects.filter(
         uuid=uuid,
         stable_type='ark'
     )[:1]
     if len(arks) > 0:
         print('uuid {} has an ARK'.format(uuid))
         return None
     # the item doesn't yet have an ARK id, so make one!
     oc_item = OCitem()
     exists = oc_item.check_exists(uuid)
     if not oc_item.exists:
         print('uuid {} does not exist'.format(uuid))
         return None
     if metadata is None:
         metadata = self.make_ark_metadata_by_uuid(uuid, oc_item)
     if not isinstance(metadata, dict):
         raise RuntimeError('Cannot make metadata for {}'.format(uuid))
     if '_target' in metadata:
         oc_uri = metadata['_target']
     else:
         oc_uri = URImanagement.make_oc_uri(
             oc_item.manifest.uuid,
             oc_item.item_type
         )
     if not isinstance(oc_uri, str):
         raise RuntimeError(
             'Invalid URI for {} item_type {}'.format(
                 oc_item.manifest.uuid,
                 oc_item.item_type
             )
         )
     print('Make ARK id for: ' + oc_uri)
     ark_id = self.ezid.mint_identifier(oc_uri, metadata, 'ark')
     if not isinstance(ark_id, str):
         raise RuntimeWarning('EZID failed minting an ARK for {}'.format(oc_uri))
     # success! we have an ARK id!
     stable_id = ark_id.replace('ark:/', '')
     ok = self.save_oc_item_stable_id(
         oc_item,
         stable_id,
         'ark'
     )
     return ok           
예제 #10
0
 def add_project_archive_dir_metadata(self, project_uuid, archive_dir, deposition_id):
     """ adds metadata about a project to Zenodo deposition by deposition_id """
     ok = None
     dir_dict = self.arch_files_obj.get_dict_from_file(archive_dir,
                                                       self.dir_content_file_json)
     oc_item = OCitem(True)  # use cannonical URIs
     exists = oc_item.check_exists(project_uuid)
     if exists and isinstance(dir_dict, dict):
         oc_item.generate_json_ld()
         proj_dict = oc_item.json_ld
         arch_meta = ArchiveMetadata()
         meta = arch_meta.make_zenodo_proj_media_files_metadata(proj_dict,
                                                                dir_dict,
                                                                self.dir_content_file_json)
         ok = self.zenodo.update_metadata(deposition_id, meta)
         if ok is not False:
             ok = True
             print('Metadata created and updated for: ' + str(deposition_id))
     return ok
예제 #11
0
def html_view_new(request, uuid):
    request = RequestNegotiation().anonymize_request(request)
    # Handle some content negotiation for the item.
    req_neg = RequestNegotiation('text/html')
    req_neg.supported_types = []
    if 'HTTP_ACCEPT' in request.META:
        req_neg.check_request_support(request.META['HTTP_ACCEPT'])
    if not req_neg.supported:
        # The client may be wanting a non-HTML representation, so
        # use the following function to get it.
        return items_graph(request, uuid, item_type=ITEM_TYPE)
    # Proceed with constructing the HTML item
    ocitem = OCitemNew()
    if 'hashes' in request.GET:
        ocitem.assertion_hashes = True
    exists = ocitem.check_exists(uuid)
    if not exists:
        # Did not find a record for the table, check for redirects
        r_url = RedirectURL()
        r_ok = r_url.get_direct_by_type_id(ITEM_TYPE, uuid)
        if r_ok:
            # found a redirect!!
            return redirect(r_url.redirect, permanent=r_url.permanent)
        # raise Http404
        raise Http404
    # Construnct item the JSON-LD
    ocitem.generate_json_ld()
    rp = RootPath()
    base_url = rp.get_baseurl()
    proj_content = ProjectContent(ocitem.manifest.uuid, ocitem.manifest.slug,
                                  ocitem.json_ld)
    html_temp = HTMLtemplate()
    html_temp.proj_context_json_ld = ocitem.proj_context_json_ld
    html_temp.proj_content = proj_content.get_project_content()
    html_temp.read_jsonld_dict(ocitem.json_ld)
    template = loader.get_template('projects/view.html')
    context = {'item': html_temp, 'base_url': base_url, 'user': request.user}
    response = HttpResponse(template.render(context, request))
    patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
    return response
예제 #12
0
 def make_save_doi_by_uuid(self, uuid, metadata=None):
     """ makes an saves an DOI identifier by a uuid """
     ok = False
     oc_uri = None
     dois = StableIdentifer.objects.filter(uuid=uuid, stable_type='doi')[:1]
     if len(dois) < 1:
         # the item doesn't yet have an ARK id, so make one!
         oc_item = OCitem()
         exists = oc_item.check_exists(uuid)
         if oc_item.exists:
             if metadata is None:
                 metadata = self.make_doi_metadata_by_uuid(uuid, oc_item)
             if isinstance(metadata, dict):
                 if '_target' in metadata:
                     oc_uri = metadata['_target']
                 else:
                     oc_uri = URImanagement.make_oc_uri(
                         oc_item.manifest.uuid, oc_item.item_type)
                 if isinstance(oc_uri, str):
                     print('Make DOI id for: ' + oc_uri)
                     ezid_response = self.ezid.mint_identifier(
                         oc_uri, metadata, 'doi')
                     if self.do_test:
                         print('EZID response: ' + str(ezid_response))
                     if isinstance(ezid_response, str):
                         if '|' in ezid_response:
                             resp_ex = ezid_response.split('|')
                             for resp_id in resp_ex:
                                 if 'doi:' in resp_id:
                                     ok = self.save_oc_item_stable_id(
                                         oc_item, resp_id, 'doi')
                                 else:
                                     pass
                         else:
                             ok = self.save_oc_item_stable_id(
                                 oc_item, ezid_response, 'doi')
     return ok
예제 #13
0
def items_graph(request, identifier, return_media=None, item_type=None):
    # The new Open Context OCitem generator
    # that better integrates caching
    oc_item = OCitem()
    if 'hashes' in request.GET:
        oc_item.assertion_hashes = True
    if not oc_item.check_exists(identifier):
        # Did not find a record for the table, check for redirects
        r_ok = False
        if item_type:
            r_url = RedirectURL()
            r_ok = r_url.get_direct_by_type_id(item_type, identifier)
        if r_ok:
            # found a redirect!!
            return redirect(r_url.redirect, permanent=r_url.permanent)
        # raise Http404
        raise Http404
    if item_type and item_type != oc_item.manifest.item_type:
        # We have a rare case where the item_type is wrong, even though we found
        # something in the manifest, so throw an error.
        raise Http404
    oc_item.generate_json_ld()
    req_neg = RequestNegotiation('application/json')
    req_neg.supported_types = ['application/ld+json']
    if (not item_type or
        item_type not in ['persons', 'types', 'predicates', 'tables']):
        # We don't have specified item type, or the item_type is
        # not for a resource that's lacking a geospatial component. Therefore,
        # support GeoJSON as a media type.
        req_neg.supported_types.append('application/vnd.geo+json')
    req_neg.supported_types += RDF_SERIALIZATIONS
    if 'HTTP_ACCEPT' in request.META:
        req_neg.check_request_support(request.META['HTTP_ACCEPT'])
    if return_media:
        req_neg.check_request_support(return_media)
        req_neg.use_response_type = return_media
    # Associate the request media type with the request so we can
    # make sure that different representations of this resource get different
    # cache responses.
    request.content_type = req_neg.use_response_type
    if not req_neg.supported:
        # client wanted a mimetype we don't support
        response = HttpResponse(req_neg.error_message,
                                content_type=req_neg.use_response_type + "; charset=utf8",
                                status=415)
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
    # Check first if the output is requested to be an RDF format
    graph_output = None
    if req_neg.use_response_type in RDF_SERIALIZATIONS:
        json_ld = oc_item.json_ld
        # We're making an RDF graph serialization, so consolidate all the
        # context resources so we don't have to make Web requests to generate
        # the graph
        consolidated_contexts = consolidate_contexts(oc_item.json_ld)
        json_ld['@context'] = consolidated_contexts
        # Now make and serialize the graph
        graph_output = graph_serialize(req_neg.use_response_type,
                                       json_ld)
    if graph_output:
        # Return with some sort of graph output
        response = HttpResponse(graph_output,
                                content_type=req_neg.use_response_type + "; charset=utf8")
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
    # We're outputing JSON
    if (req_neg.use_response_type == 'application/ld+json' or
        return_media == 'application/ld+json'):
        # A hack to remove non-point features so JSON-LD will validate.
        json_ld = strip_non_point_features(oc_item.json_ld)
    else:
        json_ld = oc_item.json_ld
    json_output = json.dumps(json_ld,
                             indent=4,
                             ensure_ascii=False)
    if 'callback' in request.GET:
        funct = request.GET['callback']
        response = HttpResponse(funct + '(' + json_output + ');',
                                content_type='application/javascript' + "; charset=utf8")
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
    else:
        response = HttpResponse(json_output,
                                content_type=req_neg.use_response_type + "; charset=utf8")
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
예제 #14
0
 def match_california_site(self, site_uuid):
     """ Attempts to match California site name with a tDAR
         site key word
     """
     found_matches = 0
     oc_item = OCitem()
     exists = oc_item.check_exists(site_uuid)
     if exists:
         la_check = LinkAnnotation.objects\
                                  .filter(subject=site_uuid,
                                          predicate_uri='dc-terms:subject',
                                          object_uri__contains=self.TDAR_VOCAB)[:1]
     if exists and len(la_check) < 1:
         # we don't already have a tDAR id for this item, continue with matches
         # first, generate the item's JSON-LD
         oc_item.generate_json_ld()
         request_keywords = []
         if 'oc-gen:has-obs' in oc_item.json_ld:
             if isinstance(oc_item.json_ld['oc-gen:has-obs'], list):
                 for obs in oc_item.json_ld['oc-gen:has-obs']:
                     if 'oc-pred:52-alternate-site-or-place-name' in obs:
                         if isinstance(obs['oc-pred:52-alternate-site-or-place-name'], list): 
                             for name_obj in obs['oc-pred:52-alternate-site-or-place-name']:
                                 if 'xsd:string' in name_obj:
                                     if isinstance(name_obj['xsd:string'], str):
                                         name_str = name_obj['xsd:string']
                                         request_keywords.append(name_str)
         print('Checking names in tDAR: ' + '; '.join(request_keywords))
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     lw_result = result['label'].lower()
                     lw_keyword = keyword.lower()
                     if lw_result == lw_keyword:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     if match_real:
                         print('FOUND ' + result['label'])
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = oc_item.manifest.uuid
                         la.subject_type = oc_item.manifest.item_type
                         la.project_uuid = oc_item.manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] + ' is not exactly: ' + keyword)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) + ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
예제 #15
0
 def save_media_files(self, man_obj, license_uri):
     """ saves media files 
         
     """
     ok = False
     if isinstance(man_obj, Manifest):
         # first get metadata about the media item, especially creator + contribution informaiton
         oc_item = OCitem(True)  # use cannonical URIs
         exists = oc_item.check_exists(man_obj.uuid)
         oc_item.generate_json_ld()
         project_uuid = man_obj.project_uuid
         part_num = self.get_current_part_num(license_uri,
                                              project_uuid)
         act_dir = self.make_act_files_dir_name(part_num,
                                                license_uri,
                                                project_uuid)
         dir_dict = self.dir_contents[project_uuid][act_dir]
         # now get the item media files!
         item_files_dict = self.get_item_media_files(man_obj)
         new_files = []
         files_ok = 0
         for file_uri_key, item_file_dict in item_files_dict.items():
             # print('Checking ' + file_uri_key)
             file_name = item_file_dict['filename']
             found = self.check_file_exists_in_all_project_dirs(project_uuid, file_name)
             if found:
                 files_ok += 1
             else:
                 file_name = item_file_dict['filename']
                 for dir_file in dir_dict['files']:
                     if dir_file['filename'] == file_name:
                         found = True
                         files_ok += 1
                         break
             if found is False:
                 # we have a new file to save
                 # Now set the full path to cache the file
                 self.bin_file_obj.full_path_cache_dir = self.arch_files_obj.prep_directory(act_dir)
                 # now retrieve and save the file
                 # check first if there's a file in the temp-cache directory (from previous attempts)
                 ok = self.copy_file_from_temp_cache(act_dir, file_name)
                 if ok is False:
                     ok = self.bin_file_obj.get_cache_remote_file_content(file_name,
                                                                          file_uri_key)
                 if ok:
                     files_ok += 1
                     dir_dict = self.record_associated_categories(dir_dict,
                                                                  oc_item.json_ld)
                     dir_dict = self.record_citation_people(dir_dict, oc_item.json_ld)
                     new_files.append(item_file_dict)
                 else:
                     self.errors.append(item_file_dict)
         dir_dict['size'] = self.arch_files_obj.get_directory_size(act_dir)
         print('Adding files for ' + man_obj.uuid + ' ' + str(len(new_files)))
         dir_dict['files'] += new_files
         self.arch_files_obj.save_serialized_json(act_dir,
                                                  self.dir_content_file_json,
                                                  dir_dict)
         self.dir_contents[project_uuid][act_dir] = dir_dict
         if len(item_files_dict) == files_ok:
             # we have saved the expected number of files for this item
             ok = True
     return ok
예제 #16
0
 def match_california_site(self, site_uuid):
     """ Attempts to match California site name with a tDAR
         site key word
     """
     found_matches = 0
     oc_item = OCitem()
     exists = oc_item.check_exists(site_uuid)
     if exists:
         la_check = LinkAnnotation.objects\
                                  .filter(subject=site_uuid,
                                          predicate_uri='dc-terms:subject',
                                          object_uri__contains=self.TDAR_VOCAB)[:1]
     if exists and len(la_check) < 1:
         # we don't already have a tDAR id for this item, continue with matches
         # first, generate the item's JSON-LD
         oc_item.generate_json_ld()
         request_keywords = []
         if 'oc-gen:has-obs' in oc_item.json_ld:
             if isinstance(oc_item.json_ld['oc-gen:has-obs'], list):
                 for obs in oc_item.json_ld['oc-gen:has-obs']:
                     if 'oc-pred:52-alternate-site-or-place-name' in obs:
                         if isinstance(
                                 obs['oc-pred:52-alternate-site-or-place-name'],
                                 list):
                             for name_obj in obs[
                                     'oc-pred:52-alternate-site-or-place-name']:
                                 if 'xsd:string' in name_obj:
                                     if isinstance(name_obj['xsd:string'],
                                                   str):
                                         name_str = name_obj['xsd:string']
                                         request_keywords.append(name_str)
         print('Checking names in tDAR: ' + '; '.join(request_keywords))
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     lw_result = result['label'].lower()
                     lw_keyword = keyword.lower()
                     if lw_result == lw_keyword:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     if match_real:
                         print('FOUND ' + result['label'])
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(
                                 uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = oc_item.manifest.uuid
                         la.subject_type = oc_item.manifest.item_type
                         la.project_uuid = oc_item.manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] +
                               ' is not exactly: ' + keyword)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) +
                           ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
예제 #17
0
def items_graph(request, identifier, return_media=None, item_type=None):
    # The new Open Context OCitem generator
    # that better integrates caching
    oc_item = OCitem()
    if 'hashes' in request.GET:
        oc_item.assertion_hashes = True
    if not oc_item.check_exists(identifier):
        # Did not find a record for the table, check for redirects
        r_ok = False
        if item_type:
            r_url = RedirectURL()
            r_ok = r_url.get_direct_by_type_id(item_type, identifier)
        if r_ok:
            # found a redirect!!
            return redirect(r_url.redirect, permanent=r_url.permanent)
        # raise Http404
        raise Http404
    if item_type and item_type != oc_item.manifest.item_type:
        # We have a rare case where the item_type is wrong, even though we found
        # something in the manifest, so throw an error.
        raise Http404
    oc_item.generate_json_ld()
    req_neg = RequestNegotiation('application/json')
    req_neg.supported_types = ['application/ld+json']
    if (not item_type
            or item_type not in ['persons', 'types', 'predicates', 'tables']):
        # We don't have specified item type, or the item_type is
        # not for a resource that's lacking a geospatial component. Therefore,
        # support GeoJSON as a media type.
        req_neg.supported_types.append('application/vnd.geo+json')
    req_neg.supported_types += RDF_SERIALIZATIONS
    if 'HTTP_ACCEPT' in request.META:
        req_neg.check_request_support(request.META['HTTP_ACCEPT'])
    if return_media:
        req_neg.check_request_support(return_media)
        req_neg.use_response_type = return_media
    # Associate the request media type with the request so we can
    # make sure that different representations of this resource get different
    # cache responses.
    request.content_type = req_neg.use_response_type
    if not req_neg.supported:
        # client wanted a mimetype we don't support
        response = HttpResponse(req_neg.error_message,
                                content_type=req_neg.use_response_type +
                                "; charset=utf8",
                                status=415)
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
    # Check first if the output is requested to be an RDF format
    graph_output = None
    if req_neg.use_response_type in RDF_SERIALIZATIONS:
        json_ld = oc_item.json_ld
        # We're making an RDF graph serialization, so consolidate all the
        # context resources so we don't have to make Web requests to generate
        # the graph
        consolidated_contexts = consolidate_contexts(oc_item.json_ld)
        json_ld['@context'] = consolidated_contexts
        # Now make and serialize the graph
        graph_output = graph_serialize(req_neg.use_response_type, json_ld)
    if graph_output:
        # Return with some sort of graph output
        response = HttpResponse(graph_output,
                                content_type=req_neg.use_response_type +
                                "; charset=utf8")
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
    # We're outputing JSON
    if (req_neg.use_response_type == 'application/ld+json'
            or return_media == 'application/ld+json'):
        # A hack to remove non-point features so JSON-LD will validate.
        json_ld = strip_non_point_features(oc_item.json_ld)
    else:
        json_ld = oc_item.json_ld
    json_output = json.dumps(json_ld, indent=4, ensure_ascii=False)
    if 'callback' in request.GET:
        funct = request.GET['callback']
        response = HttpResponse(funct + '(' + json_output + ');',
                                content_type='application/javascript' +
                                "; charset=utf8")
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response
    else:
        response = HttpResponse(json_output,
                                content_type=req_neg.use_response_type +
                                "; charset=utf8")
        patch_vary_headers(response, ['accept', 'Accept', 'content-type'])
        return response