Example #1
0
 def make_list_cite_projects(self, json_ld):
     """ makes a string for citation of projects """
     projects_list = []
     cite_projects_list = []
     if 'dc-terms:source' in json_ld:
         for item in json_ld['dc-terms:source']:
             cite_projects_list.append(item['label'])
             proj_item = {}
             if 'rdfs:isDefinedBy' in item:
                 proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri(
                     item['rdfs:isDefinedBy'], False)
                 proj_item['uri'] = item['rdfs:isDefinedBy']
             else:
                 proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri(
                     item['id'], False)
                 proj_item['uri'] = item['id']
             proj_item['label'] = item['label']
             if 'count' in item:
                 proj_item['count'] = item['count']
             else:
                 proj_item['count'] = False
             projects_list.append(proj_item)
     self.cite_projects = ', '.join(cite_projects_list)
     self.projects_list = projects_list
     return self.cite_projects
Example #2
0
 def make_list_cite_projects(self, json_ld):
     """ makes a string for citation of projects """
     projects_list = []
     cite_projects_list = []
     if 'dc-terms:source' in json_ld:
         for item in json_ld['dc-terms:source']:
             cite_projects_list.append(item['label'])
             proj_item = {}
             if 'rdfs:isDefinedBy' in item:
                 proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri(item['rdfs:isDefinedBy'],
                                                                        False)
                 proj_item['uri'] = item['rdfs:isDefinedBy']
             else:
                 proj_item['uuid'] = URImanagement.get_uuid_from_oc_uri(item['id'],
                                                                        False)
                 proj_item['uri'] = item['id']
             proj_item['label'] = item['label']
             if 'count' in item:
                 proj_item['count'] = item['count']
             else:
                 proj_item['count'] = False
             projects_list.append(proj_item)
     self.cite_projects = ', '.join(cite_projects_list)
     self.projects_list = projects_list
     return self.cite_projects
Example #3
0
 def get_entity_parents(self, identifier):
     """
     Gets parent concepts for a given URI or UUID identified entity
     """
     self.loop_count += 1
     lequiv = LinkEquivalence()
     identifiers = lequiv.get_identifier_list_variants(identifier)
     p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ
     preds_for_superobjs = lequiv.get_identifier_list_variants(p_for_superobjs)
     p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ
     preds_for_subobjs = lequiv.get_identifier_list_variants(p_for_subobjs)
     try:
         # look for superior items in the objects of the assertion
         # sorting by sort so we can privelage a certain hierarchy path
         superobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers,
                                                        predicate_uri__in=preds_for_superobjs)\
                                                .exclude(object_uri__in=identifiers)\
                                                .order_by('sort', 'object_uri')[:1]
         if(len(superobjs_anno) < 1):
             superobjs_anno = False
     except LinkAnnotation.DoesNotExist:
         superobjs_anno = False
     if(superobjs_anno is not False):
         parent_id = superobjs_anno[0].object_uri
         if(parent_id.count('/') > 1):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id)
             if(oc_uuid is not False):
                 parent_id = oc_uuid
             if(parent_id not in self.parent_entities):
                 self.parent_entities.append(parent_id)
         if self.loop_count <= 50:
             self.parent_entities = self.get_entity_parents(parent_id)
     try:
         """
         Now look for superior entities in the subject, not the object
         sorting by sort so we can privelage a certain hierarchy path
         """
         supersubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers,
                                                        predicate_uri__in=preds_for_subobjs)\
                                                .exclude(subject__in=identifiers)\
                                                .order_by('sort', 'subject')[:1]
         if(len(supersubj_anno) < 1):
             supersubj_anno = False
     except LinkAnnotation.DoesNotExist:
         supersubj_anno = False
     if supersubj_anno is not False:
         parent_id = supersubj_anno[0].subject
         if(parent_id.count('/') > 1):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id)
             if(oc_uuid is not False):
                 parent_id = oc_uuid
             if(parent_id not in self.parent_entities):
                 self.parent_entities.append(parent_id)
         if self.loop_count <= 50:
             self.parent_entities = self.get_entity_parents(parent_id)
     return self.parent_entities
Example #4
0
 def get_entity_parents(self, identifier):
     """
     Gets parent concepts for a given URI or UUID identified entity
     """
     self.loop_count += 1
     lequiv = LinkEquivalence()
     identifiers = lequiv.get_identifier_list_variants(identifier)
     p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ
     preds_for_superobjs = lequiv.get_identifier_list_variants(
         p_for_superobjs)
     p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ
     preds_for_subobjs = lequiv.get_identifier_list_variants(p_for_subobjs)
     try:
         # look for superior items in the objects of the assertion
         superobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers,
                                                        predicate_uri__in=preds_for_superobjs)\
                                                .exclude(object_uri__in=identifiers)[:1]
         if (len(superobjs_anno) < 1):
             superobjs_anno = False
     except LinkAnnotation.DoesNotExist:
         superobjs_anno = False
     if (superobjs_anno is not False):
         parent_id = superobjs_anno[0].object_uri
         if (parent_id.count('/') > 1):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id)
             if (oc_uuid is not False):
                 parent_id = oc_uuid
             if (parent_id not in self.parent_entities):
                 self.parent_entities.append(parent_id)
         if self.loop_count <= 50:
             self.parent_entities = self.get_entity_parents(parent_id)
     try:
         """
         Now look for superior entities in the subject, not the object
         """
         supersubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers,
                                                        predicate_uri__in=preds_for_subobjs)\
                                                .exclude(subject__in=identifiers)[:1]
         if (len(supersubj_anno) < 1):
             supersubj_anno = False
     except LinkAnnotation.DoesNotExist:
         supersubj_anno = False
     if (supersubj_anno is not False):
         parent_id = supersubj_anno[0].subject
         if (parent_id.count('/') > 1):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id)
             if (oc_uuid is not False):
                 parent_id = oc_uuid
             if (parent_id not in self.parent_entities):
                 self.parent_entities.append(parent_id)
         if self.loop_count <= 50:
             self.parent_entities = self.get_entity_parents(parent_id)
     return self.parent_entities
Example #5
0
 def get_identifier_list_variants(self, id_list):
     """ makes different variants of identifiers
         for a list of identifiers
     """
     output_list = []
     if not isinstance(id_list, list):
         id_list = [str(id_list)]
     for identifier in id_list:
         output_list.append(identifier)
         if(identifier.startswith('http://') or identifier.startswith('https://')):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier)
             if oc_uuid:
                 output_list.append(oc_uuid)
             prefix_id = URImanagement.prefix_common_uri(identifier)
             if prefix_id:
                 output_list.append(prefix_id)
         elif ':' in identifier:
             full_uri = URImanagement.convert_prefix_to_full_uri(identifier)
             output_list.append(full_uri)
         else:
             # probably an open context uuid or a slug
             m_cache = MemoryCache()
             ent = m_cache.get_entity(identifier)
             if ent:
                 full_uri = ent.uri
                 output_list.append(full_uri)
                 prefix_uri = URImanagement.prefix_common_uri(full_uri)
                 if prefix_uri != full_uri:
                     output_list.append(prefix_uri)
     return output_list
Example #6
0
 def get_identifier_list_variants(self, id_list):
     """ makes different variants of identifiers
         for a list of identifiers
     """
     output_list = []
     if not isinstance(id_list, list):
         id_list = [str(id_list)]
     for identifier in id_list:
         output_list.append(identifier)
         if(identifier[:7] == 'http://' or identifier[:8] == 'https://'):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier)
             if oc_uuid is not False:
                 output_list.append(oc_uuid)
             else:
                 prefix_id = URImanagement.prefix_common_uri(identifier)
                 output_list.append(prefix_id)
         elif ':' in identifier:
             full_uri = URImanagement.convert_prefix_to_full_uri(identifier)
             output_list.append(full_uri)
         else:
             # probably an open context uuid or a slug
             ent = Entity()
             found = ent.dereference(identifier)
             if found:
                 full_uri = ent.uri
                 output_list.append(full_uri)
                 prefix_uri = URImanagement.prefix_common_uri(full_uri)
                 if prefix_uri != full_uri:
                     output_list.append(prefix_uri)
     return output_list
Example #7
0
 def _get_entity_children_db(self, identifier, recursive=True):
     """
     Gets child concepts for a given URI or UUID identified entity
     """
     if not self.child_entities:
         self.child_entities = LastUpdatedOrderedDict()
     if identifier in self.child_entities and recursive:
         output = self.child_entities[identifier]
     else:
         act_children = []
         p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ
         p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ
         lequiv = LinkEquivalence()
         identifiers = lequiv.get_identifier_list_variants(identifier)
         try:
             # look for child items in the objects of the assertion
             subobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers,
                                                          predicate_uri__in=p_for_subobjs)
             if(len(subobjs_anno) < 1):
                 subobjs_anno = False
         except LinkAnnotation.DoesNotExist:
             subobjs_anno = False
         if subobjs_anno is not False:
             for sub_obj in subobjs_anno:
                 child_id = sub_obj.object_uri
                 act_children.append(child_id)
         try:
             """
             Now look for subordinate entities in the subject, not the object
             """
             subsubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers,
                                                          predicate_uri__in=p_for_superobjs)
             if len(subsubj_anno) < 1:
                 subsubj_anno = False
         except LinkAnnotation.DoesNotExist:
             subsubj_anno = False
         if subsubj_anno is not False:
             for sub_sub in subsubj_anno:
                 child_id = sub_sub.subject
                 act_children.append(child_id)
         if len(act_children) > 0:
             identifier_children = []
             for child_id in act_children:
                 if child_id.count('/') > 1:
                     oc_uuid = URImanagement.get_uuid_from_oc_uri(child_id)
                     if oc_uuid:
                         child_id = oc_uuid
                 identifier_children.append(child_id)
                 # recursively get the children of the child
                 if recursive:
                     self.get_entity_children(child_id, recursive)
             # same the list of children of the current identified item
             if identifier not in self.child_entities:
                 self.child_entities[identifier] = identifier_children
         else:
             # save a False for the current identified item. it has no children
             if identifier not in self.child_entities:
                 self.child_entities[identifier] = []
         output = self.child_entities[identifier]
     return output
Example #8
0
 def get_item_basics(self, solr_rec):
     """ get basic metadata for an item """
     output = False
     if isinstance(solr_rec, dict):
         if 'uuid' in solr_rec:
             self.uuid = solr_rec['uuid']
         if 'slug_type_uri_label' in solr_rec:
             id_parts = self.parse_solr_value_parts(
                 solr_rec['slug_type_uri_label'])
             if id_parts is not False:
                 output = True
                 self.uri = self.make_url_from_val_string(
                     id_parts['uri'], True)
                 self.href = self.make_url_from_val_string(
                     id_parts['uri'], False)
                 item_type_output = URImanagement.get_uuid_from_oc_uri(
                     self.uri, True)
                 self.item_type = item_type_output['item_type']
                 self.label = id_parts['label']
         if 'updated' in solr_rec:
             self.updated = solr_rec['updated']
         if 'published' in solr_rec:
             self.published = solr_rec['published']
         if 'human_remains' in solr_rec:
             # is the record flagged as related to human remains ?human_remains
             if solr_rec['human_remains'] > 0:
                 self.human_remains_flagged = True
     return output
Example #9
0
 def process_id(self, identifier):
     # check for identifier
     query_dict = {'fq': [],
                   'facet.field': []}
     fq_terms = []
     escape_id = self.escape_solr_arg(identifier)
     fq_terms.append('persistent_uri:' + escape_id)
     # now make a DOI URI in case this is just a naked DOI
     doi_uri = self.escape_solr_arg('http://dx.doi.org/' + identifier)
     fq_terms.append('persistent_uri:' + doi_uri)
     # now make an ARK URI in case this is just a naked ARK
     ark_uri = self.escape_solr_arg('http://n2t.net/' + identifier)
     fq_terms.append('persistent_uri:' + ark_uri)
     # now make an ORCID URI in case this is just a naked ORCID
     orcid_uri = self.escape_solr_arg('http://orcid.org/' + identifier)
     fq_terms.append('persistent_uri:' + orcid_uri)
     fq_terms.append('uuid:' + escape_id)
     tcheck = URImanagement.get_uuid_from_oc_uri(identifier, True)
     if tcheck is not False:
         uuid = tcheck['uuid']
         fq_terms.append('uuid:' + uuid)
     fq_final = ' OR '.join(fq_terms)
     fq_final = '(' + fq_final + ')'
     query_dict['fq'].append(fq_final)
     # print(fq_final)
     return query_dict
Example #10
0
 def get_identifier_list_variants(self, id_list):
     """ makes different variants of identifiers
         for a list of identifiers
     """
     output_list = []
     if not isinstance(id_list, list):
         id_list = [str(id_list)]
     for identifier in id_list:
         output_list.append(identifier)
         if (identifier[:7] == 'http://' or identifier[:8] == 'https://'):
             oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier)
             if oc_uuid is not False:
                 output_list.append(oc_uuid)
             else:
                 prefix_id = URImanagement.prefix_common_uri(identifier)
                 output_list.append(prefix_id)
         elif ':' in identifier:
             full_uri = URImanagement.convert_prefix_to_full_uri(identifier)
             output_list.append(full_uri)
         else:
             # probably an open context uuid or a slug
             ent = Entity()
             found = ent.dereference(identifier)
             if found:
                 full_uri = ent.uri
                 output_list.append(full_uri)
                 prefix_uri = URImanagement.prefix_common_uri(full_uri)
                 if prefix_uri != full_uri:
                     output_list.append(prefix_uri)
     return output_list
Example #11
0
    def dereference(self, identifier, link_entity_slug=False):
        """ Dereferences an entity identified by an identifier, checks if a URI,
            if, not a URI, then looks in the OC manifest for the item
        """
        output = False
        # Only try to dereference if the identifier is a string.
        if not isinstance(identifier, str):
            return output
        identifier = URImanagement.convert_prefix_to_full_uri(identifier)
        oc_uuid = URImanagement.get_uuid_from_oc_uri(identifier)
        if not oc_uuid and (settings.CANONICAL_HOST +
                            '/tables/') in identifier:
            # Special case for probable open context table item.
            oc_uuid = identifier.replace(
                (settings.CANONICAL_HOST + '/tables/'), '')

        if not oc_uuid:
            # We don't have an Open Context UUID, so look up a linked
            # data entity.
            link_entity_found = self.dereference_linked_data(
                identifier, link_entity_slug=link_entity_slug)
            if link_entity_found:
                # Found what we want, so skip the rest and return True.
                return True
        # If we haven't found a link_entity, check for manifest items.
        if oc_uuid:
            # We found an Open Context uuid by parsing a URI. So that
            # should be the identifier to lookup.
            identifier = oc_uuid
        manifest_item_found = self.dereference_manifest_item(identifier)
        if manifest_item_found:
            return True
        return output
Example #12
0
 def _get_parent_id_db(self, identifier):
     """Get the parent id for the current identifier """
     parent_id = None
     lequiv = LinkEquivalence()
     identifiers = lequiv.get_identifier_list_variants(identifier)
     # print('identifiers: {}'.format(identifiers))
     p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ
     preds_for_superobjs = lequiv.get_identifier_list_variants(p_for_superobjs)
     p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ
     preds_for_subobjs = lequiv.get_identifier_list_variants(p_for_subobjs)
     try:
         # look for superior items in the objects of the assertion
         # sorting by sort so we can privelage a certain hierarchy path
         superobjs_anno = LinkAnnotation.objects.filter(subject__in=identifiers,
                                                        predicate_uri__in=preds_for_superobjs)\
                                                .exclude(object_uri__in=identifiers)\
                                                .order_by('sort', 'object_uri')[:1]
         if len(superobjs_anno) < 1:
             superobjs_anno = False
     except LinkAnnotation.DoesNotExist:
         superobjs_anno = False
     if superobjs_anno:
         parent_id = superobjs_anno[0].object_uri
         # print('Subject {} is child of {}'.format(identifiers, parent_id))
         oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id)
         if oc_uuid:
             parent_id = oc_uuid
     try:
         """
         Now look for superior entities in the subject, not the object
         sorting by sort so we can privelage a certain hierarchy path
         """
         supersubj_anno = LinkAnnotation.objects.filter(object_uri__in=identifiers,
                                                        predicate_uri__in=preds_for_subobjs)\
                                                .exclude(subject__in=identifiers)\
                                                .order_by('sort', 'subject')[:1]
         if len(supersubj_anno) < 1:
             supersubj_anno = False
     except LinkAnnotation.DoesNotExist:
         supersubj_anno = False
     if supersubj_anno:
         parent_id = supersubj_anno[0].subject
         # print('Subject {} is parent of {}'.format(parent_id, identifiers))
         oc_uuid = URImanagement.get_uuid_from_oc_uri(parent_id)
         if oc_uuid:
             parent_id = oc_uuid
     return parent_id
 def process_equivalent_linked_data(self):
     """ Types are useful for entity reconciliation
         this checks for linked data associated
         with a type
     """
     for equiv_uri in self.LD_EQUIVALENT_PREDICATES:
         if equiv_uri in self.oc_item.json_ld and "foaf" not in equiv_uri:
             # for now, default to a close match
             fname = "skos_closematch___pred_id"
             allname = "obj_all___skos_closematch___pred_id"
             if fname not in self.fields:
                 self.fields[fname] = []
                 if self.ROOT_LINK_DATA_SOLR not in self.fields:
                     self.fields[self.ROOT_LINK_DATA_SOLR] = []
                 item = self._concat_solr_string_value(
                     "skos-closematch", "id", "http://www.w3.org/2004/02/skos/core#closeMatch", "Close Match"
                 )
                 self.fields[self.ROOT_LINK_DATA_SOLR].append(item)
             if allname not in self.fields:
                 self.fields[allname] = []
             for entity in self.oc_item.json_ld[equiv_uri]:
                 if "http://" in entity["id"] or "https://" in entity["id"]:
                     self.fields["text"] += entity["label"] + "\n"
                     self.fields["text"] += entity["id"] + "\n"
                     item = self._concat_solr_string_value(entity["slug"], "id", entity["id"], entity["label"])
                     self.fields[fname].append(item)
                     self.fields[allname].append(item)
                     self.process_object_uri(entity["id"])
     if "skos:related" in self.oc_item.json_ld:
         fname = "skos_related___pred_id"
         allname = "obj_all___skos_related___pred_id"
         if fname not in self.fields:
             self.fields[fname] = []
             if self.ROOT_LINK_DATA_SOLR not in self.fields:
                 self.fields[self.ROOT_LINK_DATA_SOLR] = []
             item = self._concat_solr_string_value(
                 "skos-related", "id", "http://www.w3.org/2004/02/skos/core#related", "Related"
             )
             self.fields[self.ROOT_LINK_DATA_SOLR].append(item)
         if allname not in self.fields:
             self.fields[allname] = []
         for entity in self.oc_item.json_ld["skos:related"]:
             if "http://" in entity["id"] or "https://" in entity["id"]:
                 self.fields["text"] += entity["label"] + "\n"
                 self.fields["text"] += entity["id"] + "\n"
                 item = self._concat_solr_string_value(entity["slug"], "id", entity["id"], entity["label"])
                 self.fields[fname].append(item)
                 self.fields[allname].append(item)
                 self.process_object_uri(entity["id"])
             elif "oc-pred:" in entity["id"] and "owl:sameAs" in entity:
                 pred_uuid = URImanagement.get_uuid_from_oc_uri(entity["owl:sameAs"])
                 self.fields["text"] += entity["label"] + "\n"
                 self.fields["text"] += entity["id"] + "\n"
                 item = self._concat_solr_string_value(
                     entity["slug"], "id", "/predicates/" + pred_uuid, entity["label"]
                 )
                 self.fields[fname].append(item)
                 self.fields[allname].append(item)
Example #14
0
 def parse_json_record(self, json_rec):
     """ parses json for a
         geo-json feature of the record
     """
     if 'properties' in json_rec:
         props = json_rec['properties']
     else:
         props = json_rec
     if isinstance(props, dict):
         if 'id' in props:
             self.id = props['id'].replace('#', '')
         if 'label' in props:
             self.label = props['label']
         if 'href' in props:
             self.href = props['href']
         if 'uri' in props:
             item_type_output = URImanagement.get_uuid_from_oc_uri(props['uri'], True)
             if isinstance(item_type_output, dict):
                 self.item_type = item_type_output['item_type']
                 self.uuid = item_type_output['uuid']
         if 'project label' in props:
             self.project = props['project label']
         if 'context label' in props:
             self.context = props['context label']
         if 'early bce/ce' in props:
             self.early_bce_ce = props['early bce/ce']
             if self.early_bce_ce < 0:
                 self.early_bce_ce = int(round(self.early_bce_ce * -1, 0))
                 self.early_suffix = 'BCE'
             else:
                 self.early_bce_ce = int(round(self.early_bce_ce, 0))
                 self.early_suffix = False
         if 'late bce/ce' in props:
             self.late_bce_ce = props['late bce/ce']
             if self.late_bce_ce < 0:
                 self.late_bce_ce = int(round(self.late_bce_ce * -1, 0))
                 self.late_suffix = 'BCE'
             else:
                 self.late_bce_ce = int(round(self.late_bce_ce, 0))
                 self.late_suffix = False
         if 'item category' in props:
             self.category = props['item category']
         if 'snippet' in props:
             self.snippet = props['snippet']
             self.snippet = self.snippet.replace('<em>', '[[[[mark]]]]')
             self.snippet = self.snippet.replace('</em>', '[[[[/mark]]]]')
             self.snippet = strip_tags(self.snippet)
             self.snippet = self.snippet.replace('</', '')
             self.snippet = self.snippet.replace('<', '')
             self.snippet = self.snippet.replace('>', '')
             self.snippet = self.snippet.replace('[[[[mark]]]]', '<mark>')
             self.snippet = self.snippet.replace('[[[[/mark]]]]', '</mark>')
         if 'thumbnail' in props:
             self.thumbnail = props['thumbnail']
         if 'published' in props:
             self.published = QueryMaker().make_human_readable_date(props['published'])
         if 'updated' in props:
             self.updated = QueryMaker().make_human_readable_date(props['updated'])
Example #15
0
 def get_project_authors(self, project_uuid):
     """ Gets author information for a project """
     output = False
     creator_links = LinkAnnotation.objects\
                                   .filter(Q(subject=project_uuid),
                                           Q(predicate_uri=self.URI_DC_CREATE)
                                           | Q(predicate_uri=self.PRF_DC_CREATE))\
                                   .order_by('sort')
     if len(creator_links) < 1:
         # look for creators from the parent project
         par_proj = Project.objects\
                           .filter(uuid=project_uuid)\
                           .exclude(project_uuid=project_uuid)[:1]
         if len(par_proj) > 0:
             creator_links = LinkAnnotation.objects\
                                           .filter(Q(subject=par_proj[0].project_uuid),
                                                   Q(predicate_uri=self.URI_DC_CREATE)
                                                   | Q(predicate_uri=self.PRF_DC_CREATE))\
                                           .order_by('sort')
     if len(creator_links) > 0:
         for creator in creator_links:
             pid = URImanagement.get_uuid_from_oc_uri(creator.object_uri)
             if pid is False:
                 pid = creator.object_uri
             if pid not in self.creators:
                 self.creators.append(pid)
     contrib_links = LinkAnnotation.objects\
                                   .filter(Q(subject=project_uuid),
                                           Q(predicate_uri=self.URI_DC_CONTRIB)
                                           | Q(predicate_uri=self.PRF_DC_CONTRIB))\
                                   .order_by('sort')
     for contrib in contrib_links:
         pid = URImanagement.get_uuid_from_oc_uri(contrib.object_uri)
         if pid is False:
             pid = contrib.object_uri
         if pid not in self.contributors:
             if pid not in self.creators \
                or self.consolidate_authorship is False\
                or contrib.sort > 0:
                 self.contributors.append(
                     pid)  # add to contrib if not a creator
     if len(self.contributors) > 0 or len(self.creators) > 0:
         output = True
     return output
Example #16
0
 def get_project_authors(self, project_uuid):
     """ Gets author information for a project """
     output = False
     creator_links = LinkAnnotation.objects\
                                   .filter(Q(subject=project_uuid),
                                           Q(predicate_uri=self.URI_DC_CREATE)
                                           | Q(predicate_uri=self.PRF_DC_CREATE))\
                                   .order_by('sort')
     if len(creator_links) < 1:
         # look for creators from the parent project
         par_proj = Project.objects\
                           .filter(uuid=project_uuid)\
                           .exclude(project_uuid=project_uuid)[:1]
         if len(par_proj) > 0:
             creator_links = LinkAnnotation.objects\
                                           .filter(Q(subject=par_proj[0].project_uuid),
                                                   Q(predicate_uri=self.URI_DC_CREATE)
                                                   | Q(predicate_uri=self.PRF_DC_CREATE))\
                                           .order_by('sort')
     if len(creator_links) > 0:
         for creator in creator_links:
             pid = URImanagement.get_uuid_from_oc_uri(creator.object_uri)
             if not pid:
                 pid = creator.object_uri
             if pid not in self.creators:
                 self.creators.append(pid)
     contrib_links = LinkAnnotation.objects\
                                   .filter(Q(subject=project_uuid),
                                           Q(predicate_uri=self.URI_DC_CONTRIB)
                                           | Q(predicate_uri=self.PRF_DC_CONTRIB))\
                                   .order_by('sort')
     for contrib in contrib_links:
         pid = URImanagement.get_uuid_from_oc_uri(contrib.object_uri)
         if not pid:
             pid = contrib.object_uri
         if pid not in self.contributors:
             if pid not in self.creators \
                or self.consolidate_authorship is False\
                or contrib.sort > 0:
                 self.contributors.append(pid)  # add to contrib if not a creator    
     if len(self.contributors) > 0 or len(self.creators) > 0:
         output = True
     return output
Example #17
0
 def get_entity_children(self, identifier, recurive=True):
     """
     Gets child concepts for a given URI or UUID identified entity
     """
     act_children = []
     p_for_superobjs = LinkAnnotation.PREDS_SBJ_IS_SUB_OF_OBJ
     p_for_subobjs = LinkAnnotation.PREDS_SBJ_IS_SUPER_OF_OBJ
     lequiv = LinkEquivalence()
     identifiers = lequiv.get_identifier_list_variants(identifier)
     try:
         # look for child items in the objects of the assertion
         subobjs_anno = LinkAnnotation.objects.filter(
             subject__in=identifiers, predicate_uri__in=p_for_subobjs)
         if (len(subobjs_anno) < 1):
             subobjs_anno = False
     except LinkAnnotation.DoesNotExist:
         subobjs_anno = False
     if subobjs_anno is not False:
         for sub_obj in subobjs_anno:
             child_id = sub_obj.object_uri
             act_children.append(child_id)
     try:
         """
         Now look for subordinate entities in the subject, not the object
         """
         subsubj_anno = LinkAnnotation.objects.filter(
             object_uri__in=identifiers, predicate_uri__in=p_for_superobjs)
         if len(subsubj_anno) < 1:
             subsubj_anno = False
     except LinkAnnotation.DoesNotExist:
         subsubj_anno = False
     if subsubj_anno is not False:
         for sub_sub in subsubj_anno:
             child_id = sub_sub.subject
             act_children.append(child_id)
     if len(act_children) > 0:
         identifier_children = []
         for child_id in act_children:
             if child_id.count('/') > 1:
                 oc_uuid = URImanagement.get_uuid_from_oc_uri(child_id)
                 if oc_uuid is not False:
                     child_id = oc_uuid
             identifier_children.append(child_id)
             # recursively get the children of the child
             self.get_entity_children(child_id, recurive)
         # same the list of children of the current identified item
         if identifier not in self.child_entities:
             self.child_entities[identifier] = identifier_children
     else:
         # save a False for the current identified item. it has no children
         if identifier not in self.child_entities:
             self.child_entities[identifier] = False
Example #18
0
 def check_opencontext_uri(self, cell):
     """ looks for a valid opencontext uri in a cell """
     oc_item = False
     if 'http://opencontext.' in cell\
        or 'https://opencontext.' in cell:
         uuid = URImanagement.get_uuid_from_oc_uri(cell)
         if uuid is not False:
             # appears to be an Open Context URI
             # now check we actually have that entity in the database
             try:
                 oc_item = Manifest.objects.get(uuid=uuid)
             except Manifest.DoesNotExist:
                 oc_item = False
     return oc_item
Example #19
0
 def check_opencontext_uri(self, cell):
     """ looks for a valid opencontext uri in a cell """
     oc_item = False
     if 'http://opencontext.' in cell\
        or 'https://opencontext.' in cell:
         uuid = URImanagement.get_uuid_from_oc_uri(cell)
         if uuid is not False:
             # appears to be an Open Context URI
             # now check we actually have that entity in the database
             try:
                 oc_item = Manifest.objects.get(uuid=uuid)
             except Manifest.DoesNotExist:
                 oc_item = False
     return oc_item
Example #20
0
    def add_json_ld_link_annotations(self, json_ld):
        """
        adds linked data annotations (typically referencing URIs from
        outside Open Context)
        """
        if not self.link_annotations:
            return json_ld

        if not len(self.link_annotations):
            return json_ld

        parts_json_ld = PartsJsonLD()
        parts_json_ld.proj_context_json_ld = self.proj_context_json_ld
        parts_json_ld.manifest_obj_dict = self.manifest_obj_dict
        for la in self.link_annotations:
            tcheck = URImanagement.get_uuid_from_oc_uri(la.object_uri, True)
            if not tcheck:
                # this item is NOT from open context
                item_type = False
            else:
                # an Open Context item
                item_type = tcheck['item_type']
            if item_type == 'persons':
                # add a stable ID to person items, but only if they are ORCID IDs
                parts_json_ld.stable_id_predicate = ItemKeys.PREDICATES_FOAF_PRIMARYTOPICOF
                parts_json_ld.stable_id_prefix_limit = StableIdentifer.ID_TYPE_PREFIXES[
                    'orcid']
            # this shortens URIs in item-context declared namespaces
            # to make a compact URI (prefixed), as the act_pred
            act_pred = URImanagement.prefix_common_uri(la.predicate_uri)
            if act_pred not in self.dc_author_preds \
                and act_pred not in self.dc_inherit_preds \
                and act_pred not in self.dc_metadata_preds:
                # the act_pred is not a dublin core predicate, so we're OK to add it
                # now, not later.

                if not biological_taxonomy_validation(act_pred, la.object_uri):
                    # We have a act_pred and object_uri combination
                    # that is not valid. So skip.
                    continue

                json_ld = parts_json_ld.addto_predicate_list(
                    json_ld, act_pred, la.object_uri, item_type)
            else:
                # we've got dublin core assertions, cache these in the dict_object
                # dc_assertions so they get added LAST, after other asserttions
                self.dc_assertions = parts_json_ld.addto_predicate_list(
                    self.dc_assertions, act_pred, la.object_uri, item_type)
        return json_ld
Example #21
0
 def add_json_ld_link_annotations(self, json_ld):
     """
     adds linked data annotations (typically referencing URIs from
     outside Open Context)
     """
     if not self.link_annotations or not len(self.link_annotations):
         # No link annotations, so skip out.
         return json_ld
     # We have link annotations.
     parts_json_ld = PartsJsonLD()
     parts_json_ld.proj_context_json_ld = self.proj_context_json_ld
     parts_json_ld.manifest_obj_dict = self.manifest_obj_dict
     for la in self.link_annotations:
         tcheck = URImanagement.get_uuid_from_oc_uri(la.object_uri, True)
         if not tcheck:
             # this item is NOT from open context
             item_type = False
         else:
             # an Open Context item
             item_type = tcheck['item_type']
         if item_type == 'persons':
             # add a stable ID to person items, but only if they are ORCID IDs
             parts_json_ld.stable_id_predicate = ItemKeys.PREDICATES_FOAF_PRIMARYTOPICOF
             parts_json_ld.stable_id_prefix_limit = StableIdentifer.ID_TYPE_PREFIXES['orcid']
         # this shortens URIs in item-context declared namespaces
         # to make a compact URI (prefixed), as the act_pred
         act_pred = URImanagement.prefix_common_uri(la.predicate_uri)
         if act_pred not in self.dc_author_preds \
            and act_pred not in self.dc_inherit_preds \
            and act_pred not in self.dc_metadata_preds:
             # the act_prec is not a dublin core predicate, so we're OK to add it
             # now, not later.
             json_ld = parts_json_ld.addto_predicate_list(
                 json_ld,
                 act_pred,
                 la.object_uri,
                 item_type
             )
         else:
             # we've got dublin core assertions, cache these in the dict_object
             # dc_assertions so they get added LAST, after other asserttions
             self.dc_assertions = parts_json_ld.addto_predicate_list(
                 self.dc_assertions,
                 act_pred,
                 la.object_uri,
                 item_type
             )
     return json_ld
Example #22
0
 def get_solr_record_uuid_type(self, solr_rec):
     """ get item uuid, label, and type from a solr_rec """
     output = False
     if isinstance(solr_rec, dict):
         output = {'uuid': False,
                   'label': False,
                   'item_type': False}
         if 'uuid' in solr_rec:
             output['uuid'] = solr_rec['uuid']
         if 'slug_type_uri_label' in solr_rec:
             id_parts = self.parse_solr_value_parts(solr_rec['slug_type_uri_label'])
             if id_parts is not False:
                 uri = self.make_url_from_val_string(id_parts['uri'], True)
                 item_type_output = URImanagement.get_uuid_from_oc_uri(uri, True)
                 output['item_type'] = item_type_output['item_type']
                 output['label'] = id_parts['label']
     return output
Example #23
0
 def get_solr_record_uuid_type(self, solr_rec):
     """ get item uuid, label, and type from a solr_rec """
     output = False
     if isinstance(solr_rec, dict):
         output = {'uuid': False,
                   'label': False,
                   'item_type': False}
         if 'uuid' in solr_rec:
             output['uuid'] = solr_rec['uuid']
         if 'slug_type_uri_label' in solr_rec:
             id_parts = self.parse_solr_value_parts(solr_rec['slug_type_uri_label'])
             if id_parts is not False:
                 uri = self.make_url_from_val_string(id_parts['uri'], True)
                 item_type_output = URImanagement.get_uuid_from_oc_uri(uri, True)
                 output['item_type'] = item_type_output['item_type']
                 output['label'] = id_parts['label']
     return output
Example #24
0
 def process_id(self, identifier):
     # check for identifier
     query_dict = {'fq': [],
                   'facet.field': []}
     fq_terms = []
     id_list = [identifier]
     id_list = self.make_http_https_options(id_list)
     for act_id in id_list:
         escape_id = self.escape_solr_arg(act_id)
         fq_terms.append('persistent_uri:' + escape_id)
         fq_terms.append('uuid:' + escape_id)
      # now make URIs in case we have a naked identifier
     prefix_removes = [
         'doi:',
         'orcid:',
         'http://dx.doi.org/',
         'https://dx.doi.org/',
         'http://doi.org/',
         'https://doi.org/'
     ]
     for prefix in prefix_removes:
         # strip ID prefixes, case insensitive
         re_gone = re.compile(re.escape(prefix), re.IGNORECASE)
         identifier = re_gone.sub('', identifier)
     uris = [
         'http://dx.doi.org/' + identifier,  # DOI (old)
         'http://doi.org/' + identifier,  # DOI (new)
         'http://n2t.net/' + identifier,  # ARK (CDL / Merritt)
         'http://orcid.org/' + identifier # Orcid (people)
     ]
     # now make https http varients of the URIs
     uris = self.make_http_https_options(uris)
     for uri in uris:
         # now make a DOI URI in case this is just a naked DOI
         escaped_uri = self.escape_solr_arg(uri)
         fq_terms.append('persistent_uri:' + escaped_uri)
     tcheck = URImanagement.get_uuid_from_oc_uri(identifier, True)
     if tcheck is not False:
         uuid = tcheck['uuid']
         fq_terms.append('uuid:' + uuid)
     fq_final = ' OR '.join(fq_terms)
     fq_final = '(' + fq_final + ')'
     query_dict['fq'].append(fq_final)
     # print(fq_final)
     return query_dict
Example #25
0
 def get_item_json_ld(self, item):
     """ gets metadata and uris
     """
     output = False
     if 'uri' in item:
         tcheck = URImanagement.get_uuid_from_oc_uri(item['uri'], True)
         if tcheck is False:
             item_type = False
         else:
             uuid = tcheck['uuid']
             item_type = tcheck['item_type']
             ocitem = OCitem()
             ocitem.get_item(uuid)
             if ocitem.manifest is not False:
                 output = ocitem.json_ld
             else:
                 output = False
     return output
Example #26
0
 def get_item_json_ld(self, item):
     """ gets metadata and uris
     """
     output = False
     if 'uri' in item:
         tcheck = URImanagement.get_uuid_from_oc_uri(item['uri'], True)
         if tcheck is False:
             item_type = False
         else:
             uuid = tcheck['uuid']
             item_type = tcheck['item_type']
             ocitem = OCitem()
             ocitem.get_item(uuid)
             if ocitem.manifest is not False:
                 output = ocitem.json_ld
             else:
                 output = False
     return output
Example #27
0
 def load_csv(self, filename, after=0, add_path=False):
     """ loads CSV dump from Merritt """
     if add_path:
         filename_path = os.path.join(settings.STATIC_ROOT,
                                      self.DEFAULT_DIRECTORY,
                                      filename)
     else:
         filename_path = filename
     data = csv.reader(open(filename_path))
     i = 0
     for row in data:
         manifest = False
         if 'ark:/' in row[0]:
             i += 0
             if i >= after:
                 uuid = URImanagement.get_uuid_from_oc_uri(row[1])
                 if uuid is not False:
                     try:
                         manifest = Manifest.objects.get(uuid=uuid,
                                                         archived__isnull=True)
                     except Manifest.DoesNotExist:
                         manifest = False
                 if manifest is not False:
                     ok_new = True
                     try:
                         sid = StableIdentifer()
                         sid.stable_id = row[0].replace('ark:/', '')
                         sid.stable_type = 'ark'
                         sid.uuid = manifest.uuid
                         sid.project_uuid = manifest.project_uuid
                         sid.item_type = manifest.item_type
                         sid.save()
                     except:
                         ok_new = False
                     # note when the item was last archived
                     try:
                         manifest.archived = self.validate_date(row[3])
                         manifest.archived_save()
                     except:
                         manifest.archived = time.strftime('%Y-%m-%d %H:%M:%S')
                         manifest.archived_save()
                     if ok_new:
                         self.id_recorded += 1
                     print('Saved ids: ' + str(self.id_recorded))
Example #28
0
 def load_csv(self, filename, after=0, add_path=False):
     """ loads CSV dump from Merritt """
     if add_path:
         filename_path = os.path.join(settings.STATIC_ROOT,
                                      self.DEFAULT_DIRECTORY, filename)
     else:
         filename_path = filename
     data = csv.reader(open(filename_path))
     i = 0
     for row in data:
         manifest = False
         if 'ark:/' in row[0]:
             i += 0
             if i >= after:
                 uuid = URImanagement.get_uuid_from_oc_uri(row[1])
                 if uuid is not False:
                     try:
                         manifest = Manifest.objects.get(
                             uuid=uuid, archived__isnull=True)
                     except Manifest.DoesNotExist:
                         manifest = False
                 if manifest is not False:
                     ok_new = True
                     try:
                         sid = StableIdentifer()
                         sid.stable_id = row[0].replace('ark:/', '')
                         sid.stable_type = 'ark'
                         sid.uuid = manifest.uuid
                         sid.project_uuid = manifest.project_uuid
                         sid.item_type = manifest.item_type
                         sid.save()
                     except:
                         ok_new = False
                     # note when the item was last archived
                     try:
                         manifest.archived = self.validate_date(row[3])
                         manifest.archived_save()
                     except:
                         manifest.archived = time.strftime(
                             '%Y-%m-%d %H:%M:%S')
                         manifest.archived_save()
                     if ok_new:
                         self.id_recorded += 1
                     print('Saved ids: ' + str(self.id_recorded))
Example #29
0
 def get_predicate_uuids(self, pred_keys, item_dict):
     """ gets uuids for open context items
         for a given LIST of predicate keys for an item dict
     """
     uuids = []
     if not isinstance(pred_keys, list):
         pred_keys = [pred_keys]
     if isinstance(item_dict, dict):
         for pred_key in pred_keys:
             items = item_dict.get(pred_key, [])
             if not isinstance(items, list):
                 continue
             for item in items:
                 if not isinstance(item, dict) or not 'id' in item:
                     continue
                 uuid = URImanagement.get_uuid_from_oc_uri(item['id'])
                 if not uuid or uuid in uuids:
                     continue
                 uuids.append(uuid)
     return uuids
Example #30
0
 def get_item_json_ld(self, item):
     """ gets metadata and uris
     """
     output = False
     if 'uri' in item:
         tcheck = URImanagement.get_uuid_from_oc_uri(item['uri'], True)
         if tcheck is False:
             item_type = False
         else:
             uuid = tcheck['uuid']
             item_type = tcheck['item_type']
             url = self.base_url + '/' + item_type + '/' + uuid
             header = {'Accept': 'application/json'}
             try:
                 r = requests.get(url, headers=header, timeout=60)
                 r.raise_for_status()
                 output = r.json()
             except:
                 output = False
     return output
 def get_item_basics(self, solr_rec):
     """ get basic metadata for an item """
     output = False
     if isinstance(solr_rec, dict):
         if 'uuid' in solr_rec:
             self.uuid = solr_rec['uuid']
         if 'slug_type_uri_label' in solr_rec:
             id_parts = self.parse_solr_value_parts(solr_rec['slug_type_uri_label'])
             if id_parts is not False:
                 output = True
                 self.uri = self.make_url_from_val_string(id_parts['uri'], True)
                 self.href = self.make_url_from_val_string(id_parts['uri'], False)
                 item_type_output = URImanagement.get_uuid_from_oc_uri(self.uri, True)
                 self.item_type = item_type_output['item_type']
                 self.label = id_parts['label']
         if 'updated' in solr_rec:
             self.updated = solr_rec['updated']
         if 'published' in solr_rec:
             self.published = solr_rec['published']
     return output
Example #32
0
 def set_record_basic_metadata(self, solr_doc):
     """Sets the record's general metadata attributes"""
     self.uuid = solr_doc.get('uuid')
     item_dict = utilities.parse_solr_encoded_entity_str(
         solr_doc.get('slug_type_uri_label', ''),
         solr_slug_format=False,
     )
     if not item_dict:
         return None
     # Add the item local url for this deployment
     self.href = make_url_from_partial_url(
         item_dict.get('uri', ''),
         base_url=self.base_url,
     )
     # Add the item "cannonical" uri
     self.uri = make_url_from_partial_url(item_dict.get('uri', ''),
                                          base_url=settings.CANONICAL_HOST)
     item_type_output = URImanagement.get_uuid_from_oc_uri(self.uri, True)
     self.item_type = item_type_output['item_type']
     self.label = item_dict.get('label')
     self.slug = item_dict.get('slug')
Example #33
0
 def get_predicate_uuids(self, pred_keys, item_dict):
     """ gets uuids for open context items
         for a given LIST of predicate keys for an item dict
     """
     uuids = []
     if not isinstance(pred_keys, list):
         pred_keys = [pred_keys]
     if isinstance(item_dict, dict):
         for pred_key in pred_keys:
             if pred_key in item_dict:
                 items = item_dict[pred_key]
                 if isinstance(items, list):
                     for item in items:
                         if isinstance(item, dict):
                             if 'id' in item:
                                 uuid = URImanagement.get_uuid_from_oc_uri(
                                     item['id'])
                                 if isinstance(uuid, str):
                                     if uuid not in uuids:
                                         uuids.append(uuid)
     return uuids
Example #34
0
 def get_item_json_ld(self, item):
     """ gets metadata and uris
     """
     output = False
     if 'uri' in item:
         tcheck = URImanagement.get_uuid_from_oc_uri(item['uri'], True)
         if tcheck is False:
             item_type = False
         else:
             uuid = tcheck['uuid']
             item_type = tcheck['item_type']
             url = self.base_url + '/' + item_type + '/' + uuid
             header = {'Accept': 'application/json'}
             try:
                 r = requests.get(url,
                                  headers=header,
                                  timeout=60)
                 r.raise_for_status()
                 output = r.json()
             except:
                 output = False
     return output
Example #35
0
 def add_ids(self, ids):
     """ Adds ids to the database
     """
     if isinstance(ids, list):
         for id_rec in ids:
             id_and_type = self.parse_stable_id(id_rec['stable_id'])
             manifest = False
             uuid = URImanagement.get_uuid_from_oc_uri(id_rec['id'])
             if uuid is not False and id_and_type is not False:
                 try:
                     manifest = Manifest.objects.get(uuid=uuid)
                 except Manifest.DoesNotExist:
                     manifest = False
             if manifest is not False:
                 # we found the archived item in the manifest
                 # save the stable identifier in the database
                 ok_new = True
                 try:
                     sid = StableIdentifer()
                     sid.stable_id = id_and_type['id']
                     sid.stable_type = id_and_type['type']
                     sid.uuid = manifest.uuid
                     sid.project_uuid = manifest.project_uuid
                     sid.item_type = manifest.item_type
                     sid.save()
                 except:
                     ok_new = False
                 # note when the item was last archived
                 try:
                     manifest.archived = self.validate_date(
                         id_rec['archived'])
                     manifest.archived_save()
                 except:
                     manifest.archived = time.strftime('%Y-%m-%d %H:%M:%S')
                     manifest.archived_save()
                 if ok_new:
                     self.id_recorded += 1
     return self.id_recorded
Example #36
0
 def add_ids(self, ids):
     """ Adds ids to the database
     """
     if isinstance(ids, list):
         for id_rec in ids:
             id_and_type = self.parse_stable_id(id_rec['stable_id'])
             manifest = False
             uuid = URImanagement.get_uuid_from_oc_uri(id_rec['id'])
             if uuid is not False and id_and_type is not False:
                 try:
                     manifest = Manifest.objects.get(uuid=uuid)
                 except Manifest.DoesNotExist:
                     manifest = False
             if manifest is not False:
                 # we found the archived item in the manifest
                 # save the stable identifier in the database
                 ok_new = True
                 try:
                     sid = StableIdentifer()
                     sid.stable_id = id_and_type['id']
                     sid.stable_type = id_and_type['type']
                     sid.uuid = manifest.uuid
                     sid.project_uuid = manifest.project_uuid
                     sid.item_type = manifest.item_type
                     sid.save()
                 except:
                     ok_new = False
                 # note when the item was last archived
                 try:
                     manifest.archived = self.validate_date(id_rec['archived'])
                     manifest.archived_save()
                 except:
                     manifest.archived = time.strftime('%Y-%m-%d %H:%M:%S')
                     manifest.archived_save()
                 if ok_new:
                     self.id_recorded += 1
     return self.id_recorded
Example #37
0
 def dereference(self, identifier, link_entity_slug=False):
     """ Dereferences an entity identified by an identifier, checks if a URI,
         if, not a URI, then looks in the OC manifest for the item
     """
     output = False
     try_manifest = True
     identifier = URImanagement.convert_prefix_to_full_uri(identifier)
     if (link_entity_slug or (len(identifier) > 8)):
         if (link_entity_slug or
             (identifier[:7] == 'http://' or identifier[:8] == 'https://')):
             try:
                 try_manifest = False
                 ld_entity = LinkEntity.objects.get(
                     Q(uri=identifier) | Q(slug=identifier))
             except LinkEntity.DoesNotExist:
                 ld_entity = False
             if (ld_entity is not False):
                 output = True
                 self.uri = ld_entity.uri
                 self.slug = ld_entity.slug
                 self.label = ld_entity.label
                 self.item_type = 'uri'
                 self.alt_label = ld_entity.alt_label
                 self.entity_type = ld_entity.ent_type
                 self.vocab_uri = ld_entity.vocab_uri
                 self.ld_object_ok = True
                 try:
                     vocab_entity = LinkEntity.objects.get(
                         uri=self.vocab_uri)
                 except LinkEntity.DoesNotExist:
                     vocab_entity = False
                 if (vocab_entity is not False):
                     self.vocabulary = vocab_entity.label
                 if self.get_icon:
                     prefix_uri = URImanagement.prefix_common_uri(
                         ld_entity.uri)
                     icon_anno = LinkAnnotation.objects\
                                               .filter(Q(subject=ld_entity.uri)
                                                       | Q(subject=identifier)
                                                       | Q(subject=prefix_uri),
                                                       predicate_uri='oc-gen:hasIcon')[:1]
                     if len(icon_anno) > 0:
                         self.icon = icon_anno[0].object_uri
             else:
                 try_manifest = True
                 # couldn't find the item in the linked entities table
                 identifier = URImanagement.get_uuid_from_oc_uri(identifier)
     if (try_manifest):
         try:
             manifest_item = Manifest.objects.get(
                 Q(uuid=identifier) | Q(slug=identifier))
         except Manifest.DoesNotExist:
             manifest_item = False
         if (manifest_item is not False):
             output = True
             self.uri = URImanagement.make_oc_uri(manifest_item.uuid,
                                                  manifest_item.item_type)
             self.uuid = manifest_item.uuid
             self.slug = manifest_item.slug
             self.label = manifest_item.label
             self.item_type = manifest_item.item_type
             self.class_uri = manifest_item.class_uri
             self.project_uuid = manifest_item.project_uuid
             if (manifest_item.item_type == 'media' and self.get_thumbnail):
                 # a media item. get information about its thumbnail.
                 try:
                     thumb_obj = Mediafile.objects.get(
                         uuid=manifest_item.uuid,
                         file_type='oc-gen:thumbnail')
                 except Mediafile.DoesNotExist:
                     thumb_obj = False
                 if thumb_obj is not False:
                     self.thumbnail_media = thumb_obj
                     self.thumbnail_uri = thumb_obj.file_uri
             elif (manifest_item.item_type == 'types'):
                 tl = TypeLookup()
                 tl.get_octype_without_manifest(identifier)
                 self.content = tl.content
             elif (manifest_item.item_type == 'predicates'):
                 try:
                     oc_pred = Predicate.objects.get(
                         uuid=manifest_item.uuid)
                 except Predicate.DoesNotExist:
                     oc_pred = False
                 if (oc_pred is not False):
                     self.data_type = oc_pred.data_type
             elif (manifest_item.item_type == 'subjects'
                   and self.get_context):
                 try:
                     subj = Subject.objects.get(uuid=manifest_item.uuid)
                 except Subject.DoesNotExist:
                     subj = False
                 if subj is not False:
                     self.context = subj.context
     return output
 def _process_predicate_values(self, predicate_slug, predicate_type):
     # First generate the solr field name
     solr_field_name = self._convert_slug_to_solr(
         predicate_slug + self._get_predicate_type_string(predicate_type, prefix="___pred_")
     )
     # Then get the predicate values
     if solr_field_name not in self.fields:
         self.fields[solr_field_name] = []
     if self.oc_item.item_type == "media" or self.oc_item.item_type == "documents":
         # we want to make joins easier for these types of items
         make_join_ids = True
     else:
         make_join_ids = False
     predicate_key = "oc-pred:" + predicate_slug
     for obs_list in self.oc_item.json_ld["oc-gen:has-obs"]:
         if predicate_key in obs_list:
             predicate_values = obs_list[predicate_key]
             for value in predicate_values:
                 if predicate_type == "@id":
                     if make_join_ids and "subjects" in value["id"]:
                         # case where we want to make a join field to link
                         # associated subjects items with media or document
                         # items allows join relationships between
                         # 'join___pred_id' and 'uuid' solr fields.
                         if "join___pred_id" not in self.fields:
                             self.fields["join___pred_id"] = []
                         # get subjects UUID from the URI
                         sub_uuid = URImanagement.get_uuid_from_oc_uri(value["id"])
                         # append to the solr field for joins
                         self.fields["join___pred_id"].append(sub_uuid)
                     if predicate_slug != "link":
                         active_solr_field = solr_field_name
                         parents = LinkRecursion().get_jsonldish_entity_parents(value["id"])
                         all_obj_solr_field = "obj_all___" + active_solr_field
                         if all_obj_solr_field not in self.fields:
                             self.fields[all_obj_solr_field] = []
                         for parent in parents:
                             if active_solr_field not in self.fields:
                                 self.fields[active_solr_field] = []
                             active_solr_value = self._concat_solr_string_value(
                                 parent["slug"],
                                 self._get_predicate_type_string(parent["type"]),
                                 parent["id"],
                                 parent["label"],
                             )
                             self.fields["text"] += " " + parent["label"] + " "
                             self.fields[active_solr_field].append(active_solr_value)
                             # so all items in the hiearchy are present in the
                             # and can be queried, even if you don't know the parent
                             self.fields[all_obj_solr_field].append(active_solr_value)
                             active_solr_field = self._convert_slug_to_solr(parent["slug"]) + "___" + solr_field_name
                     else:
                         # case of a linking relation, don't bother looking
                         # up hierarchies or recording as a solr field, but
                         # check for image, other media, and document counts
                         if "media" in value["id"] and "image" in value["type"]:
                             self.fields["image_media_count"] += 1
                         elif "media" in value["id"] and "image" not in value["type"]:
                             # other types of media
                             self.fields["other_binary_media_count"] += 1
                         elif "documents" in value["id"]:
                             self.fields["document_count"] += 1
                         self.fields["text"] += value["label"] + " "
                 elif predicate_type in ["xsd:integer", "xsd:double", "xsd:boolean"]:
                     self.fields[solr_field_name].append(value)
                 elif predicate_type == "xsd:date":
                     self.fields[solr_field_name].append(value + "T00:00:00Z")
                 elif predicate_type == "xsd:string":
                     self.fields["text"] += value["xsd:string"] + " \n"
                     self.fields[solr_field_name].append(value["xsd:string"])
                 else:
                     raise Exception("Error: Could not get predicate value")
             self.fields["text"] += " \n"
Example #39
0
def get_identifier_query_dict(raw_identifier):
    """Make a query dict for identifiers"""
    if not raw_identifier:
        return None
    query_dict = {'fq': []}
    fq_terms = []

    values_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_identifier,
        or_delim=configs.REQUEST_OR_OPERATOR,
        hierarchy_delim=None)
    id_list = []
    for value in values_list:
        if not value:
            continue
        id_list += utilities.make_uri_equivalence_list(value)

    for act_id in id_list:
        # The act_id maybe a persistent URI, escape it and
        # query the persistent_uri string.
        escape_id = utilities.escape_solr_arg(act_id)
        fq_terms.append('persistent_uri:{}'.format(escape_id))
        if ':' in act_id:
            # Skip below, because the act_id has a
            # character that's not in uuids or slugs.
            continue
        # The act_id maybe a UUID.
        fq_terms.append('uuid:{}'.format(act_id))
        # The act_id maybe a slug, so do a prefix query
        # for document slug_type_uri_label.
        fq_terms.append('slug_type_uri_label:{}'.format(
            utilities.fq_slug_value_format(act_id)))

    # Now make URIs in case we have a naked identifier
    prefix_removes = [
        'doi:', 'orcid:', 'http://dx.doi.org/', 'https://dx.doi.org/',
        'http://doi.org/', 'https://doi.org/'
    ]
    for value in values_list:
        if not value:
            continue
        for prefix in prefix_removes:
            # strip ID prefixes, case insensitive
            re_gone = re.compile(re.escape(prefix), re.IGNORECASE)
            identifier = re_gone.sub('', value)
            if (identifier.startswith('http://')
                    or identifier.startswith('https://')):
                continue

            # Only loop through URI templaces for N2T if
            # we have an ARK identifier.
            if identifier.startswith('ark:'):
                uri_templates = configs.N2T_URI_TEMPLATES
            else:
                uri_templates = configs.PERSISTENT_URI_TEMPLATES
            for uri_template in uri_templates:
                escaped_uri = utilities.escape_solr_arg(
                    uri_template.format(id=identifier))
                fq_term = 'persistent_uri:{}'.format(escaped_uri)
                if fq_term in fq_terms:
                    # We already have this, so skip.
                    continue
                fq_terms.append(fq_term)
        # Now see if there's a UUID in the identifier.
        oc_check = URImanagement.get_uuid_from_oc_uri(value, True)
        if oc_check:
            # We have an identifier we can interperate as an
            # Open Context URI. So extract the uuid part.
            fq_term = 'uuid:{}'.format(oc_check['uuid'])
            if fq_term in fq_terms:
                # We already have this, so skip.
                continue
            fq_terms.append('uuid:{}'.format(oc_check['uuid']))

    # Join the various identifier queries as OR terms.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(fq_terms, operator='OR'))
    return query_dict
Example #40
0
 def make_datacite_metadata_xml(self, parent_node, json_ld):
     """ makes metadata for the datacite specification, which
         is also used in the oai_datacite specification
     """
     tcheck = URImanagement.get_uuid_from_oc_uri(json_ld['id'], True)
     if tcheck is False:
         item_type = False
     else:
         item_type = tcheck['item_type']
     resource_xml = etree.SubElement(parent_node,
                                     'resoure',
                                     nsmap={None: self.DATACITE_RESOURCE['ns']},
                                     attrib={'{' + self.XSI_NS + '}schemaLocation': self.DATACITE_RESOURCE['schemaLocation']})
     identifiers = {}
     if 'id' in json_ld:
         identifiers['URL'] = json_ld['id']
     if 'owl:sameAs' in json_ld:
         if isinstance(json_ld['owl:sameAs'], list):
             for ld_item in json_ld['owl:sameAs']:
                 if 'doi' in ld_item['id']:
                     identifiers['DOI'] = ld_item['id'].replace('http://dx.doi.org/', '')
                 if 'ark' in ld_item['id']:
                     identifiers['ARK'] = ld_item['id'].replace('http://n2t.net/', '')
     if 'DOI' in identifiers:
         act_xml = etree.SubElement(resource_xml,
                                    'identifier',
                                    attrib={'identifierType': 'DOI'})
         act_xml.text = identifiers['DOI']
     elif 'ARK' in identifiers:
         act_xml = etree.SubElement(resource_xml,
                                    'identifier',
                                    attrib={'identifierType': 'ARK'})
         act_xml.text = identifiers['ARK']
     elif 'URL' in identifiers:
         act_xml = etree.SubElement(resource_xml,
                                    'identifier',
                                    attrib={'identifierType': 'URL'})
         act_xml.text = identifiers['URL']
     act_node = etree.SubElement(resource_xml, 'titles')
     dc_title = None
     if 'dc-terms:title' in json_ld:
         act_xml = etree.SubElement(act_node, 'title')
         act_xml.text = json_ld['dc-terms:title']
         dc_title = json_ld['dc-terms:title']
     if 'label' in json_ld:
         if dc_title != json_ld['label']:
             act_xml = etree.SubElement(act_node, 'title')
             act_xml.text = json_ld['label']
     if 'dc-terms:creator' in json_ld:
         if isinstance(json_ld['dc-terms:creator'], list):
             act_node = etree.SubElement(resource_xml, 'creators')
             for ld_item in json_ld['dc-terms:creator']:
                 act_xml = etree.SubElement(act_node, 'creator')
                 if 'label' in ld_item:
                     act_xml.text = ld_item['label']
     if 'dc-terms:contributor' in json_ld:
         if isinstance(json_ld['dc-terms:contributor'], list):
             act_node = etree.SubElement(resource_xml, 'contributors')
             for ld_item in json_ld['dc-terms:contributor']:
                 act_xml = etree.SubElement(act_node, 'contributor')
                 if 'label' in ld_item:
                     act_xml.text = ld_item['label']
     act_node = etree.SubElement(resource_xml, 'dates')
     create_date = time.strftime('%Y-%m-%d')
     if 'dc-terms:issued' in json_ld:
         create_date = json_ld['dc-terms:issued']
         date_xml = etree.SubElement(act_node,
                                     'date',
                                     attrib={'dateType': 'Available'})
         date_xml.text = create_date
     if 'dc-terms:modified' in json_ld:
         mod_date = json_ld['dc-terms:modified']
         date_xml = etree.SubElement(act_node,
                                     'date',
                                     attrib={'dateType': 'Updated'})
         date_xml.text = mod_date
     act_node = etree.SubElement(resource_xml, 'publisher')
     act_node.text = self.publisher_name
     act_node = etree.SubElement(resource_xml, 'publicationYear')
     act_node.text = create_date[:4]  # the year, first 4 characters
     # now add the Datacite resource type
     if item_type in self.DATACITE_RESOURCE_TYPES:
         act_rt = self.DATACITE_RESOURCE_TYPES[item_type]
     else:
         act_rt = self.DATACITE_RESOURCE_TYPES['other']
     rt_xml = etree.SubElement(resource_xml,
                               'resourceType',
                               attrib={'resourceTypeGeneral': act_rt['ResourceTypeGeneral']})
     rt_xml.text = act_rt['oc']
     # now add relevant mime-types
     if item_type in self.DC_FORMATS:
         format_list = self.DC_FORMATS[item_type]
         if item_type == 'media':
             if 'oc-gen:has-files' in json_ld:
                 if isinstance(json_ld['oc-gen:has-files'], list):
                     for act_f in json_ld['oc-gen:has-files']:
                         if 'type' in act_f and 'dc-terms:hasFormat' in act_f:
                             if act_f['type'] == 'oc-gen:fullfile':
                                 mime_uri = act_f['dc-terms:hasFormat']
                                 format_list.append(mime_uri.replace('http://purl.org/NET/mediatypes/',
                                                                     ''))
     else:
         format_list = self.DC_FORMATS['other']
     act_node = etree.SubElement(resource_xml, 'formats')
     for mime in format_list:
         act_xml = etree.SubElement(act_node, 'format')
         act_xml.text = mime
     subjects_list = []
     if 'category' in json_ld:
         cat = json_ld['category'][0]
         cat_label = self.get_category_label(cat, json_ld)
         if cat_label is not False:
             subjects_list.append(cat_label)
     if 'dc-terms:subject' in json_ld:
         if isinstance(json_ld['dc-terms:subject'], list):
             for subj in json_ld['dc-terms:subject']:
                 if 'label' in subj:
                     subjects_list.append(subj['label'])
     if len(subjects_list) > 0:
         act_node = etree.SubElement(resource_xml, 'subjects')
         for subject in subjects_list:
             act_xml = etree.SubElement(act_node, 'subject')
             act_xml.text = subject
     if 'dc-terms:isPartOf' in json_ld:
         if isinstance(json_ld['dc-terms:isPartOf'], list):
             for rel in json_ld['dc-terms:isPartOf']:
                 if 'id' in rel:
                     related = rel['id']
                     act_xml = etree.SubElement(resource_xml,
                                                'RelatedIdentifier',
                                                attrib={'relatedIdentifierType': 'URL',
                                                        'relationType': 'IsPartOf'})
                     act_xml.text = related
Example #41
0
 def process_equivalent_linked_data(self):
     """ Types are useful for entity reconciliation
         this checks for linked data associated
         with a type
     """
     for equiv_uri in self.LD_EQUIVALENT_PREDICATES:
         if equiv_uri in self.oc_item.json_ld \
            and 'foaf' not in equiv_uri:
             # for now, default to a close match
             fname = 'skos_closematch___pred_id'
             allname = 'obj_all___skos_closematch___pred_id'
             if fname not in self.fields:
                 self.fields[fname] = []
                 if self.ROOT_LINK_DATA_SOLR not in self.fields:
                     self.fields[self.ROOT_LINK_DATA_SOLR] = []
                 item = self._concat_solr_string_value(
                     'skos-closematch',
                     'id',
                     'http://www.w3.org/2004/02/skos/core#closeMatch',
                     'Close Match')
                 self.fields[self.ROOT_LINK_DATA_SOLR].append(item)
             if allname not in self.fields:
                 self.fields[allname] = []
             for entity in self.oc_item.json_ld[equiv_uri]:
                 if 'http://' in entity['id'] \
                    or 'https://' in entity['id']:
                     self.fields['text'] += entity['label'] + '\n'
                     self.fields['text'] += entity['id'] + '\n'
                     item = self._concat_solr_string_value(
                         entity['slug'],
                         'id',
                         entity['id'],
                         entity['label'])
                     self.fields[fname].append(item)
                     self.fields[allname].append(item)
                     self.process_object_uri(entity['id'])
     if 'skos:related' in self.oc_item.json_ld:
         fname = 'skos_related___pred_id'
         allname = 'obj_all___skos_related___pred_id'
         if fname not in self.fields:
             self.fields[fname] = []
             if self.ROOT_LINK_DATA_SOLR not in self.fields:
                 self.fields[self.ROOT_LINK_DATA_SOLR] = []
             item = self._concat_solr_string_value(
                 'skos-related',
                 'id',
                 'http://www.w3.org/2004/02/skos/core#related',
                 'Related')
             self.fields[self.ROOT_LINK_DATA_SOLR].append(item)
         if allname not in self.fields:
             self.fields[allname] = []
         for entity in self.oc_item.json_ld['skos:related']:
             if 'http://' in entity['id'] \
                or 'https://' in entity['id']:
                 self.fields['text'] += entity['label'] + '\n'
                 self.fields['text'] += entity['id'] + '\n'
                 item = self._concat_solr_string_value(
                     entity['slug'],
                     'id',
                     entity['id'],
                     entity['label'])
                 self.fields[fname].append(item)
                 self.fields[allname].append(item)
                 self.process_object_uri(entity['id'])
             elif 'oc-pred:' in entity['id'] \
                 and 'owl:sameAs' in entity:
                 pred_uuid = URImanagement.get_uuid_from_oc_uri(
                             entity['owl:sameAs']
                             )
                 self.fields['text'] += entity['label'] + '\n'
                 self.fields['text'] += entity['id'] + '\n'
                 item = self._concat_solr_string_value(
                     entity['slug'],
                     'id',
                     '/predicates/' + pred_uuid,
                     entity['label'])
                 self.fields[fname].append(item)
                 self.fields[allname].append(item)
Example #42
0
 def make_datacite_metadata_xml(self, parent_node, json_ld):
     """ makes metadata for the datacite specification, which
         is also used in the oai_datacite specification
     """
     tcheck = URImanagement.get_uuid_from_oc_uri(json_ld['id'], True)
     if tcheck is False:
         item_type = False
     else:
         item_type = tcheck['item_type']
     resource_xml = etree.SubElement(
         parent_node,
         'resoure',
         nsmap={None: self.DATACITE_RESOURCE['ns']},
         attrib={
             '{' + self.XSI_NS + '}schemaLocation':
             self.DATACITE_RESOURCE['schemaLocation']
         })
     identifiers = {}
     if 'id' in json_ld:
         identifiers['URL'] = json_ld['id']
     if 'owl:sameAs' in json_ld:
         if isinstance(json_ld['owl:sameAs'], list):
             for ld_item in json_ld['owl:sameAs']:
                 if 'doi' in ld_item['id']:
                     identifiers['DOI'] = ld_item['id'].replace(
                         'http://dx.doi.org/', '')
                 if 'ark' in ld_item['id']:
                     identifiers['ARK'] = ld_item['id'].replace(
                         'http://n2t.net/', '')
     if 'DOI' in identifiers:
         act_xml = etree.SubElement(resource_xml,
                                    'identifier',
                                    attrib={'identifierType': 'DOI'})
         act_xml.text = identifiers['DOI']
     elif 'ARK' in identifiers:
         act_xml = etree.SubElement(resource_xml,
                                    'identifier',
                                    attrib={'identifierType': 'ARK'})
         act_xml.text = identifiers['ARK']
     elif 'URL' in identifiers:
         act_xml = etree.SubElement(resource_xml,
                                    'identifier',
                                    attrib={'identifierType': 'URL'})
         act_xml.text = identifiers['URL']
     act_node = etree.SubElement(resource_xml, 'titles')
     dc_title = None
     if 'dc-terms:title' in json_ld:
         act_xml = etree.SubElement(act_node, 'title')
         act_xml.text = json_ld['dc-terms:title']
         dc_title = json_ld['dc-terms:title']
     if 'label' in json_ld:
         if dc_title != json_ld['label']:
             act_xml = etree.SubElement(act_node, 'title')
             act_xml.text = json_ld['label']
     if 'dc-terms:creator' in json_ld:
         if isinstance(json_ld['dc-terms:creator'], list):
             act_node = etree.SubElement(resource_xml, 'creators')
             for ld_item in json_ld['dc-terms:creator']:
                 act_xml = etree.SubElement(act_node, 'creator')
                 if 'label' in ld_item:
                     act_xml.text = ld_item['label']
     if 'dc-terms:contributor' in json_ld:
         if isinstance(json_ld['dc-terms:contributor'], list):
             act_node = etree.SubElement(resource_xml, 'contributors')
             for ld_item in json_ld['dc-terms:contributor']:
                 act_xml = etree.SubElement(act_node, 'contributor')
                 if 'label' in ld_item:
                     act_xml.text = ld_item['label']
     act_node = etree.SubElement(resource_xml, 'dates')
     create_date = time.strftime('%Y-%m-%d')
     if 'dc-terms:issued' in json_ld:
         create_date = json_ld['dc-terms:issued']
         date_xml = etree.SubElement(act_node,
                                     'date',
                                     attrib={'dateType': 'Available'})
         date_xml.text = create_date
     if 'dc-terms:modified' in json_ld:
         mod_date = json_ld['dc-terms:modified']
         date_xml = etree.SubElement(act_node,
                                     'date',
                                     attrib={'dateType': 'Updated'})
         date_xml.text = mod_date
     act_node = etree.SubElement(resource_xml, 'publisher')
     act_node.text = self.publisher_name
     act_node = etree.SubElement(resource_xml, 'publicationYear')
     act_node.text = create_date[:4]  # the year, first 4 characters
     # now add the Datacite resource type
     if item_type in self.DATACITE_RESOURCE_TYPES:
         act_rt = self.DATACITE_RESOURCE_TYPES[item_type]
     else:
         act_rt = self.DATACITE_RESOURCE_TYPES['other']
     rt_xml = etree.SubElement(
         resource_xml,
         'resourceType',
         attrib={'resourceTypeGeneral': act_rt['ResourceTypeGeneral']})
     rt_xml.text = act_rt['oc']
     # now add relevant mime-types
     if item_type in self.DC_FORMATS:
         format_list = self.DC_FORMATS[item_type]
         if item_type == 'media':
             if 'oc-gen:has-files' in json_ld:
                 if isinstance(json_ld['oc-gen:has-files'], list):
                     for act_f in json_ld['oc-gen:has-files']:
                         if 'type' in act_f and 'dc-terms:hasFormat' in act_f:
                             if act_f['type'] == 'oc-gen:fullfile':
                                 mime_uri = act_f['dc-terms:hasFormat']
                                 format_list.append(
                                     mime_uri.replace(
                                         'http://purl.org/NET/mediatypes/',
                                         ''))
     else:
         format_list = self.DC_FORMATS['other']
     act_node = etree.SubElement(resource_xml, 'formats')
     for mime in format_list:
         act_xml = etree.SubElement(act_node, 'format')
         act_xml.text = mime
     subjects_list = []
     if 'category' in json_ld:
         cat = json_ld['category'][0]
         cat_label = self.get_category_label(cat, json_ld)
         if cat_label is not False:
             subjects_list.append(cat_label)
     if 'dc-terms:subject' in json_ld:
         if isinstance(json_ld['dc-terms:subject'], list):
             for subj in json_ld['dc-terms:subject']:
                 if 'label' in subj:
                     subjects_list.append(subj['label'])
     if len(subjects_list) > 0:
         act_node = etree.SubElement(resource_xml, 'subjects')
         for subject in subjects_list:
             act_xml = etree.SubElement(act_node, 'subject')
             act_xml.text = subject
     if 'dc-terms:isPartOf' in json_ld:
         if isinstance(json_ld['dc-terms:isPartOf'], list):
             for rel in json_ld['dc-terms:isPartOf']:
                 if 'id' in rel:
                     related = rel['id']
                     act_xml = etree.SubElement(resource_xml,
                                                'RelatedIdentifier',
                                                attrib={
                                                    'relatedIdentifierType':
                                                    'URL',
                                                    'relationType':
                                                    'IsPartOf'
                                                })
                     act_xml.text = related
Example #43
0
 def make_dc_metadata_xml(self, parent_node, json_ld):
     """ makes metadata in the dublin core format """
     act_format = self.get_metadata_format_attributes('oai_dc')
     if act_format is not False:
         tcheck = URImanagement.get_uuid_from_oc_uri(json_ld['id'], True)
         if tcheck is False:
             item_type = False
         else:
             item_type = tcheck['item_type']
         dc = 'http://purl.org/dc/elements/1.1/'
         ns = {'dc': dc,
               'oai_dc': act_format['ns'],
               'xsi': self.XSI_NS}
         format_xml = etree.SubElement(parent_node,
                                       '{' + act_format['ns'] + '}dc',
                                       nsmap=ns,
                                       attrib={'{' + self.XSI_NS + '}schemaLocation': act_format['schemaLocation']})
         title_xml = etree.SubElement(format_xml, '{' + dc + '}title')
         if 'dc-terms:title' in json_ld:
             title_xml.text = json_ld['dc-terms:title']
         elif 'label' in json_ld:
             title_xml.text = json_ld['label']
         if 'dc-terms:issued' in json_ld:
             dt_date = json_ld['dc-terms:issued']
             date_xml = etree.SubElement(format_xml, '{' + dc + '}date')
             date_xml.text = dt_date
         if 'dc-terms:creator' in json_ld:
             if isinstance(json_ld['dc-terms:creator'], list):
                 for ld_item in json_ld['dc-terms:creator']:
                     act_xml = etree.SubElement(format_xml, '{' + dc + '}creator')
                     if 'label' in ld_item:
                         act_xml.text = ld_item['label']
         if 'dc-terms:contributor' in json_ld:
             if isinstance(json_ld['dc-terms:contributor'], list):
                 for ld_item in json_ld['dc-terms:contributor']:
                     act_xml = etree.SubElement(format_xml, '{' + dc + '}contributor')
                     if 'label' in ld_item:
                         act_xml.text = ld_item['label']
         if 'owl:sameAs' in json_ld:
             if isinstance(json_ld['owl:sameAs'], list):
                 for ld_item in json_ld['owl:sameAs']:
                     act_xml = etree.SubElement(format_xml, '{' + dc + '}identifier')
                     act_xml.text = ld_item['id']
         if 'id' in json_ld:
             act_xml = etree.SubElement(format_xml, '{' + dc + '}identifier')
             act_xml.text = json_ld['id']
         if item_type in self.DATACITE_RESOURCE_TYPES:
             act_rt = self.DATACITE_RESOURCE_TYPES[item_type]
         else:
             act_rt = self.DATACITE_RESOURCE_TYPES['other']
         rt_xml = etree.SubElement(format_xml, '{' + dc + '}type')
         rt_xml.text = act_rt['ResourceTypeGeneral']
         publisher = etree.SubElement(format_xml, '{' + dc + '}publisher')
         publisher.text = self.publisher_name
         if item_type in self.DC_FORMATS:
             format_list = self.DC_FORMATS[item_type]
             if item_type == 'media':
                 if 'oc-gen:has-files' in json_ld:
                     if isinstance(json_ld['oc-gen:has-files'], list):
                         for act_f in json_ld['oc-gen:has-files']:
                             if 'type' in act_f and 'dc-terms:hasFormat' in act_f:
                                 if act_f['type'] == 'oc-gen:fullfile':
                                     mime_uri = act_f['dc-terms:hasFormat']
                                     format_list.append(mime_uri.replace('http://purl.org/NET/mediatypes/',
                                                                         ''))
         else:
             format_list = self.DC_FORMATS['other']
         for mime in format_list:
             act_xml = etree.SubElement(format_xml, '{' + dc + '}format')
             act_xml.text = mime
         subjects_list = []
         if 'category' in json_ld:
             cat = json_ld['category'][0]
             cat_label = self.get_category_label(cat, json_ld)
             if cat_label is not False:
                 subjects_list.append(cat_label)
         if 'dc-terms:subject' in json_ld:
             if isinstance(json_ld['dc-terms:subject'], list):
                 for subj in json_ld['dc-terms:subject']:
                     if 'label' in subj:
                         subjects_list.append(subj['label'])
         if len(subjects_list) > 0:
             for subject in subjects_list:
                 act_xml = etree.SubElement(format_xml, '{' + dc + '}subject')
                 act_xml.text = subject
Example #44
0
 def get_description_tree(self,
                          entity_obj,
                          depth=1,
                          first_time=True,
                          item_type=False,
                          class_uri=False):
     """ gets a hierarchy for descriptive
         predicates and types
     """
     lr = LinkRecursion()
     if entity_obj.item_type == 'projects':
         tree = self.make_containment_item(entity_obj)
         if item_type is not False and class_uri is False:
             # returns the classes associated with an item_type for a project
             tree['label'] = tree['label'] + ', ' + item_type
             tree['children'] = self.get_proj_type_classes_items(
                 entity_obj.uuid, item_type)
         elif item_type is not False and class_uri is not False:
             # returns the predicates associated with an item_type and class_uri
             tree['children'] = self.get_proj_type_class_preds(
                 entity_obj.uuid, item_type, class_uri, True)
         else:
             # project root, returns the item_types for the project
             tree['children'] = self.get_proj_types(entity_obj.uuid)
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'predicates':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         child_list = lr.get_entity_children(entity_obj.uuid, False)
         if len(child_list) > 0:
             for child_uuid in child_list:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_containment_children(
                             child_ent, depth - 1, False)
                     else:
                         child = self.make_containment_item(child_ent)
                     tree['children'].append(child)
         elif entity_obj.data_type == 'id':
             top_types = lr.get_pred_top_rank_types(entity_obj.uuid)
             for top_type in top_types:
                 uri = top_type['id']
                 uuid = URImanagement.get_uuid_from_oc_uri(uri)
                 item = False
                 if depth > 1:
                     child_ent = Entity()
                     found = child_ent.dereference(uuid)
                     if found:
                         item = self.get_description_tree(
                             child_ent, depth - 1, False)
                 else:
                     item = LastUpdatedOrderedDict()
                     item['id'] = uuid
                     item['label'] = top_type['label']
                     item['class_uri'] = 'type'
                     item['class_label'] = 'type'
                 tree['children'].append(item)
             tree['children'] = self.sort_children_by_label(
                 tree['children'])
         else:
             pass
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'types':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         act_children = lr.get_entity_children(entity_obj.uuid, False)
         for child_uuid in act_children:
             if child_uuid != entity_obj.uuid:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_description_tree(
                             child_ent, depth - 1, False)
                     else:
                         child = self.make_containment_item(child_ent)
                     child['class_uri'] = 'type'
                     child['class_label'] = 'type'
                     tree['children'].append(child)
         if len(tree['children']) == 0:
             tree.pop('children', None)
         else:
             tree['children'] = self.sort_children_by_label(
                 tree['children'])
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     else:
         output = []
     return output
Example #45
0
 def dereference(self, identifier, link_entity_slug=False):
     """ Dereferences an entity identified by an identifier, checks if a URI,
         if, not a URI, then looks in the OC manifest for the item
     """
     output = False
     try_manifest = True
     identifier = URImanagement.convert_prefix_to_full_uri(identifier)
     if(link_entity_slug or (len(identifier) > 8)):
         if(link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://')):
             try:
                 try_manifest = False
                 ld_entity = LinkEntity.objects.get(Q(uri=identifier) | Q(slug=identifier))
             except LinkEntity.DoesNotExist:
                 ld_entity = False
             if(ld_entity is not False):
                 output = True
                 self.uri = ld_entity.uri
                 self.slug = ld_entity.slug
                 self.label = ld_entity.label
                 self.item_type = 'uri'
                 self.alt_label = ld_entity.alt_label
                 self.entity_type = ld_entity.ent_type
                 self.vocab_uri = ld_entity.vocab_uri
                 self.ld_object_ok = True
                 try:
                     vocab_entity = LinkEntity.objects.get(uri=self.vocab_uri)
                 except LinkEntity.DoesNotExist:
                     vocab_entity = False
                 if(vocab_entity is not False):
                     self.vocabulary = vocab_entity.label
                 if self.get_icon:
                     prefix_uri = URImanagement.prefix_common_uri(ld_entity.uri)
                     icon_anno = LinkAnnotation.objects\
                                               .filter(Q(subject=ld_entity.uri)
                                                       | Q(subject=identifier)
                                                       | Q(subject=prefix_uri),
                                                       predicate_uri='oc-gen:hasIcon')[:1]
                     if len(icon_anno) > 0:
                         self.icon = icon_anno[0].object_uri
             else:
                 try_manifest = True
                 # couldn't find the item in the linked entities table
                 identifier = URImanagement.get_uuid_from_oc_uri(identifier)
     if(try_manifest):
         try:
             manifest_item = Manifest.objects.get(Q(uuid=identifier) | Q(slug=identifier))
         except Manifest.DoesNotExist:
             manifest_item = False
         if(manifest_item is not False):
             output = True
             self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type)
             self.uuid = manifest_item.uuid
             self.slug = manifest_item.slug
             self.label = manifest_item.label
             self.item_type = manifest_item.item_type
             self.class_uri = manifest_item.class_uri
             self.project_uuid = manifest_item.project_uuid
             if(manifest_item.item_type == 'media' and self.get_thumbnail):
                 # a media item. get information about its thumbnail.
                 try:
                     thumb_obj = Mediafile.objects.get(uuid=manifest_item.uuid, file_type='oc-gen:thumbnail')
                 except Mediafile.DoesNotExist:
                     thumb_obj = False
                 if thumb_obj is not False:
                     self.thumbnail_media = thumb_obj
                     self.thumbnail_uri = thumb_obj.file_uri
             elif(manifest_item.item_type == 'types'):
                 tl = TypeLookup()
                 tl.get_octype_without_manifest(identifier)
                 self.content = tl.content
             elif(manifest_item.item_type == 'predicates'):
                 try:
                     oc_pred = Predicate.objects.get(uuid=manifest_item.uuid)
                 except Predicate.DoesNotExist:
                     oc_pred = False
                 if(oc_pred is not False):
                     self.data_type = oc_pred.data_type
             elif(manifest_item.item_type == 'subjects' and self.get_context):
                 try:
                     subj = Subject.objects.get(uuid=manifest_item.uuid)
                 except Subject.DoesNotExist:
                     subj = False
                 if subj is not False:
                     self.context = subj.context
     return output
Example #46
0
 def _process_predicate_values(self, predicate_slug, predicate_type):
     # First generate the solr field name
     solr_field_name = self._convert_slug_to_solr(
         predicate_slug +
         self._get_predicate_type_string(
             predicate_type, prefix='___pred_')
         )
     # Then get the predicate values
     if solr_field_name not in self.fields:
         self.fields[solr_field_name] = []
     if self.oc_item.item_type == 'media' \
             or self.oc_item.item_type == 'documents':
     # we want to make joins easier for these types of items
         make_join_ids = True
     else:
         make_join_ids = False
     predicate_key = 'oc-pred:' + predicate_slug
     for obs_list in self.oc_item.json_ld['oc-gen:has-obs']:
         if predicate_key in obs_list:
             predicate_values = obs_list[predicate_key]
             for value in predicate_values:
                 if predicate_type == '@id':
                     if make_join_ids and 'subjects' in value['id']:
                         # case where we want to make a join field to link
                         # associated subjects items with media or document
                         # items allows join relationships between
                         # 'join___pred_id' and 'uuid' solr fields.
                         if 'join___pred_id' not in self.fields:
                             self.fields['join___pred_id'] = []
                         # get subjects UUID from the URI
                         sub_uuid = URImanagement.get_uuid_from_oc_uri(
                             value['id']
                             )
                         # append to the solr field for joins
                         self.fields['join___pred_id'].append(sub_uuid)
                     if predicate_slug != 'link':
                         active_solr_field = solr_field_name
                         parents = LinkRecursion(
                             ).get_jsonldish_entity_parents(
                             value['id']
                             )
                         all_obj_solr_field = 'obj_all___' + active_solr_field
                         if all_obj_solr_field not in self.fields:
                             self.fields[all_obj_solr_field] = []
                         for parent in parents:
                             if active_solr_field not in self.fields:
                                 self.fields[active_solr_field] = []
                             active_solr_value = \
                                 self._concat_solr_string_value(
                                     parent['slug'],
                                     self._get_predicate_type_string(
                                         parent['type']),
                                     parent['id'],
                                     parent['label']
                                 )
                             self.fields['text'] += ' ' + \
                                 parent['label'] + ' '
                             self.fields[active_solr_field].append(
                                 active_solr_value
                             )
                             # so all items in the hiearchy are present in the
                             # and can be queried, even if you don't know the parent
                             self.fields[all_obj_solr_field].append(
                                 active_solr_value
                             )
                             active_solr_field = self._convert_slug_to_solr(
                                 parent['slug']) + '___' + solr_field_name
                     else:
                         # case of a linking relation, don't bother looking
                         # up hierarchies or recording as a solr field, but
                         # check for image, other media, and document counts
                         if 'media' in value['id'] \
                                 and 'image' in value['type']:
                             self.fields['image_media_count'] += 1
                         elif 'media' in value['id'] \
                                 and 'image' not in value['type']:
                              # other types of media
                             self.fields['other_binary_media_count'] += 1
                         elif 'documents' in value['id']:
                             self.fields['document_count'] += 1
                         self.fields['text'] += value['label'] + ' '
                 elif predicate_type in [
                     'xsd:integer', 'xsd:double', 'xsd:boolean'
                         ]:
                     self.fields[solr_field_name].append(value)
                 elif predicate_type == 'xsd:date':
                     self.fields[solr_field_name].append(value +
                                                         'T00:00:00Z')
                 elif predicate_type == 'xsd:string':
                     self.fields['text'] += value['xsd:string'] + ' \n'
                     self.fields[solr_field_name].append(
                         value['xsd:string'])
                 else:
                     raise Exception("Error: Could not get predicate value")
             self.fields['text'] += ' \n'
Example #47
0
 def dereference(self, identifier, link_entity_slug=False):
     """ Dereferences an entity identified by an identifier, checks if a URI,
         if, not a URI, then looks in the OC manifest for the item
     """
     output = False
     if isinstance(identifier, str):
         # only try to dereference if the identifier is a string.
         try_manifest = True
         identifier = URImanagement.convert_prefix_to_full_uri(identifier)
         if (settings.CANONICAL_HOST + '/tables/') in identifier:
             identifier = identifier.replace((settings.CANONICAL_HOST + '/tables/'), '')
         if link_entity_slug or (len(identifier) > 8):
             if link_entity_slug or (identifier[:7] == 'http://' or identifier[:8] == 'https://'):
                 ent_equivs = EntityEquivalents()
                 uris = ent_equivs.make_uri_variants(identifier)
                 ld_entities = LinkEntity.objects.filter(Q(uri__in=uris) | Q(slug=identifier))[:1]
                 if len(ld_entities) > 0:
                     ld_entity = ld_entities[0]
                 else:
                     ld_entity = False
                 if ld_entity is not False:
                     output = True
                     self.uri = ld_entity.uri
                     self.slug = ld_entity.slug
                     self.label = ld_entity.label
                     self.item_type = 'uri'
                     self.alt_label = ld_entity.alt_label
                     self.entity_type = ld_entity.ent_type
                     self.vocab_uri = ld_entity.vocab_uri
                     self.ld_object_ok = True
                     try:
                         if 'https://' in self.vocab_uri:
                             alt_vocab_uri = self.vocab_uri.replace('https://', 'http://')
                         else:
                             alt_vocab_uri = self.vocab_uri.replace('http://', 'https://')
                         vocab_entity = LinkEntity.objects.get(Q(uri=self.vocab_uri) | Q(uri=alt_vocab_uri))
                     except LinkEntity.DoesNotExist:
                         vocab_entity = False
                     if vocab_entity is not False:
                         self.vocabulary = vocab_entity.label
                     if self.get_icon:
                         prefix_uri = URImanagement.prefix_common_uri(ld_entity.uri)
                         icon_anno = LinkAnnotation.objects\
                                                   .filter(Q(subject=ld_entity.uri)
                                                           | Q(subject=identifier)
                                                           | Q(subject=prefix_uri),
                                                           predicate_uri='oc-gen:hasIcon')[:1]
                         if len(icon_anno) > 0:
                             self.icon = icon_anno[0].object_uri
                 else:
                     try_manifest = True
                     # couldn't find the item in the linked entities table
                     identifier = URImanagement.get_uuid_from_oc_uri(identifier)
         if try_manifest:
             try:
                 manifest_item = Manifest.objects.get(Q(uuid=identifier) | Q(slug=identifier))
             except Manifest.DoesNotExist:
                 manifest_item = False
             if manifest_item is not False:
                 output = True
                 self.uri = URImanagement.make_oc_uri(manifest_item.uuid, manifest_item.item_type)
                 self.uuid = manifest_item.uuid
                 self.slug = manifest_item.slug
                 self.label = manifest_item.label
                 self.item_type = manifest_item.item_type
                 self.class_uri = manifest_item.class_uri
                 self.project_uuid = manifest_item.project_uuid
                 if manifest_item.item_type == 'media' and self.get_thumbnail:
                     # a media item. get information about its thumbnail.
                     try:
                         thumb_obj = Mediafile.objects.get(uuid=manifest_item.uuid, file_type='oc-gen:thumbnail')
                     except Mediafile.DoesNotExist:
                         thumb_obj = False
                     if thumb_obj is not False:
                         self.thumbnail_media = thumb_obj
                         self.thumbnail_uri = thumb_obj.file_uri
                 elif manifest_item.item_type in ['persons', 'projects', 'tables'] \
                      or self.get_stable_ids:
                     # get stable identifiers for persons or projects by default
                     stable_ids = StableIdentifer.objects.filter(uuid=manifest_item.uuid)
                     if len(stable_ids) > 0:
                         self.stable_id_uris = []
                         doi_uris = []
                         orcid_uris = []
                         other_uris = []
                         for stable_id in stable_ids:
                             if stable_id.stable_type in StableIdentifer.ID_TYPE_PREFIXES:
                                 prefix = StableIdentifer.ID_TYPE_PREFIXES[stable_id.stable_type]
                             else:
                                 prefix = ''
                             stable_uri = prefix + stable_id.stable_id
                             if stable_id.stable_type == 'orcid':
                                 orcid_uris.append(stable_uri)
                             elif stable_id.stable_type == 'doi':
                                 doi_uris.append(stable_uri)
                             else:
                                 other_uris.append(stable_uri)
                         # now list URIs in order of importance, with ORCIDs and DOIs
                         # first, followed by other stable URI types (Arks or something else)
                         self.stable_id_uris = orcid_uris + doi_uris + other_uris
                 elif manifest_item.item_type == 'types':
                     tl = TypeLookup()
                     tl.get_octype_without_manifest(identifier)
                     self.content = tl.content
                 elif manifest_item.item_type == 'predicates':
                     try:
                         oc_pred = Predicate.objects.get(uuid=manifest_item.uuid)
                     except Predicate.DoesNotExist:
                         oc_pred = False
                     if oc_pred is not False:
                         self.data_type = oc_pred.data_type
                         self.sort = oc_pred.sort
                         self.slug_uri = 'oc-pred:' + str(self.slug)
                 elif manifest_item.item_type == 'projects':
                     # get a manifest object for the parent of a project, if it exists
                     ch_tab = '"oc_projects" AS "child"'
                     filters = 'child.project_uuid=oc_manifest.uuid '\
                               ' AND child.uuid=\'' + self.uuid + '\' ' \
                               ' AND child.project_uuid != \'' + self.uuid + '\' '
                     par_rows = Manifest.objects\
                                        .filter(item_type='projects')\
                                        .exclude(uuid=self.uuid)\
                                        .extra(tables=[ch_tab], where=[filters])[:1]
                     if len(par_rows) > 0:
                         self.par_proj_man_obj = par_rows[0]
                 elif manifest_item.item_type == 'subjects' and self.get_context:
                     try:
                         subj = Subject.objects.get(uuid=manifest_item.uuid)
                     except Subject.DoesNotExist:
                         subj = False
                     if subj is not False:
                         self.context = subj.context
     return output
Example #48
0
 def parse_json_record(self, json_rec):
     """ parses json for a
         geo-json feature of the record
     """
     if 'properties' in json_rec:
         props = json_rec['properties']
     else:
         props = json_rec
     if isinstance(props, dict):
         if 'id' in props:
             self.id = props['id'].replace('#', '')
         if 'label' in props:
             self.label = props['label']
         if 'href' in props:
             self.href = props['href']
         if 'uri' in props:
             item_type_output = URImanagement.get_uuid_from_oc_uri(
                 props['uri'], True)
             if isinstance(item_type_output, dict):
                 self.item_type = item_type_output['item_type']
                 self.uuid = item_type_output['uuid']
         if 'project label' in props:
             self.project = props['project label']
         if 'context label' in props:
             self.context = props['context label']
         if 'early bce/ce' in props:
             self.early_bce_ce = props['early bce/ce']
             if self.early_bce_ce < 0:
                 self.early_bce_ce = int(round(self.early_bce_ce * -1, 0))
                 self.early_suffix = 'BCE'
             else:
                 self.early_bce_ce = int(round(self.early_bce_ce, 0))
                 self.early_suffix = False
         if 'late bce/ce' in props:
             self.late_bce_ce = props['late bce/ce']
             if self.late_bce_ce < 0:
                 self.late_bce_ce = int(round(self.late_bce_ce * -1, 0))
                 self.late_suffix = 'BCE'
             else:
                 self.late_bce_ce = int(round(self.late_bce_ce, 0))
                 self.late_suffix = False
         if 'item category' in props:
             self.category = props['item category']
         if 'snippet' in props:
             self.snippet = props['snippet']
             self.snippet = self.snippet.replace('<em>', '[[[[mark]]]]')
             self.snippet = self.snippet.replace('</em>', '[[[[/mark]]]]')
             self.snippet = strip_tags(self.snippet)
             self.snippet = self.snippet.replace('</', '')
             self.snippet = self.snippet.replace('<', '')
             self.snippet = self.snippet.replace('>', '')
             self.snippet = self.snippet.replace('[[[[mark]]]]', '<mark>')
             self.snippet = self.snippet.replace('[[[[/mark]]]]', '</mark>')
         if 'thumbnail' in props:
             self.thumbnail = props['thumbnail']
         if 'published' in props:
             self.published = QueryMaker().make_human_readable_date(
                 props['published'])
         if 'updated' in props:
             self.updated = QueryMaker().make_human_readable_date(
                 props['updated'])
Example #49
0
 def make_dc_metadata_xml(self, parent_node, json_ld):
     """ makes metadata in the dublin core format """
     act_format = self.get_metadata_format_attributes('oai_dc')
     if act_format is not False:
         tcheck = URImanagement.get_uuid_from_oc_uri(json_ld['id'], True)
         if tcheck is False:
             item_type = False
         else:
             item_type = tcheck['item_type']
         dc = 'http://purl.org/dc/elements/1.1/'
         ns = {'dc': dc, 'oai_dc': act_format['ns'], 'xsi': self.XSI_NS}
         format_xml = etree.SubElement(
             parent_node,
             '{' + act_format['ns'] + '}dc',
             nsmap=ns,
             attrib={
                 '{' + self.XSI_NS + '}schemaLocation':
                 act_format['schemaLocation']
             })
         title_xml = etree.SubElement(format_xml, '{' + dc + '}title')
         if 'dc-terms:title' in json_ld:
             title_xml.text = json_ld['dc-terms:title']
         elif 'label' in json_ld:
             title_xml.text = json_ld['label']
         if 'dc-terms:issued' in json_ld:
             dt_date = json_ld['dc-terms:issued']
             date_xml = etree.SubElement(format_xml, '{' + dc + '}date')
             date_xml.text = dt_date
         if 'dc-terms:creator' in json_ld:
             if isinstance(json_ld['dc-terms:creator'], list):
                 for ld_item in json_ld['dc-terms:creator']:
                     act_xml = etree.SubElement(format_xml,
                                                '{' + dc + '}creator')
                     if 'label' in ld_item:
                         act_xml.text = ld_item['label']
         if 'dc-terms:contributor' in json_ld:
             if isinstance(json_ld['dc-terms:contributor'], list):
                 for ld_item in json_ld['dc-terms:contributor']:
                     act_xml = etree.SubElement(format_xml,
                                                '{' + dc + '}contributor')
                     if 'label' in ld_item:
                         act_xml.text = ld_item['label']
         if 'owl:sameAs' in json_ld:
             if isinstance(json_ld['owl:sameAs'], list):
                 for ld_item in json_ld['owl:sameAs']:
                     act_xml = etree.SubElement(format_xml,
                                                '{' + dc + '}identifier')
                     act_xml.text = ld_item['id']
         if 'id' in json_ld:
             act_xml = etree.SubElement(format_xml,
                                        '{' + dc + '}identifier')
             act_xml.text = json_ld['id']
         if item_type in self.DATACITE_RESOURCE_TYPES:
             act_rt = self.DATACITE_RESOURCE_TYPES[item_type]
         else:
             act_rt = self.DATACITE_RESOURCE_TYPES['other']
         rt_xml = etree.SubElement(format_xml, '{' + dc + '}type')
         rt_xml.text = act_rt['ResourceTypeGeneral']
         publisher = etree.SubElement(format_xml, '{' + dc + '}publisher')
         publisher.text = self.publisher_name
         if item_type in self.DC_FORMATS:
             format_list = self.DC_FORMATS[item_type]
             if item_type == 'media':
                 if 'oc-gen:has-files' in json_ld:
                     if isinstance(json_ld['oc-gen:has-files'], list):
                         for act_f in json_ld['oc-gen:has-files']:
                             if 'type' in act_f and 'dc-terms:hasFormat' in act_f:
                                 if act_f['type'] == 'oc-gen:fullfile':
                                     mime_uri = act_f['dc-terms:hasFormat']
                                     format_list.append(
                                         mime_uri.replace(
                                             'http://purl.org/NET/mediatypes/',
                                             ''))
         else:
             format_list = self.DC_FORMATS['other']
         for mime in format_list:
             act_xml = etree.SubElement(format_xml, '{' + dc + '}format')
             act_xml.text = mime
         subjects_list = []
         if 'category' in json_ld:
             cat = json_ld['category'][0]
             cat_label = self.get_category_label(cat, json_ld)
             if cat_label is not False:
                 subjects_list.append(cat_label)
         if 'dc-terms:subject' in json_ld:
             if isinstance(json_ld['dc-terms:subject'], list):
                 for subj in json_ld['dc-terms:subject']:
                     if 'label' in subj:
                         subjects_list.append(subj['label'])
         if len(subjects_list) > 0:
             for subject in subjects_list:
                 act_xml = etree.SubElement(format_xml,
                                            '{' + dc + '}subject')
                 act_xml.text = subject
Example #50
0
 def get_description_tree(self,
                          entity_obj,
                          depth=1,
                          first_time=True,
                          item_type=False,
                          class_uri=False):
     """ gets a hierarchy for descriptive
         predicates and types
     """
     lr = LinkRecursion()
     if entity_obj.item_type == 'projects':
         tree = self.make_containment_item(entity_obj)
         if item_type is not False and class_uri is False:
             # returns the classes associated with an item_type for a project
             tree['label'] = tree['label'] + ', ' + item_type
             tree['children'] = self.get_proj_type_classes_items(entity_obj.uuid, 
                                                                 item_type)
         elif item_type is not False and class_uri is not False:
             # returns the predicates associated with an item_type and class_uri
             tree['children'] = self.get_proj_type_class_preds(entity_obj.uuid,
                                                               item_type,
                                                               class_uri,
                                                               True)
         else:
             # project root, returns the item_types for the project
             tree['children'] = self.get_proj_types(entity_obj.uuid)
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'predicates':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         child_list = lr.get_entity_children(entity_obj.uuid, False)
         if len(child_list) > 0:
             for child_uuid in child_list:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_containment_children(child_ent,
                                                               depth - 1,
                                                               False)
                     else:
                         child = self.make_containment_item(child_ent)
                     tree['children'].append(child)
         elif entity_obj.data_type == 'id':
             top_types = lr.get_pred_top_rank_types(entity_obj.uuid)
             for top_type in top_types:
                 uri = top_type['id']
                 uuid = URImanagement.get_uuid_from_oc_uri(uri)
                 item = False
                 if depth > 1:
                     child_ent = Entity()
                     found = child_ent.dereference(uuid)
                     if found:
                         item = self.get_description_tree(child_ent,
                                                          depth - 1,
                                                          False)
                 else:
                     item = LastUpdatedOrderedDict()
                     item['id'] = uuid
                     item['label'] = top_type['label']
                     item['class_uri'] = 'type'
                     item['class_label'] = 'type'
                 tree['children'].append(item)
             tree['children'] = self.sort_children_by_label(tree['children'])
         else:
             pass
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'types':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         act_children = lr.get_entity_children(entity_obj.uuid, False)
         for child_uuid in act_children:
             if child_uuid != entity_obj.uuid:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_description_tree(child_ent,
                                                           depth - 1,
                                                           False)
                     else:
                         child = self.make_containment_item(child_ent)
                     child['class_uri'] = 'type'
                     child['class_label'] = 'type'
                     tree['children'].append(child)
         if len(tree['children']) == 0:
             tree.pop('children', None)
         else:
             tree['children'] = self.sort_children_by_label(tree['children'])
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     else:
         output = []
     return output