Example #1
0
 def makeEntityId(self, entityType, entity):
     entityIdTemplate = 'urn:schema-org:{0}:{{0}}:{{1}}'.format(entityType)
     if entityType == 'creative_work':
         return entityIdTemplate.format('url', helperfunctions.scrubUrl(entity['properties']['url'][0]))
     # AlignmentObject is unique.  It is essentially a hyperedge and is defined by the combination of its properties.
     if entityType == 'alignment_object':
         propCollection = ''
         for prop in self.alignmentObjectProperties:
             if prop in entity['properties']:
                 if isinstance(entity['properties'][prop], list) and \
                     len(entity['properties'][prop]) == 1 and \
                     (type(entity['properties'][prop][0]) == str or type(entity['properties'][prop][0]) == unicode):
                     propCollection += entity['properties'][prop][0]
                 else:
                     raise Exception('Unexpected property in AlignmentObject.  name: "{0}", value: "{1}"'.format(prop, entity['properties'][prop]))
             propCollection += '|'
         propCollection = propCollection[:-1]
         return entityIdTemplate.format('hash', hashlib.md5(propCollection).hexdigest())
     ''' Don't ever trust the id field coming in from the JSON
     if 'id' in entity and len(entity['id']) > 0:
         # id is specific to stand-alone schema.org JSON, if it starts with "urn:" then 
         # trust that it is properly formatted and globally unique
         if entity['id'].startswith('urn:'):
             return entity['id']
         return entityIdTemplate.format('id', entity['id'])
     '''
     for prop in self.potentialEntityIdProperties:
         if prop in entity['properties'] and \
             isinstance(entity['properties'][prop], list) and \
             len(entity['properties'][prop]) > 0 and \
             len(entity['properties'][prop][0]) > 0:
             value = entity['properties'][prop][0]
             if prop.lower().endswith("url"):
                 value = helperfunctions.scrubUrl(value)
             return entityIdTemplate.format(self.makeLowercaseUnderscore(prop), value)
     raise ValueError(str.format('unable to create entityId'))
Example #2
0
 def insertIntoElasticSearch(self, envelope):
     try:
         logging.debug(str.format('insertIntoElasticSearch with {0}', envelope['doc_ID']))
         item = envelope['resource_data']['items'][0]
         es = pyes.ES('{0}:{1}'.format(self.config['lris']['host'], self.config['lris']['port']))
         es.index(item, self.config['lris']['index'], self.config['lris']['index_type'], urllib.quote_plus(helperfunctions.scrubUrl(item['properties']['url'][0])))
     except:
         logging.exception(str.format('doc_ID: {0}', envelope['doc_ID']))