def __extract_title__(self): "Helper function extracts title information from MODS" title_entities = [] titleInfos = self.mods_xml.findall('{{{0}}}titleInfo'.format(MODS_NS)) for titleInfo in titleInfos: output = {} if titleInfo.attrib.get('type')is None: # equalvant to MARC 245 $a titleValue = titleInfo.find('{{{0}}}title'.format(MODS_NS)) if titleValue is not None and len(titleValue.text) > 0: output['titleValue'] = titleValue.text output['label'] = output['titleValue'] # equalvant to MARC 245 $b subtitle = titleInfo.find('{{{0}}}subTitle'.format(MODS_NS)) if subtitle is not None and len(subtitle.text) > 0: output['subtitle'] = subtitle.text output['label'] = '{0}: {1}'.format(output.get('label'), output['subtitle']) # equalivant to MARC 245 $p partTitle = titleInfo.find('{{{0}}}partName'.format(MODS_NS)) if partTitle is not None and len(partTitle.text) > 0: output['partTitle'] = partTitle.text if len(output) > 0: title_entity = TitleEntity(redis_datastore=self.redis_datastore, **output) title_entity.save() index_title(title_entity, self.redis_datastore) title_entities.append(title_entity.redis_key) return title_entities
def __add_title_entity__(self): "Helper method adds a new TitleEntity" self.title_entity = TitleEntity(redis_datastore=self.redis_datastore) for key, value in self.entity_info.iteritems(): if key is not None and value is not None: setattr(self.title_entity, key, value) self.title_entity.save()
class TestWorkClassifier(unittest.TestCase): def setUp(self): self.title_entity = TitleEntity(redis_datastore = TEST_REDIS, label='Pride and Prejudice', titleValue='Pride and Prejudice') self.title_entity.save() index_title(self.title_entity, TEST_REDIS) self.work = Book(redis_datastore=TEST_REDIS, associatedAgent=set(["bf:Person:1"]), title=self.title_entity.redis_key) setattr(self.work, 'rda:isCreatedBy', 'bf:Person:1') self.work.save() TEST_REDIS.sadd( "{0}:relatedResources".format(self.title_entity.redis_key), self.work.redis_key) def test_init(self): entity_info = {'title': 'Pride and Prejudice', 'rda:isCreatedBy': set(['bf:Person:1'])} classifier = simple_fuzzy.WorkClassifier(redis_datastore = TEST_REDIS, entity_info = entity_info, work_class=Book) self.assert_(classifier is not None) self.assert_(classifier.strict is True) self.assert_(classifier.creative_work is None) self.assertEquals(classifier.entity_info, entity_info) def test_exact_match(self): entity_info = {'rda:isCreatedBy': set(['bf:Person:1']), 'title': self.title_entity.redis_key,} classifier = simple_fuzzy.WorkClassifier(redis_datastore = TEST_REDIS, entity_info = entity_info, work_class=Book) classifier.classify() self.assertEquals(classifier.creative_work.redis_key, self.work.redis_key) def tearDown(self): TEST_REDIS.flushdb()
def setUp(self): self.title_entity = TitleEntity(redis_datastore = TEST_REDIS, label='Pride and Prejudice', titleValue='Pride and Prejudice') self.title_entity.save() index_title(self.title_entity, TEST_REDIS) self.work = Book(redis_datastore=TEST_REDIS, associatedAgent=set(["bf:Person:1"]), title=self.title_entity.redis_key) setattr(self.work, 'rda:isCreatedBy', 'bf:Person:1') self.work.save() TEST_REDIS.sadd( "{0}:relatedResources".format(self.title_entity.redis_key), self.work.redis_key)
class MARC21toTitleEntity(MARCParser): "Extracts BIBFRAME TitleEntity info from MARC21 record" def __init__(self, **kwargs): """Initializes MARC21toTitleEntity object Parameters: """ kwargs['rules_filename'] = 'bibframe-title-entity-map.json' super(MARC21toTitleEntity, self).__init__(**kwargs) self.title_entity = None def __add_title_entity__(self): "Helper method adds a new TitleEntity" self.title_entity = TitleEntity(redis_datastore=self.redis_datastore) for key, value in self.entity_info.iteritems(): if key is not None and value is not None: setattr(self.title_entity, key, value) self.title_entity.save() def __get_or_add_title_entity__(self): "Helper method returns new or existing TitleEntity" existing_titles = [] if self.entity_info.get('titleValue') is not None: title_string = title if self.entity_info.get('subtitle') is not None: title_string += " {0}".format( self.entity_info.get('subtitle')) self.entity_info['label'] = title_string def ingest(self): "Method finds or creates a TitleEntity in RLSP" self.parse() self.__add_title_entity__()
def ingest(self): "Method ingests MARC Record into RLSP" self.__classify_work_class__() self.creative_work = self.work_class( redis_datastore=self.redis_datastore) work_titles = [] for attribute, rules in self.creative_work.marc_map.iteritems(): values = [] #! NEED TitleEntity to check for duplicates if attribute == 'uniformTitle': pass if attribute == 'title': rule = rules[0] titleValue = ' '.join(self.__rule_one__(rule)) title_entity = TitleEntity(redis_datastore=self.redis_datastore, titleValue=titleValue, label=self.record.title()) title_entity.save() index_title(title_entity, self.redis_datastore) self.entity_info[attribute] = title_entity.redis_key work_titles.append(title_entity.redis_key) continue for rule in rules: result = list(set(self.__rule_one__(rule))) values.extend(result) if len(values) > 0: self.entity_info[attribute] = values # List of specific methods that haven't had Rule regex developed self.extract_creators() self.extract_note() self.extract_performerNote() self.get_or_add_work() if self.creative_work is not None: for title_key in work_titles: self.redis_datastore.sadd( "{0}:relatedResources".format(title_key), self.creative_work.redis_key)