예제 #1
0
 def __extract_title__(self):
     "Helper function extracts title information from MODS"
     title_entities = []
     titleInfos = self.mods_xml.findall('{{{0}}}titleInfo'.format(MODS_NS))
     for titleInfo in titleInfos:
         output = {}
         if titleInfo.attrib.get('type')is None:
             # equalvant to MARC 245 $a
             titleValue = titleInfo.find('{{{0}}}title'.format(MODS_NS))
             if titleValue is not None and len(titleValue.text) > 0:
                 output['titleValue'] = titleValue.text
                 output['label'] = output['titleValue']
             # equalvant to MARC 245 $b
             subtitle = titleInfo.find('{{{0}}}subTitle'.format(MODS_NS))
             if subtitle is not None and len(subtitle.text) > 0:
                 output['subtitle'] = subtitle.text
                 output['label'] = '{0}: {1}'.format(output.get('label'),
                                                     output['subtitle'])
             # equalivant to MARC 245 $p
             partTitle = titleInfo.find('{{{0}}}partName'.format(MODS_NS))
             if partTitle is not None and len(partTitle.text) > 0:
                 output['partTitle'] = partTitle.text
         if len(output) > 0:
             title_entity = TitleEntity(redis_datastore=self.redis_datastore,
                                        **output)
             title_entity.save()
             index_title(title_entity, self.redis_datastore)
             title_entities.append(title_entity.redis_key)
     return title_entities
예제 #2
0
 def __add_title_entity__(self):
     "Helper method adds a new TitleEntity"
     self.title_entity = TitleEntity(redis_datastore=self.redis_datastore)
     for key, value in self.entity_info.iteritems():
         if key is not None and value is not None:
             setattr(self.title_entity,
                     key,
                     value)
     self.title_entity.save()
예제 #3
0
class TestWorkClassifier(unittest.TestCase):

    def setUp(self):
        self.title_entity = TitleEntity(redis_datastore = TEST_REDIS,
                                        label='Pride and Prejudice',
                                        titleValue='Pride and Prejudice')
        self.title_entity.save()
        index_title(self.title_entity, TEST_REDIS)
        self.work = Book(redis_datastore=TEST_REDIS,
                         associatedAgent=set(["bf:Person:1"]),
                         title=self.title_entity.redis_key)
        setattr(self.work, 'rda:isCreatedBy', 'bf:Person:1')
        self.work.save()
        TEST_REDIS.sadd(
            "{0}:relatedResources".format(self.title_entity.redis_key),
            self.work.redis_key)

    def test_init(self):
        entity_info = {'title': 'Pride and Prejudice',
                       'rda:isCreatedBy': set(['bf:Person:1'])}
        classifier = simple_fuzzy.WorkClassifier(redis_datastore = TEST_REDIS,
                                                 entity_info = entity_info,
                                                 work_class=Book)
        self.assert_(classifier is not None)
        self.assert_(classifier.strict is True)
        self.assert_(classifier.creative_work is None)
        self.assertEquals(classifier.entity_info,
                          entity_info)


    def test_exact_match(self):
        entity_info = {'rda:isCreatedBy': set(['bf:Person:1']),
                       'title': self.title_entity.redis_key,}
        classifier = simple_fuzzy.WorkClassifier(redis_datastore = TEST_REDIS,
                                                 entity_info = entity_info,
                                                 work_class=Book)
        classifier.classify()
        self.assertEquals(classifier.creative_work.redis_key,
                          self.work.redis_key)

    def tearDown(self):
        TEST_REDIS.flushdb()
예제 #4
0
 def setUp(self):
     self.title_entity = TitleEntity(redis_datastore = TEST_REDIS,
                                     label='Pride and Prejudice',
                                     titleValue='Pride and Prejudice')
     self.title_entity.save()
     index_title(self.title_entity, TEST_REDIS)
     self.work = Book(redis_datastore=TEST_REDIS,
                      associatedAgent=set(["bf:Person:1"]),
                      title=self.title_entity.redis_key)
     setattr(self.work, 'rda:isCreatedBy', 'bf:Person:1')
     self.work.save()
     TEST_REDIS.sadd(
         "{0}:relatedResources".format(self.title_entity.redis_key),
         self.work.redis_key)
예제 #5
0
class MARC21toTitleEntity(MARCParser):
    "Extracts BIBFRAME TitleEntity info from MARC21 record"

    def __init__(self, **kwargs):
        """Initializes MARC21toTitleEntity object

        Parameters:
        """
        kwargs['rules_filename'] = 'bibframe-title-entity-map.json'
        super(MARC21toTitleEntity, self).__init__(**kwargs)
        self.title_entity = None

    def __add_title_entity__(self):
        "Helper method adds a new TitleEntity"
        self.title_entity = TitleEntity(redis_datastore=self.redis_datastore)
        for key, value in self.entity_info.iteritems():
            if key is not None and value is not None:
                setattr(self.title_entity,
                        key,
                        value)
        self.title_entity.save()

    def __get_or_add_title_entity__(self):
        "Helper method returns new or existing TitleEntity"
        
        existing_titles = []
        if self.entity_info.get('titleValue') is not None:
            title_string = title
            if self.entity_info.get('subtitle') is not None:
                title_string += " {0}".format(
                    self.entity_info.get('subtitle'))
            self.entity_info['label'] = title_string

    def ingest(self):
        "Method finds or creates a TitleEntity in RLSP"
        self.parse()
        self.__add_title_entity__()
예제 #6
0
 def ingest(self):
     "Method ingests MARC Record into RLSP"
     self.__classify_work_class__()
     self.creative_work = self.work_class(
         redis_datastore=self.redis_datastore)
     work_titles = []
     for attribute, rules in self.creative_work.marc_map.iteritems():
         values = []
         #! NEED TitleEntity to check for duplicates
         if attribute == 'uniformTitle':
             pass
         if attribute == 'title':
             rule = rules[0]
             titleValue = ' '.join(self.__rule_one__(rule))
             title_entity = TitleEntity(redis_datastore=self.redis_datastore,
                                        titleValue=titleValue,
                                        label=self.record.title())
             title_entity.save()
             index_title(title_entity, self.redis_datastore)
             self.entity_info[attribute] = title_entity.redis_key
             work_titles.append(title_entity.redis_key)
             continue
         for rule in rules:
             result = list(set(self.__rule_one__(rule)))
             values.extend(result)
         if len(values) > 0:
             self.entity_info[attribute] = values
     # List of specific methods that haven't had Rule regex developed
     self.extract_creators()
     self.extract_note()
     self.extract_performerNote()
     self.get_or_add_work()
     if self.creative_work is not None:
         for title_key in work_titles: 
             self.redis_datastore.sadd(
                 "{0}:relatedResources".format(title_key),
                 self.creative_work.redis_key)