class TestWorkClassifier(unittest.TestCase): def setUp(self): self.classifier = WorkClassifier(name="Test", datastore=TEST_REDIS) def test_init(self): self.assert_(self.classifier) def test_tokenize_marc21_one(self): marc_record = pymarc.Record() marc_record.leader = marc_record.leader[0:6] + 'a' + marc_record.leader[7:] marc_record.add_field( pymarc.Field(tag='100', indicators=['1', ' '], subfields=['a','Naslund, Sena Jeter.'])) marc_record.add_field( pymarc.Field(tag='245', indicators=['1', '0'], subfields = ['a', "Ahab's wife, or, The star-gazer :", 'b', "a novel /"])) self.assertEquals( sorted(self.classifier.__tokenize_marc21__(marc_record)), ['ahab', 'book', 'gazer', 'jeter', 'naslund', 'novel', 'sena', 'star', 'wife']) def test_tokenize_marc21_title_only(self): marc_record = pymarc.Record() marc_record.add_field( pymarc.Field(tag='245', indicators=['1', '0'], subfields = ['a', "Ahab's wife, or, The star-gazer :", 'b', "a novel /"])) self.assertEquals( self.classifier.__tokenize_marc21__(marc_record), ['novel', 'star', 'ahab', 'wife', 'work', 'gazer']) def tearDown(self): TEST_REDIS.flushdb()
def create_row_from_marc(marc_record, is_work=False, reason=None): """Function creates a table row from a MARC21 record Keywords: marc_record -- pymarc MARC21 record """ tr = etree.Element("tr") # Record ID bib_id_td = etree.SubElement(tr, "td") field907 = marc_record['907'] if field907.subfields.count('a') > 0: bib_id_td.text = field907['a'][1:-1] # Title title = marc_record.title() title_td = etree.SubElement(tr, "td") if title is not None: title_td.text = title # Author author = marc_record.author() author_td = etree.SubElement(tr, "td") if author is not None: author_td.text = author # Tokenized terms classifier = WorkClassifier(name=title, datastore=None) terms = classifier.__tokenize_marc21__(marc_record) terms_td = etree.SubElement(tr, "td") if terms is not None: terms_td.text = str(sorted(terms)) # Is work? is_work_td = etree.SubElement(tr, "td") if is_work is True: is_work_td.text = "True" else: is_work_td.text = "False" if reason is not None: is_work_td.text += ', {0}'.format(reason) return etree.tostring(tr)
def setUp(self): self.classifier = WorkClassifier(name="Test", datastore=TEST_REDIS)