def test_authors(self): self.assert_parse( "Carroll, Lewis, 1832-1898", "Carroll, Lewis", Contributor.PRIMARY_AUTHOR_ROLE, "1832", "1898") self.assert_parse( "Kent, Rockwell, 1882-1971 [Illustrator]", "Kent, Rockwell", "Illustrator", "1882", "1971") self.assert_parse( u"Карролл, Лувис, 1832-1898.", u"Карролл, Лувис", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1832", deathdate="1898") kerry, melville = OCLCXMLParser.parse_author_string( self._db, "McSweeney, Kerry, 1941- | Melville, Herman, 1819-1891") self.assert_author(kerry, "McSweeney, Kerry", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1941", deathdate=self.MISSING) self.assert_author( melville, "Melville, Herman", Contributor.AUTHOR_ROLE, birthdate="1819", deathdate="1891") # Check out this mess. s = "Sunzi, active 6th century B.C. | Giles, Lionel, 1875-1958 [Writer of added commentary; Translator] | Griffith, Samuel B. [Editor; Author of introduction; Translator] | Cleary, Thomas F., 1949- [Editor; Translator] | Sawyer, Ralph D. [Editor; Author of introduction; Translator] | Clavell, James" sunzi, giles, griffith, cleary, sawyer, clavell = ( OCLCXMLParser.parse_author_string(self._db, s)) # This one could be better. self.assert_author(sunzi, "Sunzi, active 6th century B.C.", Contributor.PRIMARY_AUTHOR_ROLE) self.assert_author(giles, "Giles, Lionel", ["Writer of added commentary", "Translator"], "1875", "1958") self.assert_author(griffith, "Griffith, Samuel B.", ["Editor", "Author of introduction", "Translator"], self.MISSING, self.MISSING) self.assert_author( cleary, "Cleary, Thomas F.", ["Editor", "Translator"], "1949", self.MISSING) self.assert_author( sawyer, "Sawyer, Ralph D.", ["Editor", "Author of introduction", "Translator"], self.MISSING, self.MISSING) # Once contributors start getting explicit roles, a # contributor with no explicit role is treated as 'unknown' # rather than 'author.' self.assert_author( clavell, "Clavell, James", [Contributor.UNKNOWN_ROLE], self.MISSING, self.MISSING) # These are titles we don't parse as well as we ought, but # we are able to handle them without crashing. self.assert_parse( u"梅爾維爾 (Melville, Herman), 1819-1891", u"梅爾維爾 (Melville, Herman)", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1819", deathdate="1891") self.assert_parse( u"卡洛爾 (Carroll, Lewis), (英), 1832-1898", u"卡洛爾 (Carroll, Lewis), (英)", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1832", deathdate="1898") s = u"杜格孫 (Dodgson, Charles Lutwidge,1832-1896)" self.assert_parse(s, s, Contributor.PRIMARY_AUTHOR_ROLE)
def test_authors(self): self.assert_parse( "Carroll, Lewis, 1832-1898", "Carroll, Lewis", Contributor.PRIMARY_AUTHOR_ROLE, "1832", "1898" ) self.assert_parse("Kent, Rockwell, 1882-1971 [Illustrator]", "Kent, Rockwell", "Illustrator", "1882", "1971") self.assert_parse( u"Карролл, Лувис, 1832-1898.", u"Карролл, Лувис", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1832", deathdate="1898", ) kerry, melville = OCLCXMLParser.parse_author_string( self._db, "McSweeney, Kerry, 1941- | Melville, Herman, 1819-1891" ) self.assert_author( kerry, "McSweeney, Kerry", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1941", deathdate=self.MISSING ) self.assert_author(melville, "Melville, Herman", Contributor.AUTHOR_ROLE, birthdate="1819", deathdate="1891") # Check out this mess. s = "Sunzi, active 6th century B.C. | Giles, Lionel, 1875-1958 [Writer of added commentary; Translator] | Griffith, Samuel B. [Editor; Author of introduction; Translator] | Cleary, Thomas F., 1949- [Editor; Translator] | Sawyer, Ralph D. [Editor; Author of introduction; Translator] | Clavell, James" sunzi, giles, griffith, cleary, sawyer, clavell = OCLCXMLParser.parse_author_string(self._db, s) # This one could be better. self.assert_author(sunzi, "Sunzi, active 6th century B.C.", Contributor.PRIMARY_AUTHOR_ROLE) self.assert_author(giles, "Giles, Lionel", ["Writer of added commentary", "Translator"], "1875", "1958") self.assert_author( griffith, "Griffith, Samuel B.", ["Editor", "Author of introduction", "Translator"], self.MISSING, self.MISSING, ) self.assert_author(cleary, "Cleary, Thomas F.", ["Editor", "Translator"], "1949", self.MISSING) self.assert_author( sawyer, "Sawyer, Ralph D.", ["Editor", "Author of introduction", "Translator"], self.MISSING, self.MISSING ) # Once contributors start getting explicit roles, a # contributor with no explicit role is treated as 'unknown' # rather than 'author.' self.assert_author(clavell, "Clavell, James", [Contributor.UNKNOWN_ROLE], self.MISSING, self.MISSING) # These are titles we don't parse as well as we ought, but # we are able to handle them without crashing. self.assert_parse( u"梅爾維爾 (Melville, Herman), 1819-1891", u"梅爾維爾 (Melville, Herman)", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1819", deathdate="1891", ) self.assert_parse( u"卡洛爾 (Carroll, Lewis), (英), 1832-1898", u"卡洛爾 (Carroll, Lewis), (英)", Contributor.PRIMARY_AUTHOR_ROLE, birthdate="1832", deathdate="1898", ) s = u"杜格孫 (Dodgson, Charles Lutwidge,1832-1896)" self.assert_parse(s, s, Contributor.PRIMARY_AUTHOR_ROLE)
def assert_parse(self, string, name, role=Contributor.AUTHOR_ROLE, birthdate=None, deathdate=None): [res] = OCLCXMLParser.parse_author_string(self._db, string) self.assert_author(res, name, role, birthdate, deathdate)