def test_findAllCapsName_NameSecured(self): entries = self.getEntries4Name(['{ACME Unlimited}'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def checkConsistency(entries, config): # Check for Duplicates # # Duplicate keys if config.duplicateKeys: print(NOT_IMPLEMENTED_PATTERN.format("Duplicate Keys")) # duplicateKeys = nanny.findDuplicateKeys(entries) # if duplicateKeys: # print(HEADLINE_PATTERN.format("Duplicate Keys")) # for duplicateKey in duplicateKeys: # print("Found duplicate key:".format(duplicateKey)) # print() # Duplicate titles # Todo: Add handling of acceptable cases, such as different editions of a book, preprints and talks. if config.duplicateTitles: title2duplicateEntries = nanny.findDuplicateTitles(entries, config.duplicateTitlesIgnoredTypes) if title2duplicateEntries: print(HEADLINE_PATTERN.format("Duplicate Titles")) for duplicateTitle, duplicateTitleEntries in title2duplicateEntries.items(): keysString = getEnumerationString(duplicateTitleEntries) firstTitle = duplicateTitleEntries[0][nanny.FIELD_TITLE] print("Entries {} have the same title: {}".format(keysString, firstTitle)) print() # Missing fields # if config.anyMissingFields: key2availability = nanny.getFieldAvailabilities(entries) if key2availability: print(HEADLINE_PATTERN.format("Missing fields")) for key, availability in key2availability.items(): missingRequiredFields = availability[nanny.FIELD_IS_REQUIRED_MISSING] missingOptionalFields = availability[nanny.FIELD_IS_OPTIONAL_MISSING] if config.anyMissingFields and (missingRequiredFields or missingOptionalFields): print("Entry {}".format(key)) if config.missingRequiredFields and missingRequiredFields: print(" Required missing: ", ', '.join(missingRequiredFields)) if config.missingOptionalFields and missingOptionalFields: print(" Optional missing: ", ', '.join(missingOptionalFields)) print() # Bad Formatting # # Unsecured uppercase characters in titles # Todo: Identify over-eager use of curly braces, e.g. across multiple words # Todo: Add option to prefer braces around full words instead of single characters # Todo: Improve search of unsecured characters to not break when double braces are used if config.unsecuredTitleChars: key2unsecuredChars = nanny.findUnsecuredUppercase(entries, field="title") if key2unsecuredChars: print(HEADLINE_PATTERN.format("Titles with uppercase characters that are not secured by curly braces")) for key in key2unsecuredChars: title = entries[key][nanny.FIELD_TITLE] print("Entry {} has unsecured uppercase characters: {}".format(key, title)) print() # Unnecessary curly braces if config.unnecessaryBraces: print(NOT_IMPLEMENTED_PATTERN.format("unnecessary curly braces")) # Bad page numbers if config.badPageNumbers: badPageNumberEntries = nanny.findBadPageNumbers(entries, tolerateSingleHyphens=False) if badPageNumberEntries: print(HEADLINE_PATTERN.format("Entries with badly formatted page numbers")) for entry in badPageNumberEntries: print("Entry {} has bad page number format: {}".format(entry.key, entry[nanny.FIELD_PAGES])) print() # Inconsistent Formatting # # Inconsistent names for conferences if config.inconsistentConferences: print(NOT_IMPLEMENTED_PATTERN.format("inconsistent names for conferences")) # Incomplete name formatting (e.g. first name is initials only or missing middle names found in other entry) if config.incompleteNames: print(NOT_IMPLEMENTED_PATTERN.format("incomplete name formatting")) # Ambiguous name formatting (i.e. not following the "LAST, FIRST and LAST, FIRST" format) if config.ambiguousNames: print(NOT_IMPLEMENTED_PATTERN.format("ambigous name formatting")) # All-caps name formatting if config.allcapsNames: for field in nanny.PERSON_NAME_FIELDS: entrykey2CapsNames = nanny.findAllCapsName(entries, field) if entrykey2CapsNames: print(HEADLINE_PATTERN.format("{}s whose names are all-caps".format(field.capitalize()))) for key, capsnames in entrykey2CapsNames.items(): for capsname in capsnames: print("Entry {} has {}s which are all-caps: {}".format(key, field, capsname.pretty())) print() # Inconsistent location names if config.inconsistentLocations: print(NOT_IMPLEMENTED_PATTERN.format("inconsistent location names")) # Inconsistent inferrable information if config.inconsistentInferrableInfo: print(NOT_IMPLEMENTED_PATTERN.format("inconsistent inferrable information"))
def test_findAllCapsName_JuniorCommaSeparated(self): entries = self.getEntries4Name(['Mouse, Jr., Mickey D.'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_NameSpecialchar(self): entries = self.getEntries4Name(['M{\\\'i}ckey Mo{\\"u}se'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_LastnameNumerals(self): entries = self.getEntries4Name(['Mickey Mouse III', 'Mickey Mouse VI', 'Mickey Mouse IX', 'Mickey Mouse X'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_JuniorBasic(self): entries = self.getEntries4Name(['Mickey D. Mouse Jr.'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_ThreeInitialsSpaced(self): entries = self.getEntries4Name(['M. D. R. Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_ThreeInitialsNoperiodNospace(self): entries = self.getEntries4Name(['MDR Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_MiddlenameInitialNoperiod(self): entries = self.getEntries4Name(['Mickey D Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_MiddlenameTwoInitialsOneperiodNospace(self): entries = self.getEntries4Name(['Mickey DR Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_FirstnameIsInitial(self): entries = self.getEntries4Name(['M. Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_FirstnameAllCaps(self): entries = self.getEntries4Name(['MICKEY Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEqual(entrykey2CapsNames, {'foobar0': [algo.Name(first='MICKEY', von='', last='Mouse', jr='')]})
def test_findAllCapsName_basicTwoNamesAndOthers(self): entries = self.getEntries4Name(['Mickey Mouse and Minie Mouse and others'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def test_findAllCapsName_basicName(self): entries = self.getEntries4Name(['Mickey Mouse'], FIELD_AUTHOR) entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR) self.assertEmpty(entrykey2CapsNames)
def checkConsistency(entries, config): # Check for Duplicates # # Duplicate keys if config.duplicateKeys: print(NOT_IMPLEMENTED_PATTERN.format("Duplicate Keys")) # duplicateKeys = nanny.findDuplicateKeys(entries) # if duplicateKeys: # print(HEADLINE_PATTERN.format("Duplicate Keys")) # for duplicateKey in duplicateKeys: # print("Found duplicate key:".format(duplicateKey)) # print() # Duplicate titles # Todo: Add handling of acceptable cases, such as different editions of a book, preprints and talks. if config.duplicateTitles: title2duplicateEntries = nanny.findDuplicateTitles( entries, config.duplicateTitlesIgnoredTypes) if title2duplicateEntries: print(HEADLINE_PATTERN.format("Duplicate Titles")) for duplicateTitle, duplicateTitleEntries in title2duplicateEntries.items( ): keysString = getEnumerationString(duplicateTitleEntries) firstTitle = duplicateTitleEntries[0][nanny.FIELD_TITLE] print("Entries {} have the same title: {}".format( keysString, firstTitle)) print() # Missing fields # if config.anyMissingFields: key2availability = nanny.getFieldAvailabilities(entries) if key2availability: print(HEADLINE_PATTERN.format("Missing fields")) for key, availability in key2availability.items(): missingRequiredFields = availability[ nanny.FIELD_IS_REQUIRED_MISSING] missingOptionalFields = availability[ nanny.FIELD_IS_OPTIONAL_MISSING] if config.anyMissingFields and (missingRequiredFields or missingOptionalFields): print("Entry {}".format(key)) if config.missingRequiredFields and missingRequiredFields: print(" Required missing: ", ', '.join(missingRequiredFields)) if config.missingOptionalFields and missingOptionalFields: print(" Optional missing: ", ', '.join(missingOptionalFields)) print() # Bad Formatting # # Unsecured uppercase characters in titles # Todo: Identify over-eager use of curly braces, e.g. across multiple words # Todo: Add option to prefer braces around full words instead of single characters # Todo: Improve search of unsecured characters to not break when double braces are used if config.unsecuredTitleChars: key2unsecuredChars = nanny.findUnsecuredUppercase(entries, field="title") if key2unsecuredChars: print( HEADLINE_PATTERN.format( "Titles with uppercase characters that are not secured by curly braces" )) for key in key2unsecuredChars: title = entries[key][nanny.FIELD_TITLE] print("Entry {} has unsecured uppercase characters: {}".format( key, title)) print() # Unnecessary curly braces if config.unnecessaryBraces: print(NOT_IMPLEMENTED_PATTERN.format("unnecessary curly braces")) # Bad page numbers if config.badPageNumbers: badPageNumberEntries = nanny.findBadPageNumbers( entries, tolerateSingleHyphens=False) if badPageNumberEntries: print( HEADLINE_PATTERN.format( "Entries with badly formatted page numbers")) for entry in badPageNumberEntries: print("Entry {} has bad page number format: {}".format( entry.key, entry[nanny.FIELD_PAGES])) print() # Inconsistent Formatting # # Inconsistent names for conferences if config.inconsistentConferences: print( NOT_IMPLEMENTED_PATTERN.format( "inconsistent names for conferences")) # Incomplete name formatting (e.g. first name is initials only or missing middle names found in other entry) if config.incompleteNames: print(NOT_IMPLEMENTED_PATTERN.format("incomplete name formatting")) # Ambiguous name formatting (i.e. not following the "LAST, FIRST and LAST, FIRST" format) if config.ambiguousNames: print(NOT_IMPLEMENTED_PATTERN.format("ambigous name formatting")) # All-caps name formatting if config.allcapsNames: for field in nanny.PERSON_NAME_FIELDS: entrykey2CapsNames = nanny.findAllCapsName(entries, field) if entrykey2CapsNames: print( HEADLINE_PATTERN.format( "{}s whose names are all-caps".format( field.capitalize()))) for key, capsnames in entrykey2CapsNames.items(): for capsname in capsnames: print("Entry {} has {}s which are all-caps: {}".format( key, field, capsname.pretty())) print() # Inconsistent location names if config.inconsistentLocations: print(NOT_IMPLEMENTED_PATTERN.format("inconsistent location names")) # Inconsistent inferrable information if config.inconsistentInferrableInfo: print( NOT_IMPLEMENTED_PATTERN.format( "inconsistent inferrable information"))