コード例 #1
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_NameSecured(self):
     entries = self.getEntries4Name(['{ACME Unlimited}'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #2
0
def checkConsistency(entries, config):
    # Check for Duplicates #
    # Duplicate keys
    if config.duplicateKeys:
        print(NOT_IMPLEMENTED_PATTERN.format("Duplicate Keys"))
        # duplicateKeys = nanny.findDuplicateKeys(entries)
        # if duplicateKeys:
        #     print(HEADLINE_PATTERN.format("Duplicate Keys"))
        #     for duplicateKey in duplicateKeys:
        #         print("Found duplicate key:".format(duplicateKey))
        #     print()

    # Duplicate titles
    # Todo: Add handling of acceptable cases, such as different editions of a book, preprints and talks.
    if config.duplicateTitles:
        title2duplicateEntries = nanny.findDuplicateTitles(entries, config.duplicateTitlesIgnoredTypes)
        if title2duplicateEntries:
            print(HEADLINE_PATTERN.format("Duplicate Titles"))
            for duplicateTitle, duplicateTitleEntries in title2duplicateEntries.items():
                keysString = getEnumerationString(duplicateTitleEntries)
                firstTitle = duplicateTitleEntries[0][nanny.FIELD_TITLE]
                print("Entries {} have the same title: {}".format(keysString, firstTitle))
            print()

    # Missing fields #
    if config.anyMissingFields:
        key2availability = nanny.getFieldAvailabilities(entries)
        if key2availability:
            print(HEADLINE_PATTERN.format("Missing fields"))
            for key, availability in key2availability.items():
                missingRequiredFields = availability[nanny.FIELD_IS_REQUIRED_MISSING]
                missingOptionalFields = availability[nanny.FIELD_IS_OPTIONAL_MISSING]

                if config.anyMissingFields and (missingRequiredFields or missingOptionalFields):
                    print("Entry {}".format(key))
                    if config.missingRequiredFields and missingRequiredFields:
                        print("  Required missing: ", ', '.join(missingRequiredFields))
                    if config.missingOptionalFields and missingOptionalFields:
                        print("  Optional missing: ", ', '.join(missingOptionalFields))
            print()

    # Bad Formatting #
    # Unsecured uppercase characters in titles
    # Todo: Identify over-eager use of curly braces, e.g. across multiple words
    # Todo: Add option to prefer braces around full words instead of single characters
    # Todo: Improve search of unsecured characters to not break when double braces are used
    if config.unsecuredTitleChars:
        key2unsecuredChars = nanny.findUnsecuredUppercase(entries, field="title")
        if key2unsecuredChars:
            print(HEADLINE_PATTERN.format("Titles with uppercase characters that are not secured by curly braces"))
            for key in key2unsecuredChars:
                title = entries[key][nanny.FIELD_TITLE]
                print("Entry {} has unsecured uppercase characters: {}".format(key, title))
            print()

    # Unnecessary curly braces
    if config.unnecessaryBraces:
        print(NOT_IMPLEMENTED_PATTERN.format("unnecessary curly braces"))

    # Bad page numbers
    if config.badPageNumbers:
        badPageNumberEntries = nanny.findBadPageNumbers(entries, tolerateSingleHyphens=False)
        if badPageNumberEntries:
            print(HEADLINE_PATTERN.format("Entries with badly formatted page numbers"))
            for entry in badPageNumberEntries:
                print("Entry {} has bad page number format: {}".format(entry.key, entry[nanny.FIELD_PAGES]))
            print()

    # Inconsistent Formatting #
    # Inconsistent names for conferences
    if config.inconsistentConferences:
        print(NOT_IMPLEMENTED_PATTERN.format("inconsistent names for conferences"))

    # Incomplete name formatting (e.g. first name is initials only or missing middle names found in other entry)
    if config.incompleteNames:
        print(NOT_IMPLEMENTED_PATTERN.format("incomplete name formatting"))

    # Ambiguous name formatting (i.e. not following the "LAST, FIRST and LAST, FIRST" format)
    if config.ambiguousNames:
        print(NOT_IMPLEMENTED_PATTERN.format("ambigous name formatting"))

    # All-caps name formatting
    if config.allcapsNames:
        for field in nanny.PERSON_NAME_FIELDS:
            entrykey2CapsNames = nanny.findAllCapsName(entries, field)
            if entrykey2CapsNames:
                print(HEADLINE_PATTERN.format("{}s whose names are all-caps".format(field.capitalize())))
                for key, capsnames in entrykey2CapsNames.items():
                    for capsname in capsnames:
                        print("Entry {} has {}s which are all-caps: {}".format(key, field, capsname.pretty()))
                print()

    # Inconsistent location names
    if config.inconsistentLocations:
        print(NOT_IMPLEMENTED_PATTERN.format("inconsistent location names"))

    # Inconsistent inferrable information
    if config.inconsistentInferrableInfo:
        print(NOT_IMPLEMENTED_PATTERN.format("inconsistent inferrable information"))
コード例 #3
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_JuniorCommaSeparated(self):
     entries = self.getEntries4Name(['Mouse, Jr., Mickey D.'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #4
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_NameSpecialchar(self):
     entries = self.getEntries4Name(['M{\\\'i}ckey Mo{\\"u}se'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #5
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_LastnameNumerals(self):
     entries = self.getEntries4Name(['Mickey Mouse III', 'Mickey Mouse VI', 'Mickey Mouse IX', 'Mickey Mouse X'],
                                    FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #6
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_JuniorBasic(self):
     entries = self.getEntries4Name(['Mickey D. Mouse Jr.'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #7
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_ThreeInitialsSpaced(self):
     entries = self.getEntries4Name(['M. D. R. Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #8
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_ThreeInitialsNoperiodNospace(self):
     entries = self.getEntries4Name(['MDR Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #9
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_MiddlenameInitialNoperiod(self):
     entries = self.getEntries4Name(['Mickey D Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #10
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_MiddlenameTwoInitialsOneperiodNospace(self):
     entries = self.getEntries4Name(['Mickey DR Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #11
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_FirstnameIsInitial(self):
     entries = self.getEntries4Name(['M. Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #12
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_FirstnameAllCaps(self):
     entries = self.getEntries4Name(['MICKEY Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEqual(entrykey2CapsNames,
                      {'foobar0': [algo.Name(first='MICKEY', von='', last='Mouse', jr='')]})
コード例 #13
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_basicTwoNamesAndOthers(self):
     entries = self.getEntries4Name(['Mickey Mouse and Minie Mouse and others'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #14
0
ファイル: nannyTest.py プロジェクト: marcschulder/BibTexNanny
 def test_findAllCapsName_basicName(self):
     entries = self.getEntries4Name(['Mickey Mouse'], FIELD_AUTHOR)
     entrykey2CapsNames = nanny.findAllCapsName(entries, FIELD_AUTHOR)
     self.assertEmpty(entrykey2CapsNames)
コード例 #15
0
def checkConsistency(entries, config):
    # Check for Duplicates #
    # Duplicate keys
    if config.duplicateKeys:
        print(NOT_IMPLEMENTED_PATTERN.format("Duplicate Keys"))
        # duplicateKeys = nanny.findDuplicateKeys(entries)
        # if duplicateKeys:
        #     print(HEADLINE_PATTERN.format("Duplicate Keys"))
        #     for duplicateKey in duplicateKeys:
        #         print("Found duplicate key:".format(duplicateKey))
        #     print()

    # Duplicate titles
    # Todo: Add handling of acceptable cases, such as different editions of a book, preprints and talks.
    if config.duplicateTitles:
        title2duplicateEntries = nanny.findDuplicateTitles(
            entries, config.duplicateTitlesIgnoredTypes)
        if title2duplicateEntries:
            print(HEADLINE_PATTERN.format("Duplicate Titles"))
            for duplicateTitle, duplicateTitleEntries in title2duplicateEntries.items(
            ):
                keysString = getEnumerationString(duplicateTitleEntries)
                firstTitle = duplicateTitleEntries[0][nanny.FIELD_TITLE]
                print("Entries {} have the same title: {}".format(
                    keysString, firstTitle))
            print()

    # Missing fields #
    if config.anyMissingFields:
        key2availability = nanny.getFieldAvailabilities(entries)
        if key2availability:
            print(HEADLINE_PATTERN.format("Missing fields"))
            for key, availability in key2availability.items():
                missingRequiredFields = availability[
                    nanny.FIELD_IS_REQUIRED_MISSING]
                missingOptionalFields = availability[
                    nanny.FIELD_IS_OPTIONAL_MISSING]

                if config.anyMissingFields and (missingRequiredFields
                                                or missingOptionalFields):
                    print("Entry {}".format(key))
                    if config.missingRequiredFields and missingRequiredFields:
                        print("  Required missing: ",
                              ', '.join(missingRequiredFields))
                    if config.missingOptionalFields and missingOptionalFields:
                        print("  Optional missing: ",
                              ', '.join(missingOptionalFields))
            print()

    # Bad Formatting #
    # Unsecured uppercase characters in titles
    # Todo: Identify over-eager use of curly braces, e.g. across multiple words
    # Todo: Add option to prefer braces around full words instead of single characters
    # Todo: Improve search of unsecured characters to not break when double braces are used
    if config.unsecuredTitleChars:
        key2unsecuredChars = nanny.findUnsecuredUppercase(entries,
                                                          field="title")
        if key2unsecuredChars:
            print(
                HEADLINE_PATTERN.format(
                    "Titles with uppercase characters that are not secured by curly braces"
                ))
            for key in key2unsecuredChars:
                title = entries[key][nanny.FIELD_TITLE]
                print("Entry {} has unsecured uppercase characters: {}".format(
                    key, title))
            print()

    # Unnecessary curly braces
    if config.unnecessaryBraces:
        print(NOT_IMPLEMENTED_PATTERN.format("unnecessary curly braces"))

    # Bad page numbers
    if config.badPageNumbers:
        badPageNumberEntries = nanny.findBadPageNumbers(
            entries, tolerateSingleHyphens=False)
        if badPageNumberEntries:
            print(
                HEADLINE_PATTERN.format(
                    "Entries with badly formatted page numbers"))
            for entry in badPageNumberEntries:
                print("Entry {} has bad page number format: {}".format(
                    entry.key, entry[nanny.FIELD_PAGES]))
            print()

    # Inconsistent Formatting #
    # Inconsistent names for conferences
    if config.inconsistentConferences:
        print(
            NOT_IMPLEMENTED_PATTERN.format(
                "inconsistent names for conferences"))

    # Incomplete name formatting (e.g. first name is initials only or missing middle names found in other entry)
    if config.incompleteNames:
        print(NOT_IMPLEMENTED_PATTERN.format("incomplete name formatting"))

    # Ambiguous name formatting (i.e. not following the "LAST, FIRST and LAST, FIRST" format)
    if config.ambiguousNames:
        print(NOT_IMPLEMENTED_PATTERN.format("ambigous name formatting"))

    # All-caps name formatting
    if config.allcapsNames:
        for field in nanny.PERSON_NAME_FIELDS:
            entrykey2CapsNames = nanny.findAllCapsName(entries, field)
            if entrykey2CapsNames:
                print(
                    HEADLINE_PATTERN.format(
                        "{}s whose names are all-caps".format(
                            field.capitalize())))
                for key, capsnames in entrykey2CapsNames.items():
                    for capsname in capsnames:
                        print("Entry {} has {}s which are all-caps: {}".format(
                            key, field, capsname.pretty()))
                print()

    # Inconsistent location names
    if config.inconsistentLocations:
        print(NOT_IMPLEMENTED_PATTERN.format("inconsistent location names"))

    # Inconsistent inferrable information
    if config.inconsistentInferrableInfo:
        print(
            NOT_IMPLEMENTED_PATTERN.format(
                "inconsistent inferrable information"))